In [1]:

%load_ext watermark
%watermark -a 'cs224' -u -d -v -p numpy,pandas,matplotlib,sklearn,h5py,pytest,pvlib

Author: cs224

Last updated: 2022-09-22

Python implementation: CPython
Python version       : 3.8.12
IPython version      : 8.2.0

numpy     : 1.23.1
pandas    : 1.4.2
matplotlib: 3.5.1
sklearn   : 1.0.2
h5py      : 2.10.0
pytest    : 7.1.1
pvlib     : 0.9.2

In [2]:

%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import sklearn, sklearn.pipeline, sklearn.model_selection, sklearn.preprocessing, sklearn.linear_model

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180

SEED = 42
np.random.seed(SEED)

sns.set()

In [3]:

from IPython.display import display, HTML
display(HTML("<style>.container { width:70% !important; }</style>"))

In [4]:

import pvlib, pvlib.iotools, pvlib.modelchain, pvlib.location, pvlib.pvsystem, pvlib.temperature
import pvlib.irradiance

In [5]:

latitude=50.94
longitude=6.96
surface_tilt=45
surface_azimuth=180.0

In [6]:

location = pvlib.location.Location(latitude=latitude, longitude=longitude, tz='Europe/Berlin', altitude=80, name='Cologne Cathedral')
sandia_modules = pvlib.pvsystem.retrieve_sam('SandiaMod')
cec_inverters  = pvlib.pvsystem.retrieve_sam('CECInverter')

def sandia_system_example():
    module = sandia_modules['Canadian_Solar_CS5P_220M___2009_']
    inverter = cec_inverters['ABB__PVI_3_0_OUTD_S_US__208V_']
    temperature_parameters = pvlib.temperature.TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_glass']

    system = pvlib.pvsystem.PVSystem(
        surface_tilt=surface_tilt, surface_azimuth=surface_azimuth,
        module_parameters=module, inverter_parameters=inverter,
        temperature_model_parameters=temperature_parameters,
        modules_per_string=7, strings_per_inverter=2
    )

    model_chain = pvlib.modelchain.ModelChain(system, location)
    return model_chain

Get TMY Reference Data¶

We get some TMY Reference Data to be able to verify that our calculations are correct. This is basically a sort of code for a "test setup".

In [7]:

# model chain is only needed to get general information like latitude, longitude and surface measures
model_chain = sandia_system_example()

# get the tmy data
data, months_selected, inputs, metadata = pvlib.iotools.get_pvgis_tmy(model_chain.location.latitude, model_chain.location.longitude, map_variables=True)

# extract the month july from it:
tmy = data.copy().reset_index()
tmy = tmy[(tmy['time(UTC)'] >= '2015-01-01') & (tmy['time(UTC)'] < '2016-01-01')]
tmy = tmy.set_index('time(UTC)')
tmy = tmy[['temp_air', 'ghi', 'dni', 'dhi', 'wind_speed']]
tmy.index.name = 'time'
tmy

Out[7]:

	temp_air	ghi	dni	dhi	wind_speed
time
2015-07-01 00:00:00+00:00	18.90	0.0	0.00	0.0	2.90
2015-07-01 01:00:00+00:00	19.27	0.0	0.00	0.0	3.00
2015-07-01 02:00:00+00:00	19.64	0.0	0.00	0.0	3.10
2015-07-01 03:00:00+00:00	20.01	0.0	0.00	0.0	3.20
2015-07-01 04:00:00+00:00	20.37	56.0	262.46	31.0	3.30
...	...	...	...	...	...
2015-07-31 19:00:00+00:00	16.35	0.0	0.00	0.0	2.01
2015-07-31 20:00:00+00:00	16.74	0.0	0.00	0.0	2.03
2015-07-31 21:00:00+00:00	17.13	0.0	0.00	0.0	2.06
2015-07-31 22:00:00+00:00	17.52	0.0	0.00	0.0	2.09
2015-07-31 23:00:00+00:00	17.90	0.0	0.00	0.0	2.11

744 rows × 5 columns

The terminology is explained here: https://www.3tier.com/en/support/solar-prospecting-tools/what-global-horizontal-irradiance-solar-prospecting/ And I copy it for reference:

The radiation reaching the earth's surface can be represented in a number of different ways. Global Horizontal Irradiance (GHI) is the total amount of shortwave radiation received from above by a surface horizontal to the ground. This value is of particular interest to photovoltaic installations and includes both Direct Normal Irradiance (DNI) and Diffuse Horizontal Irradiance (DHI).

DNI is solar radiation that comes in a straight line from the direction of the sun at its current position in the sky. DHI is solar radiation that does not arrive on a direct path from the sun, but has been scattered by molecules and particles in the atmosphere and comes equally from all directions.

Every month is from a specific year. We find the year that corresponds to July:

In [8]:

year = [m for m in months_selected if m['month'] == 7][0]['year']
year

Out[8]:

Get the PVGIS Hourly Data¶

Now we get the PVGIS hourly data from the year from which we also have the TMY data.

The problem is that TMY data is already in 'ghi', 'dni', 'dhi' format, but the PVGIS hourly data is in POA (Plane of Array) format. We will have to process the POA data to get it into 'ghi', 'dni', 'dhi' format.

In [9]:

pvgis_hourly, inputs, metadata = pvlib.iotools.get_pvgis_hourly(
    model_chain.location.latitude, model_chain.location.longitude, 
    start=year, end=year,
    raddatabase='PVGIS-SARAH2', # https://joint-research-centre.ec.europa.eu/pvgis-photovoltaic-geographical-information-system/getting-started-pvgis/pvgis-user-manual_en#ref-2-using-horizon-information
    components=True, 
    surface_tilt=0, surface_azimuth=0, 
    outputformat='json', usehorizon=True, userhorizon=None, pvcalculation=False, peakpower=None, pvtechchoice='crystSi', mountingplace='free', loss=0, trackingtype=0, optimal_surface_tilt=False, optimalangles=False, 
    url='https://re.jrc.ec.europa.eu/api/v5_2/', # https://joint-research-centre.ec.europa.eu/pvgis-photovoltaic-geographical-information-system/pvgis-releases/pvgis-52_en
    map_variables=True, timeout=30
)
pvgis_hourly['poa_diffuse'] = pvgis_hourly['poa_sky_diffuse'] + pvgis_hourly['poa_ground_diffuse']
pvgis_hourly['poa_global']  = pvgis_hourly['poa_direct']      + pvgis_hourly['poa_diffuse']

pvgis_hourly = pvgis_hourly[['temp_air', 'poa_global', 'poa_direct', 'poa_diffuse', 'wind_speed']].copy()
pvgis_hourly.columns = ['temp_air', 'ghi', 'dni_', 'dhi', 'wind_speed']
pvgis_hourly = pvgis_hourly.loc['2015-07-01':'2015-08-01'].copy()
# This is wrong, but we will correct it below
pvgis_hourly['dni'] = pvgis_hourly['dni_']
pvgis_hourly

Out[9]:

	temp_air	ghi	dni_	dhi	wind_speed	dni
time
2015-07-01 00:10:00+00:00	19.00	0.00	0.0	0.00	3.10	0.0
2015-07-01 01:10:00+00:00	18.64	0.00	0.0	0.00	3.17	0.0
2015-07-01 02:10:00+00:00	18.29	0.00	0.0	0.00	3.17	0.0
2015-07-01 03:10:00+00:00	17.97	0.00	0.0	0.00	3.17	0.0
2015-07-01 04:10:00+00:00	17.78	49.09	24.0	25.09	3.10	24.0
...	...	...	...	...	...	...
2015-08-01 19:10:00+00:00	19.85	0.00	0.0	0.00	2.83	0.0
2015-08-01 20:10:00+00:00	18.50	0.00	0.0	0.00	2.69	0.0
2015-08-01 21:10:00+00:00	17.51	0.00	0.0	0.00	2.48	0.0
2015-08-01 22:10:00+00:00	16.34	0.00	0.0	0.00	2.21	0.0
2015-08-01 23:10:00+00:00	15.32	0.00	0.0	0.00	1.93	0.0

768 rows × 6 columns

First of all we check the data that does not correspond on the orientation of the PV array like wind speed and temperature.

As the TMY data is every hour on the hour but the PVGIS hourly data is every hour plus 10 minutes the two data sets will not match perfectly.

We also check if it makes a difference if we perform an interpolation() plus a bfill() or a bfill() plus a ffill() to estimate the missing data points. The below charts indicate that it does not really matter, but a interpolation() plus a bfill() "feels" more correct.

In [10]:

columns = ['temp_air', 'wind_speed']
fig, axs = plt.subplots(len(columns) * 2, 1, figsize=(32, 8 * len(columns) * 2), dpi=80, facecolor='w', edgecolor='k')

for i, c in enumerate(columns):
    ldf = pd.concat([
        tmy[[c]].rename(columns={c: 'tmy_' + c}), 
        pvgis_hourly[[c]].rename(columns={c: 'pvgis_hourly_' + c}),
        #pvgis_hourly2[[c]].rename(columns=dict(ghi='pvgis_hourly2_' + c))
    ], axis=1).bfill().ffill()
    ldf.plot(ax=axs[2 * i])
    
    ldf = pd.concat([
        tmy[[c]].rename(columns={c: 'tmy_' + c}), 
        pvgis_hourly[[c]].rename(columns={c: 'pvgis_hourly_' + c}),
        #pvgis_hourly2[[c]].rename(columns=dict(ghi='pvgis_hourly2_' + c))
    ], axis=1).interpolate().bfill()
    ldf.plot(ax=axs[2 * i + 1])

After this verification step we will have a look at the data points we're most interested in the irradiance data. I'll use a interpolate() plus bfill() as said above from now on:

In [11]:

columns = ['ghi', 'dni', 'dhi']
fig, axs = plt.subplots(len(columns), 1, figsize=(32, 8 * len(columns)), dpi=80, facecolor='w', edgecolor='k')

for i, c in enumerate(columns):
    ldf = pd.concat([
        tmy[[c]].rename(columns={c: 'tmy_' + c}), 
        pvgis_hourly[[c]].rename(columns={c: 'pvgis_hourly_' + c}),
        #pvgis_hourly2[[c]].rename(columns=dict(ghi='pvgis_hourly2_' + c))
    ], axis=1).interpolate().bfill()
    ldf.plot(ax=axs[i])

As we can see the ghi and the dhi components seem to match nicely. Only the dni component makes trouble.

The link Solar Resource Glossary contains the hints:

Global Horizontal = Direct Normal x cos(Z) + Diffuse Horizontal

where

Z = Solar Zenith Angle at the time of measurement.

The cos(Z) part we can compute via the AOI (angle of incidence) projection functions of pvlib:

In [12]:

solpos = pvlib.solarposition.spa_python(pvgis_hourly.index, model_chain.location.latitude, model_chain.location.longitude, altitude=model_chain.location.altitude)
aoi_projection = pvlib.irradiance.aoi_projection(surface_tilt=0, surface_azimuth=0, solar_zenith=solpos['apparent_zenith'], solar_azimuth=solpos['azimuth'])

pvgis_hourly['aoi_projection'] = aoi_projection

pvgis_hourly['dni'] = pvgis_hourly['dni_']
lds = pvgis_hourly['aoi_projection'] > 0.1
pvgis_hourly.loc[lds,'dni'] = pvgis_hourly.loc[lds, 'dni_'] / pvgis_hourly.loc[lds, 'aoi_projection']
pvgis_hourly

Out[12]:

	temp_air	ghi	dni_	dhi	wind_speed	dni	aoi_projection
time
2015-07-01 00:10:00+00:00	19.00	0.00	0.0	0.00	3.10	0.0	-0.267965
2015-07-01 01:10:00+00:00	18.64	0.00	0.0	0.00	3.17	0.0	-0.226241
2015-07-01 02:10:00+00:00	18.29	0.00	0.0	0.00	3.17	0.0	-0.148318
2015-07-01 03:10:00+00:00	17.97	0.00	0.0	0.00	3.17	0.0	-0.039507
2015-07-01 04:10:00+00:00	17.78	49.09	24.0	25.09	3.10	24.0	0.095439
...	...	...	...	...	...	...	...
2015-08-01 19:10:00+00:00	19.85	0.00	0.0	0.00	2.83	0.0	0.013700
2015-08-01 20:10:00+00:00	18.50	0.00	0.0	0.00	2.69	0.0	-0.129037
2015-08-01 21:10:00+00:00	17.51	0.00	0.0	0.00	2.48	0.0	-0.239136
2015-08-01 22:10:00+00:00	16.34	0.00	0.0	0.00	2.21	0.0	-0.316663
2015-08-01 23:10:00+00:00	15.32	0.00	0.0	0.00	1.93	0.0	-0.356342

768 rows × 7 columns

And now we see that all three components match the TMY data:

In [13]:

columns = ['ghi', 'dni', 'dhi']
fig, axs = plt.subplots(len(columns), 1, figsize=(32, 8 * len(columns)), dpi=80, facecolor='w', edgecolor='k')

for i, c in enumerate(columns):
    ldf = pd.concat([
        tmy[[c]].rename(columns={c: 'tmy_' + c}), 
        pvgis_hourly[[c]].rename(columns={c: 'pvgis_hourly_' + c}),
        #pvgis_hourly2[[c]].rename(columns=dict(ghi='pvgis_hourly2_' + c))
    ], axis=1).interpolate().bfill()
    ldf.plot(ax=axs[i])

Final Test¶

You can call the ModelChain.run_model() or the ModelChain.run_model_from_poa() methods to calculate the energy production. The ModelChain.run_model() is nicer in my opinion as it allows for a clear separtion of the solar data from the PV system configuration. But the pvlib.iotools.get_pvgis_hourly() method returns the data more amenable to the ModelChain.run_model_from_poa() method for direct use.

Both calculations should end-up with the same results. Let's verify.

https://pvlib-python.readthedocs.io/en/stable/reference/modelchain.html#modelchain-runmodel
- ModelChain.run_model
- ModelChain.run_model_from_poa

In [14]:

model_chain.run_model(pvgis_hourly.loc['2015-07-01':'2015-07-14'])
plt.figure(figsize=(32, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1, 1, 1)
model_chain.results.ac.plot(ax=ax)

Out[14]:

<AxesSubplot:xlabel='time'>

In [15]:

model_chain_ = sandia_system_example()
pvgis_hourly_, inputs, metadata = pvlib.iotools.get_pvgis_hourly(
    model_chain.location.latitude, model_chain.location.longitude, 
    start=year, end=year,
    raddatabase='PVGIS-SARAH2', # https://joint-research-centre.ec.europa.eu/pvgis-photovoltaic-geographical-information-system/getting-started-pvgis/pvgis-user-manual_en#ref-2-using-horizon-information
    components=True, 
    surface_tilt=model_chain.system.arrays[0].mount.surface_tilt, surface_azimuth=model_chain.system.arrays[0].mount.surface_azimuth - 180.0, 
    outputformat='json', usehorizon=True, userhorizon=None, pvcalculation=False, peakpower=None, pvtechchoice='crystSi', mountingplace='free', loss=0, trackingtype=0, optimal_surface_tilt=False, optimalangles=False, 
    url='https://re.jrc.ec.europa.eu/api/v5_2/', # https://joint-research-centre.ec.europa.eu/pvgis-photovoltaic-geographical-information-system/pvgis-releases/pvgis-52_en
    map_variables=True, timeout=30
)
print(model_chain.system.arrays[0].mount.surface_tilt, model_chain.system.arrays[0].mount.surface_azimuth - 180.0)
pvgis_hourly_['poa_diffuse'] = pvgis_hourly_['poa_sky_diffuse'] + pvgis_hourly_['poa_ground_diffuse']
pvgis_hourly_['poa_global']  = pvgis_hourly_['poa_direct']      + pvgis_hourly_['poa_diffuse']

model_chain_.run_model_from_poa(pvgis_hourly_.loc['2015-07-01':'2015-07-14'])
plt.figure(figsize=(32, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1, 1, 1)
model_chain_.results.ac.plot(ax=ax)

45 0.0

Out[15]:

<AxesSubplot:xlabel='time'>

We can see that both methods match nearly perfectly:

In [16]:

plt.figure(figsize=(32, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1, 1, 1)

# m1 for method 1 with pv system and solar data separated via the model_chain.run_model()
ldf = model_chain.results.ac.to_frame('m1')

# m2 for method 2 using the plane of array calculation capabilities of model_chain_.run_model_from_poa()
ldf['m2'] = model_chain_.results.ac

ldf.plot(ax=ax)

Out[16]:

<AxesSubplot:xlabel='time'>