%load_ext watermark
%watermark -a 'cs224' -u -d -v -p numpy,pandas,matplotlib,sklearn,h5py,pytest,pvlib
Author: cs224 Last updated: 2022-09-22 Python implementation: CPython Python version : 3.8.12 IPython version : 8.2.0 numpy : 1.23.1 pandas : 1.4.2 matplotlib: 3.5.1 sklearn : 1.0.2 h5py : 2.10.0 pytest : 7.1.1 pvlib : 0.9.2
%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import sklearn, sklearn.pipeline, sklearn.model_selection, sklearn.preprocessing, sklearn.linear_model
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180
SEED = 42
np.random.seed(SEED)
sns.set()
from IPython.display import display, HTML
display(HTML("<style>.container { width:70% !important; }</style>"))
import pvlib, pvlib.iotools, pvlib.modelchain, pvlib.location, pvlib.pvsystem, pvlib.temperature
import pvlib.irradiance
latitude=50.94
longitude=6.96
surface_tilt=45
surface_azimuth=180.0
location = pvlib.location.Location(latitude=latitude, longitude=longitude, tz='Europe/Berlin', altitude=80, name='Cologne Cathedral')
sandia_modules = pvlib.pvsystem.retrieve_sam('SandiaMod')
cec_inverters = pvlib.pvsystem.retrieve_sam('CECInverter')
def sandia_system_example():
module = sandia_modules['Canadian_Solar_CS5P_220M___2009_']
inverter = cec_inverters['ABB__PVI_3_0_OUTD_S_US__208V_']
temperature_parameters = pvlib.temperature.TEMPERATURE_MODEL_PARAMETERS['sapm']['open_rack_glass_glass']
system = pvlib.pvsystem.PVSystem(
surface_tilt=surface_tilt, surface_azimuth=surface_azimuth,
module_parameters=module, inverter_parameters=inverter,
temperature_model_parameters=temperature_parameters,
modules_per_string=7, strings_per_inverter=2
)
model_chain = pvlib.modelchain.ModelChain(system, location)
return model_chain
We get some TMY Reference Data to be able to verify that our calculations are correct. This is basically a sort of code for a "test setup".
# model chain is only needed to get general information like latitude, longitude and surface measures
model_chain = sandia_system_example()
# get the tmy data
data, months_selected, inputs, metadata = pvlib.iotools.get_pvgis_tmy(model_chain.location.latitude, model_chain.location.longitude, map_variables=True)
# extract the month july from it:
tmy = data.copy().reset_index()
tmy = tmy[(tmy['time(UTC)'] >= '2015-01-01') & (tmy['time(UTC)'] < '2016-01-01')]
tmy = tmy.set_index('time(UTC)')
tmy = tmy[['temp_air', 'ghi', 'dni', 'dhi', 'wind_speed']]
tmy.index.name = 'time'
tmy
temp_air | ghi | dni | dhi | wind_speed | |
---|---|---|---|---|---|
time | |||||
2015-07-01 00:00:00+00:00 | 18.90 | 0.0 | 0.00 | 0.0 | 2.90 |
2015-07-01 01:00:00+00:00 | 19.27 | 0.0 | 0.00 | 0.0 | 3.00 |
2015-07-01 02:00:00+00:00 | 19.64 | 0.0 | 0.00 | 0.0 | 3.10 |
2015-07-01 03:00:00+00:00 | 20.01 | 0.0 | 0.00 | 0.0 | 3.20 |
2015-07-01 04:00:00+00:00 | 20.37 | 56.0 | 262.46 | 31.0 | 3.30 |
... | ... | ... | ... | ... | ... |
2015-07-31 19:00:00+00:00 | 16.35 | 0.0 | 0.00 | 0.0 | 2.01 |
2015-07-31 20:00:00+00:00 | 16.74 | 0.0 | 0.00 | 0.0 | 2.03 |
2015-07-31 21:00:00+00:00 | 17.13 | 0.0 | 0.00 | 0.0 | 2.06 |
2015-07-31 22:00:00+00:00 | 17.52 | 0.0 | 0.00 | 0.0 | 2.09 |
2015-07-31 23:00:00+00:00 | 17.90 | 0.0 | 0.00 | 0.0 | 2.11 |
744 rows × 5 columns
The terminology is explained here: https://www.3tier.com/en/support/solar-prospecting-tools/what-global-horizontal-irradiance-solar-prospecting/ And I copy it for reference:
The radiation reaching the earth's surface can be represented in a number of different ways. Global Horizontal Irradiance (GHI) is the total amount of shortwave radiation received from above by a surface horizontal to the ground. This value is of particular interest to photovoltaic installations and includes both Direct Normal Irradiance (DNI) and Diffuse Horizontal Irradiance (DHI).
DNI is solar radiation that comes in a straight line from the direction of the sun at its current position in the sky. DHI is solar radiation that does not arrive on a direct path from the sun, but has been scattered by molecules and particles in the atmosphere and comes equally from all directions.
Every month is from a specific year. We find the year that corresponds to July:
year = [m for m in months_selected if m['month'] == 7][0]['year']
year
2015
Now we get the PVGIS hourly data from the year from which we also have the TMY data.
The problem is that TMY data is already in 'ghi', 'dni', 'dhi' format, but the PVGIS hourly data is in POA (Plane of Array) format. We will have to process the POA data to get it into 'ghi', 'dni', 'dhi' format.
pvgis_hourly, inputs, metadata = pvlib.iotools.get_pvgis_hourly(
model_chain.location.latitude, model_chain.location.longitude,
start=year, end=year,
raddatabase='PVGIS-SARAH2', # https://joint-research-centre.ec.europa.eu/pvgis-photovoltaic-geographical-information-system/getting-started-pvgis/pvgis-user-manual_en#ref-2-using-horizon-information
components=True,
surface_tilt=0, surface_azimuth=0,
outputformat='json', usehorizon=True, userhorizon=None, pvcalculation=False, peakpower=None, pvtechchoice='crystSi', mountingplace='free', loss=0, trackingtype=0, optimal_surface_tilt=False, optimalangles=False,
url='https://re.jrc.ec.europa.eu/api/v5_2/', # https://joint-research-centre.ec.europa.eu/pvgis-photovoltaic-geographical-information-system/pvgis-releases/pvgis-52_en
map_variables=True, timeout=30
)
pvgis_hourly['poa_diffuse'] = pvgis_hourly['poa_sky_diffuse'] + pvgis_hourly['poa_ground_diffuse']
pvgis_hourly['poa_global'] = pvgis_hourly['poa_direct'] + pvgis_hourly['poa_diffuse']
pvgis_hourly = pvgis_hourly[['temp_air', 'poa_global', 'poa_direct', 'poa_diffuse', 'wind_speed']].copy()
pvgis_hourly.columns = ['temp_air', 'ghi', 'dni_', 'dhi', 'wind_speed']
pvgis_hourly = pvgis_hourly.loc['2015-07-01':'2015-08-01'].copy()
# This is wrong, but we will correct it below
pvgis_hourly['dni'] = pvgis_hourly['dni_']
pvgis_hourly
temp_air | ghi | dni_ | dhi | wind_speed | dni | |
---|---|---|---|---|---|---|
time | ||||||
2015-07-01 00:10:00+00:00 | 19.00 | 0.00 | 0.0 | 0.00 | 3.10 | 0.0 |
2015-07-01 01:10:00+00:00 | 18.64 | 0.00 | 0.0 | 0.00 | 3.17 | 0.0 |
2015-07-01 02:10:00+00:00 | 18.29 | 0.00 | 0.0 | 0.00 | 3.17 | 0.0 |
2015-07-01 03:10:00+00:00 | 17.97 | 0.00 | 0.0 | 0.00 | 3.17 | 0.0 |
2015-07-01 04:10:00+00:00 | 17.78 | 49.09 | 24.0 | 25.09 | 3.10 | 24.0 |
... | ... | ... | ... | ... | ... | ... |
2015-08-01 19:10:00+00:00 | 19.85 | 0.00 | 0.0 | 0.00 | 2.83 | 0.0 |
2015-08-01 20:10:00+00:00 | 18.50 | 0.00 | 0.0 | 0.00 | 2.69 | 0.0 |
2015-08-01 21:10:00+00:00 | 17.51 | 0.00 | 0.0 | 0.00 | 2.48 | 0.0 |
2015-08-01 22:10:00+00:00 | 16.34 | 0.00 | 0.0 | 0.00 | 2.21 | 0.0 |
2015-08-01 23:10:00+00:00 | 15.32 | 0.00 | 0.0 | 0.00 | 1.93 | 0.0 |
768 rows × 6 columns
First of all we check the data that does not correspond on the orientation of the PV array like wind speed and temperature.
As the TMY data is every hour on the hour but the PVGIS hourly data is every hour plus 10 minutes the two data sets will not match perfectly.
We also check if it makes a difference if we perform an interpolation() plus a bfill() or a bfill() plus a ffill() to estimate the missing data points. The below charts indicate that it does not really matter, but a interpolation() plus a bfill() "feels" more correct.
columns = ['temp_air', 'wind_speed']
fig, axs = plt.subplots(len(columns) * 2, 1, figsize=(32, 8 * len(columns) * 2), dpi=80, facecolor='w', edgecolor='k')
for i, c in enumerate(columns):
ldf = pd.concat([
tmy[[c]].rename(columns={c: 'tmy_' + c}),
pvgis_hourly[[c]].rename(columns={c: 'pvgis_hourly_' + c}),
#pvgis_hourly2[[c]].rename(columns=dict(ghi='pvgis_hourly2_' + c))
], axis=1).bfill().ffill()
ldf.plot(ax=axs[2 * i])
ldf = pd.concat([
tmy[[c]].rename(columns={c: 'tmy_' + c}),
pvgis_hourly[[c]].rename(columns={c: 'pvgis_hourly_' + c}),
#pvgis_hourly2[[c]].rename(columns=dict(ghi='pvgis_hourly2_' + c))
], axis=1).interpolate().bfill()
ldf.plot(ax=axs[2 * i + 1])
After this verification step we will have a look at the data points we're most interested in the irradiance data. I'll use a interpolate() plus bfill() as said above from now on:
columns = ['ghi', 'dni', 'dhi']
fig, axs = plt.subplots(len(columns), 1, figsize=(32, 8 * len(columns)), dpi=80, facecolor='w', edgecolor='k')
for i, c in enumerate(columns):
ldf = pd.concat([
tmy[[c]].rename(columns={c: 'tmy_' + c}),
pvgis_hourly[[c]].rename(columns={c: 'pvgis_hourly_' + c}),
#pvgis_hourly2[[c]].rename(columns=dict(ghi='pvgis_hourly2_' + c))
], axis=1).interpolate().bfill()
ldf.plot(ax=axs[i])
As we can see the ghi
and the dhi
components seem to match nicely. Only the dni
component makes trouble.
The link Solar Resource Glossary contains the hints:
Global Horizontal = Direct Normal x cos(Z) + Diffuse Horizontal
where
Z = Solar Zenith Angle at the time of measurement.
The cos(Z) part we can compute via the AOI (angle of incidence) projection functions of pvlib:
solpos = pvlib.solarposition.spa_python(pvgis_hourly.index, model_chain.location.latitude, model_chain.location.longitude, altitude=model_chain.location.altitude)
aoi_projection = pvlib.irradiance.aoi_projection(surface_tilt=0, surface_azimuth=0, solar_zenith=solpos['apparent_zenith'], solar_azimuth=solpos['azimuth'])
pvgis_hourly['aoi_projection'] = aoi_projection
pvgis_hourly['dni'] = pvgis_hourly['dni_']
lds = pvgis_hourly['aoi_projection'] > 0.1
pvgis_hourly.loc[lds,'dni'] = pvgis_hourly.loc[lds, 'dni_'] / pvgis_hourly.loc[lds, 'aoi_projection']
pvgis_hourly
temp_air | ghi | dni_ | dhi | wind_speed | dni | aoi_projection | |
---|---|---|---|---|---|---|---|
time | |||||||
2015-07-01 00:10:00+00:00 | 19.00 | 0.00 | 0.0 | 0.00 | 3.10 | 0.0 | -0.267965 |
2015-07-01 01:10:00+00:00 | 18.64 | 0.00 | 0.0 | 0.00 | 3.17 | 0.0 | -0.226241 |
2015-07-01 02:10:00+00:00 | 18.29 | 0.00 | 0.0 | 0.00 | 3.17 | 0.0 | -0.148318 |
2015-07-01 03:10:00+00:00 | 17.97 | 0.00 | 0.0 | 0.00 | 3.17 | 0.0 | -0.039507 |
2015-07-01 04:10:00+00:00 | 17.78 | 49.09 | 24.0 | 25.09 | 3.10 | 24.0 | 0.095439 |
... | ... | ... | ... | ... | ... | ... | ... |
2015-08-01 19:10:00+00:00 | 19.85 | 0.00 | 0.0 | 0.00 | 2.83 | 0.0 | 0.013700 |
2015-08-01 20:10:00+00:00 | 18.50 | 0.00 | 0.0 | 0.00 | 2.69 | 0.0 | -0.129037 |
2015-08-01 21:10:00+00:00 | 17.51 | 0.00 | 0.0 | 0.00 | 2.48 | 0.0 | -0.239136 |
2015-08-01 22:10:00+00:00 | 16.34 | 0.00 | 0.0 | 0.00 | 2.21 | 0.0 | -0.316663 |
2015-08-01 23:10:00+00:00 | 15.32 | 0.00 | 0.0 | 0.00 | 1.93 | 0.0 | -0.356342 |
768 rows × 7 columns
And now we see that all three components match the TMY data:
columns = ['ghi', 'dni', 'dhi']
fig, axs = plt.subplots(len(columns), 1, figsize=(32, 8 * len(columns)), dpi=80, facecolor='w', edgecolor='k')
for i, c in enumerate(columns):
ldf = pd.concat([
tmy[[c]].rename(columns={c: 'tmy_' + c}),
pvgis_hourly[[c]].rename(columns={c: 'pvgis_hourly_' + c}),
#pvgis_hourly2[[c]].rename(columns=dict(ghi='pvgis_hourly2_' + c))
], axis=1).interpolate().bfill()
ldf.plot(ax=axs[i])
You can call the ModelChain.run_model() or the ModelChain.run_model_from_poa() methods to calculate the energy production. The ModelChain.run_model() is nicer in my opinion as it allows for a clear separtion of the solar data from the PV system configuration. But the pvlib.iotools.get_pvgis_hourly() method returns the data more amenable to the ModelChain.run_model_from_poa() method for direct use.
Both calculations should end-up with the same results. Let's verify.
model_chain.run_model(pvgis_hourly.loc['2015-07-01':'2015-07-14'])
plt.figure(figsize=(32, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1, 1, 1)
model_chain.results.ac.plot(ax=ax)
<AxesSubplot:xlabel='time'>
model_chain_ = sandia_system_example()
pvgis_hourly_, inputs, metadata = pvlib.iotools.get_pvgis_hourly(
model_chain.location.latitude, model_chain.location.longitude,
start=year, end=year,
raddatabase='PVGIS-SARAH2', # https://joint-research-centre.ec.europa.eu/pvgis-photovoltaic-geographical-information-system/getting-started-pvgis/pvgis-user-manual_en#ref-2-using-horizon-information
components=True,
surface_tilt=model_chain.system.arrays[0].mount.surface_tilt, surface_azimuth=model_chain.system.arrays[0].mount.surface_azimuth - 180.0,
outputformat='json', usehorizon=True, userhorizon=None, pvcalculation=False, peakpower=None, pvtechchoice='crystSi', mountingplace='free', loss=0, trackingtype=0, optimal_surface_tilt=False, optimalangles=False,
url='https://re.jrc.ec.europa.eu/api/v5_2/', # https://joint-research-centre.ec.europa.eu/pvgis-photovoltaic-geographical-information-system/pvgis-releases/pvgis-52_en
map_variables=True, timeout=30
)
print(model_chain.system.arrays[0].mount.surface_tilt, model_chain.system.arrays[0].mount.surface_azimuth - 180.0)
pvgis_hourly_['poa_diffuse'] = pvgis_hourly_['poa_sky_diffuse'] + pvgis_hourly_['poa_ground_diffuse']
pvgis_hourly_['poa_global'] = pvgis_hourly_['poa_direct'] + pvgis_hourly_['poa_diffuse']
model_chain_.run_model_from_poa(pvgis_hourly_.loc['2015-07-01':'2015-07-14'])
plt.figure(figsize=(32, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1, 1, 1)
model_chain_.results.ac.plot(ax=ax)
45 0.0
<AxesSubplot:xlabel='time'>
We can see that both methods match nearly perfectly:
plt.figure(figsize=(32, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(1, 1, 1)
# m1 for method 1 with pv system and solar data separated via the model_chain.run_model()
ldf = model_chain.results.ac.to_frame('m1')
# m2 for method 2 using the plane of array calculation capabilities of model_chain_.run_model_from_poa()
ldf['m2'] = model_chain_.results.ac
ldf.plot(ax=ax)
<AxesSubplot:xlabel='time'>