This is the analysis of the first attempt at using an ensemble forecast method for SoG-bloomcast. The ensemble members are SOG runs for the 2014 bloom using real forcing data up to 2014-03-15 and historical forcing data from 1980 through 2010 thereafter; i.e. 30 ensemble members.
import copy
import datetime
import matplotlib.dates
import matplotlib.pyplot as plt
import numpy as np
import bloomcast.ensemble
import bloomcast.utils
import bloomcast.visualization
%matplotlib inline
Bloom peak identification parameters based on:
Allen & Wolfe, 2013 [1]:
"Although the idea of a spring bloom is well-defined, the exact
timing of a real spring bloom is not. In Collins, et al, 2009 [2]
the peak of the bloom was defined as the highest concentration of
phytoplankton unless an earlier bloom (more than 5 days earlier) was
associated with nitrate going to zero. Gower, et al, 2013 [3] using satellite
data chooses a measure of the start of the bloom as the time when
the whole Strait of Georgia has high chlorophyll. The nutritional
quality of the phytoplankton appears to change when they become nutrient
limited Sastri & Dower, 2009 [4]. Thus here we use a definition
that should delineate between nutrient replete spring conditions
and nutrient stressed summer conditions. We use the peak
phytoplankton concentration (averaged from the surface to 3 m
depth) within four days of the average 0-3 m nitrate concentration
going below 0.5 uM (the half-saturation concentration) for two
consecutive days."
[1] Allen, S. E. and M. A. Wolfe, Hindcast of the Timing of the Spring Phytoplankton Bloom in the Strait of Georgia, 1968-2010. Progress in Oceanography, vol 115 (2013), pp. 6-13. http://dx.doi.org/10.1016/j.pocean.2013.05.026
[2] A.K. Collins, S.E. Allen, R. Pawlowicz, The role of wind in determining the timing of the spring bloom in the Strait of Georgia. Canadian Journal of Fisheries and Aquatic Sciences, 66 (2009), pp. 1597–1616. http://dx.doi.org/10.1139/F09-071
[3] Gower, J., King, S., Statham, S., Fox, R., Young, E., The Malaspina Dragon: a new pattern of the early spring bloom in the Strait of Georgia. Progress in Oceanography 115 (2013), pp. 181–188. http://dx.doi.org/10.1016/j.pocean.2013.05.024
[4] A.R. Sastri and J.F. Dower, Interannual variability in chitobiase-based production rates of the crustacean zooplankton community in the Strait of Georgia, British Columbia, Canada. Marine Ecology-Progress Series, 388 (2009), pp. 147–157. http://dx.doi.org/10.3354/meps08111
NITRATE_HALF_SATURATION_CONCENTRATION = 0.5 # uM
PHYTOPLANKTON_PEAK_WINDOW_HALF_WIDTH = 4 # days
Ensemble parameter values and initialization:
ensemble_start_year = 1981
ensemble_end_year = 2010
run_start_date = datetime.datetime(2013, 9, 19)
SOG_timestep = 900
bio_filename_root = '../run/timeseries/std_bio_2014_bloomcast.out'
nitrate_ts, diatoms_ts = {}, {}
Load nitrate and diatom biomass timeseries results for each ensemble member:
for year in range(ensemble_start_year, ensemble_end_year + 1):
member = bloomcast.ensemble.two_yr_suffix(year)
nitrate_ts[member] = bloomcast.utils.SOG_Timeseries(bio_filename_root + member)
nitrate_ts[member].read_data('time', '3 m avg nitrate concentration')
nitrate_ts[member].calc_mpl_dates(run_start_date)
diatoms_ts[member] = bloomcast.utils.SOG_Timeseries(bio_filename_root + member)
diatoms_ts[member].read_data('time', '3 m avg micro phytoplankton biomass')
diatoms_ts[member].calc_mpl_dates(run_start_date)
nitrate = copy.deepcopy(nitrate_ts)
diatoms = copy.deepcopy(diatoms_ts)
Calculate the date of the peak of the bloom for each ensemble member. Steps:
because we want to be sure to exclude any fall bloom signals
in which the nitrate concentration is below the half-saturation concentration threshold
peak window relative to the nitrate depletion period
def clip_results_to_jan1(nitrate, diatoms, run_start_date):
"""Clip the nitrate concentration and diatom biomass results
so that they start on 1-Jan of the bloom year.
"""
jan1 = datetime.datetime(run_start_date.year + 1, 1, 1)
discard_hours = jan1 - run_start_date
discard_hours = discard_hours.days * 24 + discard_hours.seconds / 3600
for member in nitrate:
predicate = nitrate[member].indep_data >= discard_hours
nitrate[member].boolean_slice(predicate)
diatoms[member].boolean_slice(predicate)
clip_results_to_jan1(nitrate, diatoms, run_start_date)
def reduce_results_to_daily(nitrate, diatoms, run_start_date, SOG_timestep):
"""Reduce the nitrate concentration and diatom biomass results
to daily values.
Nitrate concentrations are daily minimum values.
Diatom biomasses are daily maximum values.
Independent data values are dates.
"""
# Assume that there are an integral nummber of SOG time steps in a
# day
day_slice = 86400 // SOG_timestep
jan1 = datetime.date(run_start_date.year + 1, 1, 1)
for member in nitrate:
last_day = nitrate[member].dep_data.shape[0] - day_slice
day_iterator = range(0, last_day, day_slice)
nitrate[member].dep_data = np.array(
[nitrate[member].dep_data[i:i + day_slice].min()
for i in day_iterator])
nitrate[member].indep_data = np.array(
[jan1 + datetime.timedelta(days=i)
for i in range(nitrate[member].dep_data.size)])
last_day = diatoms[member].dep_data.shape[0] - day_slice
day_iterator = range(0, last_day, day_slice)
diatoms[member].dep_data = np.array(
[diatoms[member].dep_data[i:i + day_slice].max()
for i in day_iterator])
diatoms[member].indep_data = np.array(
[jan1 + datetime.timedelta(days=i)
for i in range(diatoms[member].dep_data.size)])
reduce_results_to_daily(nitrate, diatoms, run_start_date, SOG_timestep)
def find_low_nitrate_days(nitrate, threshold):
"""Return the start and end dates of the first 2 day period in
which the nitrate concentration is below the ``threshold``.
"""
first_low_nitrate_days = {}
for member in nitrate:
nitrate[member].boolean_slice(nitrate[member].dep_data <= threshold)
for i in range(nitrate[member].dep_data.shape[0]):
low_nitrate_day_1 = nitrate[member].indep_data[i]
days = nitrate[member].indep_data[i + 1] - low_nitrate_day_1
if days == datetime.timedelta(days=1):
low_nitrate_day_2 = nitrate[member].indep_data[i + 1]
break
first_low_nitrate_days[member] = (low_nitrate_day_1, low_nitrate_day_2)
return first_low_nitrate_days
first_low_nitrate_days = find_low_nitrate_days(nitrate, NITRATE_HALF_SATURATION_CONCENTRATION)
def find_phytoplankton_peak(diatoms, first_low_nitrate_days, peak_half_width):
"""Return the date within ``peak_half_width`` of the
``first_low_nitrate_days`` on which the diatoms biomass is the
greatest.
"""
half_width_days = datetime.timedelta(days=peak_half_width)
bloom_dates, bloom_biomasses = {}, {}
for member in diatoms:
bloom_window_start = first_low_nitrate_days[member][0] - half_width_days
bloom_window_end = first_low_nitrate_days[member][1] + half_width_days
diatoms[member].boolean_slice(diatoms[member].indep_data >= bloom_window_start)
diatoms[member].boolean_slice(diatoms[member].indep_data <= bloom_window_end)
bloom_date_index = diatoms[member].dep_data.argmax()
bloom_dates[member] = diatoms[member].indep_data[bloom_date_index]
bloom_biomasses[member] = diatoms[member].dep_data[bloom_date_index]
return bloom_dates, bloom_biomasses
bloom_dates, bloom_biomasses = find_phytoplankton_peak(
diatoms, first_low_nitrate_days, PHYTOPLANKTON_PEAK_WINDOW_HALF_WIDTH)
Convert bloom dates to year-day values so that we can do statistics on them more easily.
jan1_ordinal = datetime.date(2014, 1, 1).toordinal()
for member, bloom_date in bloom_dates.items():
bloom_dates[member] = {
'date': bloom_date,
'year_day': bloom_date.toordinal() - jan1_ordinal + 1
}
year_days = np.array([bloom_date['year_day'] for bloom_date in bloom_dates.values()])
year_days
array([87, 78, 99, 87, 79, 82, 86, 84, 91, 79, 80, 96, 88, 84, 81, 88, 82, 80, 88, 86, 93, 86, 83, 80, 82, 92, 98, 78, 95, 79])
def todate(year_day, jan1=datetime.date(2014, 1, 1)):
return datetime.date.fromordinal(jan1.toordinal() + int(year_day) - 1)
def percentiles(year_days, q_values, description=''):
year_days_result = np.percentile(year_days, q_values)
try:
year_days_result = (
np.trunc(year_days_result[0]),
np.ceil(year_days_result[1])
)
except IndexError:
year_days_result = np.rint(year_days_result)
output = '{}:'.format(description)
try:
for result in year_days_result:
output = ' '.join((output, '{:.0f}'.format(result)))
output = ' '.join((output, '{:%d-%b}'.format(todate(result))))
except TypeError:
output = ' '.join((output, '{:.0f}'.format(year_days_result)))
output = ' '.join((output, '{:%d-%b}'.format(todate(year_days_result))))
print(output)
percentiles(year_days, 50, 'median')
percentiles(year_days, (25, 75), 'quartiles')
percentiles(year_days, (10, 90), 'deciles')
percentiles(year_days, (5, 95), '5/95 centiles')
percentiles(year_days, (0, 100), 'min/max')
median: 85 26-Mar quartiles: 80 21-Mar 88 29-Mar deciles: 79 20-Mar 96 06-Apr 5/95 centiles: 78 19-Mar 98 08-Apr min/max: 78 19-Mar 99 09-Apr
ax = plt.subplot(1, 1, 1)
counts, bins, patches = ax.hist(year_days, bins=21)
ax.set_xlabel('Year-Day')
ax.set_ylabel('Counts')
ax.set_title('Distribution of Bloom Dates')
<matplotlib.text.Text at 0x10a989e50>
Find the ensemble members that correspond to the median, 5th centile, and 95th centile dates. If there are no members for those dates, look at members on the days before and after.
def find_members(bloom_dates, year_day):
for member, bloom_date in bloom_dates.items():
if bloom_date['year_day'] == year_day:
print(year_day, member)
print('members for median on year-day 85:')
find_members(bloom_dates, 85)
find_members(bloom_dates, 84)
find_members(bloom_dates, 86)
print('members for 5th centile on year-day 78:')
find_members(bloom_dates, 78)
print('members for 95th centile on year-day 98:')
find_members(bloom_dates, 98)
members for median on year-day 85: 84 _0405 84 _9697 86 _9495 86 _9394 86 _9293 members for 5th centile on year-day 78: 78 _8283 78 _9192 members for 95th centile on year-day 98: 98 _0809
In the cases where there are multiple members for a date, chose the member based on the most recent weather.
median = '_0405'
early_bound = '_9192'
late_bound = '_0809'
colors = {
'axes': '#ebebeb',
'bg': '#2B3E50',
'diatoms': '#7CC643',
'nitrate': '#82AFDC',
}
data_date = datetime.date(2014, 3, 15)
fig, axes_left = plt.subplots(3, 1, figsize=(15, 10), facecolor=colors['bg'], sharex=True)
axes_right = [ax.twinx() for ax in axes_left]
for ax in axes_left:
ax.set_axis_bgcolor(colors['bg'])
for side in 'top bottom left right'.split():
ax.spines[side].set_color(colors['axes'])
ax.tick_params(color=colors['axes'])
for label in ax.get_xticklabels():
label.set_color(colors['axes'])
for label in ax.get_yticklabels():
label.set_color(colors['nitrate'])
for ax in axes_right:
ax.tick_params(color=colors['axes'])
for label in ax.get_yticklabels():
label.set_color(colors['diatoms'])
ax_titles = (
'Early Bound Prediction',
'Median Prediction',
'Late Bound Prediction',
)
for i, title in enumerate(ax_titles):
axes_left[i].set_title(title, color=colors['axes'], loc='left')
for i, bound in ((0, early_bound), (1, prediction), (2, late_bound)):
axes_left[i].plot(
nitrate_ts[bound].mpl_dates,
nitrate_ts[bound].dep_data,
color=colors['nitrate'],
)
axes_right[i].plot(
diatoms_ts[bound].mpl_dates,
diatoms_ts[bound].dep_data,
color=colors['diatoms'],
)
axes_left[i].axvline(matplotlib.dates.date2num(data_date), color=colors['axes'])
axes_left[i].text(matplotlib.dates.date2num(data_date), 31, 'Actual to Ensemble\nForcing Transition', color=colors['axes'])
d = datetime.datetime.combine(bloom_dates[bound]['date'], datetime.time(12))
bloom_date_line = axes_left[i].axvline(matplotlib.dates.date2num(d), color=colors['diatoms'])
axes_left[i].legend((bloom_date_line,), ('Bloom Date',), fontsize='small')
axes_left[2].set_xlim((
np.trunc(nitrate_ts[prediction].mpl_dates[0]),
np.ceil(nitrate_ts[prediction].mpl_dates[-1]),
))
for ax in axes_left:
ax.set_yticks(range(0, 31, 5))
ax.grid(color=colors['axes'])
for ax in axes_right:
ax.set_yticks(range(0, 19, 3))
axes_left[1].set_ylabel('3m Avg Nitrate Concentration [uM N]', color=colors['nitrate'])
axes_right[1].set_ylabel('3m Avg Diatom Biomass [uM N]', color=colors['diatoms'])
axes_left[2].xaxis.set_major_locator(matplotlib.dates.MonthLocator())
axes_left[2].xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%j\n%b'))
axes_left[2].set_xlabel('Year-days in 2103 and 2014', color=colors['axes'])
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-406-4b90699759c5> in <module>() 35 for i, bound in ((0, early_bound), (1, prediction), (2, late_bound)): 36 axes_left[i].plot( ---> 37 nitrate_ts[bound].mpl_dates, 38 nitrate_ts[bound].dep_data, 39 color=colors['nitrate'], TypeError: unhashable type: 'dict'
<matplotlib.figure.Figure at 0x13e581a10>
This section is concerned with testing the time series plot genration
code that is implemented in the bloomcast.ensemble
and bloomcast.visualiation
modules.
prediction = {
'early': 1992,
'median': 2005,
'late': 2009,
}
def fix_member(member):
num = int(member[-2:])
return num + 2000 if num <= 10 else num + 1900
new_bloom_dates = {fix_member(member): bloom_date['date'] for member, bloom_date in bloom_dates.items()}
new_nitrate_ts = {fix_member(member): value for member, value in nitrate_ts.items()}
new_diatoms_ts = {fix_member(member): value for member, value in diatoms_ts.items()}
import matplotlib.backends.backend_agg
fig = bloomcast.visualization.nitrate_diatoms_timeseries(
new_nitrate_ts, new_diatoms_ts, colors,
data_date, prediction, new_bloom_dates, ('foo', 'bar'))
canvas = matplotlib.backends.backend_agg.FigureCanvasAgg(fig)
fig
temperature, salinity = {}, {}
temperature[1992] = bloomcast.utils.SOG_Timeseries('../run/timeseries/std_phys_2014_bloomcast.out_9192')
temperature[1992].read_data('time', '3 m avg temperature')
temperature[1992].calc_mpl_dates(run_start_date)
temperature[2005] = bloomcast.utils.SOG_Timeseries('../run/timeseries/std_phys_2014_bloomcast.out_0405')
temperature[2005].read_data('time', '3 m avg temperature')
temperature[2005].calc_mpl_dates(run_start_date)
temperature[2009] = bloomcast.utils.SOG_Timeseries('../run/timeseries/std_phys_2014_bloomcast.out_0809')
temperature[2009].read_data('time', '3 m avg temperature')
temperature[2009].calc_mpl_dates(run_start_date)
salinity[1992] = bloomcast.utils.SOG_Timeseries('../run/timeseries/std_phys_2014_bloomcast.out_9192')
salinity[1992].read_data('time', '3 m avg salinity')
salinity[1992].calc_mpl_dates(run_start_date)
salinity[2005] = bloomcast.utils.SOG_Timeseries('../run/timeseries/std_phys_2014_bloomcast.out_0405')
salinity[2005].read_data('time', '3 m avg salinity')
salinity[2005].calc_mpl_dates(run_start_date)
salinity[2009] = bloomcast.utils.SOG_Timeseries('../run/timeseries/std_phys_2014_bloomcast.out_0809')
salinity[2009].read_data('time', '3 m avg salinity')
salinity[2009].calc_mpl_dates(run_start_date)
colors['temperature'] = '#D83F83'
colors['salinity'] = '#82DCDC'
colors['temperature_lines'] = {
'early': '#F00C27',
'median': '#D83F83',
'late': '#BD9122',
}
colors['salinity_lines'] = {
'early': '#0EB256',
'median': '#82DCDC',
'late': '#224EBD',
}
fig = bloomcast.visualization.temperature_salinity_timeseries(
temperature, salinity, colors,
data_date, prediction, new_bloom_dates, ('3 m Avg Temperature [deg C]', '3 m Avg Salinity [-]'))
canvas = matplotlib.backends.backend_agg.FigureCanvasAgg(fig)
fig
imp.reload(bloomcast.wind)
<module 'bloomcast.wind' from '/Users/doug/Documents/devel/SOG-projects/SoG-bloomcast-ensemble/bloomcast/wind.py'>
mld, wind = {}, {}
mld = bloomcast.utils.SOG_Timeseries('../run/timeseries/std_phys_2014_bloomcast.out_0405')
mld.read_data('time', 'mixing layer depth')
mld.calc_mpl_dates(run_start_date)
wind = bloomcast.wind.WindTimeseries('../run/Sandheads_wind')
wind.read_data(run_start_date)
wind.calc_mpl_dates(run_start_date)
colors['mld'] = '#df691a'
colors['wind_speed'] = '#ebebeb'
import imp
imp.reload(bloomcast.visualization)
<module 'bloomcast.visualization' from '/Users/doug/Documents/devel/SOG-projects/SoG-bloomcast-ensemble/bloomcast/visualization.py'>
fig = bloomcast.visualization.mixing_layer_depth_wind_timeseries(
mld, wind, colors,
data_date, ('Mixing Layer Depth [m]', 'Wind Speed [m/s]'))
canvas = matplotlib.backends.backend_agg.FigureCanvasAgg(fig)
fig