import xarray as xr
import numpy as np
import pandas as pd
Create some random data and lists of time period indexes
# Fake heat array with 2000 time steps
rng = np.random.default_rng()
time = pd.date_range(start="2000-01-01",periods=2000)
heat_rand = xr.DataArray(rng.random((2000,6,6)),dims=("time","latitude","longitude"),coords=({"time":time}))
heat2_rand = xr.DataArray(rng.random((2000,6,6)),dims=("time","latitude","longitude"),coords=({"time":time}))
# Lists of time indexes to create time periods with start, peak, end. We ensure end > peak > start
start = list(rng.integers(0,2000-20,100))
peak = [i + 5 for i in start]
end = [i + 10 for i in peak]
Write the function to calculate the mean over all the time periods:
def h_mean(mean_var, period_start, period_end, mean_dim):
'''
mean_var: DataArray, heat variable
period_start: list, start indexes of the time period
period_end: list, end indexes of the time period
mean_dim: str or sequence of str, dimensions along which to calculate the mean'''
# Store the means for each period in a list.
# Then concatenate those in an array along a new dimension "period"
# Return this array as the result of the function
mean_h = []
for i in range(len(period_start)):
foo = mean_var.isel(time=slice(period_start[i],period_end[i])).mean(dim=mean_dim)
mean_h.append(foo)
mean_h = xr.concat(mean_h, dim="period")
return mean_h
Now to calculate the different means, one just need to write various calls to that function. There is no need to copy-paste all the lines of the loop every single time. This limits the risk of errors and improves readability of the code.
# mean in time between start, peak for heat_rand
mean_sp_1 = h_mean(heat_rand, start, peak, "time")
# mean in time between peak, end for heat2_rand
mean_pe_2 = h_mean(heat2_rand, peak, end, "time")
# mean over all dimensions between peak, end for heat_rand
allmean_pe_1 = h_mean(heat_rand, peak, end, heat_rand.dims)
As a bonus if you have a lot of different averages to calculate, you can list all the different lists of arguments in a list or dictionary and store the results in a list or dictionary
heat_rand_averages_to_do={
"sp_time":(heat_rand, start, peak, "time"),
"pe_time":(heat_rand, peak, end, "time"),
"sp_all":(heat_rand, start, peak, heat_rand.dims),
"pe_all":(heat_rand, peak, end, heat_rand.dims)
}
heat_rand_averages={}
for k,v in heat_rand_averages_to_do.items():
print(k)
heat_rand_averages[k] = h_mean(*v)
sp_time pe_time sp_all pe_all
heat_rand_averages["sp_all"]
<xarray.DataArray (period: 100)> array([0.52116265, 0.50350177, 0.49117636, 0.50797535, 0.50235892, 0.49280181, 0.49295265, 0.49843657, 0.48486577, 0.48250537, 0.49759331, 0.46348957, 0.51427831, 0.51400317, 0.51738406, 0.49128513, 0.54771575, 0.51242504, 0.48683111, 0.47880147, 0.51072864, 0.4879107 , 0.48191024, 0.50773241, 0.46217286, 0.51132989, 0.50716077, 0.46398606, 0.51646143, 0.4700371 , 0.50123077, 0.52576168, 0.48276 , 0.52905521, 0.43456626, 0.47398211, 0.49877843, 0.46385063, 0.48581055, 0.49178077, 0.46965612, 0.51277046, 0.5160116 , 0.51444248, 0.48440463, 0.51714391, 0.50441126, 0.50159757, 0.44967519, 0.52175544, 0.49786862, 0.46715397, 0.49475277, 0.50933782, 0.49695607, 0.50420801, 0.49925864, 0.51016634, 0.50057239, 0.50786069, 0.48936275, 0.48983723, 0.5307329 , 0.4924123 , 0.48256033, 0.52254621, 0.50766511, 0.5308694 , 0.5171582 , 0.45593982, 0.50766511, 0.50943241, 0.50797535, 0.48852579, 0.48122126, 0.50383983, 0.48103436, 0.47889519, 0.48966029, 0.50237752, 0.50583606, 0.5408924 , 0.51395325, 0.52552788, 0.53302423, 0.49976186, 0.54500088, 0.51171402, 0.4881687 , 0.46398606, 0.50050785, 0.51062933, 0.48063893, 0.45373819, 0.48920264, 0.4971618 , 0.48592287, 0.50774901, 0.52479109, 0.50799125]) Dimensions without coordinates: period