#!/usr/bin/env python # coding: utf-8 # In[2]: import datetime import numpy as np import pandas as pd print(pd.__version__) # In[156]: start = datetime.datetime(2020, 1, 1) end = datetime.datetime(2022, 1, 1) # Create a DatetimeIndex with freq='Q' # the 'Q' indicates the last day of each quarter ts = pd.date_range(start, end, freq='M') # In[157]: df = pd.DataFrame(index=ts.to_period('M'), data={'Quarter':ts.to_period('Q')}) # In[158]: # make a sine wave as data df['Data'] = np.sin(np.linspace(0,10, len(ts))) # In[159]: df['pct'] = df.groupby(by='Quarter')['Data'].transform(lambda x: x/sum(x)) df['cumpct'] = df.groupby(by='Quarter')['Data'].transform(lambda x: np.cumsum(x)/sum(x)) df['qtrTot'] = df.groupby(by='Quarter')['Data'].transform(lambda x: x.sum()) # In[160]: df.head() # In[161]: # We can do a shift of 3 months for our cumulative percent for the previous quarter df['prev_qtr_pct'] = df.cumpct.shift(3) # then divide it to apply the prediction df['prev_qtr_linearity'] = df['Data'] / df['prev_qtr_pct'] # In[162]: # calculate the delta df['delta'] = (df['prev_qtr_linearity']/df['qtrTot']) -1 # filter on month1/month2 scores = df.query('cumpct!=1 & delta.notna() & delta <=10000').delta # In[163]: # MAPE: np.abs(scores).mean() # In[164]: np.abs(scores).plot(kind='bar') # In[67]: df.groupby(by='Quarter').sum()['Data'].plot() # In[ ]: