#!/usr/bin/env python # coding: utf-8 # In[74]: # Importing required modules import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.dates as mdates import datetime as dt from datetime import datetime # To access datetime from pandas import Series # To work on series # Settings for pretty nice plots plt.style.use('fivethirtyeight') get_ipython().run_line_magic('matplotlib', 'inline') # ignore warnings import warnings warnings.filterwarnings('ignore') # # Importing necessary libraries and the Stock Data # In[75]: # Reading in the data df = pd.read_csv('maruti_stocks.csv') # In[76]: # Inspecting the data df.head() # In[77]: #To keep things simple, let's limit the number of columns. data = df[['Date','Open','High','Low','Close','Volume','VWAP']] data.info() # In[78]: # Converting string to datetime64 data['Date'] = data['Date'].apply(pd.to_datetime) data.set_index('Date',inplace=True) data.head() # # # Manipulating Time Series dataset with Pandas # ## 1. DateTime Manipulations ⚙️ # In[79]: from datetime import datetime my_year = 2019 my_month = 4 my_day = 21 my_hour = 10 my_minute = 5 my_second = 30 # In[80]: # Creating timestamps by using the above attributes. test_data = datetime(my_year,my_month,my_day) test_data # # We have selected only the day, month, and year. We could also include more details like an hour, minute, and second, if necessary. # # In[81]: test_data = datetime(my_year,my_month,my_day,my_hour,my_minute,my_second) print("The day is : ",test_data.day) print("The hour is : ",test_data.hour) print("The month is : ",test_data.month) # For our stock price dataset, the index column is of the type. DatetimeIndex. We can use pandas to obtain the minimum and maximum dates in the data. # In[82]: print(data.index.max()) print(data.index.min()) # We can also calculate the latest date index location and the Earliest Date index location as follows: # In[83]: # Earliest date index location print('Earliest date index location is: ',data.index.argmin()) # Latest date location print('Latest date location: ',data.index.argmax()) # ## 2. Subsetting the time series data ✂️ # In[84]: df_vwap = df[['Date','VWAP']] # df is the original dataframe df_vwap['Date'] = df_vwap['Date'].apply(pd.to_datetime) df_vwap.set_index("Date", inplace = True) df_vwap.head() # The data can also be sliced on year, month, or day, as follows: # In[85]: # Slicing on year vwap_subset = df_vwap['2017':'2020'] # Slicing on month vwap_subset = df_vwap['2017-01':'2020-12'] #Slicing on day vwap_subset = df_vwap['2017-01-01':'2020-12-15'] # ## 3. Visualizing the Time Series data 📊 # Visualizing the time series data can offer a better perspective instead of merely looking at the numbers. # In[86]: df_vwap['VWAP'].plot(figsize=(16,8),title=' volume weighted average price') # It appears that Maruti had a more or less steady increase in its stock price from 2004 to the mid-2018 window. There seems to be some drop in 2019, though. Let's further analyze the data for the year 2018. # In[87]: ax = df_vwap.loc['2018', 'VWAP'].plot(figsize=(15,6)) ax.set_title('Month-wise Trend in 2018'); ax.set_ylabel('VWAP'); ax.xaxis.set_major_locator(mdates.MonthLocator()) ax.xaxis.set_major_formatter(mdates.DateFormatter('%b')); # We see a dip in the stock prices, particularly around the end of October and November. Let's further zoom in on these dates. # In[88]: ax = df_vwap.loc['2018-10':'2018-11','VWAP'].plot(marker='o', linestyle='-',figsize=(15,6)) ax.set_title('Oct-Nov 2018 trend'); ax.set_ylabel('VWAP'); ax.xaxis.set_major_locator(mdates.WeekdayLocator(byweekday=mdates.MONDAY)) ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d')); # So there is a dip in stock prices around the last week of October and the first week of November. One could investigate it further by finding out if there was some special event on that day. # ## Feature Extraction # Let's extract time and date features from the Date column. # In[89]: df_vwap.reset_index(inplace=True) df_vwap['year'] = df_vwap.Date.dt.year df_vwap['month'] = df_vwap.Date.dt.month df_vwap['day'] = df_vwap.Date.dt.day df_vwap['day of week'] = df_vwap.Date.dt.dayofweek #Set Date column as the index column. df_vwap.set_index('Date', inplace=True) df_vwap.head() # ## 4. Time resampling ⏳ # In[90]: df_vwap.resample(rule = 'A').mean()[:5] # In[91]: plt.rcParams['figure.figsize'] = (8, 6) df_vwap['VWAP'].resample('A').mean().plot(kind='bar') plt.title('Yearly Mean VWAP for Maruti') # In[92]: df_vwap['VWAP'].resample('AS').mean().plot(kind='bar',figsize = (10,4)) plt.title('Yearly start Mean VWAP for Maruti') # ## 6. Time Shifting ⏲️ # In[94]: df_vwap.head() # ### 6.1 Forward Shifting # In[96]: df_vwap.shift(1).head() # ### 6.2 Backward Shifting # In[97]: df_vwap.shift(-1).head() # ### 6.3 Shifting based off Time String Code # In[98]: df_vwap.tshift(periods=3, freq = 'M').head() # # 7. Rolling Windows 🧈 # In[99]: df_vwap['VWAP'].plot(figsize = (10,6)) # In[100]: df_vwap.rolling(7).mean().head(10) # In[101]: df_vwap['VWAP'].plot() df_vwap.rolling(window=30).mean()['VWAP'].plot(figsize=(16, 6)) # In[ ]: