In [74]:

# Importing required modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
from datetime import datetime    # To access datetime 
from pandas import Series        # To work on series 

# Settings for pretty nice plots 
plt.style.use('fivethirtyeight')
%matplotlib inline

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

Importing necessary libraries and the Stock Data¶

In [75]:

# Reading in the data
df = pd.read_csv('maruti_stocks.csv')

In [76]:

# Inspecting the data
df.head()

Out[76]:

	Date	Symbol	Series	Prev Close	Open	High	Low	Last	Close	VWAP	Volume	Turnover	Trades	Deliverable Volume	%Deliverble
0	2003-07-09	MARUTI	EQ	125.00	164.90	170.40	155.00	164.0	164.30	165.95	35164283	5.835528e+14	NaN	8537695.0	0.2428
1	2003-07-10	MARUTI	EQ	164.30	167.00	168.70	164.50	167.0	167.00	166.74	10464179	1.744820e+14	NaN	4363947.0	0.4170
2	2003-07-11	MARUTI	EQ	167.00	167.75	174.85	166.25	173.6	173.35	172.45	11740117	2.024622e+14	NaN	3014852.0	0.2568
3	2003-07-14	MARUTI	EQ	173.35	174.25	179.25	174.25	178.6	177.95	177.91	5982324	1.064313e+14	NaN	1949217.0	0.3258
4	2003-07-15	MARUTI	EQ	177.95	200.00	200.00	173.00	176.3	176.20	176.88	6173689	1.092001e+14	NaN	1307694.0	0.2118

In [77]:

#To keep things simple, let's limit the number of columns.
data = df[['Date','Open','High','Low','Close','Volume','VWAP']]
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4305 entries, 0 to 4304
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    4305 non-null   object 
 1   Open    4305 non-null   float64
 2   High    4305 non-null   float64
 3   Low     4305 non-null   float64
 4   Close   4305 non-null   float64
 5   Volume  4305 non-null   int64  
 6   VWAP    4305 non-null   float64
dtypes: float64(5), int64(1), object(1)
memory usage: 235.6+ KB

In [78]:

# Converting string to datetime64
data['Date'] = data['Date'].apply(pd.to_datetime)
data.set_index('Date',inplace=True)
data.head()

Out[78]:

	Open	High	Low	Close	Volume	VWAP
Date
2003-07-09	164.90	170.40	155.00	164.30	35164283	165.95
2003-07-10	167.00	168.70	164.50	167.00	10464179	166.74
2003-07-11	167.75	174.85	166.25	173.35	11740117	172.45
2003-07-14	174.25	179.25	174.25	177.95	5982324	177.91
2003-07-15	200.00	200.00	173.00	176.20	6173689	176.88

Manipulating Time Series dataset with Pandas¶

1. DateTime Manipulations ⚙️¶

In [79]:

from datetime import datetime
my_year = 2019
my_month = 4
my_day = 21
my_hour = 10
my_minute = 5
my_second = 30

In [80]:

# Creating timestamps by using the above attributes.
test_data = datetime(my_year,my_month,my_day)
test_data

Out[80]:

datetime.datetime(2019, 4, 21, 0, 0)

We have selected only the day, month, and year. We could also include more details like an hour, minute, and second, if necessary.

In [81]:

test_data = datetime(my_year,my_month,my_day,my_hour,my_minute,my_second)
print("The day is : ",test_data.day)
print("The hour is : ",test_data.hour)
print("The month is : ",test_data.month)

The day is :  21
The hour is :  10
The month is :  4

For our stock price dataset, the index column is of the type. DatetimeIndex. We can use pandas to obtain the minimum and maximum dates in the data.

In [82]:

print(data.index.max())
print(data.index.min())

2020-10-30 00:00:00
2003-07-09 00:00:00

We can also calculate the latest date index location and the Earliest Date index location as follows:

In [83]:

# Earliest date index location
print('Earliest date index location is: ',data.index.argmin())

# Latest date location
print('Latest date location: ',data.index.argmax())

Earliest date index location is:  0
Latest date location:  4304

2. Subsetting the time series data ✂️¶

In [84]:

df_vwap = df[['Date','VWAP']] # df is the original dataframe
df_vwap['Date'] = df_vwap['Date'].apply(pd.to_datetime)
df_vwap.set_index("Date", inplace = True)
df_vwap.head()

Out[84]:

	VWAP
Date
2003-07-09	165.95
2003-07-10	166.74
2003-07-11	172.45
2003-07-14	177.91
2003-07-15	176.88

The data can also be sliced on year, month, or day, as follows:

In [85]:

# Slicing on year
vwap_subset = df_vwap['2017':'2020']

# Slicing on month
vwap_subset = df_vwap['2017-01':'2020-12']

#Slicing on day
vwap_subset = df_vwap['2017-01-01':'2020-12-15']

3. Visualizing the Time Series data 📊¶

Visualizing the time series data can offer a better perspective instead of merely looking at the numbers.

In [86]:

df_vwap['VWAP'].plot(figsize=(16,8),title=' volume weighted average price')

Out[86]:

<matplotlib.axes._subplots.AxesSubplot at 0x7fe54b0668d0>

It appears that Maruti had a more or less steady increase in its stock price from 2004 to the mid-2018 window. There seems to be some drop in 2019, though. Let's further analyze the data for the year 2018.

In [87]:

ax = df_vwap.loc['2018', 'VWAP'].plot(figsize=(15,6))
ax.set_title('Month-wise Trend in 2018'); 
ax.set_ylabel('VWAP');
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'));

We see a dip in the stock prices, particularly around the end of October and November. Let's further zoom in on these dates.

In [88]:

ax = df_vwap.loc['2018-10':'2018-11','VWAP'].plot(marker='o', linestyle='-',figsize=(15,6))
ax.set_title('Oct-Nov 2018 trend'); 
ax.set_ylabel('VWAP');
ax.xaxis.set_major_locator(mdates.WeekdayLocator(byweekday=mdates.MONDAY))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'));

So there is a dip in stock prices around the last week of October and the first week of November. One could investigate it further by finding out if there was some special event on that day.

Feature Extraction¶

Let's extract time and date features from the Date column.

In [89]:

df_vwap.reset_index(inplace=True)
df_vwap['year'] = df_vwap.Date.dt.year
df_vwap['month'] = df_vwap.Date.dt.month
df_vwap['day'] = df_vwap.Date.dt.day
df_vwap['day of week'] = df_vwap.Date.dt.dayofweek



#Set Date column as the index column.
df_vwap.set_index('Date', inplace=True)
df_vwap.head()

Out[89]:

	VWAP	year	month	day	day of week
Date
2003-07-09	165.95	2003	7	9	2
2003-07-10	166.74	2003	7	10	3
2003-07-11	172.45	2003	7	11	4
2003-07-14	177.91	2003	7	14	0
2003-07-15	176.88	2003	7	15	1

4. Time resampling ⏳¶

In [90]:

df_vwap.resample(rule = 'A').mean()[:5]

Out[90]:

	VWAP	year	month	day	day of week
Date
2003-12-31	266.608468	2003.0	9.604839	16.298387	2.016129
2004-12-31	430.522835	2004.0	6.559055	15.708661	2.047244
2005-12-31	494.328566	2005.0	6.541833	15.733068	2.000000
2006-12-31	845.582160	2006.0	6.552000	15.976000	2.056000
2007-12-31	887.107871	2007.0	6.554217	15.843373	1.987952

In [91]:

plt.rcParams['figure.figsize'] = (8, 6)
df_vwap['VWAP'].resample('A').mean().plot(kind='bar')
plt.title('Yearly Mean VWAP for Maruti')

Out[91]:

Text(0.5, 1.0, 'Yearly Mean VWAP for Maruti')

In [92]:

df_vwap['VWAP'].resample('AS').mean().plot(kind='bar',figsize = (10,4))
plt.title('Yearly start Mean VWAP for Maruti')

Out[92]:

Text(0.5, 1.0, 'Yearly start Mean VWAP for Maruti')

6. Time Shifting ⏲️¶

In [94]:

df_vwap.head()

Out[94]:

	VWAP	year	month	day	day of week
Date
2003-07-09	165.95	2003	7	9	2
2003-07-10	166.74	2003	7	10	3
2003-07-11	172.45	2003	7	11	4
2003-07-14	177.91	2003	7	14	0
2003-07-15	176.88	2003	7	15	1

6.1 Forward Shifting¶

In [96]:

df_vwap.shift(1).head()

Out[96]:

	VWAP	year	month	day	day of week
Date
2003-07-09	NaN	NaN	NaN	NaN	NaN
2003-07-10	165.95	2003.0	7.0	9.0	2.0
2003-07-11	166.74	2003.0	7.0	10.0	3.0
2003-07-14	172.45	2003.0	7.0	11.0	4.0
2003-07-15	177.91	2003.0	7.0	14.0	0.0

6.2 Backward Shifting¶

In [97]:

df_vwap.shift(-1).head()

Out[97]:

	VWAP	year	month	day	day of week
Date
2003-07-09	166.74	2003.0	7.0	10.0	3.0
2003-07-10	172.45	2003.0	7.0	11.0	4.0
2003-07-11	177.91	2003.0	7.0	14.0	0.0
2003-07-14	176.88	2003.0	7.0	15.0	1.0
2003-07-15	177.59	2003.0	7.0	16.0	2.0

6.3 Shifting based off Time String Code¶

In [98]:

df_vwap.tshift(periods=3, freq = 'M').head()

Out[98]:

	VWAP	year	month	day	day of week
Date
2003-09-30	165.95	2003	7	9	2
2003-09-30	166.74	2003	7	10	3
2003-09-30	172.45	2003	7	11	4
2003-09-30	177.91	2003	7	14	0
2003-09-30	176.88	2003	7	15	1

7. Rolling Windows 🧈¶

In [99]:

df_vwap['VWAP'].plot(figsize = (10,6))

Out[99]:

<matplotlib.axes._subplots.AxesSubplot at 0x7fe54acd9cf8>

In [100]:

df_vwap.rolling(7).mean().head(10)

Out[100]:

	VWAP	year	month	day	day of week
Date
2003-07-09	NaN	NaN	NaN	NaN	NaN
2003-07-10	NaN	NaN	NaN	NaN	NaN
2003-07-11	NaN	NaN	NaN	NaN	NaN
2003-07-14	NaN	NaN	NaN	NaN	NaN
2003-07-15	NaN	NaN	NaN	NaN	NaN
2003-07-16	NaN	NaN	NaN	NaN	NaN
2003-07-17	173.285714	2003.0	7.0	13.142857	2.142857
2003-07-18	173.530000	2003.0	7.0	14.428571	2.428571
2003-07-21	173.520000	2003.0	7.0	16.000000	2.000000
2003-07-22	172.464286	2003.0	7.0	17.571429	1.571429

In [101]:

df_vwap['VWAP'].plot()
df_vwap.rolling(window=30).mean()['VWAP'].plot(figsize=(16, 6))

Out[101]:

<matplotlib.axes._subplots.AxesSubplot at 0x7fe54adec3c8>

In [ ]: