#importing libraries
import numpy as np
import pandas as pd
import plotly as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
#Loading the Dataset
data = pd.read_csv('India_Temp_2011.csv')
data.head()
Year | Annual | Jan | Mar | Jun | Oct | |
---|---|---|---|---|---|---|
0 | 1901 | 28.96 | 23.27 | 31.46 | 31.27 | 27.25 |
1 | 1902 | 29.22 | 25.75 | 31.76 | 31.09 | 26.49 |
2 | 1903 | 28.47 | 24.24 | 30.71 | 30.92 | 26.26 |
3 | 1904 | 28.49 | 23.62 | 30.95 | 30.67 | 26.40 |
4 | 1905 | 28.30 | 22.25 | 30.00 | 31.33 | 26.57 |
#Exploratory data analysis on dataset through different functions
data.shape
(111, 6)
data.columns.values # column names
array(['Year', 'Annual', 'Jan', 'Mar', 'Jun', 'Oct'], dtype=object)
data.info() # to check data-type and null values in all columns
<class 'pandas.core.frame.DataFrame'> RangeIndex: 111 entries, 0 to 110 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Year 111 non-null int64 1 Annual 111 non-null float64 2 Jan 111 non-null float64 3 Mar 111 non-null float64 4 Jun 111 non-null float64 5 Oct 111 non-null float64 dtypes: float64(5), int64(1) memory usage: 5.3 KB
data.describe() #to find out mean,median , max values of each column
Year | Annual | Jan | Mar | Jun | Oct | |
---|---|---|---|---|---|---|
count | 111.000000 | 111.000000 | 111.000000 | 111.000000 | 111.000000 | 111.000000 |
mean | 1956.000000 | 29.116216 | 24.540901 | 31.444865 | 31.157928 | 27.133514 |
std | 32.186954 | 0.453954 | 0.788018 | 0.639510 | 0.383994 | 0.566916 |
min | 1901.000000 | 28.110000 | 22.250000 | 29.920000 | 30.240000 | 25.740000 |
25% | 1928.500000 | 28.760000 | 24.030000 | 31.025000 | 30.905000 | 26.670000 |
50% | 1956.000000 | 29.070000 | 24.510000 | 31.460000 | 31.160000 | 27.180000 |
75% | 1983.500000 | 29.420000 | 25.030000 | 31.865000 | 31.370000 | 27.515000 |
max | 2011.000000 | 30.290000 | 27.440000 | 33.460000 | 32.240000 | 28.530000 |
#Understanding Target Variable
data.Annual.unique()
array([28.96, 29.22, 28.47, 28.49, 28.3 , 28.73, 28.65, 28.83, 28.39, 28.53, 28.62, 28.95, 28.67, 28.66, 28.94, 28.82, 28.11, 28.76, 28.86, 28.8 , 28.74, 28.7 , 28.59, 28.98, 29.15, 29.09, 29.03, 28.71, 28.85, 28.88, 29.46, 28.89, 28.97, 29.37, 28.84, 29.16, 29.43, 28.92, 28.63, 28.64, 29.33, 29.02, 29.31, 28.72, 29.04, 29.41, 29.14, 29.07, 29.61, 29.47, 29.44, 29.26, 29.27, 29.23, 29.63, 29.58, 29.32, 29.11, 29.28, 29.72, 29.55, 29.18, 30.18, 29.05, 29.7 , 29.81, 29.75, 29.99, 30.23, 29.79, 29.6 , 30.06, 29.84, 29.64, 30.29, 30.12, 29.82])
data.Annual.value_counts()
28.76 4 28.89 4 28.66 3 28.80 3 28.70 3 .. 30.12 1 28.30 1 28.95 1 29.82 1 28.97 1 Name: Annual, Length: 77, dtype: int64
data.Jan.unique()
array([23.27, 25.75, 24.24, 23.62, 22.25, 23.03, 24.23, 24.42, 23.52, 24.2 , 23.9 , 24.88, 24.25, 24.59, 23.22, 24.57, 24.52, 23.57, 23.71, 23.64, 23.91, 24.43, 23.73, 23.94, 24.73, 23.76, 24.21, 23.53, 23.2 , 24.55, 24.51, 24.13, 24.53, 23.41, 24.11, 23.31, 24.46, 24.37, 24.03, 24.02, 23.86, 25.49, 23.99, 24.49, 24.16, 25.17, 24.71, 24.9 , 24.4 , 23.87, 25.43, 25.48, 24.17, 24.29, 24.67, 25.54, 25.31, 23.68, 24.99, 25.19, 25.35, 24.34, 24.12, 24.61, 25.15, 24.36, 25.21, 24.62, 25.29, 24.64, 25.07, 25.39, 24.74, 24.6 , 25.09, 25.68, 26.3 , 24.97, 25.11, 24.82, 25.88, 25.37, 25.32, 24.96, 27.44, 25.73, 24.72, 26.5 , 25.95, 25.33])
data.Jan .value_counts()
24.99 4 24.51 3 23.62 3 25.49 2 24.90 2 .. 24.59 1 23.91 1 23.90 1 25.35 1 25.75 1 Name: Jan, Length: 90, dtype: int64
data.Mar.unique()
array([31.46, 31.76, 30.71, 30.95, 30. , 31.11, 29.92, 31.43, 31.02, 31.14, 30.7 , 31.1 , 30.89, 30.73, 31.06, 31.88, 30.06, 30.68, 31.17, 30.4 , 32.05, 31.21, 31.4 , 31.44, 31.47, 30.21, 30.72, 31.51, 31.72, 30.94, 31.71, 30.42, 31.28, 31.15, 30.84, 31.74, 30.76, 30.66, 32.12, 31.8 , 30.8 , 31.03, 31.19, 31.5 , 31.78, 31.27, 30.67, 31.13, 32.19, 31.89, 30.88, 31.53, 30.41, 31.73, 31.69, 31.16, 31.31, 31.04, 31.24, 32.02, 32.03, 31.58, 31.49, 31.92, 31.62, 31.45, 31.65, 31.57, 31.7 , 32.2 , 31.64, 30.79, 32.51, 31.37, 31.75, 31.35, 31.32, 31.61, 31.85, 32.4 , 32.07, 31.26, 32.45, 32.22, 32.61, 33.06, 32.69, 31.81, 32.08, 32.32, 32.11, 32.57, 33.46])
data.Mar .value_counts()
30.84 3 31.17 3 31.89 3 31.69 2 32.19 2 .. 31.14 1 30.66 1 31.32 1 30.40 1 30.00 1 Name: Mar, Length: 93, dtype: int64
data.Jun.unique()
array([31.27, 31.09, 30.92, 30.67, 31.33, 30.86, 30.8 , 30.72, 30.33, 30.48, 31.14, 31.15, 30.84, 31.51, 30.52, 30.24, 31.11, 31.08, 30.81, 30.9 , 30.98, 30.96, 31.03, 31.16, 31.25, 30.41, 31.22, 30.85, 30.68, 30.59, 31.06, 30.93, 31.37, 30.99, 30.83, 31.48, 31.23, 31.2 , 30.88, 31.13, 31.12, 30.25, 31.07, 31.29, 30.75, 31.28, 30.82, 31.24, 31.32, 31.54, 31.55, 30.91, 31.66, 31.39, 30.66, 31.87, 31.36, 31.34, 31.44, 32.24, 31.18, 31.45, 31.47, 31.31, 32.01, 31.19, 31.77, 31.75, 31.61, 32.02, 31.72, 31.42, 31.84, 31.4 , 31.43])
data.Jun .value_counts()
31.55 4 31.28 4 31.11 4 31.25 4 30.80 3 .. 31.87 1 30.33 1 30.66 1 30.91 1 32.02 1 Name: Jun, Length: 75, dtype: int64
data.Oct .unique()
array([27.25, 26.49, 26.26, 26.4 , 26.57, 27.29, 27.36, 26.64, 26.88, 26.2 , 26.31, 26.42, 27.18, 26.32, 25.74, 26.77, 26.6 , 27.45, 26.43, 26.38, 26.59, 26.61, 26.72, 26.73, 26.27, 26.9 , 26.97, 26.92, 26.94, 26.69, 26.71, 27.05, 27.24, 27.62, 27.23, 27.33, 27.16, 26.82, 27.3 , 26.7 , 26.79, 26.58, 27.77, 27.26, 27.56, 26.46, 26.37, 27.01, 27.03, 26.3 , 26.65, 27. , 27.2 , 27.19, 27.71, 27.5 , 27.17, 27.21, 26.99, 27.76, 27.59, 27.64, 27.67, 27.51, 27.46, 27.35, 27.4 , 27.82, 27.57, 27.49, 27.74, 27.52, 28.52, 27.83, 27.95, 28.53, 28.13, 28.36, 27.7 , 27.65, 27.78, 28.03, 28.29, 27.96, 28.23])
data.Oct.value_counts()
27.24 3 27.50 3 27.26 3 27.62 2 26.97 2 .. 26.64 1 27.16 1 26.32 1 26.60 1 27.25 1 Name: Oct, Length: 85, dtype: int64
#Data Visualization starts here
#To check missing values
sns.heatmap(data.isnull(),cbar=False,yticklabels=False,cmap = 'viridis')
<AxesSubplot:>
#To check correlation
plt.figure(figsize=(6,4))
sns.heatmap(data.corr(),cmap='Blues',annot=False)
<AxesSubplot:>
#Columns correlation matrix
k = 12 #number of variables for heatmap
cols = data.corr().nlargest(k, 'Annual')['Annual'].index
cm = data[cols].corr()
plt.figure(figsize=(10,6))
sns.heatmap(cm, annot=True, cmap = 'viridis')
<AxesSubplot:>
#To check Outliers
l = data.columns.values
number_of_columns=6
number_of_columns=int(np.ceil(np.sqrt(len("Annual,Jan,Mar,Jun,Oct")))) # convert float value into int
number_of_rows = len(l)-1/number_of_columns
plt.figure(figsize=(number_of_columns,5*number_of_rows))
for i in range(0,len(l)):
plt.subplot(number_of_rows + 1,number_of_columns,i+1)
sns.set_style('whitegrid')
sns.boxplot(data[l[i]],color='green',orient='v')
plt.tight_layout()
#To check distribution-Skewness
plt.figure(figsize=(2*number_of_columns,5*number_of_rows))
for i in range(0,len(l)):
plt.subplot(number_of_rows + 1,number_of_columns,i+1)
sns.distplot(data[l[i]],kde=True)
#prepare some data- annual temp
annual=np.array ([28.96,29.22,28.47,28.49,28.3,28.73,28.65,28.83,28.39,28.53,28.62,28.95,28.67,28.66,28.94,28.82,28.11,28.66,
28.66,28.76,28.86,28.8,28.74,28.8,28.67,28.7,28.59,28.98,28.76,28.65,29.15,29.09,28.49,29.03,28.76,28.71,
28.7,28.7,28.85,28.88,29.46,28.98,28.8,28.89,28.97,29.37,28.84,28.73,28.89,28.47,29.09,29.16,29.43,28.92,
28.76,28.63,28.64,29.33,29.02,29.31,28.72,28.89,29.04,29.09,29.16,29.41,29.14,29.07,29.61,29.47,29.15,29.31,
29.44,29.26,28.89,29.27,29.41,29.23,29.63,29.58,29.32,29.11,29.11,29.28,29.61,29.33,29.72,29.55,29.18,29.14,
29.32,29.23,29.55,29.46,30.18,29.58,29.05,29.7,29.81,29.75,29.99,30.23,29.75,29.79,29.6,30.06,29.84,29.64,30.29,
30.12,29.82])
x= np.array ([1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,
1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,
1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,
1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,
1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,
2011])
#Plotting annual temp readings- Line plot
fig = go.Figure(data=go.Scatter(x=x, y=annual))
# Edit the layout
fig.update_layout(title='Annual Temperature(1901-2011)',
xaxis_title='Years',
yaxis_title='Temperature (degrees C)')
fig.show()
pip install chart_studio
Collecting chart_studio Downloading https://files.pythonhosted.org/packages/ca/ce/330794a6b6ca4b9182c38fc69dd2a9cbff60fd49421cb8648ee5fee352dc/chart_studio-1.1.0-py3-none-any.whl (64kB) |████████████████████████████████| 71kB 5.4MB/s Requirement already satisfied: plotly in /usr/local/lib/python3.6/dist-packages (from chart_studio) (4.4.1) Requirement already satisfied: retrying>=1.3.3 in /usr/local/lib/python3.6/dist-packages (from chart_studio) (1.3.3) Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from chart_studio) (1.15.0) Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from chart_studio) (2.23.0) Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->chart_studio) (3.0.4) Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->chart_studio) (1.24.3) Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->chart_studio) (2.10) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->chart_studio) (2020.12.5) Installing collected packages: chart-studio Successfully installed chart-studio-1.1.0
#Hosting visualization using chartstudio
import chart_studio
import chart_studio.plotly as py
chart_studio.tools.set_credentials_file(username='TROPICSU', api_key='sqFXkXIxv94tXI4BCvUI')
#Push your visualization to your account using this code
py.plot(fig, filename = 'Annual_temp_plot', auto_open=True)
'https://plotly.com/~TROPICSU/2/'
#create HTML for big datasets
import plotly.io as pio
pio.write_html(fig, file='index.html', auto_open=True)
#prepare some data- JUN TO SEPT
j =([31.27,31.09,30.92,30.67,31.33,30.86,30.8,30.72,30.33,30.48,31.14,31.15,30.92,30.84,31.51,30.52,30.24,31.11,30.8,31.08,
30.81,30.9,30.98,30.96,30.67,31.14,30.8,31.14,31.03,30.98,31.16,31.25,30.41,31.22,30.85,30.68,31.22,30.59,31.06,30.93,
31.37,30.99,30.83,31.25,31.48,31.23,31.2,30.84,30.88,30.48,31.13,31.11,31.12,30.9,30.72,30.25,31.07,31.23,30.99,31.29,
30.75,31.08,31.28,30.82,31.37,31.24,31.32,31.54,31.55,31.16,30.91,31.66,31.28,31.39,30.66,31.09,31.22,31.11,31.87,31.36,
31.34,31.55,31.48,31.09,31.28,31.44,32.24,31.34,31.11,31.18,31.45,31.28,31.47,31.31,32.01,31.19,31.77,31.75,31.55,31.25,
31.61,32.02,31.72,31.42,31.84,31.39,31.4,31.25,32.24,31.43,31.55])
x= np.array ([1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,
1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,
1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,
1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,
1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,
2011])
#Plotting JUN-SEPT readings
fig = go.Figure(data=go.Scatter(x=x, y=j))
# Edit the layout
fig.update_layout(title='June to September Temperature',
xaxis_title='Years',
yaxis_title='Temperature (degrees C)')
fig.show()
chart_studio.tools.set_credentials_file(username='TROPICSU', api_key='sqFXkXIxv94tXI4BCvUI')
#Push your visualization to your account using this code
py.plot(fig, filename = 'June-Sept plot', auto_open=True)
'https://plotly.com/~TROPICSU/6/'
#prepare some data -OCT-DEC
o=([27.25,26.49,26.26,26.4,26.57,27.29,27.36,26.64,26.88,26.2,26.31,26.57,26.42,26.4,27.18,26.32,25.74,26.77,26.6,27.45,
26.43,26.38,26.43,26.49,26.59,26.61,26.72,26.73,26.27,26.9,26.97,27.18,26.92,26.94,26.88,26.69,26.26,26.71,27.05,27.24,
27.62,27.23,27.33,27.16,26.82,27.3,26.7,26.77,26.79,26.58,27.77,27.26,27.56,26.46,26.59,26.37,26.82,27.01,27.03,27.36,
26.3,26.65,27,27.2,27.62,27.26,26.9,27.19,27.71,27.5,27.17,27.21,26.97,27.03,26.99,27.76,27.59,27.64,27.67,27.51,27.24,
27.26,27.24,27.46,27.35,27.4,27.82,27.76,27.57,27.5,27.4,27.49,27.74,27.52,28.52,27.29,26.65,27.83,27.95,28.53,28.13,28.36,
27.7,27.65,27.5,27.78,28.03,28.29,27.96,27.78,28.23])
x= np.array ([1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,
1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,
1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,
1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,
1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,
2011])
#Plotting OCT-DEC readings
fig = go.Figure(data=go.Scatter(x=x, y=o))
# Edit the layout
fig.update_layout(title='October to December Temperature',
xaxis_title='Years',
yaxis_title='Temperature (degrees C)')
fig.show()
chart_studio.tools.set_credentials_file(username='TROPICSU', api_key='sqFXkXIxv94tXI4BCvUI')
#Push your visualization to your account using this code
py.plot(fig, filename = 'Oct-Dec plot', auto_open=True)
'https://plotly.com/~TROPICSU/8/'
#prepare some data - Jan-Feb
col_jan_feb = data['Jan'].to_numpy()
x= np.array ([1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,
1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,
1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,
1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,
1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,
2011])
#Plotting Jan-feb readings
fig = go.Figure(data=go.Scatter(x=x, y=col_jan_feb))
# Edit the layout
fig.update_layout(title='January to February Temperature',
xaxis_title='Years',
yaxis_title='Temperature (degrees C)')
fig.show()
chart_studio.tools.set_credentials_file(username='TROPICSU', api_key='sqFXkXIxv94tXI4BCvUI')
#Push your visualization to your account using this code
py.plot(fig, filename = ' plot', auto_open=True)
'https://plotly.com/~TROPICSU/10/'
#prepare some data - March-May
col_march_may = data['Mar'].to_numpy()
x= np.array ([1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,
1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,
1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,
1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,
1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,
2011])
#Plotting Jan-feb readings
fig = go.Figure(data=go.Scatter(x=x, y=col_march_may))
# Edit the layout
fig.update_layout(title='March to May Temperature',
xaxis_title='Years',
yaxis_title='Temperature (degrees C)')
fig.show()
chart_studio.tools.set_credentials_file(username='TROPICSU', api_key='sqFXkXIxv94tXI4BCvUI')
#Push your visualization to your account using this code
py.plot(fig, filename = 'march_may', auto_open=True)
'https://plotly.com/~TROPICSU/13/'