In [1]:
#importing libraries 
import numpy as np 
import pandas as pd 
import plotly as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
#Loading the Dataset
data = pd.read_csv('India_Temp_2011.csv')
data.head() 
Out[2]:
Year Annual Jan Mar Jun Oct
0 1901 28.96 23.27 31.46 31.27 27.25
1 1902 29.22 25.75 31.76 31.09 26.49
2 1903 28.47 24.24 30.71 30.92 26.26
3 1904 28.49 23.62 30.95 30.67 26.40
4 1905 28.30 22.25 30.00 31.33 26.57
In [3]:
#Exploratory data analysis on dataset through different functions
data.shape 
Out[3]:
(111, 6)
In [4]:
data.columns.values  # column names
Out[4]:
array(['Year', 'Annual', 'Jan', 'Mar', 'Jun', 'Oct'], dtype=object)
In [ ]:
data.info() # to check data-type and null values in all columns
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 111 entries, 0 to 110
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Year    111 non-null    int64  
 1   Annual  111 non-null    float64
 2   Jan     111 non-null    float64
 3   Mar     111 non-null    float64
 4   Jun     111 non-null    float64
 5   Oct     111 non-null    float64
dtypes: float64(5), int64(1)
memory usage: 5.3 KB
In [5]:
data.describe() #to find out mean,median , max values of each column
Out[5]:
Year Annual Jan Mar Jun Oct
count 111.000000 111.000000 111.000000 111.000000 111.000000 111.000000
mean 1956.000000 29.116216 24.540901 31.444865 31.157928 27.133514
std 32.186954 0.453954 0.788018 0.639510 0.383994 0.566916
min 1901.000000 28.110000 22.250000 29.920000 30.240000 25.740000
25% 1928.500000 28.760000 24.030000 31.025000 30.905000 26.670000
50% 1956.000000 29.070000 24.510000 31.460000 31.160000 27.180000
75% 1983.500000 29.420000 25.030000 31.865000 31.370000 27.515000
max 2011.000000 30.290000 27.440000 33.460000 32.240000 28.530000
In [6]:
#Understanding Target Variable
data.Annual.unique()
Out[6]:
array([28.96, 29.22, 28.47, 28.49, 28.3 , 28.73, 28.65, 28.83, 28.39,
       28.53, 28.62, 28.95, 28.67, 28.66, 28.94, 28.82, 28.11, 28.76,
       28.86, 28.8 , 28.74, 28.7 , 28.59, 28.98, 29.15, 29.09, 29.03,
       28.71, 28.85, 28.88, 29.46, 28.89, 28.97, 29.37, 28.84, 29.16,
       29.43, 28.92, 28.63, 28.64, 29.33, 29.02, 29.31, 28.72, 29.04,
       29.41, 29.14, 29.07, 29.61, 29.47, 29.44, 29.26, 29.27, 29.23,
       29.63, 29.58, 29.32, 29.11, 29.28, 29.72, 29.55, 29.18, 30.18,
       29.05, 29.7 , 29.81, 29.75, 29.99, 30.23, 29.79, 29.6 , 30.06,
       29.84, 29.64, 30.29, 30.12, 29.82])
In [7]:
data.Annual.value_counts()
Out[7]:
28.76    4
28.89    4
28.66    3
28.80    3
28.70    3
        ..
30.12    1
28.30    1
28.95    1
29.82    1
28.97    1
Name: Annual, Length: 77, dtype: int64
In [8]:
data.Jan.unique()
Out[8]:
array([23.27, 25.75, 24.24, 23.62, 22.25, 23.03, 24.23, 24.42, 23.52,
       24.2 , 23.9 , 24.88, 24.25, 24.59, 23.22, 24.57, 24.52, 23.57,
       23.71, 23.64, 23.91, 24.43, 23.73, 23.94, 24.73, 23.76, 24.21,
       23.53, 23.2 , 24.55, 24.51, 24.13, 24.53, 23.41, 24.11, 23.31,
       24.46, 24.37, 24.03, 24.02, 23.86, 25.49, 23.99, 24.49, 24.16,
       25.17, 24.71, 24.9 , 24.4 , 23.87, 25.43, 25.48, 24.17, 24.29,
       24.67, 25.54, 25.31, 23.68, 24.99, 25.19, 25.35, 24.34, 24.12,
       24.61, 25.15, 24.36, 25.21, 24.62, 25.29, 24.64, 25.07, 25.39,
       24.74, 24.6 , 25.09, 25.68, 26.3 , 24.97, 25.11, 24.82, 25.88,
       25.37, 25.32, 24.96, 27.44, 25.73, 24.72, 26.5 , 25.95, 25.33])
In [9]:
data.Jan .value_counts()
Out[9]:
24.99    4
24.51    3
23.62    3
25.49    2
24.90    2
        ..
24.59    1
23.91    1
23.90    1
25.35    1
25.75    1
Name: Jan, Length: 90, dtype: int64
In [10]:
data.Mar.unique()
Out[10]:
array([31.46, 31.76, 30.71, 30.95, 30.  , 31.11, 29.92, 31.43, 31.02,
       31.14, 30.7 , 31.1 , 30.89, 30.73, 31.06, 31.88, 30.06, 30.68,
       31.17, 30.4 , 32.05, 31.21, 31.4 , 31.44, 31.47, 30.21, 30.72,
       31.51, 31.72, 30.94, 31.71, 30.42, 31.28, 31.15, 30.84, 31.74,
       30.76, 30.66, 32.12, 31.8 , 30.8 , 31.03, 31.19, 31.5 , 31.78,
       31.27, 30.67, 31.13, 32.19, 31.89, 30.88, 31.53, 30.41, 31.73,
       31.69, 31.16, 31.31, 31.04, 31.24, 32.02, 32.03, 31.58, 31.49,
       31.92, 31.62, 31.45, 31.65, 31.57, 31.7 , 32.2 , 31.64, 30.79,
       32.51, 31.37, 31.75, 31.35, 31.32, 31.61, 31.85, 32.4 , 32.07,
       31.26, 32.45, 32.22, 32.61, 33.06, 32.69, 31.81, 32.08, 32.32,
       32.11, 32.57, 33.46])
In [11]:
data.Mar .value_counts()
Out[11]:
30.84    3
31.17    3
31.89    3
31.69    2
32.19    2
        ..
31.14    1
30.66    1
31.32    1
30.40    1
30.00    1
Name: Mar, Length: 93, dtype: int64
In [12]:
data.Jun.unique()
Out[12]:
array([31.27, 31.09, 30.92, 30.67, 31.33, 30.86, 30.8 , 30.72, 30.33,
       30.48, 31.14, 31.15, 30.84, 31.51, 30.52, 30.24, 31.11, 31.08,
       30.81, 30.9 , 30.98, 30.96, 31.03, 31.16, 31.25, 30.41, 31.22,
       30.85, 30.68, 30.59, 31.06, 30.93, 31.37, 30.99, 30.83, 31.48,
       31.23, 31.2 , 30.88, 31.13, 31.12, 30.25, 31.07, 31.29, 30.75,
       31.28, 30.82, 31.24, 31.32, 31.54, 31.55, 30.91, 31.66, 31.39,
       30.66, 31.87, 31.36, 31.34, 31.44, 32.24, 31.18, 31.45, 31.47,
       31.31, 32.01, 31.19, 31.77, 31.75, 31.61, 32.02, 31.72, 31.42,
       31.84, 31.4 , 31.43])
In [14]:
data.Jun .value_counts()
Out[14]:
31.55    4
31.28    4
31.11    4
31.25    4
30.80    3
        ..
31.87    1
30.33    1
30.66    1
30.91    1
32.02    1
Name: Jun, Length: 75, dtype: int64
In [13]:
data.Oct .unique()
Out[13]:
array([27.25, 26.49, 26.26, 26.4 , 26.57, 27.29, 27.36, 26.64, 26.88,
       26.2 , 26.31, 26.42, 27.18, 26.32, 25.74, 26.77, 26.6 , 27.45,
       26.43, 26.38, 26.59, 26.61, 26.72, 26.73, 26.27, 26.9 , 26.97,
       26.92, 26.94, 26.69, 26.71, 27.05, 27.24, 27.62, 27.23, 27.33,
       27.16, 26.82, 27.3 , 26.7 , 26.79, 26.58, 27.77, 27.26, 27.56,
       26.46, 26.37, 27.01, 27.03, 26.3 , 26.65, 27.  , 27.2 , 27.19,
       27.71, 27.5 , 27.17, 27.21, 26.99, 27.76, 27.59, 27.64, 27.67,
       27.51, 27.46, 27.35, 27.4 , 27.82, 27.57, 27.49, 27.74, 27.52,
       28.52, 27.83, 27.95, 28.53, 28.13, 28.36, 27.7 , 27.65, 27.78,
       28.03, 28.29, 27.96, 28.23])
In [15]:
data.Oct.value_counts()
Out[15]:
27.24    3
27.50    3
27.26    3
27.62    2
26.97    2
        ..
26.64    1
27.16    1
26.32    1
26.60    1
27.25    1
Name: Oct, Length: 85, dtype: int64
In [ ]:
#Data Visualization starts here
#To check missing values

sns.heatmap(data.isnull(),cbar=False,yticklabels=False,cmap = 'viridis')
Out[ ]:
<AxesSubplot:>
In [ ]:
#To check correlation

plt.figure(figsize=(6,4))
sns.heatmap(data.corr(),cmap='Blues',annot=False) 
Out[ ]:
<AxesSubplot:>
In [ ]:
#Columns correlation matrix
k = 12 #number of variables for heatmap
cols = data.corr().nlargest(k, 'Annual')['Annual'].index
cm = data[cols].corr()
plt.figure(figsize=(10,6))
sns.heatmap(cm, annot=True, cmap = 'viridis')
Out[ ]:
<AxesSubplot:>
In [ ]:
#To check Outliers

l = data.columns.values
number_of_columns=6
number_of_columns=int(np.ceil(np.sqrt(len("Annual,Jan,Mar,Jun,Oct")))) # convert float value into int 

number_of_rows = len(l)-1/number_of_columns
plt.figure(figsize=(number_of_columns,5*number_of_rows))
for i in range(0,len(l)):
    plt.subplot(number_of_rows + 1,number_of_columns,i+1)
    sns.set_style('whitegrid')
    sns.boxplot(data[l[i]],color='green',orient='v')
    plt.tight_layout()
In [ ]:
#To check distribution-Skewness

plt.figure(figsize=(2*number_of_columns,5*number_of_rows))
for i in range(0,len(l)):
    plt.subplot(number_of_rows + 1,number_of_columns,i+1)
    sns.distplot(data[l[i]],kde=True) 
In [16]:
#prepare some data- annual temp
annual=np.array ([28.96,29.22,28.47,28.49,28.3,28.73,28.65,28.83,28.39,28.53,28.62,28.95,28.67,28.66,28.94,28.82,28.11,28.66,
             28.66,28.76,28.86,28.8,28.74,28.8,28.67,28.7,28.59,28.98,28.76,28.65,29.15,29.09,28.49,29.03,28.76,28.71,
             28.7,28.7,28.85,28.88,29.46,28.98,28.8,28.89,28.97,29.37,28.84,28.73,28.89,28.47,29.09,29.16,29.43,28.92,
            28.76,28.63,28.64,29.33,29.02,29.31,28.72,28.89,29.04,29.09,29.16,29.41,29.14,29.07,29.61,29.47,29.15,29.31,
             29.44,29.26,28.89,29.27,29.41,29.23,29.63,29.58,29.32,29.11,29.11,29.28,29.61,29.33,29.72,29.55,29.18,29.14,
             29.32,29.23,29.55,29.46,30.18,29.58,29.05,29.7,29.81,29.75,29.99,30.23,29.75,29.79,29.6,30.06,29.84,29.64,30.29,
             30.12,29.82])


x= np.array ([1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,
               1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,
               1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,
               1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,
               1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,
                2011])
In [17]:
#Plotting annual temp readings- Line plot

fig = go.Figure(data=go.Scatter(x=x, y=annual))
# Edit the layout
fig.update_layout(title='Annual Temperature(1901-2011)',
                   xaxis_title='Years',
                   yaxis_title='Temperature (degrees C)')
fig.show()
In [19]:
pip install chart_studio
Collecting chart_studio
  Downloading https://files.pythonhosted.org/packages/ca/ce/330794a6b6ca4b9182c38fc69dd2a9cbff60fd49421cb8648ee5fee352dc/chart_studio-1.1.0-py3-none-any.whl (64kB)
     |████████████████████████████████| 71kB 3.4MB/s 
Requirement already satisfied: retrying>=1.3.3 in /usr/local/lib/python3.6/dist-packages (from chart_studio) (1.3.3)
Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from chart_studio) (2.23.0)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from chart_studio) (1.15.0)
Requirement already satisfied: plotly in /usr/local/lib/python3.6/dist-packages (from chart_studio) (4.4.1)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->chart_studio) (2.10)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->chart_studio) (2020.12.5)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->chart_studio) (1.24.3)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->chart_studio) (3.0.4)
Installing collected packages: chart-studio
Successfully installed chart-studio-1.1.0
In [20]:
#Hosting visualization using chartstudio
import chart_studio
import chart_studio.plotly as py
In [21]:
chart_studio.tools.set_credentials_file(username='TROPICSU', api_key='sqFXkXIxv94tXI4BCvUI')
In [22]:
#Push your visualization to your account using this code
py.plot(fig, filename = 'Annual_temp_plot', auto_open=True) 
Out[22]:
'https://plotly.com/~TROPICSU/2/'
In [24]:
#create HTML for big datasets
import plotly.io as pio
pio.write_html(fig, file='index.html', auto_open=True)
In [25]:
#prepare some data- JUN TO SEPT
j =([31.27,31.09,30.92,30.67,31.33,30.86,30.8,30.72,30.33,30.48,31.14,31.15,30.92,30.84,31.51,30.52,30.24,31.11,30.8,31.08,
     30.81,30.9,30.98,30.96,30.67,31.14,30.8,31.14,31.03,30.98,31.16,31.25,30.41,31.22,30.85,30.68,31.22,30.59,31.06,30.93,
    31.37,30.99,30.83,31.25,31.48,31.23,31.2,30.84,30.88,30.48,31.13,31.11,31.12,30.9,30.72,30.25,31.07,31.23,30.99,31.29,
    30.75,31.08,31.28,30.82,31.37,31.24,31.32,31.54,31.55,31.16,30.91,31.66,31.28,31.39,30.66,31.09,31.22,31.11,31.87,31.36,
    31.34,31.55,31.48,31.09,31.28,31.44,32.24,31.34,31.11,31.18,31.45,31.28,31.47,31.31,32.01,31.19,31.77,31.75,31.55,31.25,
     31.61,32.02,31.72,31.42,31.84,31.39,31.4,31.25,32.24,31.43,31.55])

x= np.array ([1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,
               1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,
               1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,
               1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,
               1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,
                2011])
In [26]:
#Plotting JUN-SEPT readings

fig = go.Figure(data=go.Scatter(x=x, y=j))

# Edit the layout
fig.update_layout(title='June to September Temperature',
                   xaxis_title='Years',
                   yaxis_title='Temperature (degrees C)')
fig.show()
In [27]:
chart_studio.tools.set_credentials_file(username='TROPICSU', api_key='sqFXkXIxv94tXI4BCvUI')
In [28]:
#Push your visualization to your account using this code
py.plot(fig, filename = 'June-Sept plot', auto_open=True) 
Out[28]:
'https://plotly.com/~TROPICSU/6/'
In [29]:
#prepare some data -OCT-DEC

o=([27.25,26.49,26.26,26.4,26.57,27.29,27.36,26.64,26.88,26.2,26.31,26.57,26.42,26.4,27.18,26.32,25.74,26.77,26.6,27.45,
   26.43,26.38,26.43,26.49,26.59,26.61,26.72,26.73,26.27,26.9,26.97,27.18,26.92,26.94,26.88,26.69,26.26,26.71,27.05,27.24,
   27.62,27.23,27.33,27.16,26.82,27.3,26.7,26.77,26.79,26.58,27.77,27.26,27.56,26.46,26.59,26.37,26.82,27.01,27.03,27.36,
    26.3,26.65,27,27.2,27.62,27.26,26.9,27.19,27.71,27.5,27.17,27.21,26.97,27.03,26.99,27.76,27.59,27.64,27.67,27.51,27.24,
   27.26,27.24,27.46,27.35,27.4,27.82,27.76,27.57,27.5,27.4,27.49,27.74,27.52,28.52,27.29,26.65,27.83,27.95,28.53,28.13,28.36,
   27.7,27.65,27.5,27.78,28.03,28.29,27.96,27.78,28.23])

x= np.array ([1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,
               1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,
               1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,
               1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,
               1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,
                2011])
In [30]:
#Plotting OCT-DEC readings

fig = go.Figure(data=go.Scatter(x=x, y=o))
# Edit the layout
fig.update_layout(title='October to December Temperature',
                   xaxis_title='Years',
                   yaxis_title='Temperature (degrees C)')
fig.show()
In [31]:
chart_studio.tools.set_credentials_file(username='TROPICSU', api_key='sqFXkXIxv94tXI4BCvUI')
In [32]:
#Push your visualization to your account using this code
py.plot(fig, filename = 'Oct-Dec plot', auto_open=True) 
Out[32]:
'https://plotly.com/~TROPICSU/8/'

End of code