#!/usr/bin/env python
# coding: utf-8

# 1.	Replace Not available with NaN value in pandas? (Example.csv)

# In[1]:


import pandas as pd

df=pd.read_csv('Example.csv',na_values=['not available'])
df.head()


# 2.	To get the first 3 rows of a given DataFrame.
# Sample Python dictionary data and list labels:
# exam_data = {'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'],
# 'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19],
# 'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
# 'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
# labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
# 

# In[8]:


dict_data = {'name': list(df.loc[:2,'name']),'class': list(df.loc[:2,'class'])}
dict_data


# In[2]:


import numpy as np
import pandas as pd
dictx= {'name': ['Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura', 'Kevin', 'Jonas'], 'score': [12.5, 9, 16.5, np.nan, 9, 20, 14.5, np.nan, 8, 19], 'attempts': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1], 'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
df2=pd.DataFrame(dictx,index=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'])
df2


# In[4]:


df2.iloc[:3]


# 3.	To select the 'name' and 'score' columns from frame

# In[10]:


df2.loc[:,['name','score']]


# 4.	To select the specified columns and rows from a given data frame.  
# 
# Select 'name' and 'score' columns in rows 1, 3, 5, 6 from the following data frame.
# 

# In[11]:


df2.index =[0,1,2,3,4,5,6,7,8,9]


# In[12]:


df2.loc[[1,3,5,6],['name','score']]


# In[13]:


df2.loc[:,['name','score']]


# In[7]:


df2.groupby('name').aggregate({'attempts':max}).reset_index()


# In[3]:


df2.groupby('attempts').max().reset_index().sort_values(by='attempts',ascending=False).head(1)


# 5.	To select the rows where the number of attempts in the examination is greater than 2.  

# In[12]:


df2[df2['attempts']>2]['name']


# In[5]:


df2.loc[df2['attempts']>2]


# In[10]:


df2.loc[df2['attempts']>2,['attempts','name']]


# 6.	To count the number of rows and columns of a DataFrame. 

# In[16]:


df2.shape[0]


# 7.	To select the rows where the score is missing, i.e. is NaN.  

# In[15]:


df2.loc[df2['score'].isna()==True]


# In[22]:


df2['score'].value_counts()


# In[29]:


df2['score'].isna().sum()


# In[18]:


df2[df2['score'].isna()]


# 8.	To select the rows the score is between 15 and 20 (inclusive).  

# In[19]:


df2[df2['score'].between(15,20)]


# 9.	To select the rows where number of attempts in the examination is less than 2 and score greater than 15.  

# In[20]:


df2.loc[(df2['attempts']<2) & (df2['score']>15)]


# 10.	To change the score in row 'd' to 11.5.  

# In[21]:


df2.loc[4,'score']=11.5
df2


# 11.	 To calculate the sum of the examination attempts by the students.  

# In[22]:


df2['attempts'].aggregate(sum)


# 12.	To calculate the mean score for each different student in DataFrame.  

# In[16]:


df2.groupby('name').aggregate({'score':np.mean})


# 13.	To append a new row 'k' to data frame with given values for each column. Now delete the new row and return the original DataFrame.  

# In[18]:


df2.loc['k'] = ['smesh',15,1,'yes']
print(df2)


# In[19]:


df2.loc['k']
df2.drop('k')


# In[26]:


df2['extra']=np.arange(11)
df2


# In[27]:


df2.drop('extra',axis=1,inplace=True)


# 14.	To Sort the data frame first by 'name' in descending order, then by 'score' in ascending order:

# In[28]:


df2.sort_values(by=['name','score'],ascending=[True,False])


# 15.	To replace the 'qualify' column contains the values 'yes' and 'no' with True and False.  

# In[29]:


df3=df2.replace({'yes':True,'no':False})
df3


# 16.	To change the name 'James' to 'Suresh' in name column of the DataFrame.  

# In[30]:


df3.loc[df3['name']=='James','name']='Suresh'


# In[31]:


df2[df2['name']=='James']['name'] ='Suresh'


# In[32]:


df3


# 17.	To delete the 'attempts' column from the DataFrame.  

# In[33]:


df3=df3.drop('attempts',axis=1)


# 18.	To insert a new column in existing DataFrame.  

# In[34]:


df3['color']=np.nan


# Bonus

# In[35]:


df3.loc[[True,False,False,False,False,False,False,False,False,False,False]]


# Iterating through DataFrame

# In[36]:


for key,value in df.iteritems():
    print('key',key)
    print('value',value)


# In[37]:


for key,value in df.iterrows():
    print('key',key)
    print('value',value)


# In[38]:


for value in df.itertuples():
    print('value',value)


# In[39]:


import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(df.corr(),cmap='GnBu',annot=np.array([['',''],list(df.corr()['total marks'])]),fmt = 's',linewidths=10,linecolor='r')
plt.show()


# In[40]:


df.corr()


# In[41]:


df.corr()['total marks']


# In[42]:


df.corr().loc['class',['total marks']]


# In[ ]: