# Import pandas package
import pandas as pd
# Define a dictionary containing employee data
data = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Age':[27, 24, 22, 32],
'Address':['Delhi', 'Kanpur', 'Allahabad', 'Kannauj'],
'Qualification':['Msc', 'MA', 'MCA', 'Phd']}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data)
print(df)
# select two columns
print(df[['Name', 'Qualification']])
Name Age Address Qualification 0 Jai 27 Delhi Msc 1 Princi 24 Kanpur MA 2 Gaurav 22 Allahabad MCA 3 Anuj 32 Kannauj Phd Name Qualification 0 Jai Msc 1 Princi MA 2 Gaurav MCA 3 Anuj Phd
import pandas as pd
df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
index=['cobra', 'viper', 'sidewinder'],
columns=['max_speed', 'shield'])
df
max_speed | shield | |
---|---|---|
cobra | 1 | 2 |
viper | 4 | 5 |
sidewinder | 7 | 8 |
df.loc[['cobra'],['shield']]
shield | |
---|---|
cobra | 2 |
df.loc['cobra',['max_speed','shield']]
max_speed 1 shield 2 Name: cobra, dtype: int64
df.loc[['viper']]
max_speed | shield | |
---|---|---|
viper | 4 | 5 |
df.loc[['viper', 'sidewinder'],'shield']
viper 5 sidewinder 8 Name: shield, dtype: int64
df.loc['cobra', 'shield']
2
df.loc['cobra':'viper', 'max_speed']
cobra 1 viper 4 Name: max_speed, dtype: int64
df.loc[[False, False, True]]
max_speed | shield | |
---|---|---|
sidewinder | 7 | 8 |
df.loc[df['shield'] > 6]
max_speed | shield | |
---|---|---|
sidewinder | 7 | 8 |
df[df['shield'] > 6]
max_speed | shield | |
---|---|---|
sidewinder | 7 | 8 |
df.query('shield > 6')
max_speed | shield | |
---|---|---|
sidewinder | 7 | 8 |
df.loc[df['shield'] > 6, ['max_speed']]
max_speed | |
---|---|
sidewinder | 7 |
df.loc[['viper', 'sidewinder'], ['shield']] = 50
df
max_speed | shield | |
---|---|---|
cobra | 1 | 2 |
viper | 4 | 50 |
sidewinder | 7 | 50 |
df.loc['cobra'] = 10
df
max_speed | shield | |
---|---|---|
cobra | 10 | 10 |
viper | 4 | 50 |
sidewinder | 7 | 50 |
df.loc[:, 'max_speed'] = 30
df
max_speed | shield | |
---|---|---|
cobra | 30 | 10 |
viper | 30 | 50 |
sidewinder | 30 | 50 |
df.loc[df['shield'] > 35] = 0
df
max_speed | shield | |
---|---|---|
cobra | 30 | 10 |
viper | 0 | 0 |
sidewinder | 0 | 0 |
pd.DataFrame([[1, 2], [4, 5], [7, 8]],
index=[7, 8, 9], columns=['max_speed', 'shield'])
max_speed | shield | |
---|---|---|
7 | 1 | 2 |
8 | 4 | 5 |
9 | 7 | 8 |
dict = {'name':["aparna", "pankaj", "sudhir", "Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score':[90, 40, 80, 98]}
# creating a dataframe from a dictionary
df = pd.DataFrame(dict)
# iterating over rows using iterrows() function
for i, j in df.iterrows():
print(i, j)
print()
0 name aparna degree MBA score 90 Name: 0, dtype: object 1 name pankaj degree BCA score 40 Name: 1, dtype: object 2 name sudhir degree M.Tech score 80 Name: 2, dtype: object 3 name Geeku degree MBA score 98 Name: 3, dtype: object
# creating a list of dataframe columns
columns = list(df)
print(columns)
for i in columns:
# printing the third element of the column
print (df[i][2])
['name', 'degree', 'score'] sudhir M.Tech 80
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Age':[27, 24, 22, 32],
'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
'Qualification':['Msc', 'MA', 'MCA', 'Phd']}
# Define a dictionary containing employee data
data2 = {'Name':['Abhi', 'Ayushi', 'Dhiraj', 'Hitesh'],
'Age':[17, 14, 12, 52],
'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
# Convert the dictionary into DataFrame
df1 = pd.DataFrame(data2, index=[4, 5, 6, 7])
print(df, "\n\n", df1)
frames = [df, df1]
res1 = pd.concat(frames)
res1
Name Age Address Qualification 0 Jai 27 Nagpur Msc 1 Princi 24 Kanpur MA 2 Gaurav 22 Allahabad MCA 3 Anuj 32 Kannuaj Phd Name Age Address Qualification 4 Abhi 17 Nagpur Btech 5 Ayushi 14 Kanpur B.A 6 Dhiraj 12 Allahabad Bcom 7 Hitesh 52 Kannuaj B.hons
Name | Age | Address | Qualification | |
---|---|---|---|---|
0 | Jai | 27 | Nagpur | Msc |
1 | Princi | 24 | Kanpur | MA |
2 | Gaurav | 22 | Allahabad | MCA |
3 | Anuj | 32 | Kannuaj | Phd |
4 | Abhi | 17 | Nagpur | Btech |
5 | Ayushi | 14 | Kanpur | B.A |
6 | Dhiraj | 12 | Allahabad | Bcom |
7 | Hitesh | 52 | Kannuaj | B.hons |
import pandas as pd
# Define a dictionary containing employee data
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Age':[27, 24, 22, 32],
'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
'Qualification':['Msc', 'MA', 'MCA', 'Phd'],
'Mobile No': [97, 91, 58, 76]}
# Define a dictionary containing employee data
data2 = {'Name':['Gaurav', 'Anuj', 'Dhiraj', 'Hitesh'],
'Age':[22, 32, 12, 52],
'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'],
'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons'],
'Salary':[1000, 2000, 3000, 4000]}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
# Convert the dictionary into DataFrame
df1 = pd.DataFrame(data2, index=[2, 3, 6, 7])
print(df, "\n\n", df1)
res2 = pd.concat([df, df1], axis=1, join='inner')
res2
Name Age Address Qualification Mobile No 0 Jai 27 Nagpur Msc 97 1 Princi 24 Kanpur MA 91 2 Gaurav 22 Allahabad MCA 58 3 Anuj 32 Kannuaj Phd 76 Name Age Address Qualification Salary 2 Gaurav 22 Allahabad MCA 1000 3 Anuj 32 Kannuaj Phd 2000 6 Dhiraj 12 Allahabad Bcom 3000 7 Hitesh 52 Kannuaj B.hons 4000
Name | Age | Address | Qualification | Mobile No | Name | Age | Address | Qualification | Salary | |
---|---|---|---|---|---|---|---|---|---|---|
2 | Gaurav | 22 | Allahabad | MCA | 58 | Gaurav | 22 | Allahabad | MCA | 1000 |
3 | Anuj | 32 | Kannuaj | Phd | 76 | Anuj | 32 | Kannuaj | Phd | 2000 |
pd.concat([df, df1], join='inner',sort=True)
Address | Age | Name | Qualification | |
---|---|---|---|---|
0 | Nagpur | 27 | Jai | Msc |
1 | Kanpur | 24 | Princi | MA |
2 | Allahabad | 22 | Gaurav | MCA |
3 | Kannuaj | 32 | Anuj | Phd |
2 | Allahabad | 22 | Gaurav | MCA |
3 | Kannuaj | 32 | Anuj | Phd |
6 | Allahabad | 12 | Dhiraj | Bcom |
7 | Kannuaj | 52 | Hitesh | B.hons |
res2 = pd.concat([df, df1], axis=1, sort=False)
res2
Name | Age | Address | Qualification | Mobile No | Name | Age | Address | Qualification | Salary | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Jai | 27.0 | Nagpur | Msc | 97.0 | NaN | NaN | NaN | NaN | NaN |
1 | Princi | 24.0 | Kanpur | MA | 91.0 | NaN | NaN | NaN | NaN | NaN |
2 | Gaurav | 22.0 | Allahabad | MCA | 58.0 | Gaurav | 22.0 | Allahabad | MCA | 1000.0 |
3 | Anuj | 32.0 | Kannuaj | Phd | 76.0 | Anuj | 32.0 | Kannuaj | Phd | 2000.0 |
6 | NaN | NaN | NaN | NaN | NaN | Dhiraj | 12.0 | Allahabad | Bcom | 3000.0 |
7 | NaN | NaN | NaN | NaN | NaN | Hitesh | 52.0 | Kannuaj | B.hons | 4000.0 |
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Age':[27, 24, 22, 32],
'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
'Qualification':['Msc', 'MA', 'MCA', 'Phd']}
# Define a dictionary containing employee data
data2 = {'Name':['Abhi', 'Ayushi', 'Dhiraj', 'Hitesh'],
'Age':[17, 14, 12, 52],
'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
# Convert the dictionary into DataFrame
df1 = pd.DataFrame(data2, index=[4, 5, 6, 7])
print(df, "\n\n", df1)
res = df.append(df1)
res
Name Age Address Qualification 0 Jai 27 Nagpur Msc 1 Princi 24 Kanpur MA 2 Gaurav 22 Allahabad MCA 3 Anuj 32 Kannuaj Phd Name Age Address Qualification 4 Abhi 17 Nagpur Btech 5 Ayushi 14 Kanpur B.A 6 Dhiraj 12 Allahabad Bcom 7 Hitesh 52 Kannuaj B.hons
Name | Age | Address | Qualification | |
---|---|---|---|---|
0 | Jai | 27 | Nagpur | Msc |
1 | Princi | 24 | Kanpur | MA |
2 | Gaurav | 22 | Allahabad | MCA |
3 | Anuj | 32 | Kannuaj | Phd |
4 | Abhi | 17 | Nagpur | Btech |
5 | Ayushi | 14 | Kanpur | B.A |
6 | Dhiraj | 12 | Allahabad | Bcom |
7 | Hitesh | 52 | Kannuaj | B.hons |
data = {'Name': ['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Height': [5.1, 6.2, 5.1, 5.2],
'Qualification': ['Msc', 'MA', 'Msc', 'Msc']}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data)
# Declare a list that is to be converted into a column
address = ['Delhi', 'Bangalore', 'Chennai', 'Patna']
# Using 'Address' as the column name
# and equating it to the list
df['Address'] = address
# Observe the result
df
Name | Height | Qualification | Address | |
---|---|---|---|---|
0 | Jai | 5.1 | Msc | Delhi |
1 | Princi | 6.2 | MA | Bangalore |
2 | Gaurav | 5.1 | Msc | Chennai |
3 | Anuj | 5.2 | Msc | Patna |
data = {'Name': ['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Height': [5.1, 6.2, 5.1, 5.2],
'Qualification': ['Msc', 'MA', 'Msc', 'Msc']}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data)
# Using DataFrame.insert() to add a column
df.insert(2, "Age", [21, 23, 24, 21], True)
# Observe the result
df
Name | Height | Age | Qualification | |
---|---|---|---|---|
0 | Jai | 5.1 | 21 | Msc |
1 | Princi | 6.2 | 23 | MA |
2 | Gaurav | 5.1 | 24 | Msc |
3 | Anuj | 5.2 | 21 | Msc |
data = {'Name': ['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Height': [5.1, 6.2, 5.1, 5.2],
'Qualification': ['Msc', 'MA', 'Msc', 'Msc']}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data)
# Using 'Address' as the column name and equating it to the list
df2 = df.assign(address = ['Delhi', 'Bangalore', 'Chennai', 'Patna'])
# Observe the result
df2
Name | Height | Qualification | address | |
---|---|---|---|---|
0 | Jai | 5.1 | Msc | Delhi |
1 | Princi | 6.2 | MA | Bangalore |
2 | Gaurav | 5.1 | Msc | Chennai |
3 | Anuj | 5.2 | Msc | Patna |
data = pd.DataFrame({
'course_name': ['Data Structures', 'Python',
'Machine Learning'],
'student_name': ['A', 'B',
'C'],
'student_city': ['Chennai', 'Pune',
'Delhi'],
'student_gender': ['M', 'F',
'M'] })
# show the Dataframe
data
data = pd.DataFrame({
'course_name': ['Data Structures', 'Python',
'Machine Learning'],
'student_name': ['A', 'B',
'C'],
'student_city': ['Chennai', 'Pune',
'Delhi'],
'student_gender': ['M', 'F',
'M'] })
df = data.loc[ : , data.columns != 'student_gender']
# show the dataframe
df
course_name | student_name | student_city | |
---|---|---|---|
0 | Data Structures | A | Chennai |
1 | Python | B | Pune |
2 | Machine Learning | C | Delhi |
data = pd.DataFrame({
'course_name': ['Data Structures', 'Python',
'Machine Learning'],
'student_name': ['A', 'B',
'C'],
'student_city': ['Chennai', 'Pune',
'Delhi'],
'student_gender': ['M', 'F',
'M'] })
# drop method
df = data.drop('student_city',
axis = 1)
# show the dataframe
df
course_name | student_name | student_gender | |
---|---|---|---|
0 | Data Structures | A | M |
1 | Python | B | F |
2 | Machine Learning | C | M |
data = pd.DataFrame({
'course_name': ['Data Structures', 'Python',
'Machine Learning'],
'student_name': ['A', 'B',
'C'],
'student_city': ['Chennai', 'Pune',
'Delhi'],
'student_gender': ['M', 'F',
'M'] })
df = data[data.columns.difference(['student_name'])]
# show the dataframe
df
course_name | student_city | student_gender | |
---|---|---|---|
0 | Data Structures | Chennai | M |
1 | Python | Pune | F |
2 | Machine Learning | Delhi | M |
data.columns.difference(['student_name'])
Index(['course_name', 'student_city', 'student_gender'], dtype='object')
data = {
'A':['A1', 'A2', 'A3', 'A4', 'A5'],
'B':['B1', 'B2', 'B3', 'B4', 'B5'],
'C':['C1', 'C2', 'C3', 'C4', 'C5'],
'D':['D1', 'D2', 'D3', 'D4', 'D5'],
'E':['E1', 'E2', 'E3', 'E4', 'E5'] }
# Convert the dictionary into DataFrame
df = pd.DataFrame(data)
# Remove two columns name is 'C' and 'D'
df.drop(['C', 'D'], axis=1)
# df.drop(columns =['C', 'D'])
A | B | E | |
---|---|---|---|
0 | A1 | B1 | E1 |
1 | A2 | B2 | E2 |
2 | A3 | B3 | E3 |
3 | A4 | B4 | E4 |
4 | A5 | B5 | E5 |
data = {
'A':['A1', 'A2', 'A3', 'A4', 'A5'],
'B':['B1', 'B2', 'B3', 'B4', 'B5'],
'C':['C1', 'C2', 'C3', 'C4', 'C5'],
'D':['D1', 'D2', 'D3', 'D4', 'D5'],
'E':['E1', 'E2', 'E3', 'E4', 'E5'] }
# Convert the dictionary into DataFrame
df = pd.DataFrame(data)
for col in df.columns:
if 'A' in col:
del df[col]
df
B | C | D | E | |
---|---|---|---|---|
0 | B1 | C1 | D1 | E1 |
1 | B2 | C2 | D2 | E2 |
2 | B3 | C3 | D3 | E3 |
3 | B4 | C4 | D4 | E4 |
4 | B5 | C5 | D5 | E5 |
# making data frame from csv file
data = pd.read_csv("D:\\data\\nba.csv", index_col ="Name")
# retrieving row by loc method
first = data.loc["Avery Bradley"]
second = data.loc["R.J. Hunter"]
print(first, "\n\n\n", second)