from google.colab import files uploaded = files.upload() import io df2 = pd.read_csv(io.BytesIO(uploaded['train.csv'])) import pandas as pd import csv import matplotlib.pyplot as plt df = pd.read_csv("train.csv") df df.head(5) # Used to display top 5 df.tail(5) # Used to display last 5 df = pd.read_csv("train.csv", usecols= ["PassengerId", "Survived", "Pclass", "Name", "Sex","Age"]) df.head() df.describe() df.sort_values("Age") df.head() df = df.sort_values("Age", ascending = False) df.head(5) result = df[df['Name'] == 'Svensson, Mr. Johan' ] result df["Sex"].value_counts() df.nunique() df_age = df["Age"] < 50 df_sex_mask = df["Sex"] == "female" df[df_age & df_sex_mask] df_sex = df["Sex"] == "Male" df_age_mask = df["Age"] > 70 df[df_sex | df_age_mask] null_mask = df["Age"].isnull() df[null_mask] df.isnull().sum() df.drop(labels = ["Pclass"], axis=1).head() df.replace("Nan",df["Age"].median()) df.replace("Masselmani, Mrs. Fatima", "Tanu") count = df['Survived'].value_counts() print(count) # Let us see that in percentage. percentage = df['Survived'].value_counts() * 100 / len(df) print(percentage) %matplotlib inline color = 0.5 df['Survived'].value_counts().plot(kind = 'bar')