# Importing essential libraries for data manipulation and visualization
import pandas as pd          # For handling tabular data
import numpy as np           # For numerical operations
import matplotlib.pyplot as plt  # For data visualization
import seaborn as sns        # For advanced statistical visualizations

# Reading the dataset from a CSV file
df = pd.read_csv('UberData_Analysis.csv')  # Replace this path if the file is located elsewhere
df  # Display the first few rows of the dataframe to get an overview of the dataset

# Checking the structure of the dataset including data types and non-null counts
df.info()  # Helps identify missing values and data types

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1156 entries, 0 to 1155
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   START_DATE  1156 non-null   object 
 1   END_DATE    1155 non-null   object 
 2   CATEGORY    1155 non-null   object 
 3   START       1155 non-null   object 
 4   STOP        1155 non-null   object 
 5   MILES       1156 non-null   float64
 6   PURPOSE     653 non-null    object 
dtypes: float64(1), object(6)
memory usage: 63.3+ KB

# Fill missing values in PURPOSE column with 'Not Available'

df['PURPOSE'].fillna('Not Available',inplace = True)

# View the first 5 rows
df.head()

# Convert START_DATE and END_DATE to datetime format

df['START_DATE'] = pd.to_datetime(df['START_DATE'],errors = 'coerce')
df['END_DATE'] = pd.to_datetime(df['END_DATE'],errors = 'coerce')

# Confirm changes
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1156 entries, 0 to 1155
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   START_DATE  421 non-null    datetime64[ns]
 1   END_DATE    420 non-null    datetime64[ns]
 2   CATEGORY    1155 non-null   object        
 3   START       1155 non-null   object        
 4   STOP        1155 non-null   object        
 5   MILES       1156 non-null   float64       
 6   PURPOSE     1156 non-null   object        
dtypes: datetime64[ns](2), float64(1), object(4)
memory usage: 63.3+ KB

#CREATING NEW COLUMN TO SAVE THE TIME IN DIFFRENT COLUMN

from datetime import datetime

df['DATE'] = pd.DatetimeIndex(df['START_DATE']).date
df['TIME'] = pd.DatetimeIndex(df['START_DATE']).hour
df

# Categorize time of day
df['DAY-NIGNT'] = pd.cut(df['TIME'],bins = [0,10,15,19,24], labels = ['Morning','Afternoon','Evening','Night'])
df.head()

# Drop any remaining null values
df.dropna(inplace = True)

#Size of dataset
df.shape

(413, 10)

plt.figure(figsize = (20,5))
plt.subplot(1,2,1)
sns.countplot(df['CATEGORY']) #for finding Category
plt.subplot(1,2,2)
sns.countplot(df['PURPOSE'])#for finding the purpose

<Axes: xlabel='count', ylabel='PURPOSE'>

sns.countplot(df['DAY-NIGNT'])
plt.grid(True,alpha =0.3)

# Extract month number
df['MONTH']=pd.DatetimeIndex(df['START_DATE']).month
# Map month number to month name
month_label = {
    1.0: 'jan',
    2.0: 'feb',
    3.0: 'mar',
    4.0: 'apr',
    5.0: 'may',
    6.0: 'jun',
    7.0: 'jul',
    8.0: 'aug',
    9.0: 'sep',
    10.0: 'oct',
    11.0: 'nov',
    12.0: 'dec'
}
df['MONTH']= df['MONTH'].map(month_label)  
# Count rides by month
mon =  df['MONTH'].value_counts(sort = False)



df.head()

# Create a DataFrame with total ride count and max MILES per month
df2 = pd.DataFrame({
    "MONTHS": mon.values,  # Total rides per month
    "VALUE COUNT": df.groupby("MONTH", sort=False)["MILES"].max().values  # Max miles per month
})

# Plot lineplot
plt.figure(figsize=(10, 5))
p = sns.lineplot(data=df2)
p.set(xlabel="MONTHS", ylabel="VALUE COUNT")
plt.title("Monthly Ride Count vs Max Miles Traveled")
plt.grid(True, alpha=0.3)
plt.show()

df['DAY']= df.START_DATE.dt.weekday # Monday = 0, Sunday = 6
# Map weekday to name
day_label = {
    1: 'mon',
    2: 'tue',
    3: 'wed',
    4: 'thursday',
    5: 'fri',
    6: 'sat',
    7: 'sun',
   
}
df['DAY']= df['DAY'].map(day_label)  
df.head()

# Plot
day_label = df.DAY.value_counts()
sns.barplot(x=day_label.index,y=day_label)
plt.xlabel('DAY')
plt.ylabel ('count')
plt.grid(True,alpha = 0.4)

#  Distance (MILES) Distribution
sns.boxplot(df['MILES'])

<Axes: ylabel='MILES'>

# Filtered to < 100 miles
sns.boxplot(df[df['MILES']<100]['MILES'])

<Axes: ylabel='MILES'>

# Further filtered to < 40 miles
sns.boxplot(df[df['MILES']<40]['MILES'])

<Axes: ylabel='MILES'>

sns.distplot(df[df['MILES']<40]['MILES'])

C:\Users\Asus\AppData\Local\Temp\ipykernel_13736\1171915261.py:1: UserWarning: 

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

  sns.distplot(df[df['MILES']<40]['MILES'])

<Axes: xlabel='MILES', ylabel='Density'>

	START_DATE	END_DATE	CATEGORY	START	STOP	MILES	PURPOSE
0	01-01-2016 21:11	01-01-2016 21:17	Business	Fort Pierce	Fort Pierce	5.1	Meal/Entertain
1	01-02-2016 01:25	01-02-2016 01:37	Business	Fort Pierce	Fort Pierce	5.0	NaN
2	01-02-2016 20:25	01-02-2016 20:38	Business	Fort Pierce	Fort Pierce	4.8	Errand/Supplies
3	01-05-2016 17:31	01-05-2016 17:45	Business	Fort Pierce	Fort Pierce	4.7	Meeting
4	01-06-2016 14:42	01-06-2016 15:49	Business	Fort Pierce	West Palm Beach	63.7	Customer Visit
...	...	...	...	...	...	...	...
1151	12/31/2016 13:24	12/31/2016 13:42	Business	Kar?chi	Unknown Location	3.9	Temporary Site
1152	12/31/2016 15:03	12/31/2016 15:38	Business	Unknown Location	Unknown Location	16.2	Meeting
1153	12/31/2016 21:32	12/31/2016 21:50	Business	Katunayake	Gampaha	6.4	Temporary Site
1154	12/31/2016 22:08	12/31/2016 23:51	Business	Gampaha	Ilukwatta	48.2	Temporary Site
1155	Totals	NaN	NaN	NaN	NaN	12204.7	NaN

	START_DATE	END_DATE	CATEGORY	START	STOP	MILES	PURPOSE
0	01-01-2016 21:11	01-01-2016 21:17	Business	Fort Pierce	Fort Pierce	5.1	Meal/Entertain
1	01-02-2016 01:25	01-02-2016 01:37	Business	Fort Pierce	Fort Pierce	5.0	Not Available
2	01-02-2016 20:25	01-02-2016 20:38	Business	Fort Pierce	Fort Pierce	4.8	Errand/Supplies
3	01-05-2016 17:31	01-05-2016 17:45	Business	Fort Pierce	Fort Pierce	4.7	Meeting
4	01-06-2016 14:42	01-06-2016 15:49	Business	Fort Pierce	West Palm Beach	63.7	Customer Visit

	START_DATE	END_DATE	CATEGORY	START	STOP	MILES	PURPOSE	DATE	TIME
0	2016-01-01 21:11:00	2016-01-01 21:17:00	Business	Fort Pierce	Fort Pierce	5.1	Meal/Entertain	2016-01-01	21.0
1	2016-01-02 01:25:00	2016-01-02 01:37:00	Business	Fort Pierce	Fort Pierce	5.0	Not Available	2016-01-02	1.0
2	2016-01-02 20:25:00	2016-01-02 20:38:00	Business	Fort Pierce	Fort Pierce	4.8	Errand/Supplies	2016-01-02	20.0
3	2016-01-05 17:31:00	2016-01-05 17:45:00	Business	Fort Pierce	Fort Pierce	4.7	Meeting	2016-01-05	17.0
4	2016-01-06 14:42:00	2016-01-06 15:49:00	Business	Fort Pierce	West Palm Beach	63.7	Customer Visit	2016-01-06	14.0
...	...	...	...	...	...	...	...	...	...
1151	NaT	NaT	Business	Kar?chi	Unknown Location	3.9	Temporary Site	NaT	NaN
1152	NaT	NaT	Business	Unknown Location	Unknown Location	16.2	Meeting	NaT	NaN
1153	NaT	NaT	Business	Katunayake	Gampaha	6.4	Temporary Site	NaT	NaN
1154	NaT	NaT	Business	Gampaha	Ilukwatta	48.2	Temporary Site	NaT	NaN
1155	NaT	NaT	NaN	NaN	NaN	12204.7	Not Available	NaT	NaN

	START_DATE	END_DATE	CATEGORY	START	STOP	MILES	PURPOSE	DATE	TIME	DAY-NIGNT
0	2016-01-01 21:11:00	2016-01-01 21:17:00	Business	Fort Pierce	Fort Pierce	5.1	Meal/Entertain	2016-01-01	21.0	Night
1	2016-01-02 01:25:00	2016-01-02 01:37:00	Business	Fort Pierce	Fort Pierce	5.0	Not Available	2016-01-02	1.0	Morning
2	2016-01-02 20:25:00	2016-01-02 20:38:00	Business	Fort Pierce	Fort Pierce	4.8	Errand/Supplies	2016-01-02	20.0	Night
3	2016-01-05 17:31:00	2016-01-05 17:45:00	Business	Fort Pierce	Fort Pierce	4.7	Meeting	2016-01-05	17.0	Evening
4	2016-01-06 14:42:00	2016-01-06 15:49:00	Business	Fort Pierce	West Palm Beach	63.7	Customer Visit	2016-01-06	14.0	Afternoon

	START_DATE	END_DATE	CATEGORY	START	STOP	MILES	PURPOSE	DATE	TIME	DAY-NIGNT	DAY	MONTH
0	2016-01-01 21:11:00	2016-01-01 21:17:00	Business	Fort Pierce	Fort Pierce	5.1	Meal/Entertain	2016-01-01	21.0	Night	thursday	jan
1	2016-01-02 01:25:00	2016-01-02 01:37:00	Business	Fort Pierce	Fort Pierce	5.0	Not Available	2016-01-02	1.0	Morning	fri	jan
2	2016-01-02 20:25:00	2016-01-02 20:38:00	Business	Fort Pierce	Fort Pierce	4.8	Errand/Supplies	2016-01-02	20.0	Night	fri	jan
3	2016-01-05 17:31:00	2016-01-05 17:45:00	Business	Fort Pierce	Fort Pierce	4.7	Meeting	2016-01-05	17.0	Evening	mon	jan
4	2016-01-06 14:42:00	2016-01-06 15:49:00	Business	Fort Pierce	West Palm Beach	63.7	Customer Visit	2016-01-06	14.0	Afternoon	tue	jan

🚖 Uber Rides Data Analysis – Exploratory Data Analysis (EDA)¶

❓ Problem Statement / Business Questions¶

📂 Load Dataset¶

🧹 Data Cleaning and Preprocessing¶

🕒 Extract Date and Time Features¶

📊 EDA Process Description¶

Q1. In which category do people book the most Uber rides?¶

Q2. For which purpose do people book Uber rides the most?¶

Q3. At what time do people book cabs the most from Uber?¶

Q4. In which months do people book Uber rides less frequently?¶

Q5. On which days of the week do people book Uber rides the most?¶

Q6. How many miles do people usually book a cab for through Uber?¶

✅ Key Insights & Conclusion¶