import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import plotly.io as plio
plio.templates
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
import joblib
from warnings import filterwarnings
filterwarnings(action='ignore')
data = pd.read_csv('car data.csv')
data
Car_Name | Year | Selling_Price | Present_Price | Driven_kms | Fuel_Type | Selling_type | Transmission | Owner | |
---|---|---|---|---|---|---|---|---|---|
0 | ritz | 2014 | 3.35 | 5.59 | 27000 | Petrol | Dealer | Manual | 0 |
1 | sx4 | 2013 | 4.75 | 9.54 | 43000 | Diesel | Dealer | Manual | 0 |
2 | ciaz | 2017 | 7.25 | 9.85 | 6900 | Petrol | Dealer | Manual | 0 |
3 | wagon r | 2011 | 2.85 | 4.15 | 5200 | Petrol | Dealer | Manual | 0 |
4 | swift | 2014 | 4.60 | 6.87 | 42450 | Diesel | Dealer | Manual | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
296 | city | 2016 | 9.50 | 11.60 | 33988 | Diesel | Dealer | Manual | 0 |
297 | brio | 2015 | 4.00 | 5.90 | 60000 | Petrol | Dealer | Manual | 0 |
298 | city | 2009 | 3.35 | 11.00 | 87934 | Petrol | Dealer | Manual | 0 |
299 | city | 2017 | 11.50 | 12.50 | 9000 | Diesel | Dealer | Manual | 0 |
300 | brio | 2016 | 5.30 | 5.90 | 5464 | Petrol | Dealer | Manual | 0 |
301 rows × 9 columns
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 301 entries, 0 to 300 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Car_Name 301 non-null object 1 Year 301 non-null int64 2 Selling_Price 301 non-null float64 3 Present_Price 301 non-null float64 4 Driven_kms 301 non-null int64 5 Fuel_Type 301 non-null object 6 Selling_type 301 non-null object 7 Transmission 301 non-null object 8 Owner 301 non-null int64 dtypes: float64(2), int64(3), object(4) memory usage: 21.3+ KB
duplicate_rows = data[data.duplicated(keep=False)]
duplicate_rows
Car_Name | Year | Selling_Price | Present_Price | Driven_kms | Fuel_Type | Selling_type | Transmission | Owner | |
---|---|---|---|---|---|---|---|---|---|
15 | ertiga | 2016 | 7.75 | 10.79 | 43000 | Diesel | Dealer | Manual | 0 |
17 | ertiga | 2016 | 7.75 | 10.79 | 43000 | Diesel | Dealer | Manual | 0 |
51 | fortuner | 2015 | 23.00 | 30.61 | 40000 | Diesel | Dealer | Automatic | 0 |
93 | fortuner | 2015 | 23.00 | 30.61 | 40000 | Diesel | Dealer | Automatic | 0 |
duplicated_rows = data[data.duplicated(keep='first')]
duplicated_rows
Car_Name | Year | Selling_Price | Present_Price | Driven_kms | Fuel_Type | Selling_type | Transmission | Owner | |
---|---|---|---|---|---|---|---|---|---|
17 | ertiga | 2016 | 7.75 | 10.79 | 43000 | Diesel | Dealer | Manual | 0 |
93 | fortuner | 2015 | 23.00 | 30.61 | 40000 | Diesel | Dealer | Automatic | 0 |
data_cleaned = data.drop(duplicated_rows.index)
#data_cleaned.to_csv('car_data_cleaned.csv',index=False)
data = pd.read_csv('car_data_cleaned.csv')
data.Year.value_counts()
2015 60 2016 49 2014 38 2017 35 2013 33 2012 23 2011 19 2010 15 2008 7 2009 6 2006 4 2005 4 2003 2 2007 2 2018 1 2004 1 Name: Year, dtype: int64
year = data.Year.value_counts().keys()
no_cars_sold = data.Year.value_counts().values
labels = [str(val) for val in no_cars_sold]
fig = px.bar(x=year, y=no_cars_sold, text=labels, title='Year vs No. of Cars Sold', template='plotly', color_discrete_sequence=['darkviolet'])
# Set the labels for the y-axis and x-axis
fig.update_yaxes(title_text='no_cars_sold')
fig.update_xaxes(title_text='Year')
# Show the plot
fig.show()
car_name = data.Car_Name.value_counts().head(25).keys()
cars_sold = data.Car_Name.value_counts().head(25).values
labels = [str(val) for val in cars_sold]
fig = px.bar(x=car_name, y=cars_sold, text=labels, title='Top 25 Car Name vs No. of Cars Sold', template='plotly', color_discrete_sequence=['aqua'])
# Set the labels for the y-axis and x-axis
fig.update_yaxes(title_text='No. of Cars Sold')
fig.update_xaxes(title_text='Car Name')
# Show the plot
fig.show()
data.Fuel_Type.value_counts().head(25)
Petrol 239 Diesel 58 CNG 2 Name: Fuel_Type, dtype: int64
fuel_type = data.Fuel_Type.value_counts().keys()
cars_count = data.Fuel_Type.value_counts().values
labels = [str(val) for val in cars_count]
fig = px.bar(x=fuel_type, y=cars_count, text=labels, title='Fuel Type vs Cars count', template='plotly', color_discrete_sequence=['orange'])
# Set the labels for the y-axis and x-axis
fig.update_yaxes(title_text='No. of Cars')
fig.update_xaxes(title_text='Fuel Type')
# Show the plot
fig.show()
selling_type = data.Selling_type.value_counts().keys()
cars_count = data.Selling_type.value_counts().values
labels = [str(val) for val in cars_count]
fig = px.bar(x=selling_type, y=cars_count, text=labels, title='Selling Type vs Cars count', template='plotly', color_discrete_sequence=['yellow'])
# Set the labels for the y-axis and x-axis
fig.update_yaxes(title_text='No. of Cars')
fig.update_xaxes(title_text='Selling Type')
# Show the plot
fig.show()
transmission_type = data.Transmission.value_counts().keys()
cars_count = data.Transmission.value_counts().values
labels = [str(val) for val in cars_count]
fig = px.bar(x=transmission_type, y=cars_count, text=labels, title='Transmission(change of gear) Type vs Cars count', template='plotly', color_discrete_sequence=['red'])
# Set the labels for the y-axis and x-axis
fig.update_yaxes(title_text='No. of Cars')
fig.update_xaxes(title_text='Transmission Type')
# Show the plot
fig.show()
ownership_type = data.Owner.value_counts().keys()
cars_count = data.Owner.value_counts().values
labels = [str(val) for val in cars_count]
fig = px.bar(x=ownership_type, y=cars_count, text=labels, title='Ownership Type vs Cars count', template='plotly', color_discrete_sequence=['gold'])
# Set the labels for the y-axis and x-axis
fig.update_yaxes(title_text='No. of Cars')
fig.update_xaxes(title_text='Ownership Type')
# Show the plot
fig.show()
# Changing Fuel Type column : Petrol -> 0, Diesel -> 1 and CNG -> 2
category_mapping = {'Petrol': 0, 'Diesel': 1, 'CNG': 2}
data['Fuel_Type_encoded'] = data['Fuel_Type'].map(category_mapping)
data.Fuel_Type_encoded.value_counts()
0 239 1 58 2 2 Name: Fuel_Type_encoded, dtype: int64
fuel_type = data.Fuel_Type_encoded.value_counts().keys()
cars_count = data.Fuel_Type_encoded.value_counts().values
labels = [str(val) for val in cars_count]
fig = px.bar(x=fuel_type, y=cars_count, text=labels, title='Fuel Type(encoded) vs Cars count', template='plotly', color_discrete_sequence=['orange'])
# Set the labels for the y-axis and x-axis
fig.update_yaxes(title_text='No. of Cars')
fig.update_xaxes(title_text='Fuel Type(encoded)')
# Show the plot
fig.show()
# Changing Selling_Type column : Dealer -> 0, Individual -> 1
category_mapping = {'Dealer': 0, 'Individual': 1}
data['Selling_type_encoded'] = data['Selling_type'].map(category_mapping)
data.Selling_type_encoded.value_counts()
0 193 1 106 Name: Selling_type_encoded, dtype: int64
selling_type = data.Selling_type_encoded.value_counts().keys()
cars_count = data.Selling_type_encoded.value_counts().values
labels = [str(val) for val in cars_count]
fig = px.bar(x=selling_type, y=cars_count, text=labels, title='Selling Type(encoded) vs Cars count', template='plotly', color_discrete_sequence=['yellow'])
# Set the labels for the y-axis and x-axis
fig.update_yaxes(title_text='No. of Cars(encoded)')
fig.update_xaxes(title_text='Selling Type')
# Show the plot
fig.show()
# Changing Transmission column : Manual -> 0, Automatic -> 1
category_mapping = {'Manual': 0, 'Automatic': 1}
data['Transmission_encoded'] = data['Transmission'].map(category_mapping)
data.Transmission_encoded.value_counts()
0 260 1 39 Name: Transmission_encoded, dtype: int64
transmission_type = data.Transmission_encoded.value_counts().keys()
cars_count = data.Transmission_encoded.value_counts().values
labels = [str(val) for val in cars_count]
fig = px.bar(x=transmission_type, y=cars_count, text=labels, title='Transmission(change of gear)(encoded) Type vs Cars count', template='plotly', color_discrete_sequence=['red'])
# Set the labels for the y-axis and x-axis
fig.update_yaxes(title_text='No. of Cars')
fig.update_xaxes(title_text='Transmission Type(encoded)')
# Show the plot
fig.show()
# Changing Owner Type column : 0 -> 0, 1 -> 1 and 3 -> 2
category_mapping = {0: 0, 1: 1, 3: 2}
data['Owner_encoded'] = data['Owner'].map(category_mapping)
data.Owner_encoded.value_counts()
0 288 1 10 2 1 Name: Owner_encoded, dtype: int64
ownership_type = data.Owner_encoded.value_counts().keys()
cars_count = data.Owner_encoded.value_counts().values
labels = [str(val) for val in cars_count]
fig = px.bar(x=ownership_type, y=cars_count, text=labels, title='Ownership Type(encoded) vs Cars count', template='plotly', color_discrete_sequence=['gold'])
# Set the labels for the y-axis and x-axis
fig.update_yaxes(title_text='No. of Cars')
fig.update_xaxes(title_text='Ownership Type(encoded)')
# Show the plot
fig.show()
selected_columns = ['Car_Name','Year','Selling_Price','Present_Price','Driven_kms','Fuel_Type_encoded','Selling_type_encoded','Transmission_encoded','Owner_encoded']
new_data = data[selected_columns]
new_data
#new_data.to_csv('cars price encoded.csv', index=False)
Car_Name | Year | Selling_Price | Present_Price | Driven_kms | Fuel_Type_encoded | Selling_type_encoded | Transmission_encoded | Owner_encoded | |
---|---|---|---|---|---|---|---|---|---|
0 | ritz | 2014 | 3.35 | 5.59 | 27000 | 0 | 0 | 0 | 0 |
1 | sx4 | 2013 | 4.75 | 9.54 | 43000 | 1 | 0 | 0 | 0 |
2 | ciaz | 2017 | 7.25 | 9.85 | 6900 | 0 | 0 | 0 | 0 |
3 | wagon r | 2011 | 2.85 | 4.15 | 5200 | 0 | 0 | 0 | 0 |
4 | swift | 2014 | 4.60 | 6.87 | 42450 | 1 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
294 | city | 2016 | 9.50 | 11.60 | 33988 | 1 | 0 | 0 | 0 |
295 | brio | 2015 | 4.00 | 5.90 | 60000 | 0 | 0 | 0 | 0 |
296 | city | 2009 | 3.35 | 11.00 | 87934 | 0 | 0 | 0 | 0 |
297 | city | 2017 | 11.50 | 12.50 | 9000 | 1 | 0 | 0 | 0 |
298 | brio | 2016 | 5.30 | 5.90 | 5464 | 0 | 0 | 0 | 0 |
299 rows × 9 columns
new_data.iloc[:,1:].corr()
Year | Selling_Price | Present_Price | Driven_kms | Fuel_Type_encoded | Selling_type_encoded | Transmission_encoded | Owner_encoded | |
---|---|---|---|---|---|---|---|---|
Year | 1.000000 | 0.234369 | -0.053167 | -0.525714 | 0.046210 | -0.036820 | -0.003434 | -0.170694 |
Selling_Price | 0.234369 | 1.000000 | 0.876305 | 0.028566 | 0.500292 | -0.553851 | 0.348869 | -0.096019 |
Present_Price | -0.053167 | 0.876305 | 1.000000 | 0.205224 | 0.431887 | -0.511779 | 0.334326 | -0.018158 |
Driven_kms | -0.525714 | 0.028566 | 0.205224 | 1.000000 | 0.167287 | -0.101030 | 0.163881 | 0.061924 |
Fuel_Type_encoded | 0.046210 | 0.500292 | 0.431887 | 0.167287 | 1.000000 | -0.347922 | 0.068618 | -0.055526 |
Selling_type_encoded | -0.036820 | -0.553851 | -0.511779 | -0.101030 | -0.347922 | 1.000000 | -0.058669 | 0.123165 |
Transmission_encoded | -0.003434 | 0.348869 | 0.334326 | 0.163881 | 0.068618 | -0.058669 | 1.000000 | 0.020306 |
Owner_encoded | -0.170694 | -0.096019 | -0.018158 | 0.061924 | -0.055526 | 0.123165 | 0.020306 | 1.000000 |
sns.heatmap(new_data.iloc[:,1:].corr(), annot=True)
<AxesSubplot:>
plt.figure(figsize=(10, 6))
sns.histplot(new_data['Selling_Price'], bins=20, kde=True)
plt.xlabel('Selling Price')
plt.ylabel('Frequency')
plt.title('Distribution of Selling Prices')
plt.show()
plt.figure(figsize=(10, 6))
sns.histplot(new_data['Present_Price'], bins=20, kde=True, color='darkviolet')
plt.xlabel('Present Price')
plt.ylabel('Frequency')
plt.title('Distribution of Present Prices')
plt.show()
plt.figure(figsize=(10, 6))
sns.histplot(new_data['Driven_kms'], bins=20, kde=True, color='red')
plt.xlabel('Driven kilometers')
plt.ylabel('Frequency')
plt.title('Distribution of kilometers driven')
plt.show()
x_data = new_data.drop(['Car_Name','Selling_Price'],axis=1)
y_data = new_data['Selling_Price']
x_data
Year | Present_Price | Driven_kms | Fuel_Type_encoded | Selling_type_encoded | Transmission_encoded | Owner_encoded | |
---|---|---|---|---|---|---|---|
0 | 2014 | 5.59 | 27000 | 0 | 0 | 0 | 0 |
1 | 2013 | 9.54 | 43000 | 1 | 0 | 0 | 0 |
2 | 2017 | 9.85 | 6900 | 0 | 0 | 0 | 0 |
3 | 2011 | 4.15 | 5200 | 0 | 0 | 0 | 0 |
4 | 2014 | 6.87 | 42450 | 1 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... |
294 | 2016 | 11.60 | 33988 | 1 | 0 | 0 | 0 |
295 | 2015 | 5.90 | 60000 | 0 | 0 | 0 | 0 |
296 | 2009 | 11.00 | 87934 | 0 | 0 | 0 | 0 |
297 | 2017 | 12.50 | 9000 | 1 | 0 | 0 | 0 |
298 | 2016 | 5.90 | 5464 | 0 | 0 | 0 | 0 |
299 rows × 7 columns
y_data
0 3.35 1 4.75 2 7.25 3 2.85 4 4.60 ... 294 9.50 295 4.00 296 3.35 297 11.50 298 5.30 Name: Selling_Price, Length: 299, dtype: float64
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2)
model_1 = LinearRegression()
model_1.fit(x_train, y_train)
LinearRegression()
y_pred_1 = model_1.predict(x_test)
mse = mean_squared_error(y_test, y_pred_1)
r2 = r2_score(y_test, y_pred_1)
print("Mean Square Error is :", mse)
print("R-Squared score is :",r2)
Mean Square Error is : 4.028076863760065 R-Squared score is : 0.8380235823890909
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_1, alpha=0.5, color='darkviolet')
plt.xlabel('Actual Selling Price')
plt.ylabel('Predicted Selling Price')
plt.title('Actual vs. Predicted Selling Prices (Linear Regression)')
plt.show()
model_2 = RandomForestRegressor()
model_2.fit(x_train, y_train)
RandomForestRegressor()
y_pred_2 = model_2.predict(x_test)
mse = mean_squared_error(y_test, y_pred_2)
r2 = r2_score(y_test, y_pred_2)
print("Mean Square Error is :", mse)
print("R-Squared score is :",r2)
Mean Square Error is : 7.478897279166666 R-Squared score is : 0.6992597137710552
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_2, label=model_2, alpha=0.5, color='green')
plt.xlabel("Actual Selling Prices")
plt.ylabel("Predicted Selling Prices")
plt.title("Actual vs. Predicted Selling Prices")
plt.legend()
plt.show()
model_3 = Ridge()
model_3.fit(x_train, y_train)
Ridge()
y_pred_3 = model_3.predict(x_test)
mse = mean_squared_error(y_test, y_pred_3)
r2 = r2_score(y_test, y_pred_3)
print("Mean Square Error is :", mse)
print("R-Squared score is :",r2)
Mean Square Error is : 3.9768973298980606 R-Squared score is : 0.840081606063998
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_3, label=model_3, alpha=0.5, color='darkblue')
plt.xlabel("Actual Selling Prices")
plt.ylabel("Predicted Selling Prices")
plt.title("Actual vs. Predicted Selling Prices")
plt.legend()
plt.show()
model_4 = Lasso()
model_4.fit(x_train, y_train)
Lasso()
y_pred_4 = model_4.predict(x_test)
mse = mean_squared_error(y_test, y_pred_4)
r2 = r2_score(y_test, y_pred_4)
print("Mean Square Error is :", mse)
print("R-Squared score is :",r2)
Mean Square Error is : 4.489972829825654 R-Squared score is : 0.8194498916620485
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_4, label=model_4, alpha=0.5, color='black')
plt.xlabel("Actual Selling Prices")
plt.ylabel("Predicted Selling Prices")
plt.title("Actual vs. Predicted Selling Prices")
plt.legend()
plt.show()
model_5 = ElasticNet()
model_5.fit(x_train, y_train)
ElasticNet()
y_pred_5 = model_5.predict(x_test)
mse = mean_squared_error(y_test, y_pred_5)
r2 = r2_score(y_test, y_pred_5)
print("Mean Square Error is :", mse)
print("R-Squared score is :",r2)
Mean Square Error is : 4.329718572290692 R-Squared score is : 0.825894011628066
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_5, label=model_5, alpha=0.5, color='gold')
plt.xlabel("Actual Selling Prices")
plt.ylabel("Predicted Selling Prices")
plt.title("Actual vs. Predicted Selling Prices")
plt.legend()
plt.show()
model_6 = GradientBoostingRegressor()
model_6.fit(x_train, y_train)
GradientBoostingRegressor()
y_pred_6 = model_6.predict(x_test)
mse = mean_squared_error(y_test, y_pred_6)
r2 = r2_score(y_test, y_pred_6)
print("Mean Square Error is :", mse)
print("R-Squared score is :",r2)
Mean Square Error is : 3.1284405404718694 R-Squared score is : 0.8741996221538454
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_6, label=model_6, alpha=0.5, color='red')
plt.xlabel("Actual Selling Prices")
plt.ylabel("Predicted Selling Prices")
plt.title("Actual vs. Predicted Selling Prices")
plt.legend()
plt.show()
model_7 = XGBRegressor()
model_7.fit(x_train, y_train)
XGBRegressor(base_score=None, booster=None, callbacks=None, colsample_bylevel=None, colsample_bynode=None, colsample_bytree=None, device=None, early_stopping_rounds=None, enable_categorical=False, eval_metric=None, feature_types=None, gamma=None, grow_policy=None, importance_type=None, interaction_constraints=None, learning_rate=None, max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None, max_delta_step=None, max_depth=None, max_leaves=None, min_child_weight=None, missing=nan, monotone_constraints=None, multi_strategy=None, n_estimators=None, n_jobs=None, num_parallel_tree=None, random_state=None, ...)
y_pred_7 = model_7.predict(x_test)
mse = mean_squared_error(y_test, y_pred_7)
r2 = r2_score(y_test, y_pred_7)
print("Mean Square Error is :", mse)
print("R-Squared score is :",r2)
Mean Square Error is : 2.4098547850320946 R-Squared score is : 0.9030952838676375
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_7, label='XGBoost Regression Model', alpha=0.5, color='brown')
plt.xlabel("Actual Selling Prices")
plt.ylabel("Predicted Selling Prices")
plt.title("Actual vs. Predicted Selling Prices")
plt.legend()
plt.show()
print("R-Squared score of each Model:- ")
print("\n Linear Regression Model: ",r2_score(y_test, y_pred_1))
print("\n Random Forest Regression Model: ",r2_score(y_test, y_pred_2))
print("\n Ridge Regression Model: ",r2_score(y_test, y_pred_3))
print("\n Lasso Regression Model: ",r2_score(y_test, y_pred_4))
print("\n ElasticNet Regression Model: ",r2_score(y_test, y_pred_5))
print("\n Gradient Boosting Regression Model: ",r2_score(y_test, y_pred_6))
print("\n XGBoost Regression Model: ",r2_score(y_test, y_pred_7))
R-Squared score of each Model:- Linear Regression Model: 0.8380235823890909 Random Forest Regression Model: 0.6992597137710552 Ridge Regression Model: 0.840081606063998 Lasso Regression Model: 0.8194498916620485 ElasticNet Regression Model: 0.825894011628066 Gradient Boosting Regression Model: 0.8741996221538454 XGBoost Regression Model: 0.9030952838676375
final_model = XGBRegressor()
final_model.fit(x_train, y_train)
y_pred_final = final_model.predict(x_test)
mse = mean_squared_error(y_test, y_pred_final)
r2 = r2_score(y_test, y_pred_final)
print("Mean Square Error is :", mse)
print("R-Squared score is :",r2)
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_final, label='XGBoost Regression', alpha=0.5, color='red')
plt.xlabel("Actual Selling Prices")
plt.ylabel("Predicted Selling Prices")
plt.title("Actual vs. Predicted Selling Prices")
plt.legend()
plt.show()
Mean Square Error is : 2.4098547850320946 R-Squared score is : 0.9030952838676375
feature_importances = pd.Series(final_model.feature_importances_, index=x_data.columns)
plt.figure(figsize=(10, 6))
top_10_features = feature_importances.nlargest(10)
feature_importances.nlargest(10).plot(kind='bar')
plt.xlabel('Feature Importance')
plt.ylabel('Feature Importance Score')
plt.title('Top 10 Features having impact on the selling price of the car(recognised by XGBoost Regression Model)')
for index, value in enumerate(top_10_features):
plt.text(index, value, f'{value:.2f}', ha='center', va='bottom')
plt.show()
selling_price = new_data['Selling_Price']
present_price = new_data['Present_Price']
year = new_data['Year']
scatter_selling = go.Scatter(x=year, y=selling_price, mode='markers', name='Selling Price', marker=dict(symbol='circle'))
scatter_present = go.Scatter(x=year, y=present_price, mode='markers', name='Present Price', marker=dict(symbol='x'))
fig = go.Figure(data=[scatter_selling, scatter_present])
fig.update_layout(
xaxis_title='Year',
yaxis_title='Price',
title='Trivariate Plot of Selling Price and Present Price vs. Year'
)
fig.show()
selling_price = new_data['Selling_Price']
year = new_data['Year']
scatter_selling = go.Scatter(x=year, y=selling_price, mode='markers', name='Selling Price', marker=dict(symbol='circle'))
fig = go.Figure(data=[scatter_selling])
fig.update_layout(
xaxis_title='Year',
yaxis_title='Selling Price',
title='Bivariate Plot of Selling Price vs. Year'
)
fig.show()
present_price = new_data['Present_Price']
year = new_data['Year']
scatter_present = go.Scatter(x=year, y=present_price, mode='markers', name='Present Price', marker=dict(symbol='x'))
fig = go.Figure(data=[scatter_present])
fig.update_layout(
xaxis_title='Year',
yaxis_title='Present Price',
title='Bivariate Plot of Present Price vs. Year'
)
fig.show()
model_filename = "final_car_prediction_model.h5"
joblib.dump(final_model, model_filename)
['final_car_prediction_model.h5']