#!/usr/bin/env python # coding: utf-8 # In[5]: import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import numpy as np # In[6]: score = pd.read_csv(r"C:\Users\Teni\Desktop\Git-Github\Datasets\Linear Regression\CGPA & SAT score.csv") # In[7]: score.head() # Visually assess is there's a linear relationship between SAT and the GPAs # In[8]: sns.scatterplot(data=score, x='GPA', y = 'SAT'); # In[9]: sns.regplot(data=score, x='GPA', y = 'SAT'); # Define the X and y Variable # In[10]: X = score['GPA'] y = score['SAT'] # **Say if someone had a GPA of 3.4, what is the predicted SAT Score** # In[22]: score # In[23]: # GPA = np.linspace(0, 10, 2) # In[24]: GPA = 3.4 # In[18]: pred_SAT = 245.21763914*GPA + 1028.64068603 pred_SAT # This means the predicted SAT score will be 1862 # # As displayed below, it shows that this is close to the real GPA score of those around 3.4 from the original data # In[21]: score[score['GPA'] == 3.4] # **Say if someone had a GPA of 2.91, what is the predicted SAT Score** # In[52]: GPA = 2.91 # In[53]: pred_SAT = 245.21763914*GPA + 1028.64068603 pred_SAT # In[54]: score[(score['GPA'] <= 3.0) & (score['GPA']>= 2.92)] # Our predicted SAT score is not far from the real label in the data # # *Our model may not be spot on in comparism- but the residual difference is minimal.*