#!/usr/bin/env python
# coding: utf-8

# In[5]:


import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


# In[6]:


score = pd.read_csv(r"C:\Users\Teni\Desktop\Git-Github\Datasets\Linear Regression\CGPA & SAT score.csv")


# In[7]:


score.head()


# Visually assess is there's a linear relationship between SAT and the GPAs

# In[8]:


sns.scatterplot(data=score, x='GPA', y = 'SAT');


# In[9]:


sns.regplot(data=score, x='GPA', y = 'SAT');


# Define the X and y Variable

# In[10]:


X = score['GPA']
y = score['SAT']


# **Say if someone had a GPA of 3.4, what is the predicted SAT Score**

# In[22]:


score


# In[23]:


# GPA = np.linspace(0, 10, 2)


# In[24]:


GPA = 3.4


# In[18]:


pred_SAT = 245.21763914*GPA + 1028.64068603
pred_SAT


# This means the predicted SAT score will be 1862
# 
# As displayed below, it shows that this is close to the real GPA score of those around 3.4 from the original data

# In[21]:


score[score['GPA'] == 3.4]


# **Say if someone had a GPA of 2.91, what is the predicted SAT Score**

# In[52]:


GPA = 2.91


# In[53]:


pred_SAT = 245.21763914*GPA + 1028.64068603
pred_SAT


# In[54]:


score[(score['GPA'] <= 3.0) & (score['GPA']>= 2.92)]


# Our predicted SAT score is not far from the real label in the data
# 
# *Our model may not be spot on in comparism- but the residual difference is minimal.*