# JUST RUN THIS

from google.colab import drive
import pandas as pd

drive.mount('/content/gdrive')

# Load the data
df = pd.read_csv('/content/gdrive/MyDrive/datasets/admission_predict.csv')

# Convert "Chance of Admit" to a True/False "Admitted" column
df["Admitted"] = df["Chance of Admit "] > 0.75
df.drop("Chance of Admit ", axis=1, inplace=True)
df.rename(columns={"LOR ": "LOR"}, inplace=True)

# Explore the data
print(f"Total applicants: {len(df)}")
print(f"Admitted: {df['Admitted'].sum()}")
print(f"Not admitted: {(~df['Admitted']).sum()}")
print("\nColumns:")
print(df.columns.tolist())

# Look at a random sample of the data
print("\nRandom sample of 5 applicants:")
df.sample(5)

# JUST RUN THIS

from sklearn.model_selection import train_test_split

# Split the data (I'm doing this for you!)
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

print(f"Training set: {len(df_train)} applicants")
print(f"Test set: {len(df_test)} applicants")

def prepare_features(df_train, df_test):
    # Input: df_train and df_test are DataFrames with all columns
    # Output: Returns X_train, X_test (features only, no Serial No. or Admitted)

    # TODO: Your code here!
    # 1. First, list all the feature columns you want to use
    #    Hint: All columns except 'Serial No.' and 'Admitted'
    #    You can type them out: feature_cols = ['GRE Score', 'TOEFL Score', ...]
    #    Or use: feature_cols = df_train.columns.drop(['Serial No.', 'Admitted']).tolist()

    # 2. Create X_train using double brackets
    #    X_train = df_train[feature_cols]

    # 3. Create X_test the same way
    #    X_test = df_test[feature_cols]

    # 4. Return both (yes, functions can return multiple values!)
    #    return X_train, X_test
    pass

# Test your function
X_train, X_test = prepare_features(df_train, df_test)
print(f"Features shape - Train: {X_train.shape}, Test: {X_test.shape}")
print(f"Features used: {X_train.columns.tolist()}")

# Check to make sure the same features were picked for X_train and X_test
assert(df_train.columns.equals(df_test.columns)

def prepare_labels(df_train, df_test):
    # Input: df_train and df_test are DataFrames
    # Output: Returns y_train, y_test (just the Admitted column)

    # TODO: Your code here!
    # 1. Extract the 'Admitted' column from df_train
    #    y_train = df_train['Admitted']  # Single brackets for a Series!

    # 2. Extract the 'Admitted' column from df_test
    #    y_test = df_test['Admitted']

    # 3. Return both values
    #    return y_train, y_test
    pass

# Test your function
y_train, y_test = prepare_labels(df_train, df_test)
print(f"Training: {y_train.sum()} admitted out of {len(y_train)}")
print(f"Testing: {y_test.sum()} admitted out of {len(y_test)}")


from sklearn.linear_model import LogisticRegression

def train_logistic_model(X_train, y_train):
    # Input: X_train (features), y_train (labels)
    # Output: Returns trained model

    # TODO: Your code here!
    # 1. Create a LogisticRegression model
    #    model = LogisticRegression(max_iter=1000)
    #    Note: max_iter=1000 gives the model more iterations to converge

    # 2. Train the model using the fit method
    #    model.fit(X_train, y_train)

    # 3. Return the trained model
    #    return model
    pass

# Train the model
model = train_logistic_model(X_train, y_train)
print("Model trained!")

def make_predictions(model, X_test):
    # Input: model (trained), X_test (features to predict)
    # Output: Returns predictions as a pandas Series

    # TODO: Your code here!
    # 1. Use the model's predict method to get predictions
    #    predictions = model.predict(X_test)
    #    This returns a numpy array of True/False values

    # 2. Return the Series
    #    return predictions_series
    pass

# Make predictions
y_pred = pd.Series(make_predictions(model, X_test))
print(f"Predicted {y_pred.sum()} admissions out of {len(y_pred)} applicants")


# JUST RUN THIS

def calculate_confusion_matrix(y_test, y_pred):
    # Input: df has 'Admitted' and 'Predicted' columns
    # Output: Returns tp, tn, fp, fn
    tp = ((y_test == True)  & (y_pred == True)).sum()  # True Positive
    tn = ((y_test == False) & (y_pred == False)).sum() # True Negative
    fp = ((y_test == False) & (y_pred == True)).sum()  # False Positive
    fn = ((y_test == True)  & (y_pred == False)).sum() # False Negative
    return tp, tn, fp, fn

# Calculate confusion matrix
tp, tn, fp, fn = calculate_confusion_matrix(y_test, y_pred)
print("                  Predicted Positive | Predicted Negative")
print(f"Actual Positive |{tp:>19d} |{fn:>19d} ")
print(f"Actual Negative |{fp:>19d} |{tn:>19d} ")

def calculate_metrics(tp, tn, fp, fn):
    # Input: Confusion matrix values
    # Output: Returns accuracy, precision, recall (3 values!)

    # TODO: Your code here!
    # Calculate total predictions
    # total = tp + tn + fp + fn

    # Accuracy: What percentage did we get right?
    # accuracy = (tp + tn) / total

    # Precision: When we predict admission, how often are we right?
    # precision = tp / (tp + fp) if (tp + fp) > 0 else 0

    # Recall: Of all actual admissions, what percentage did we catch?
    # recall = tp / (tp + fn) if (tp + fn) > 0 else 0

    # Return all three metrics
    # return accuracy, precision, recall
    pass

# Calculate metrics
accuracy, precision, recall = calculate_metrics(tp, tn, fp, fn)
print(f"Accuracy: {accuracy:.2%} (Overall correctness)")
print(f"Precision: {precision:.2%} (When we predict admit, how often are we right?)")
print(f"Recall: {recall:.2%} (Of all admits, what percentage did we identify?)")

# JUST RUN THIS

# Let's look at where we went wrong
false_positives = df_test[(y_test == False) & (y_pred == True)]
false_negatives = df_test[(y_test == True)  & (y_pred == False)]

print(f"\nFalse Positives (predicted admit but rejected):")
if len(false_positives) > 0:
    display(false_positives[['GRE Score', 'CGPA', 'Research']].head())

print(f"\nFalse Negatives (predicted reject but admitted):")
if len(false_negatives) > 0:
    display(false_negatives[['GRE Score', 'CGPA', 'Research']].head())


# BONUS CODE HERE