# JUST RUN THIS, no changes needed

from google.colab import drive
import pandas as pd
from textblob import TextBlob

drive.mount('/content/gdrive')
df = pd.read_csv('/content/gdrive/MyDrive/datasets/lyrics.csv')

# FIXUP DATA
df["Title"] = df["Title"].str.replace("\u200b", "")
df["Lyric"] = df["Lyric"].str.replace("\u200b", "")

# Look at the data
print(f"Total songs: {len(df)}")
print(f"Columns: {df.columns.tolist()}")
df.head()

def get_song_lyrics(df, artist, title):
    # Input: df is the lyrics.csv dataframe
    #        artist is the artist you're looking for
    #        title is the title of the song
    # Output: Returns the lyrics of the song
    #
    # TODO: Your code here!
    pass

def calculate_song_sentiment(lyrics):
    # Input: lyrics is a string of song lyrics
    # Output: Returns the polarity score (float between -1 and 1)
    #
    # TODO: Your code here!
    # 1. Create a TextBlob object with the lyrics
    # 2. Get the sentiment.polarity
    # 3. Return the polarity value
    pass

# Test with one song
artist = "Dua Lipa"  # Change to your choice!
title = "New Rules"  # Change to your choice!

# Get the lyrics for this song
lyrics = get_song_lyrics(df, artist, title)

# Calculate sentiment
sentiment = calculate_song_sentiment(lyrics)
print(f"{artist} - {title}")
print(f"Sentiment: {sentiment:.3f}")
print("(Negative < 0 < Positive)")

def add_sentiment_column(df):
    # Input: df is the lyrics DataFrame
    # Output: Returns df with a new 'sentiment' column added
    #
    # TODO: Your code here!

    return df

# Add sentiment scores to all songs
df = add_sentiment_column(df)

# Check it worked
print("Sentiment column added!")
print(df[['Artist', 'Title', 'sentiment']].head())

def find_extreme_songs(df, artist, n=5):
    # Input: df is the DataFrame with sentiment column
    #        artist is the artist name to analyze
    #        n is how many songs to show
    # Output: Prints the most positive and negative songs
    #
    # TODO: Your code here!
    # 1. Filter to just this artist's songs
    # 2. Sort by sentiment to find most positive
    # 3. Sort by sentiment to find most negative
    # 4. Print the top n for each
    pass

# Analyze your chosen artist
my_artist = "Taylor Swift"  # Change to your choice!
find_extreme_songs(df, my_artist)

def calculate_artist_sentiment(df):
    # Input: df is the DataFrame with sentiment column
    # Output: Returns a Series with average sentiment per artist

    # TODO: Your code here!
    # 1. Group by Artist
    # 2. Calculate mean sentiment for each artist
    # 3. Sort by sentiment
    # 4. Return the sorted Series
    pass

# Find most positive and negative artists
artist_sentiments = calculate_artist_sentiment(df)

print("Most Positive Artists:")
print(artist_sentiments.tail(10))
print("\nMost Negative Artists:")
print(artist_sentiments.head(10))

def analyze_sentiment_by_year(df, artist):
    # Input: df is the DataFrame, artist is the artist name
    # Output: Returns DataFrame with year and average sentiment

    # TODO: Your code here!
    # 1. Filter to the artist
    # 2. Extract year from Date column (it's already there as Year)
    # 3. Group by Year and calculate mean sentiment
    # 4. Return results sorted by year
    pass

# Analyze how an artist's sentiment changed over time
artist_timeline = analyze_sentiment_by_year(df, my_artist)
print(f"\n{my_artist}'s sentiment over time:")
print(artist_timeline)

# Bonus: Plot it!
# artist_timeline.plot(x='Year', y='sentiment', kind='line',
#                     title=f'{my_artist} Sentiment Timeline')