# JUST RUN THIS, no changes needed

from google.colab import drive
import pandas as pd
from textblob import TextBlob

drive.mount('/content/gdrive')
df = pd.read_csv('/content/gdrive/MyDrive/datasets/lyrics.csv')

# FIXUP DATA
df["Title"] = df["Title"].str.replace("\u200b", "")
df["Lyric"] = df["Lyric"].str.replace("\u200b", "")

# Look at the data
print(f"Total songs: {len(df)}")
print(f"Columns: {df.columns.tolist()}")
df.head()

def get_song_lyrics(df, artist, title):
    artist_df = df[df['Artist'] == artist]
    song_df = artist_df[artist_df['Title'] == title]
    return song_df['Lyric'].values[0]

def calculate_song_sentiment(lyrics):
    blob = TextBlob(lyrics)
    return blob.sentiment.polarity

# Test with one song
artist = "Dua Lipa"  # Change to your choice!
title = "New Rules"  # Change to your choice!

# Get the lyrics for this song
lyrics = get_song_lyrics(df, artist, title)

# Calculate sentiment
sentiment = calculate_song_sentiment(lyrics)
print(f"{artist} - {title}")
print(f"Sentiment: {sentiment:.3f}")
print("(Negative < 0 < Positive)")

def add_sentiment_column(df):
    df['sentiment'] = df['Lyric'].apply(calculate_song_sentiment)
    return df

# Add sentiment scores to all songs
df = add_sentiment_column(df)

# Check it worked
print("Sentiment column added!")
print(df[['Artist', 'Title', 'sentiment']].head())

def find_extreme_songs(df, artist, n=5):
    artist_df = df[df['Artist'] == artist]
    # artist_df = add_sentiment_column(artist_df)
    sorted_ascending_df = artist_df.sort_values(by="sentiment", ascending=True)
    sorted_decending_df = artist_df.sort_values(by="sentiment", ascending=False)

    print(f"Most negative {artist} songs:")
    for idx, row in sorted_ascending_df.head(n).iterrows():
        title = row['Title']
        sentiment = row['sentiment']
        print(f"{title}: {sentiment}")
    print("")
    print(f"Most positive {artist} songs:")
    for idx, row in sorted_decending_df.head(n).iterrows():
        title = row['Title']
        sentiment = row['sentiment']
        print(f"{title}: {sentiment}")


# Analyze your chosen artist
my_artist = "Taylor Swift"  # Change to your choice!
find_extreme_songs(df, my_artist)

def calculate_artist_sentiment(df):
    avg_sentiment_by_artist = df.groupby("Artist")["sentiment"].mean()
    return avg_sentiment_by_artist

# Find most positive and negative artists
artist_sentiments = calculate_artist_sentiment(df)

print("Most Positive Artists:")
sorted_positive = artist_sentiments.sort_values(ascending=False)
print(sorted_positive.head(10))

print("\nMost Negative Artists:")
print(artist_sentiments.sort_values(ascending=True).head(10))

def analyze_sentiment_by_year(df, artist):
    # Filter by artist
    artist_df = df[df["Artist"] == artist]

    # Group by 'Year'
    sentiment_by_year = artist_df.groupby('Year')['sentiment'].mean()
    return sentiment_by_year

# Analyze how an artist's sentiment changed over time
artist_timeline = analyze_sentiment_by_year(df, my_artist)
print(f"\n{my_artist}'s sentiment over time:")
print(artist_timeline)

artist_timeline.plot(x='Year', y='sentiment', kind='line', title=f'{my_artist} Sentiment Timeline')