# JUST RUN THIS, no changes needed from google.colab import drive import pandas as pd from textblob import TextBlob drive.mount('/content/gdrive') df = pd.read_csv('/content/gdrive/MyDrive/datasets/lyrics.csv') # FIXUP DATA df["Title"] = df["Title"].str.replace("\u200b", "") df["Lyric"] = df["Lyric"].str.replace("\u200b", "") # Look at the data print(f"Total songs: {len(df)}") print(f"Columns: {df.columns.tolist()}") df.head() def get_song_lyrics(df, artist, title): artist_df = df[df['Artist'] == artist] song_df = artist_df[artist_df['Title'] == title] return song_df['Lyric'].values[0] def calculate_song_sentiment(lyrics): blob = TextBlob(lyrics) return blob.sentiment.polarity # Test with one song artist = "Dua Lipa" # Change to your choice! title = "New Rules" # Change to your choice! # Get the lyrics for this song lyrics = get_song_lyrics(df, artist, title) # Calculate sentiment sentiment = calculate_song_sentiment(lyrics) print(f"{artist} - {title}") print(f"Sentiment: {sentiment:.3f}") print("(Negative < 0 < Positive)") def add_sentiment_column(df): df['sentiment'] = df['Lyric'].apply(calculate_song_sentiment) return df # Add sentiment scores to all songs df = add_sentiment_column(df) # Check it worked print("Sentiment column added!") print(df[['Artist', 'Title', 'sentiment']].head()) def find_extreme_songs(df, artist, n=5): artist_df = df[df['Artist'] == artist] # artist_df = add_sentiment_column(artist_df) sorted_ascending_df = artist_df.sort_values(by="sentiment", ascending=True) sorted_decending_df = artist_df.sort_values(by="sentiment", ascending=False) print(f"Most negative {artist} songs:") for idx, row in sorted_ascending_df.head(n).iterrows(): title = row['Title'] sentiment = row['sentiment'] print(f"{title}: {sentiment}") print("") print(f"Most positive {artist} songs:") for idx, row in sorted_decending_df.head(n).iterrows(): title = row['Title'] sentiment = row['sentiment'] print(f"{title}: {sentiment}") # Analyze your chosen artist my_artist = "Taylor Swift" # Change to your choice! find_extreme_songs(df, my_artist) def calculate_artist_sentiment(df): avg_sentiment_by_artist = df.groupby("Artist")["sentiment"].mean() return avg_sentiment_by_artist # Find most positive and negative artists artist_sentiments = calculate_artist_sentiment(df) print("Most Positive Artists:") sorted_positive = artist_sentiments.sort_values(ascending=False) print(sorted_positive.head(10)) print("\nMost Negative Artists:") print(artist_sentiments.sort_values(ascending=True).head(10)) def analyze_sentiment_by_year(df, artist): # Filter by artist artist_df = df[df["Artist"] == artist] # Group by 'Year' sentiment_by_year = artist_df.groupby('Year')['sentiment'].mean() return sentiment_by_year # Analyze how an artist's sentiment changed over time artist_timeline = analyze_sentiment_by_year(df, my_artist) print(f"\n{my_artist}'s sentiment over time:") print(artist_timeline) artist_timeline.plot(x='Year', y='sentiment', kind='line', title=f'{my_artist} Sentiment Timeline')