# JUST RUN THIS, no changes needed from google.colab import drive import pandas as pd from textblob import TextBlob drive.mount('/content/gdrive') df = pd.read_csv('/content/gdrive/MyDrive/datasets/lyrics.csv') # FIXUP DATA df["Title"] = df["Title"].str.replace("\u200b", "") df["Lyric"] = df["Lyric"].str.replace("\u200b", "") # Look at the data print(f"Total songs: {len(df)}") print(f"Columns: {df.columns.tolist()}") df.head() def get_song_lyrics(df, artist, title): # Input: df is the lyrics.csv dataframe # artist is the artist you're looking for # title is the title of the song # Output: Returns the lyrics of the song # # TODO: Your code here! pass def calculate_song_sentiment(lyrics): # Input: lyrics is a string of song lyrics # Output: Returns the polarity score (float between -1 and 1) # # TODO: Your code here! # 1. Create a TextBlob object with the lyrics # 2. Get the sentiment.polarity # 3. Return the polarity value pass # Test with one song artist = "Dua Lipa" # Change to your choice! title = "New Rules" # Change to your choice! # Get the lyrics for this song lyrics = get_song_lyrics(df, artist, title) # Calculate sentiment sentiment = calculate_song_sentiment(lyrics) print(f"{artist} - {title}") print(f"Sentiment: {sentiment:.3f}") print("(Negative < 0 < Positive)") def add_sentiment_column(df): # Input: df is the lyrics DataFrame # Output: Returns df with a new 'sentiment' column added # # TODO: Your code here! return df # Add sentiment scores to all songs df = add_sentiment_column(df) # Check it worked print("Sentiment column added!") print(df[['Artist', 'Title', 'sentiment']].head()) def find_extreme_songs(df, artist, n=5): # Input: df is the DataFrame with sentiment column # artist is the artist name to analyze # n is how many songs to show # Output: Prints the most positive and negative songs # # TODO: Your code here! # 1. Filter to just this artist's songs # 2. Sort by sentiment to find most positive # 3. Sort by sentiment to find most negative # 4. Print the top n for each pass # Analyze your chosen artist my_artist = "Taylor Swift" # Change to your choice! find_extreme_songs(df, my_artist) def calculate_artist_sentiment(df): # Input: df is the DataFrame with sentiment column # Output: Returns a Series with average sentiment per artist # TODO: Your code here! # 1. Group by Artist # 2. Calculate mean sentiment for each artist # 3. Sort by sentiment # 4. Return the sorted Series pass # Find most positive and negative artists artist_sentiments = calculate_artist_sentiment(df) print("Most Positive Artists:") print(artist_sentiments.tail(10)) print("\nMost Negative Artists:") print(artist_sentiments.head(10)) def analyze_sentiment_by_year(df, artist): # Input: df is the DataFrame, artist is the artist name # Output: Returns DataFrame with year and average sentiment # TODO: Your code here! # 1. Filter to the artist # 2. Extract year from Date column (it's already there as Year) # 3. Group by Year and calculate mean sentiment # 4. Return results sorted by year pass # Analyze how an artist's sentiment changed over time artist_timeline = analyze_sentiment_by_year(df, my_artist) print(f"\n{my_artist}'s sentiment over time:") print(artist_timeline) # Bonus: Plot it! # artist_timeline.plot(x='Year', y='sentiment', kind='line', # title=f'{my_artist} Sentiment Timeline')