import os
! apt-get update -qq > /dev/null
# Install java
! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]
! java -version
! pip install nlu==2.5rc1 -qq > /dev/null
import nlu
debconf: delaying package configuration, since apt-utils is not installed openjdk version "1.8.0_265" OpenJDK Runtime Environment (build 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01) OpenJDK 64-Bit Server VM (build 25.265-b01, mixed mode)
import nlu
import pandas as pd
df = pd.read_csv('/kaggle/input/twitter-airline-sentiment/Tweets.csv')
df
tweet_id | airline_sentiment | airline_sentiment_confidence | negativereason | negativereason_confidence | airline | airline_sentiment_gold | name | negativereason_gold | retweet_count | text | tweet_coord | tweet_created | tweet_location | user_timezone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 570306133677760513 | neutral | 1.0000 | NaN | NaN | Virgin America | NaN | cairdin | NaN | 0 | @VirginAmerica What @dhepburn said. | NaN | 2015-02-24 11:35:52 -0800 | NaN | Eastern Time (US & Canada) |
1 | 570301130888122368 | positive | 0.3486 | NaN | 0.0000 | Virgin America | NaN | jnardino | NaN | 0 | @VirginAmerica plus you've added commercials t... | NaN | 2015-02-24 11:15:59 -0800 | NaN | Pacific Time (US & Canada) |
2 | 570301083672813571 | neutral | 0.6837 | NaN | NaN | Virgin America | NaN | yvonnalynn | NaN | 0 | @VirginAmerica I didn't today... Must mean I n... | NaN | 2015-02-24 11:15:48 -0800 | Lets Play | Central Time (US & Canada) |
3 | 570301031407624196 | negative | 1.0000 | Bad Flight | 0.7033 | Virgin America | NaN | jnardino | NaN | 0 | @VirginAmerica it's really aggressive to blast... | NaN | 2015-02-24 11:15:36 -0800 | NaN | Pacific Time (US & Canada) |
4 | 570300817074462722 | negative | 1.0000 | Can't Tell | 1.0000 | Virgin America | NaN | jnardino | NaN | 0 | @VirginAmerica and it's a really big bad thing... | NaN | 2015-02-24 11:14:45 -0800 | NaN | Pacific Time (US & Canada) |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
14635 | 569587686496825344 | positive | 0.3487 | NaN | 0.0000 | American | NaN | KristenReenders | NaN | 0 | @AmericanAir thank you we got on a different f... | NaN | 2015-02-22 12:01:01 -0800 | NaN | NaN |
14636 | 569587371693355008 | negative | 1.0000 | Customer Service Issue | 1.0000 | American | NaN | itsropes | NaN | 0 | @AmericanAir leaving over 20 minutes Late Flig... | NaN | 2015-02-22 11:59:46 -0800 | Texas | NaN |
14637 | 569587242672398336 | neutral | 1.0000 | NaN | NaN | American | NaN | sanyabun | NaN | 0 | @AmericanAir Please bring American Airlines to... | NaN | 2015-02-22 11:59:15 -0800 | Nigeria,lagos | NaN |
14638 | 569587188687634433 | negative | 1.0000 | Customer Service Issue | 0.6659 | American | NaN | SraJackson | NaN | 0 | @AmericanAir you have my money, you change my ... | NaN | 2015-02-22 11:59:02 -0800 | New Jersey | Eastern Time (US & Canada) |
14639 | 569587140490866689 | neutral | 0.6771 | NaN | 0.0000 | American | NaN | daviddtwu | NaN | 0 | @AmericanAir we have 8 ppl so we need 2 know h... | NaN | 2015-02-22 11:58:51 -0800 | dallas, TX | NaN |
14640 rows × 15 columns
nlu_emotion_df = nlu.load('emotion').predict(df)
classifierdl_use_emotion download started this may take some time. Approximate size to download 20.7 MB [OK!] tfhub_use download started this may take some time. Approximate size to download 923.7 MB [OK!]
nlu_emotion_df['category'].value_counts().plot.bar(title='Predicted emotion labels count in dataset')
counts = nlu_emotion_df.groupby('user_timezone')['category'].value_counts()
counts[counts >10].plot.bar(figsize=(25,8),title='Emotion tweet counts by user time zone')
nlu_emotion_df.groupby('airline')['category'].value_counts().plot.bar(figsize=(20,8), title='Emotion tweet counts grouped by airline')