#!/usr/bin/env python
# coding: utf-8

# # Import Libraries

# In[1]:


import tweepy
import json
import pandas as pd
from scipy.misc import imread
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib as mpl
import csv
import matplotlib.pyplot as plt

import operator
from textblob import TextBlob
from textblob import Word
from textblob.sentiments import NaiveBayesAnalyzer


# # Authentication

# In[2]:


consumer_key = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
consumer_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
access_token = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxx'
access_token_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret) #Interacting with twitter's API
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API (auth) #creating the API object


# # Extracting Tweets

# In[3]:


results = []
for tweet in tweepy.Cursor (api.search, q = 'millennials', lang = "en").items(2000): 
    results.append(tweet)
    
print (type(results))
print (len(results))
#print (results[4000].text)


# # Store Data in dataframe

# In[4]:


def tweets_df(results):
    id_list = [tweet.id for tweet  in results]
    data_set = pd.DataFrame(id_list, columns = ["id"])
    
    data_set["text"] = [tweet.text for tweet in results]
    data_set["created_at"] = [tweet.created_at for tweet in results]
    data_set["retweet_count"] = [tweet.retweet_count for tweet in results]
    data_set["user_screen_name"] = [tweet.author.screen_name for tweet in results]
    data_set["user_followers_count"] = [tweet.author.followers_count for tweet in results]
    data_set["user_location"] = [tweet.author.location for tweet in results]
    data_set["Hashtags"] = [tweet.entities.get('hashtags') for tweet in results]
    
    return data_set
data_set = tweets_df(results)
    

# # Remove duplicate tweets

# In[5]:


text = data_set["text"]

for i in range(0,len(text)):
    txt = ' '.join(word for word in text[i] .split() if not word.startswith('https:'))
    data_set.set_value(i, 'text2', txt)
    
data_set.drop_duplicates('text2', inplace=True)
data_set.reset_index(drop = True, inplace=True)
data_set.drop('text', axis = 1, inplace = True)
data_set.rename(columns={'text2': 'text'}, inplace=True)


# # Sentiment Analysis of tweets

# In[6]:


text = data_set["text"]

for i in range(0,len(text)):
    textB = TextBlob(text[i])
    sentiment = textB.sentiment.polarity
    data_set.set_value(i, 'Sentiment',sentiment)
    if sentiment <0.00:
        SentimentClass = 'Negative'
        data_set.set_value(i, 'SentimentClass', SentimentClass )
    elif sentiment >0.00:
        SentimentClass = 'Positive'
        data_set.set_value(i, 'SentimentClass', SentimentClass )
    else:
        SentimentClass = 'Neutral'
        data_set.set_value(i, 'SentimentClass', SentimentClass )


# In[7]:


data_set.to_csv("C:\\Users\\kdudani\\Desktop\\Personal\\Social Media Analytics\\Millennials.csv")


# # Extract all hashtags for all tweets

# In[8]:


Htag_df = pd.DataFrame()
j = 0

for tweet in range(0,len(results)):
    hashtag = results[tweet].entities.get('hashtags')
    for i in range(0,len(hashtag)):
        Htag = hashtag[i]['text'] 
        Htag_df.set_value(j, 'Hashtag',Htag)
        j = j+1


# In[9]:


Htag_df


# In[16]:


Millennials_Htag_wordcloud = Htag_df.groupby('Hashtag').size()
Millennials_Htag_wordcloud.to_csv("C:\\Users\\kdudani\\Desktop\\Personal\\Social Media Analytics\\Millennials_Htag_wordcloud.csv")


# In[10]:


# Join all the text from the 1000 tweets
Hashtag_Combined = " ".join(Htag_df['Hashtag'].values.astype(str))

no_millennials = " ".join([word for word in Hashtag_Combined.split()
                                if word != 'millennials'
                                and word != 'Millennials'
                                and word != 'Boomers'
                                and word != 'GenX'
                                                                
                                ])

Tweet_mask = imread("C:\\Users\\kdudani\\Desktop\\Personal\\Social Media Analytics\\twitter_mask.png", flatten=True)

#Create a Word Cloud
wc = WordCloud(background_color="white", stopwords=STOPWORDS, mask = Tweet_mask)
wc.generate(no_millennials)
plt.imshow(wc)
plt.axis("off")
plt.savefig('C:\\Users\\kdudani\\Desktop\\Personal\\Social Media Analytics\\millennials_Hashtag.png', dpi=300)
plt.show()