#!/usr/bin/env python # coding: utf-8 # # Load Classfier # In[1]: # Importing the libraries import numpy as np import re import pickle import nltk from nltk.corpus import stopwords from sklearn.datasets import load_files nltk.download('stopwords') # In[3]: # Using our classifier with open('pre-trained-model/tfidfmodel.pickle','rb') as f: tfidf = pickle.load(f) with open('pre-trained-model/classifier.pickle','rb') as f: clf = pickle.load(f) # # Test on a new Data # In[4]: sample = ["You are a nice person man, have a good life"] sample = tfidf.transform(sample).toarray() sentiment = clf.predict(sample) sentiment # In[5]: sample = ["Logistic regression is not good! "] sample = tfidf.transform(sample).toarray() sentiment = clf.predict(sample) sentiment # # Test on Twitter # In[6]: import json import tweepy import time from tweepy import OAuthHandler consumer_key = 'yoIwFkjZGYDa49aO16XqSNqcN' consumer_secret = 'gl4LQOItV7Z1aFwNrlvaiKJ3t8o8h99blMIAmnmdHxYjzjRAxO' access_token = '624310916-E7fDF2IE8P6bfY1oVFglASf6F8RnxMd3vgSXFqnZ' access_token_secret ='ID9JcoXHsDcKtvNcnmBGcCQhUlO0wmwAxBJ6LCesiUAas' auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) #api = tweepy.API(auth) api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, retry_count=3, retry_delay=60) # In[7]: # Creates the user object. The me() method returns the user whose authentication keys were used. user = api.me() print('Name: ' + user.name) print('ID: ' + str(user.id)) print('Location: ' + user.location) # In[8]: args = ['Khashoggi']; api = tweepy.API(auth,timeout=10) # Fetching the tweets list_tweets = [] query = args[0] if len(args) == 1: for status in tweepy.Cursor(api.search,q=query+" -filter:retweets",lang='en',result_type='recent',geocode="22.1568,89.4332,500km").items(100): list_tweets.append(status.text) # In[9]: list_tweets[:4] # In[10]: mapping = {0:'positive', 1:'negative'} # # Twitter Data Claening # In[11]: # Preprocessing the tweets sent_tweets = [] for tweet in list_tweets: tweet = re.sub(r"^https://t.co/[a-zA-Z0-9]*\s", " ", tweet) tweet = re.sub(r"\s+https://t.co/[a-zA-Z0-9]*\s", " ", tweet) tweet = re.sub(r"\s+https://t.co/[a-zA-Z0-9]*$", " ", tweet) tweet = tweet.lower() tweet = re.sub(r"that's","that is",tweet) tweet = re.sub(r"there's","there is",tweet) tweet = re.sub(r"what's","what is",tweet) tweet = re.sub(r"where's","where is",tweet) tweet = re.sub(r"it's","it is",tweet) tweet = re.sub(r"who's","who is",tweet) tweet = re.sub(r"i'm","i am",tweet) tweet = re.sub(r"she's","she is",tweet) tweet = re.sub(r"he's","he is",tweet) tweet = re.sub(r"they're","they are",tweet) tweet = re.sub(r"who're","who are",tweet) tweet = re.sub(r"ain't","am not",tweet) tweet = re.sub(r"wouldn't","would not",tweet) tweet = re.sub(r"shouldn't","should not",tweet) tweet = re.sub(r"can't","can not",tweet) tweet = re.sub(r"couldn't","could not",tweet) tweet = re.sub(r"won't","will not",tweet) tweet = re.sub(r"\W"," ",tweet) tweet = re.sub(r"\d"," ",tweet) tweet = re.sub(r"\s+[a-z]\s+"," ",tweet) tweet = re.sub(r"\s+[a-z]$"," ",tweet) tweet = re.sub(r"^[a-z]\s+"," ",tweet) tweet = re.sub(r"\s+"," ",tweet) sent = clf.predict(tfidf.transform([tweet]).toarray()) sent_tweets.append((tweet, mapping[int(sent)])) sent_tweets[:4] # ## Positive/negative Split # In[12]: pos = len([s for t, s in sent_tweets if s == 'positive']) neg = len(sent_tweets) - pos pos, neg # In[14]: # Visualizing the results import matplotlib.pyplot as plt import numpy as np plt.bar(['Positive','Negative'], [pos, neg], alpha = 0.5) #plt.xticks(y_pos,objects) plt.ylabel('Number') plt.title('Number of Postive and NEgative Tweets') plt.show() # In[ ]: # In[15]: sample = ["You are a nice person man, have a good life"] sample = tfidf.transform(sample).toarray() sentiment = clf.predict(sample) sentiment # In[19]: from textblob import TextBlob # In[20]: sample = [] yorum = "Kampanya berbat. kötü ya!" blob = TextBlob(yorum) sample.append(str(blob.translate(to="en"))) # In[21]: print(sample) # In[22]: sample = tfidf.transform(sample).toarray() sentiment = clf.predict(sample) sentiment # In[ ]: