APIKEY="AIzaSyBQrrl4SZhE3QtxsnbjY2WTdgcBz0G0Rfs" # CHANGE
print APIKEY
PROJECT_ID = "qwiklabs-gcp-14067121d7b1d12c" # CHANGE
print PROJECT_ID
BUCKET = "qwiklabs-gcp-14067121d7b1d12c" # CHANGE
import os
os.environ['BUCKET'] = BUCKET
os.environ['PROJECT'] = PROJECT_ID
from googleapiclient.discovery import build
print("\n","Google Cloud API Client credentials established")
AIzaSyBQrrl4SZhE3QtxsnbjY2WTdgcBz0G0Rfs qwiklabs-gcp-14067121d7b1d12c Google Cloud API Client credentials established
def SentimentAnalysis(text):
from googleapiclient.discovery import build
lservice = build('language', 'v1beta1', developerKey=APIKEY)
response = lservice.documents().analyzeSentiment(
body={
'document': {
'type': 'PLAIN_TEXT',
'content': text
}
}).execute()
return response
print("\n","Sentiment Analysis function defined.")
Sentiment Analysis function defined.
sampleline = u'There are places I remember, all my life though some have changed.'
results = SentimentAnalysis(sampleline)
print("\n","This is the Python object that is returned; a dictionary.")
print("\n")
print("Function returns :",type(results))
print(results)
import json
print("\n","This is the JSON formatted version of the object")
print(json.dumps(results, sort_keys=True, indent=4))
This is the Python object that is returned; a dictionary. Function returns : <type 'dict'> {u'documentSentiment': {u'polarity': 1, u'magnitude': 0.8, u'score': 0.8}, u'language': u'en', u'sentences': [{u'text': {u'content': u'There are places I remember, all my life though some have changed.', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0.8, u'score': 0.8}}]} This is the JSON formatted version of the object { "documentSentiment": { "magnitude": 0.8, "polarity": 1, "score": 0.8 }, "language": "en", "sentences": [ { "sentiment": { "magnitude": 0.8, "polarity": 1, "score": 0.8 }, "text": { "beginOffset": -1, "content": "There are places I remember, all my life though some have changed." } } ] }
# Working with the smaller sample file
#
lines = sc.textFile("/sampledata/road-not-taken.txt")
#
# The Spark map transformation will execute SentimentAnalysis on each element in lines and store the result in sentiment.
# Remember that due to lazy execution, this line just queues up the transformation, it does not run yet.
# So you will not see errors at this point.
#
sentiment = lines.map(SentimentAnalysis)
#
#
print (type(sentiment))
# sentiment is a pyspark.rdd.PipelinedRDD
#
# If it is properly formed then an action such as sentiment.collect() will run the job.
# If not properly formed, it will throw errors.
#
output = sentiment.collect()
#
# The sentiment rdd contains JSON returns. In python these are collected into a list of dictionaries.
#
print(type(output))
print("\n")
for line in output:
print(line)
<class 'pyspark.rdd.PipelinedRDD'> <type 'list'> {u'documentSentiment': {u'polarity': 0, u'magnitude': 0, u'score': 0}, u'language': u'en', u'sentences': []} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}, u'language': u'en', u'sentences': [{u'text': {u'content': u'roads diverged in a yellow wood,', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}}]} {u'documentSentiment': {u'polarity': -1, u'magnitude': 0.4, u'score': -0.4}, u'language': u'en', u'sentences': [{u'text': {u'content': u'And sorry I could not travel both', u'beginOffset': -1}, u'sentiment': {u'polarity': -1, u'magnitude': 0.4, u'score': -0.4}}]} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}, u'language': u'en', u'sentences': [{u'text': {u'content': u'And be one traveler, long I stood', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}}]} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0, u'score': 0}, u'language': u'en', u'sentences': [{u'text': {u'content': u'And looked down one as far as I could', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0, u'score': 0}}]} {u'documentSentiment': {u'polarity': -1, u'magnitude': 0.6, u'score': -0.6}, u'language': u'en', u'sentences': [{u'text': {u'content': u'To where it bent in the undergrowth;', u'beginOffset': -1}, u'sentiment': {u'polarity': -1, u'magnitude': 0.6, u'score': -0.6}}]} {u'documentSentiment': {u'polarity': 0, u'magnitude': 0, u'score': 0}, u'language': u'en', u'sentences': []} {u'documentSentiment': {u'polarity': -1, u'magnitude': 0.1, u'score': -0.1}, u'language': u'en', u'sentences': [{u'text': {u'content': u'Then took the other, as just as fair,', u'beginOffset': -1}, u'sentiment': {u'polarity': -1, u'magnitude': 0.1, u'score': -0.1}}]} {u'documentSentiment': {u'polarity': -1, u'magnitude': 0.1, u'score': -0.1}, u'language': u'en', u'sentences': [{u'text': {u'content': u'And having perhaps the better claim,', u'beginOffset': -1}, u'sentiment': {u'polarity': -1, u'magnitude': 0.1, u'score': -0.1}}]} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0.5, u'score': 0.5}, u'language': u'en', u'sentences': [{u'text': {u'content': u'Because it was grassy and wanted wear;', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0.5, u'score': 0.5}}]} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0.4, u'score': 0.4}, u'language': u'en', u'sentences': [{u'text': {u'content': u'Though as for that the passing there', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0.4, u'score': 0.4}}]} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0, u'score': 0}, u'language': u'en', u'sentences': [{u'text': {u'content': u'Had worn them really about the same,', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0, u'score': 0}}]} {u'documentSentiment': {u'polarity': 0, u'magnitude': 0, u'score': 0}, u'language': u'en', u'sentences': []} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}, u'language': u'en', u'sentences': [{u'text': {u'content': u'And both that morning equally lay', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}}]} {u'documentSentiment': {u'polarity': -1, u'magnitude': 0.1, u'score': -0.1}, u'language': u'en', u'sentences': [{u'text': {u'content': u'In leaves no step had trodden black.', u'beginOffset': -1}, u'sentiment': {u'polarity': -1, u'magnitude': 0.1, u'score': -0.1}}]} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0, u'score': 0}, u'language': u'en', u'sentences': [{u'text': {u'content': u'Oh, I kept the first for another day!', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0, u'score': 0}}]} {u'documentSentiment': {u'polarity': -1, u'magnitude': 0, u'score': 0}, u'language': u'en', u'sentences': [{u'text': {u'content': u'Yet knowing how way leads on to way,', u'beginOffset': -1}, u'sentiment': {u'polarity': -1, u'magnitude': 0, u'score': 0}}]} {u'documentSentiment': {u'polarity': -1, u'magnitude': 0, u'score': 0}, u'language': u'en', u'sentences': [{u'text': {u'content': u'I doubted if I should ever come back.', u'beginOffset': -1}, u'sentiment': {u'polarity': -1, u'magnitude': 0, u'score': 0}}]} {u'documentSentiment': {u'polarity': 0, u'magnitude': 0, u'score': 0}, u'language': u'en', u'sentences': []} {u'documentSentiment': {u'polarity': -1, u'magnitude': 0.6, u'score': -0.6}, u'language': u'en', u'sentences': [{u'text': {u'content': u'I shall be telling this with a sigh', u'beginOffset': -1}, u'sentiment': {u'polarity': -1, u'magnitude': 0.6, u'score': -0.6}}]} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}, u'language': u'en', u'sentences': [{u'text': {u'content': u'Somewhere ages and ages hence:', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}}]} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}, u'language': u'en', u'sentences': [{u'text': {u'content': u'Two roads diverged in a wood, and I-', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}}]} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}, u'language': u'en', u'sentences': [{u'text': {u'content': u'I took the one less traveled by,', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0.1, u'score': 0.1}}]} {u'documentSentiment': {u'polarity': 1, u'magnitude': 0.2, u'score': 0.2}, u'language': u'en', u'sentences': [{u'text': {u'content': u'And that has made all the difference.', u'beginOffset': -1}, u'sentiment': {u'polarity': 1, u'magnitude': 0.2, u'score': 0.2}}]} {u'documentSentiment': {u'polarity': 0, u'magnitude': 0, u'score': 0}, u'language': u'en', u'sentences': []}
#
# Ouput is a list of dictionaries
# When the list is iterated, each line is one dictionary
# And the dictionary is double-subscripted
#
for line in output:
print("Score: ",line['documentSentiment']['score'], " Magnitude :",line['documentSentiment']['magnitude'])
Score: 0 Magnitude : 0 Score: 0.1 Magnitude : 0.1 Score: -0.4 Magnitude : 0.4 Score: 0.1 Magnitude : 0.1 Score: 0 Magnitude : 0 Score: -0.6 Magnitude : 0.6 Score: 0 Magnitude : 0 Score: -0.1 Magnitude : 0.1 Score: -0.1 Magnitude : 0.1 Score: 0.5 Magnitude : 0.5 Score: 0.4 Magnitude : 0.4 Score: 0 Magnitude : 0 Score: 0 Magnitude : 0 Score: 0.1 Magnitude : 0.1 Score: -0.1 Magnitude : 0.1 Score: 0 Magnitude : 0 Score: 0 Magnitude : 0 Score: 0 Magnitude : 0 Score: 0 Magnitude : 0 Score: -0.6 Magnitude : 0.6 Score: 0.1 Magnitude : 0.1 Score: 0.1 Magnitude : 0.1 Score: 0.1 Magnitude : 0.1 Score: 0.2 Magnitude : 0.2 Score: 0 Magnitude : 0
def TailoredAnalysis(text):
from googleapiclient.discovery import build
lservice = build('language', 'v1beta1', developerKey=APIKEY)
response = lservice.documents().analyzeEntities(
body={
'document': {
'type': 'PLAIN_TEXT',
'content': text
}
}).execute()
return response
print("\n","Tailored Analysis function defined.")
Tailored Analysis function defined.
# [STEP 1] HDFS
#lines = sc.textFile("/sampledata/road-not-taken.txt")
#
#
# [STEP 2] Cloud Storage
#lines = sc.textFile("gs://<your-bucket>/time-machine-P1.txt")
#lines = sc.textFile("gs://<your-bucket>/time-machine-P2.txt")
#lines = sc.textFile("gs://<your-bucket>/time-machine-P3.txt")
#lines = sc.textFile("gs://<your-bucket>/time-machine-P4.txt")
lines = sc.textFile("gs://qwiklabs-gcp-14067121d7b1d12c/time-machine-P1.txt")
#
#
#
entities = lines.map(TailoredAnalysis)
from operator import add
rdd = entities.map(lambda x: x['entities'])
#
# results = rdd.flatMap(lambda x: x ).filter(lambda x: x['type']==u'PERSON').map(lambda x:(x['name'],1)).reduceByKey(add)
#
# It is common practice to use line continuation "\" to help make the chain more readable
results = rdd.flatMap(lambda x: x )\
.filter(lambda x: x['type']==u'PERSON')\
.map(lambda x:(x['name'],1))\
.reduceByKey(add)
print(sorted(results.take(25)))
[(u'Cadger', 1), (u'Eight', 1), (u'Hettie Potter', 1), (u'Homer', 1), (u'Nebuchadnezzar', 1), (u'Neither', 1), (u'Phoenician', 1), (u'Psychologist', 23), (u'Simon Newcomb', 1), (u'Watchett', 1), (u'conductors', 1), (u'couple', 1), (u'crowd', 1), (u'driver', 1), (u'eddy', 1), (u'friend', 1), (u'group', 2), (u'historian', 1), (u'host', 1), (u'mathematicians', 1), (u'noun substantives', 1), (u'rest', 1), (u'schoolmaster', 1), (u'some', 1), (u'speaker', 1)]
# [STEP 3] Cloud Storage
#lines = sc.textFile("gs://<your-bucket>/time-machine-P1.txt")
#lines = sc.textFile("gs://<your-bucket>/time-machine-P2.txt")
#lines = sc.textFile("gs://<your-bucket>/time-machine-P3.txt")
#lines = sc.textFile("gs://<your-bucket>/time-machine-P4.txt")
#
lines = sc.textFile("gs://qwiklabs-gcp-14067121d7b1d12c/time-machine-P2.txt")
#
entities = lines.map(TailoredAnalysis)
from operator import add
rdd = entities.map(lambda x: x['entities'])
#
# results = rdd.flatMap(lambda x: x ).filter(lambda x: x['type']==u'PERSON').map(lambda x:(x['name'],1)).reduceByKey(add)
#
# It is common practice to use line continuation "\" to help make the chain more readable
results = rdd.flatMap(lambda x: x )\
.filter(lambda x: x['type']==u'LOCATION')\
.map(lambda x:(x['name'],1))\
.reduceByKey(add)
print(sorted(results.take(25)))
[(u'I', 1), (u'London', 3), (u'Oriental', 1), (u'State', 1), (u'Thames valley', 1), (u'beach', 1), (u'buildings', 4), (u'cemeteries', 1), (u'dining-halls', 1), (u'everywhere', 3), (u'hill crest', 1), (u'hill slopes', 1), (u'neighbourhood', 1), (u'palace', 2), (u'planets', 1), (u'plants', 3), (u'railways', 1), (u'ruin', 1), (u'shop', 1), (u'sky', 5), (u'slope', 3), (u'somewhere', 1), (u'state', 1), (u'wells', 7), (u'workrooms', 1)]
# Replace with your bucket
#
results.repartition(1).saveAsTextFile("gs://qwiklabs-gcp-14067121d7b1d12c/sampleoutput/")
print("Output to Cloud Storage is complete.")
Output to Cloud Storage is complete.