In [1]:

import pandas as pd
import numpy as np
pd.set_option('display.float_format', lambda x: '%.2f' % x)

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline
plt.rcParams['savefig.dpi'] = 150
plt.style.use('ggplot')

import seaborn as sns
sns.set(color_codes=True)
sns.set_palette(sns.color_palette("husl", 8));
sns.mpl.rc("figure", figsize=(8,5))
sns.set_style('whitegrid')

In [2]:

cd ../

C:\Users\Talha\Documents\WinPython3\projects\News-Sharing-by-Sentiment

In [3]:

#read the data -> pn: published news, tn: tweeted news
pn = pd.read_csv('data/pb-sp.csv',encoding='utf-8')
tn = pd.read_csv('data/tw-sp.csv',encoding='utf-8')
outlets = sorted(pn['outlet'].unique().tolist())
pn['sp'] = pn['posemo']-pn['negemo']
tn['sp'] = tn['posemo']-tn['negemo']
tn['sp_t'] = tn['posemo_t']-tn['negemo_t']

In [5]:

#sports
tn[tn.cat=='sports'][['rt','posemo','negemo']].corr()

Out[5]:

	rt	posemo	negemo
rt	1.00	0.14	-0.04
posemo	0.14	1.00	-0.09
negemo	-0.04	-0.09	1.00

In [13]:

tn[tn.cat.isin(['politics','middleeast'])][['rt','posemo','negemo']].corr()

Out[13]:

	rt	posemo	negemo
rt	1.00	-0.05	0.05
posemo	-0.05	1.00	0.03
negemo	0.05	0.03	1.00

In [4]:

#news tweeted multiple types are grouped into two
df4 = tn.groupby('href').filter(lambda g: len(g)>3) #news tweeted at least four times
more_rt = df4.groupby('href').apply(lambda g: g.sort('rt')[['rt','sp_t']].iloc[1:]) # all the tweet excluding the least RTed
less_rt = df4.groupby('href').apply(lambda g: g.sort('rt')[['rt','sp_t']].iloc[0]) #least retweeted tweets
print(more_rt.sp_t.mean(),less_rt.sp_t.mean())

-0.335782241015 -0.08625

In [5]:

df = tn.groupby('href').filter(lambda g: len(g)==3) #news tweeted exactly three times
grouped = df.groupby('href').apply(lambda g: g.sort('rt').reset_index())[['rt','sp_t','cat']]
unstacked = grouped.unstack()
ax = unstacked.sp_t.mean()[:4].plot(kind='bar')
ax.set(xlabel='Retweet order (0 is the least retweeted group)',ylabel='Mean Tweet Sentiment',title='Tweet Sentiment vs RT Groups');

In [6]:

#Sample size for each group
#pd.DataFrame([unstacked.sp_t[[i]].dropna().shape[0] for i in range(unstacked.sp_t.columns.shape[0])],columns=['N'])
unstacked

Out[6]:

	rt			sp_t			cat
	0	1	2	0	1	2	0	1	2
href
http://abcnews.go.com/Business/cup-inventor-john-sylvan-admits-expensive-coffee-pods/story?id=29382481	94.00	189.00	326.00	4.35	-3.84	-3.84	business	business	business
http://abcnews.go.com/Business/embattled-sony-pictures-executive-amy-pascal-steps/story?id=28749965	33.00	57.00	74.00	0.00	0.00	0.00	business	business	business
http://abcnews.go.com/Business/kraft-krft-heinz-agree-merge/story?id=29889951	123.00	166.00	208.00	4.55	8.00	7.69	business	business	business
http://abcnews.go.com/Business/make-104-acre-florida-island-dream-home/story?id=29732824	168.00	181.00	232.00	0.00	0.00	0.00	business	business	business
http://abcnews.go.com/Entertainment/bobbi-kristina-brown-alive-found-unresponsive/story?id=28628653	294.00	316.00	714.00	0.00	0.00	0.00	entertainment	entertainment	entertainment
http://abcnews.go.com/Entertainment/duchess-kate-appeals-greater-care-childrens-mental-health/story?id=28988190	210.00	351.00	369.00	0.00	0.00	3.85	entertainment	entertainment	entertainment
http://abcnews.go.com/Entertainment/harrison-ford-injured-california-small-plane-crash/story?id=29425681	186.00	478.00	746.00	0.00	0.00	0.00	entertainment	entertainment	entertainment
http://abcnews.go.com/Entertainment/jon-stewart-leaving-comedy-central/story?id=28875084	118.00	179.00	189.00	5.00	0.00	3.45	entertainment	entertainment	entertainment
http://abcnews.go.com/Entertainment/kevin-costner-god-miracle-bobbi-kristina-brown/story?id=28867469	133.00	155.00	217.00	7.69	7.69	7.69	entertainment	entertainment	entertainment
http://abcnews.go.com/Entertainment/meet-elena-avalor-disneys-latina-princess/story?id=28581447	292.00	810.00	1300.00	0.00	0.00	0.00	entertainment	entertainment	entertainment
http://abcnews.go.com/Entertainment/new-york-times-media-columnist-david-carr-dies-58/story?id=28936813	92.00	96.00	103.00	0.00	0.00	0.00	entertainment	entertainment	entertainment
http://abcnews.go.com/Entertainment/oscars-2015-live-updates-red-carpet/story?id=29075436	144.00	159.00	178.00	0.00	4.55	0.00	entertainment	entertainment	entertainment
http://abcnews.go.com/Entertainment/star-trek-star-leonard-nimoy-dies-83/story?id=29274628	732.00	835.00	2022.00	4.17	0.00	0.00	entertainment	entertainment	entertainment
http://abcnews.go.com/Entertainment/vanilla-ice-arrested-burglary-florida-police/story?id=29058510	71.00	146.00	156.00	0.00	0.00	0.00	entertainment	entertainment	entertainment
http://abcnews.go.com/Health/autism-speaks-urges-parents-vaccinate-children/story?id=28751485	428.00	544.00	565.00	0.00	0.00	0.00	health	health	health
http://abcnews.go.com/Health/blind-golden-retriever-smiley-warms-hearts-therapy-dog/story?id=29533746	311.00	441.00	653.00	7.69	14.29	7.69	health	health	health
http://abcnews.go.com/Health/breakdown-ingredients-childhood-vaccines/story?id=28859870	68.00	71.00	102.00	0.00	0.00	0.00	health	health	health
http://abcnews.go.com/Health/dads-heartfelt-plea-congress-year-leukemia-exposed-measles/story?id=28866376	189.00	207.00	331.00	0.00	0.00	0.00	health	health	health
http://abcnews.go.com/Health/dangerous-bacteria-mysteriously-escapes-louisiana-monkey-lab/story?id=29327907	98.00	100.00	129.00	-6.67	-7.14	-7.14	health	health	health
http://abcnews.go.com/Health/doctors-crawling-finish-line-great-idea/story?id=28998255	342.00	468.00	470.00	0.00	0.00	0.00	health	health	health
http://abcnews.go.com/Health/father-psych-ward-stabbing-victim-mental-patients-treated/story?id=28559283	38.00	42.00	56.00	-5.88	0.00	0.00	health	health	health
http://abcnews.go.com/Health/florida-woman-birth-141-pound-baby/story?id=28784382	195.00	196.00	241.00	0.00	0.00	0.00	health	health	health
http://abcnews.go.com/Health/renowned-neurologist-oliver-sacks-announces-terminal-cancer/story?id=29084210	77.00	134.00	140.00	0.00	0.00	0.00	health	health	health
http://abcnews.go.com/Health/sex-couples-day-biological-children-researchers/story?id=29220568	168.00	169.00	224.00	0.00	0.00	0.00	health	health	health
http://abcnews.go.com/Health/year-girl-dies-catching-flu-vaccine/story?id=28526729	169.00	221.00	266.00	0.00	0.00	0.00	health	health	health
http://abcnews.go.com/International/International/tiny-penguins-tiny-sweaters/story?id=28886035	1384.00	2712.00	3784.00	0.00	0.00	0.00	world	world	world
http://abcnews.go.com/International/air-canada-hard-landing-passengers-lucky-officials/story?id=29984179	58.00	59.00	66.00	12.50	-6.67	0.00	world	world	world
http://abcnews.go.com/International/american-hostages-mother-us-failed-children/story?id=28803264	94.00	109.00	565.00	4.76	4.76	-4.00	world	world	world
http://abcnews.go.com/International/american-soldier-christ-fighting-isis-iraq/story?id=29171878	51.00	57.00	263.00	0.00	-3.57	-3.57	world	world	world
http://abcnews.go.com/International/americans-germanwings-plane-official/story?id=29887148	17.00	32.00	89.00	0.00	-7.14	0.00	world	world	world
...	...	...	...	...	...	...	...	...	...
http://www.washingtonpost.com/news/the-intersect/wp/2015/03/03/the-disturbing-case-of-the-bloggers-who-fake-death-and-disease-for-attention/	59.00	80.00	90.00	-11.11	-11.11	-11.11	news	news	news
http://www.washingtonpost.com/news/the-intersect/wp/2015/03/09/facebook-censored-a-nude-painting-and-it-could-change-the-site-forever/	68.00	117.00	133.00	0.00	0.00	0.00	news	news	news
http://www.washingtonpost.com/news/the-intersect/wp/2015/03/13/what-was-fake-on-the-internet-this-week-putins-death-hillarys-horns-and-marijuana-at-kfc/	47.00	54.00	113.00	-4.76	-7.69	-4.76	news	news	news
http://www.washingtonpost.com/news/the-intersect/wp/2015/03/20/what-was-fake-on-the-internet-this-week-smartwatch-cancer-michael-browns-mom-and-the-true-story-of-unfriended/	26.00	56.00	69.00	0.00	-7.69	-7.69	news	news	news
http://www.washingtonpost.com/news/the-intersect/wp/2015/03/23/what-you-dont-know-about-internet-algorithms-is-hurting-you-and-you-probably-dont-know-very-much/	148.00	181.00	183.00	4.17	4.35	4.35	news	news	news
http://www.washingtonpost.com/news/the-intersect/wp/2015/04/01/what-is-fake-on-the-internet-today-a-comprehensive-updating-list-of-april-fools-pranks-and-hoaxes/	77.00	87.00	126.00	-5.88	-4.17	-4.55	news	news	news
http://www.washingtonpost.com/news/to-your-health/wp/2015/03/09/how-parents-create-narcissistic-children/	114.00	137.00	146.00	10.00	10.00	10.00	news	news	news
http://www.washingtonpost.com/opinions/2015/03/27/87655262-d3f4-11e4-a62f-ee745911a4ff_story.html	47.00	54.00	76.00	-5.00	-5.00	-5.00	opinions	opinions	opinions
http://www.washingtonpost.com/opinions/for-richer-or-poorer-the-challenges-of-marrying-outside-your-class/2015/03/26/cd7ccf72-ccac-11e4-8a46-b1dc9be5a8ff_story.html	50.00	83.00	204.00	6.25	6.25	4.54	opinions	opinions	opinions
http://www.washingtonpost.com/opinions/pro-discrimination-religious-freedom-laws-are-dangerous-to-america/2015/03/29/bdb4ce9e-d66d-11e4-ba28-f2a685dc7f89_story.html	217.00	390.00	435.00	0.00	0.00	0.00	opinions	opinions	opinions
http://www.washingtonpost.com/opinions/who-had-the-worst-week-in-washington-rep-aaron-schock/2015/03/20/66809852-ce6f-11e4-a2a7-9517a3a70506_story.html	21.00	34.00	35.00	-6.25	-6.67	-6.67	opinions	opinions	opinions
http://www.washingtonpost.com/politics/absence-of-2016-competition-for-clinton-raises-stakes-for-democrats/2015/03/11/60fc4ca8-c81d-11e4-a199-6cb5e63819d2_story.html	26.00	36.00	43.00	5.00	5.88	5.26	politics	politics	politics
http://www.washingtonpost.com/politics/hillary-clinton-to-answer-questions-about-use-of-private-e-mail-server/2015/03/10/4c000d00-c735-11e4-a199-6cb5e63819d2_story.html	33.00	43.00	131.00	0.00	0.00	4.00	politics	politics	politics
http://www.washingtonpost.com/politics/how-the-white-house-decides-whose-death-is-worth-presidential-notice/2015/03/12/0c43083a-c83d-11e4-a199-6cb5e63819d2_story.html	47.00	51.00	65.00	0.00	0.00	0.00	politics	politics	politics
http://www.washingtonpost.com/politics/mitt-romney-warms-to-marco-rubio-as-young-senator-cultivates-relationship/2015/03/13/21a769b8-c98d-11e4-a199-6cb5e63819d2_story.html	26.00	49.00	60.00	5.00	0.00	0.00	politics	politics	politics
http://www.washingtonpost.com/politics/police-suspect-arrested-in-shooting-of-two-officers-in-ferguson/2015/03/15/eb3140c2-cb38-11e4-8a46-b1dc9be5a8ff_story.html	37.00	41.00	197.00	0.00	0.00	-4.00	politics	politics	politics
http://www.washingtonpost.com/politics/secret-service-agents-disrupted-bomb-investigation-at-white-house/2015/03/12/0eb74590-c8c4-11e4-aa1a-86135599fb0f_story.html	69.00	88.00	157.00	0.00	4.76	0.00	politics	politics	politics
http://www.washingtonpost.com/politics/secret-service-agents-investigated-for-late-night-car-accident-at-white-house/2015/03/11/9c853906-c7ff-11e4-a199-6cb5e63819d2_story.html	58.00	66.00	85.00	0.00	0.00	0.00	politics	politics	politics
http://www.washingtonpost.com/politics/state-department-reviewing-whether-clinton-e-mail-violated-security-rules/2015/03/05/16d1547e-c378-11e4-9271-610273846239_story.html	43.00	43.00	65.00	0.00	0.00	0.00	politics	politics	politics
http://www.washingtonpost.com/posteverything/wp/2015/03/14/this-is-why-its-impossible-for-the-kremlin-to-lie-about-putins-weird-disappearance/	125.00	126.00	130.00	-5.26	-5.26	-5.26	posteverything	posteverything	posteverything
http://www.washingtonpost.com/posteverything/wp/2015/03/30/youre-not-fooling-everyone-with-your-pretend-laughter/	24.00	26.00	43.00	0.00	0.00	0.00	posteverything	posteverything	posteverything
http://www.washingtonpost.com/world/africa/deep-in-the-rain-forest-hunting-for-the-next-ebola-outbreak/2015/03/19/c1cba80e-b78c-11e4-bc30-a4e75503948a_story.html	57.00	63.00	108.00	4.35	0.00	4.35	world	world	world
http://www.washingtonpost.com/world/after-12-years-in-guantanamo-ex-detainees-find-little-solace-in-uruguay/2015/03/21/4d376006-c1e5-11e4-a188-8e4971d37a8d_story.html	43.00	68.00	108.00	5.00	0.00	0.00	world	world	world
http://www.washingtonpost.com/world/asia_pacific/north-koreas-growing-economy-and-americas-misconceptions-about-it/2015/03/13/b551d2d0-c1a8-11e4-a188-8e4971d37a8d_story.html	55.00	78.00	87.00	0.00	0.00	0.00	world	world	world
http://www.washingtonpost.com/world/europe/come-to-rome-for-the-cathedrals-the-ruins--and-the-red-light-district/2015/03/09/880d0440-bd37-11e4-9dfb-03366e719af8_story.html	37.00	38.00	52.00	4.76	-5.56	4.76	world	world	world
http://www.washingtonpost.com/world/europe/report-co-pilot-on-doomed-flight-had-psychological-treatments-in-past/2015/03/27/b1818c48-d40b-11e4-8b1e-274d670aa9c9_story.html	51.00	67.00	76.00	-4.00	0.00	-4.35	world	world	world
http://www.washingtonpost.com/world/middle_east/the-islamic-state-is-fraying-from-within/2015/03/08/0003a2e0-c276-11e4-a188-8e4971d37a8d_story.html	24.00	54.00	131.00	-11.76	0.00	0.00	world	world	world
http://www.washingtonpost.com/world/negotiators-hold-marathon-all-night-session-in-last-ditch-effort-for-agreement/2015/04/02/68334c88-d8b2-11e4-bf0b-f648b95a6488_story.html	51.00	68.00	102.00	6.25	0.00	5.00	world	world	world
http://www.washingtonpost.com/world/pilot-reportedly-locked-out-of-cockpit-before-plane-crashed-into-alpine-mountainside/2015/03/26/460770d8-d38c-11e4-a62f-ee745911a4ff_story.html	78.00	104.00	255.00	0.00	-5.56	-4.35	world	world	world
http://www.washingtonpost.com/world/plane-carrying-150-crashes-in-france-apparently-no-survivors/2015/03/24/6fe0fc70-d225-11e4-a62f-ee745911a4ff_story.html	103.00	105.00	253.00	0.00	-4.35	0.00	world	world	world

529 rows × 9 columns

In [4]:

df4 = tn.groupby('href').filter(lambda g: len(g)>3)
gs = gridspec.GridSpec(3, 3)
axs = [plt.subplot(s) for s in (gs[0,0],gs[0,1],gs[0,2],gs[1,0],gs[1,1],gs[1,2],gs[2,:2],gs[2,2])]
for i,o in enumerate(df4.outlet.unique()):
    axs[i].set_title(o)
    if o == 'CNN' or o =='ABC': axs[i].set_ylim([0,3000])
    if o == 'NYT': axs[i].set_ylim([0,2000])
    if o == 'WPOST': axs[i].set_ylim([0,1000])
    g = sns.stripplot(x="href", y="rt",data=df4[df4.outlet==o],ax=axs[i],jitter=True,size=4)
    ylabel = 'Retweet' if i%3 == 0 else ''
    xlabel = 'Tweeted News' if i>=6 else ''
    axs[i].set(xlabel=xlabel,ylabel=ylabel,ylim=0,xticks=[])

f = plt.gcf()
f.set_size_inches(10, 7, forward=True)
f.suptitle('Retweet Counts of The News Tweeted at Least Four Times',fontsize=18, fontweight='bold')
f.savefig('figs/rt-news-stripplot.png', bbox_inches='tight')

In [5]:

#retweet stats
tw = pd.read_csv('data/LIWC/LIWC2015 Results (tweet-texts).csv',encoding='utf-8')
nan = tw[tw.url.isnull()]
url = tw[~tw.url.isnull()] 
pd.DataFrame({'no url':nan.rt.describe(),'any url':url.rt.describe(),'news url':tn.rt.describe()})

Out[5]:

	any url	news url	no url
count	23255.00	16909.00	2344.00
mean	135.47	134.34	206.62
std	355.89	259.36	759.46
min	0.00	1.00	0.00
25%	43.00	45.00	44.00
50%	72.00	74.00	94.00
75%	133.00	134.00	207.00
max	36985.00	11031.00	31123.00

In [14]:

# Retweeted News (NOTE: THIS TAKES ~ 3min 32s)
rn = pd.concat([pd.DataFrame([row[1]]*int(1 + row[1].rt/50)) for row in tn.iterrows()])

In [6]:

#sentiment polarity stats
pd.DataFrame({'Published':pn.sp.describe(),'Tweeted':tn.sp.describe(),'Retweeted':rn.sp.describe()})

Out[6]:

	Published	Retweeted	Tweeted
count	35930.000000	53792.000000	16909.000000
mean	0.514790	0.201119	0.124686
std	1.793846	1.977284	1.914987
min	-10.730000	-8.650000	-8.650000
25%	-0.510000	-0.990000	-1.050000
50%	0.550000	0.180000	0.090000
75%	1.630000	1.360000	1.260000
max	11.320000	14.700000	14.700000

In [7]:

# we can filter the categories by increasing the number of news required per category
filtr = 100
df = tn.groupby('cat').filter(lambda x: (len(x) > filtr) & (x.name not in 'news article storyline bigstory'.split()))
df = pd.pivot_table(df,values=['sp','rt'],index=['cat'])
ax= sns.regplot('rt','sp',df)
#ax.set_ylim(-1,2)
ax.set(xlabel='Retweet', ylabel='Sentiment Polarity', title='SP vs RT of Categories',ylim=(-1,2))
df.apply(lambda r: ax.annotate(r.name, r.values,xytext=(5,-2), textcoords='offset points'), axis=1);
plt.gcf().savefig('figs/cat-rt-sp-scatter.png')

In [8]:

#mean, median, max of retweets per outlet
pd.pivot_table(tn,index=['outlet'],values=['rt'],aggfunc=[np.mean,np.median,max]).rename(columns={'rt':'Retweet'})

Out[8]:

	mean	median	max
	Retweet	Retweet	Retweet
outlet
ABC	184.100213	110.5	6994
AP	89.745522	70.0	862
CBSNews	72.585079	48.0	2458
CNN	396.500725	248.0	7752
FoxNews	134.664111	89.0	3122
NBCNews	85.786790	55.0	11031
NYT	139.529051	84.0	8917
WPOST	83.048544	59.0	3683

In [9]:

# density plot of retweets
ax = [sns.kdeplot(tn[tn.outlet==o].rt,label=o) for o in outlets][0]
ax.set(xlim=(0,500),title='Retweet Distributions',xlabel='Retweet Count',ylabel='Density');

In [10]:

# sentiment polarities [posemo - negemo] of news 
c = 'sp'
sns.kdeplot(pn[c],label='Published News')
sns.kdeplot(tn[c],label='Tweeted News')
ax = sns.kdeplot(rn[c],label='Retweeted News')
ax.set_title('Comparing Sentiment Polarity of News Published/Tweeted/Retweeted - All Outlets Combined')
ax.set(xlim=(-10,10),xlabel='Sentiment Polarity',ylabel='Density')
plt.savefig('figs/sentiment-comparison-published-tweeted-retweeted.png',bbox_inches='tight')

In [11]:

sns.kdeplot(pn['Tone'],label='Published News')#.set(xlim=splim)
sns.kdeplot(tn['Tone'],label='Tweeted News')#.set(xlim=splim)
ax = sns.kdeplot(rn['Tone'],label='Retweeted News')
ax.set_title('Comparing Tone of News Published/Tweeted/Retweeted - All Outlets Combined')
ax.set_xlabel('Tone')
ax.set_ylabel('Density')
plt.savefig('figs/tone-comparison-published-tweeted-retweeted.png',bbox_inches='tight')

In [12]:

df = pn
ax = [sns.kdeplot(df[df['outlet']==o]['sp'],label=o) for o in outlets][0]
ax.set(xlim=(-10,10)) # = sns.kdeplot(df['sp'],label='Avg',color='black')
ax.set_title('Sentiment Polarities of Published News')
ax.set_xlabel('Sentiment Polarity')
ax.set_ylabel('Density')
plt.savefig('figs/sentiment-published.png',bbox_inches='tight')

In [13]:

df = tn
ax = [sns.kdeplot(df[df['outlet']==o]['sp'],label=o) for o in outlets][0]
ax.set(xlim=(-10,10)) # = sns.kdeplot(df['sp'],label='Avg',color='black')
ax.set_title('Sentiment Polarities of Tweeted News')
ax.set_xlabel('Sentiment Polarity')
ax.set_ylabel('Density')
plt.savefig('figs/sentiment-tweeted.png',bbox_inches='tight')

In [14]:

df = rn
ax = [sns.kdeplot(df[df['outlet']==o]['sp'],label=o) for o in outlets][0]
ax.set(xlim=(-10,10)) # = sns.kdeplot(df['sp'],label='Avg',color='black')
ax.set_title('Sentiment Polarities of Retweeted News')
ax.set_xlabel('Sentiment Polarity')
ax.set_ylabel('Density')
plt.savefig('figs/sentiment-retweeted.png',bbox_inches='tight')

In [15]:

# Each outlet in a separate plot
f,axes = plt.subplots(nrows=4,ncols=2,figsize=(16,16),subplot_kw={'xlim': (-10,10)});#,sharex=True,sharey=True
#f.tight_layout(h_pad=4,rect=(0,0,1,0.97))
plt.subplots_adjust(hspace=0.3,wspace=0.1)
for i,o in enumerate(outlets):
    ax = axes[i/2][i%2]
    ax.set_title(o,fontsize=14)
    ax.set_xlabel('Sentiment Polarity')
    ax.set_ylabel('Density')
    sns.kdeplot(pn[pn['outlet']==o]['sp'],label='all news',ax=ax)
    sns.kdeplot(tn[tn['outlet']==o]['sp'],label='tweeted',ax=ax)
    sns.kdeplot(rn[rn['outlet']==o]['sp'],label='retweeted',ax=ax)
f.suptitle('Sentiment Scores per Outlet', fontsize=20,y=0.93)
plt.savefig('figs/sentiment-per-outlet.png',bbox_inches='tight')

In [16]:

df = pd.pivot_table(pn,index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Published'})
df = df.join(pd.pivot_table(tn,index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Tweeted'}))
df = df.join(pd.pivot_table(rn,index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Retweeted'}))
df

Out[16]:

	Published	Tweeted	Retweeted
outlet
ABC	0.997326	0.347335	0.568554
AP	0.409545	-0.178651	-0.265285
CBSNews	0.247449	-0.109504	-0.132829
CNN	0.321695	0.224725	0.173701
FoxNews	0.712249	-0.007499	-0.004798
NBCNews	0.860164	-0.264770	-0.093226
NYT	0.756503	0.357766	0.430961
WPOST	0.653282	0.570105	0.542642

In [17]:

ax = df.plot()
ax.set_title('Sentiment Polarities of News Averaged per Outlet')
ax.set_ylabel('Sentiment Polarity')
ax.set_xlabel('Outlets')
plt.savefig('figs/sentiment-averages-per-outlet.png',bbox_inches='tight')

In [32]:

cs = ['politics','sports']#,'world'
ax = [sns.kdeplot(tn[tn['cat']==c]['sp'],label=c+' tweeted') for c in cs][0]
[sns.kdeplot(rn[rn['cat']==c]['sp'],label=c+' retweeted',linestyle='--',color=sns.color_palette()[i]) for i,c in enumerate(cs)]
ax.set(xlim=(-7,7),title='Sentiment Distrubition of Politics and Sports News',xlabel='Sentiment Polarity',ylabel='Density')
fname= '-'.join(cs)
plt.savefig('figs/tweeted-'+fname+'.png',bbox_inches='tight')

In [19]:

outlets.remove('AP')
outlets.remove('CBSNews')

In [20]:

splim=(-10, 10)
sns.mpl.rc("figure", figsize=(10,4))
c='politics'
f,axes = plt.subplots(nrows=3,ncols=2,figsize=(16,12),subplot_kw={'xlim': splim});#,sharex=True,sharey=True
plt.subplots_adjust(hspace=0.3,wspace=0.1)
for i,o in enumerate(outlets):
    ax = axes[i/2][i%2]
    ax.set_title(o,fontsize=14)
    ax.set_xlabel('Sentiment Polarity')
    ax.set_ylabel('Density')
    tw = tn[(tn['cat']==c) & (tn['outlet']==o)]['sp']
    rt = rn[(rn['cat']==c) & (rn['outlet']==o)]['sp']
    sns.kdeplot(tw,label=c+' tweeted',ax=ax)
    sns.kdeplot(rt,label=c+' retweeted',ax=ax)
f.suptitle('Sentiment Scores for "Politics" per Outlet', fontsize=20,y=0.94)
plt.savefig('figs/politics-per-outlet.png',bbox_inches='tight')

In [21]:

splim=(-10, 10)
sns.mpl.rc("figure", figsize=(10,4))
c='sports'
f,axes = plt.subplots(nrows=3,ncols=2,figsize=(16,12),subplot_kw={'xlim': splim});#,sharex=True,sharey=True
plt.subplots_adjust(hspace=0.3,wspace=0.1)
for i,o in enumerate(outlets):
    ax = axes[i/2][i%2]
    ax.set_title(o,fontsize=14)
    if o == 'CNN':
        ax.text(0.5, 0.5, 'Category N/A',
        horizontalalignment='center',
        verticalalignment='center',
        fontsize=16, color='red',
        transform=ax.transAxes)
        continue
    ax.set_xlabel('Sentiment Polarity')
    ax.set_ylabel('Density')
    tw = tn[(tn['cat']==c) & (tn['outlet']==o)]['sp']
    rt = rn[(rn['cat']==c) & (rn['outlet']==o)]['sp']
    sns.kdeplot(tw,label=c+' tweeted',ax=ax)
    sns.kdeplot(rt,label=c+' retweeted',ax=ax)
f.suptitle('Sentiment Scores for "Sports" per Outlet', fontsize=20,y=0.94)
plt.savefig('figs/sports-per-outlet.png',bbox_inches='tight')

In [22]:

c = 'politics'
df = pd.pivot_table(tn[tn['cat']==c],index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Tweeted'})
df = df.join(pd.pivot_table(rn[rn['cat']==c],index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Retweeted'}))
df

Out[22]:

	Tweeted	Retweeted
outlet
ABC	0.427611	0.289167
CNN	0.399042	0.256381
FoxNews	0.435667	0.366697
NBCNews	0.863457	1.445029
NYT	0.414118	0.316609
WPOST	0.459786	0.351538

In [23]:

c = 'sports'
df = pd.pivot_table(tn[tn['cat']==c],index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Tweeted'})
df = df.join(pd.pivot_table(rn[rn['cat']==c],index=['outlet'],values=['sp'],aggfunc=np.mean).rename(columns={'sp':'Retweeted'}))
df

Out[23]:

	Tweeted	Retweeted
outlet
ABC	1.908525	2.193559
CNN	2.652000	2.555312
FoxNews	0.740492	1.086916
NBCNews	0.516452	0.525119
NYT	1.554690	1.531263
WPOST	0.653333	0.846667

In [24]:

#Any correlation between polarity of the tweet text and retweeting?
tn[['rt','sp','sp_t']].corr()

Out[24]:

	rt	sp	sp_t
rt	1.000000	0.024777	0.017971
sp	0.024777	1.000000	0.424538
sp_t	0.017971	0.424538	1.000000