Ask HN
or Show HN
receive more comments on average?import csv
import datetime as dt
with open('hacker_news.csv') as f:
hn_data = list(csv.reader(f))
headers = hn_data.pop(0)
print(headers)
print(hn_data[0:2])
print(len(hn_data))
ask_posts = [i for i in hn_data if i[1].lower().startswith('ask hn')]
show_posts = [i for i in hn_data if i[1].lower().startswith('show hn')]
othr_posts = [i for i in hn_data if not i[1].lower().startswith('show hn') and not i[1].lower().startswith('ask hn')]
print(len(ask_posts ))
print(len(show_posts))
print(len(othr_posts))
print(len(ask_posts)+len(show_posts)+len(othr_posts))
total_ask_comments = sum([int(i[4]) for i in ask_posts])
averg_ask_comments = total_ask_comments / len(ask_posts)
total_show_comments = sum([int(i[4]) for i in show_posts])
averg_show_comments = total_show_comments / len(show_posts)
total_othr_comments = sum([int(i[4]) for i in othr_posts])
averg_othr_comments = total_othr_comments / len(othr_posts)
print(total_ask_comments)
print(averg_ask_comments)
print(total_show_comments)
print(averg_show_comments)
print(total_othr_comments)
print(averg_othr_comments)
From the numbers above, we can see that ask and show posts recieve less comments. than other posts,
and ask posts recieve more comments than show posts
print(headers)
print(ask_posts[0])
for i in ask_posts:
i[6] = dt.datetime.strptime(i[6],'%m/%d/%Y %H:%M')
# creating empty dic with day hours as keys
posts_per_hour = {i:[] for i in range(24)}
print(posts_per_hour)
print(len(posts_per_hour))
for post in ask_posts:
posts_per_hour[post[6].hour].append(post)
print(len(posts_per_hour[0]))
print(len(posts_per_hour))
count_posts_per_hour = {i:[] for i in range(24)}
tot_comment_per_hour = {i:[] for i in range(24)}
for hour,posts in posts_per_hour.items():
count_posts_per_hour[hour] = len(posts)
tot_comment_per_hour[hour] = sum([int(post[4]) for post in posts])
print('Amount of posts for each hour:')
for i in count_posts_per_hour: print(i, ' >>> ', count_posts_per_hour[i])
print('\nTotal comments for each hour:')
for i in tot_comment_per_hour:print(i, ' >>> ', tot_comment_per_hour[i])
print('\nAvg number of comments ask posts receive by hour created:')
for i in tot_comment_per_hour:print('{:02d} >>> {:.2f}'.format(i, tot_comment_per_hour[i]/count_posts_per_hour[i]))
%matplotlib inline
import matplotlib.pyplot as plt
avg_by_hour = {}
for i in tot_comment_per_hour:
avg_by_hour[i] = tot_comment_per_hour[i]/count_posts_per_hour[i]
pl = plt.bar(avg_by_hour, avg_by_hour.values())
lol = sorted(
[ [v, k] for k,v in avg_by_hour.items() ],
reverse = True
)
print("Top 5 Hours for Ask Posts Comments:")
[i[1] for i in lol[:5]]
for i in lol:
print(
dt.datetime(2020,1,1,i[1]).strftime('%H:%M') +
' : {:.2f} average comments per post'.format(i[0])
)