1-Do ASK HN or Show HN receive more comments on average? 2-Do post created at a certain time receive more comments on average?
from csv import reader
open_file=open('hacker_news.csv')
read=reader(open_file)
hn=list(read)
headers=hn[0]
hn=hn[1:]
print(headers)
print(hn[:5])
ask_posts=[]
show_posts=[]
other_posts=[]
for i in hn:
title=i[1]
if title.lower().startswith('ask hn'):
ask_posts.append(i)
elif title.lower().startswith('show hn'):
show_posts.append(i)
else:
other_posts.append(i)
print('Number of Ask_Posts',len(ask_posts))
print('Number of Show_Posts',len(show_posts))
print('Number of Other_Posts',len(other_posts))
print(ask_posts[:5])
total_ask_comments=0
for i in ask_posts:
num_comments=i[4]
num_comments=int(num_comments)
total_ask_comments+=num_comments
print('Total Ask Comments',total_ask_comments)
avg_ask_comments=total_ask_comments/len(ask_posts)
print('Average Ask Comments',avg_ask_comments)
total_show_comments=0
for i in show_posts:
num_comments=i[4]
num_comments=int(num_comments)
total_show_comments+=num_comments
print('Total Show Comments are:',total_show_comments)
avg_show_comments=total_show_comments/len(show_posts)
print('Average show comments',avg_show_comments)
import datetime as dt
result_list=[]
counts_by_hour={}
comments_by_hour={}
for i in ask_posts:
created_at=i[6]
num_comments=i[4]
num_comments=int(num_comments)
result_list.append((created_at,num_comments))
#print(result_list)
for i in result_list:
date=dt.datetime.strptime(i[0], "%m/%d/%Y %H:%M")
date=date.strftime("%H")
comment=i[1]
if date not in counts_by_hour:
counts_by_hour[date]=1
comments_by_hour[date]=comment
else:
counts_by_hour[date]+=1
comments_by_hour[date]+=comment
print(comments_by_hour)
print(counts_by_hour)
avg_by_hour=[]
for i in comments_by_hour:
avg_by_hour.append([i,comments_by_hour[i]/counts_by_hour[i]])
print(avg_by_hour)
swap_avg_by_hour=[]
for i in avg_by_hour:
swap_avg_by_hour.append((i[1],i[0]))
print(swap_avg_by_hour)
sorted_swap=sorted(swap_avg_by_hour,reverse=True)
print(sorted_swap)
for i,j in sorted_swap[:5]:
date=dt.datetime.strptime(j,'%H')
date=date.strftime('%H:%M')
print('{} {:.2f} average comments per post'.format(date,i))