#!/usr/bin/env python # coding: utf-8 # # Working on Hacker news articles # In[8]: from csv import reader file=open('hacker_news.csv') dataset=reader(file) hn=list(dataset) print(hn[:5]) print(len(hn)) # In[9]: hn_headers=hn[0] print(hn_headers) hn=hn[1:] print(hn[:5]) # In[20]: ask_posts=[] show_posts=[] other_posts=[] for i in hn: title=i[1] if (title.lower()).startswith('ask hn'): ask_posts.append(i) elif (title.lower()).startswith('show hn'): show_posts.append(i) else: other_posts.append(i) len(ask_posts) print('\n') len(show_posts) # In[21]: print(ask_posts[:2]) print('\n') print(show_posts[:2]) # In[22]: total_ask_comments=0 length=0 for row in ask_posts: a=row[4] total_ask_comments+=int(a) length+=1 avg_ask_comments=total_ask_comments/length print(avg_ask_comments) # In[23]: total_show_comments=0 show_length=0 for row in show_posts: a=row[4] total_show_comments+=int(a) show_length+=1 avg_show_comments=total_show_comments/show_length print(avg_show_comments) # In[68]: import datetime as dt result_list=[] for row in ask_posts: date=row[6] comments=int(row[4]) result_list.append([date,comments]) #print(result_list) print(len(ask_posts)) print(len(result_list)) counts_by_hour={} comments_by_hour={} total=0 for row in result_list: date=row[0] comment=int(row[1]) #print(date) dt_object=dt.datetime.strptime(date,"%m/%d/%Y %H:%M") #print(start_date) hour=dt_object.strftime("%H") #print(hour) if hour in counts_by_hour: counts_by_hour[hour]+=1 comments_by_hour[hour]+=comment else: counts_by_hour[hour]=1 comments_by_hour[hour]=comment total+=comment print(counts_by_hour) print(comments_by_hour) print(total) # In[74]: avg_by_hour=[] for i in comments_by_hour: avg_by_hour.append([i,comments_by_hour[i]/counts_by_hour[i]]) print(avg_by_hour) # In[91]: swap_avg_by_hour=[] for i in avg_by_hour: swap_avg_by_hour.append([i[1],i[0]]) print(swap_avg_by_hour) sorted_swap=sorted(swap_avg_by_hour, reverse=True) print("Top 5 hours for Ask posts comments") for i in sorted_swap[:5]: print(i) print('\n') demo=sorted_swap[0] #print(demo[1]) date_object=dt.datetime.strptime(demo[1],"%H") Hour=date_object.strftime("%H:%M") #print(Hour) print("{}: {:.2f} average commnets per post".format(Hour,demo[0])) # # Around 3 pm is the right time to put the post in order to receive maximum comments