#!/usr/bin/env python # coding: utf-8 # # EXPLORING HACKER NEWS # #### In the project we are going to Analyse 2 things: # 1-Do ASK HN or Show HN receive more comments on average? # 2-Do post created at a certain time receive more comments on average? # In[36]: from csv import reader open_file=open('hacker_news.csv') read=reader(open_file) hn=list(read) headers=hn[0] hn=hn[1:] # In[37]: print(headers) # In[38]: print(hn[:5]) # In[39]: ask_posts=[] show_posts=[] other_posts=[] for i in hn: title=i[1] if title.lower().startswith('ask hn'): ask_posts.append(i) elif title.lower().startswith('show hn'): show_posts.append(i) else: other_posts.append(i) # In[40]: print('Number of Ask_Posts',len(ask_posts)) print('Number of Show_Posts',len(show_posts)) print('Number of Other_Posts',len(other_posts)) # In[41]: print(ask_posts[:5]) # ### Calculating Total and Average Ask comments # In[42]: total_ask_comments=0 for i in ask_posts: num_comments=i[4] num_comments=int(num_comments) total_ask_comments+=num_comments print('Total Ask Comments',total_ask_comments) avg_ask_comments=total_ask_comments/len(ask_posts) print('Average Ask Comments',avg_ask_comments) # #### Calculating Total and Average Show Comments # In[43]: total_show_comments=0 for i in show_posts: num_comments=i[4] num_comments=int(num_comments) total_show_comments+=num_comments print('Total Show Comments are:',total_show_comments) avg_show_comments=total_show_comments/len(show_posts) print('Average show comments',avg_show_comments) # #### Apparantly Ask comments receive more comments on topic based on the calculated average which is 14.03 # In[44]: import datetime as dt # In[45]: result_list=[] counts_by_hour={} comments_by_hour={} for i in ask_posts: created_at=i[6] num_comments=i[4] num_comments=int(num_comments) result_list.append((created_at,num_comments)) #print(result_list) for i in result_list: date=dt.datetime.strptime(i[0], "%m/%d/%Y %H:%M") date=date.strftime("%H") comment=i[1] if date not in counts_by_hour: counts_by_hour[date]=1 comments_by_hour[date]=comment else: counts_by_hour[date]+=1 comments_by_hour[date]+=comment # In[46]: print(comments_by_hour) # In[47]: print(counts_by_hour) # In[48]: avg_by_hour=[] for i in comments_by_hour: avg_by_hour.append([i,comments_by_hour[i]/counts_by_hour[i]]) # In[49]: print(avg_by_hour) # In[52]: swap_avg_by_hour=[] for i in avg_by_hour: swap_avg_by_hour.append((i[1],i[0])) # In[54]: print(swap_avg_by_hour) # In[56]: sorted_swap=sorted(swap_avg_by_hour,reverse=True) # In[57]: print(sorted_swap) # In[62]: for i,j in sorted_swap[:5]: date=dt.datetime.strptime(j,'%H') date=date.strftime('%H:%M') print('{} {:.2f} average comments per post'.format(date,i)) # ### Here we understoon that at 15:00 every day we can expet the highest number of comments per post(ASK) based on our calculations