#!/usr/bin/env python # coding: utf-8 # # Mobile Apps # # ## This Project is intended to ... # In[1]: opened_fileA = open("AppleStore.csv") opened_fileG = open("googleplaystore.csv") from csv import reader read_fileA = reader(opened_fileA) read_fileG = reader(opened_fileG) apple = list(read_fileA) google = list(read_fileG) def explore_data(dataset, start, end, rows_and_columns=False): dataset_slice = dataset[start:end] for row in dataset_slice: print(row) print('\n') # adds a new (empty) line after each row if rows_and_columns: print('Number of rows:', len(dataset)) print('Number of columns:', len(dataset[0])) # In[2]: explore_data(apple, 0, 6, True) print("\n") print("\n") explore_data(google, 0, 6, True) # In[3]: explore_data(apple, 0, 1) print("\n") explore_data(google, 0, 1) # In[4]: print (google[0]) print (google[10473]) # In[5]: for row in google: headerlen = len(google[0]) if headerlen != len(row): print (row) print(google.index(row)) # prints the row index number # In[6]: for row in google: name = row[0] if name == 'Subway Surfers': print(row) duplicate_apps = [] unique_apps = [] for row in google[1:]: name = row[0] if name in unique_apps: duplicate_apps.append(name) else: unique_apps.append(name) print("Number of duplicate apps: ", len (duplicate_apps)) print("\n") print("Examples of duplicate apps: ", duplicate_apps[:15]) # In[10]: reviews_max = { } for row in google[1:]: name = row[0] n_reviews = float(row[3]) if name in reviews_max and reviews_max[name] < n_reviews: reviews_max.update( name = n_reviews ) if name not in reviews_max: reviews_max.update( {name: n_reviews} ) print (len(reviews_max)) print (reviews_max) # google_clean = [] # already_added = [] # for row in google: # name = app[0] # n_reviews = float(app[0]) # if n_reivews == # In[ ]: