from csv import reader
opened_file = open('AppleStore.csv') #iOS
read_file = reader(opened_file)
ios = list(read_file)
ios_header = ios[0]
ios_body = ios[1:]
opened_file = open('googleplaystore.csv') #Android
read_file = reader(opened_file)
android = list(read_file)
android_header = android[0]
android_body = android[1:]
def explore_data(dataset, start, end, rows_and_columns=False):
dataset_slice = dataset[start:end]
for row in dataset_slice:
print(row)
print('\n') #adds a new emmpty line after each row
if rows_and_columns:
print('NO of rows: ', len(dataset))
print('NO of columns: ', len(dataset[0]))
Removing duplicate data entries, and keeping the entry with highest reviews count for each app
duplicate_apps =[]
unique_apps = []
for app in android:
name = app[0]
if name in unique_apps:
duplicate_apps.append(name)
else:
unique_apps.append(name)
reviews_max ={}
for row in android[1:]:
n_reviews = float(row[3])
name = row[0]
if name in reviews_max and reviews_max[name] < n_reviews:
reviews_max[name] = n_reviews
elif name not in reviews_max:
reviews_max[name] = n_reviews
name = row[0]
n_reviews = float(row[3])
android_clean = []
already_added =[]
for app in android[1:]:
name = app[0]
n_reviews = float(app[3])
if (reviews_max[name] == n_reviews) and (name not in already_added):
android_clean.append(app)
already_added.append(name)
explore_data(android_clean, 0, 2, True)
def english_char(string):
non_ASCII = 0
for char in string:
if ord(char) > 127:
non_ASCII +=1
if non_ASCII > 3:
return False
else:
return True
english_char('爱奇艺PPS -《欢乐颂2》电视剧热播')
android_english =[]
ios_english = []
for app in android_clean:
name = app[0]
if english_char(name):
android_english.append(app)
for app in ios:
name = app[1]
if english_char(name):
ios_english.append(app)
print(len(android_english))
print('/n')
print(len(ios_english))
free_android =[]
free_ios = []
for app in android_english:
price = app[7]
if price == '0':
free_android.append(app)
for app in ios_english:
price = app[4]
if price == '0.0':
free_ios.append(app)
print(len(free_android), len(free_ios))
def freq_table(dataset, index):
frequency_table = {}
total = 0
for app in dataset:
total += 1
freq = app[index]
if freq in frequency_table:
frequency_table[freq] +=1
else:
frequency_table[freq] = 1
table_percentages = {}
for key in frequency_table:
percentage = (frequency_table[key] / total) * 100
table_percentages[key] = percentage
return table_percentages
def display_table(dataset, index):
table = freq_table(dataset, index)
table_display = []
for key in table:
key_val_as_tuple = (table[key], key)
table_display.append(key_val_as_tuple)
table_sorted = sorted(table_display, reverse =True)
for entry in table_sorted:
print(entry[1], ':', entry[0])
#explore_data(android, 0, 3)
display_table(free_android, 1)
print('/n')
display_table(free_android, 9)
'''MOST_COMMON_IOS GENRES: Games, Entertainment
WE need to look at the number of downloads to build an accurate profile'''
'''MOST COMMON ANDROD GENRES: Family, games'''
unique_ios_genres = freq_table(free_ios, -5)
for genre in unique_ios_genres:
total = 0
len_genre = 0
for app in free_ios:
genre_app = app[-5]
if genre == genre_app:
user_rating_cont = float(app[5])
total += user_rating_cont
len_genre +=1
average_user_rating = total/len_genre
print(average_user_rating, genre)
display_table(free_android, 5)
unique_android_genres = freq_table(free_android, 1)
for category in unique_android_genres:
total = 0
len_category = 0
for app in free_android:
category_app = app[1]
if category_app == category:
n_installs = app[5]
n_installs = n_installs.replace(',', '')
n_installs = n_installs.replace('+', '')
total += float(n_installs)
len_category +=1
average_installs = total / len_category
print(category, ':', average_installs)