In this project we are going to be looking for what makes apps more engaging than others and where to focus on.
The goal is to have a better understanding of what differenciate each app from each other and which category is more interesting for users
from csv import reader
opened_file = open('AppleStore.csv')
read_file_ios = reader(opened_file)
ios_data = list(read_file_ios)
ios_header = ios_data[0]
ios = ios_data[1:]
opened_file_andr = open('googleplaystore.csv')
read_file_android = reader(opened_file_andr)
android_data = list(read_file_android)
android_header = android_data[0]
android = android_data[1:]
print(ios[:2:])
print('\n')
print(android[:2:])
[['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1'], ['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']] [['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'], ['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up']]
def explore_data(dataset, start, end, rows_and_columns=False):
dataset_slice = dataset[start:end]
for row in dataset_slice:
print(row)
print('\n')
if rows_and_columns:
print('Number of rows:', len(dataset))
print('Number of columns:', len(dataset[0]))
explore_data(ios,0,3,True)
['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1'] ['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1'] ['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1'] Number of rows: 7197 Number of columns: 16
explore_data(android,0,3,True)
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up'] ['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] Number of rows: 10841 Number of columns: 13
del android[10472]
print(len(android))
10840
The next step will be a test to see if there are duplicate apps, in which I used Instagram as an example. The criterion for deleting theduplicates will be based on how many reviews it has, assuming that the more reviews the most up to date the data is.
for app in android:
name = app[0]
if name == 'Instagram':
print(app)
['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device'] ['Instagram', 'SOCIAL', '4.5', '66509917', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
As predicted, there are duplicate items on the list. I will identify how many and compare it to unique entries.
duplicate_apps = []
unique_apps = []
for app in android:
name = app[0]
if name in unique_apps:
duplicate_apps.append(name)
else:
unique_apps.append(name)
print('Number of duplicates:', len(duplicate_apps))
print('\n')
print('Number of uniques:', len(unique_apps))
print('\n')
print('Examples of duplicate apps:', duplicate_apps[:10])
Number of duplicates: 1181 Number of uniques: 9659 Examples of duplicate apps: ['Quick PDF Scanner + OCR FREE', 'Box', 'Google My Business', 'ZOOM Cloud Meetings', 'join.me - Simple Meetings', 'Box', 'Zenefits', 'Google Ads', 'Google My Business', 'Slack']
reviews_max = {}
android_clean = []
already_added = []
for app in android:
name = app[0]
n_reviews = float(app[3])
if name in reviews_max and reviews_max[name] < n_reviews:
reviews_max[name] = n_reviews
elif name not in reviews_max:
reviews_max[name] = n_reviews
if (reviews_max[name] == n_reviews) and (name not in already_added):
android_clean.append(app)
already_added.append(name)
print(len(reviews_max))
print(len(android_clean))
9659 9659
def common_english(string):
for character in string:
if ord(character) > 127:
return False
return True
print(common_english('Instagram'))
print(common_english('爱奇艺PPS -《欢乐颂2》电视剧热播'))
True False
def common_english(string):
non_ascii = 0
for character in string:
if ord(character) > 127:
non_ascii += 1
if non_ascii > 3:
return False
else:
return True
print(common_english('爱奇艺PPS -《欢乐颂2》电视剧热播'))
print(common_english('Docs To Go™ Free Office Suite'))
print(common_english('Instachat 😜'))
False True True
android_english = []
ios_english = []
for app in android_clean:
name = app[0]
if common_english(name) == True:
android_english.append(app)
for app in ios:
name = app[1]
if common_english(name) == True:
ios_english.append(app)
explore_data(android_english, 0, 3, True)
print('\n')
explore_data(ios_english, 0, 3, True)
['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'] ['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up'] ['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up'] Number of rows: 9614 Number of columns: 13 ['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1'] ['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1'] ['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1'] Number of rows: 6183 Number of columns: 16
android_final = []
ios_final = []
for app in android_english:
price = app[7]
if price == '0':
android_final.append(app)
for app in ios_english:
price = app[4]
if price == '0.0':
ios_final.append(app)
print(len(android_final))
print(len(ios_final))
8862 3222
Our goal is to launch the app on both Apple Store and Google Play to maximize user reachability. The focus is on making free apps for english speakers.
def freq_table(dataset, index):
table = {}
total = 0
for row in dataset:
total += 1
value = row[index]
if value in table:
table[value] += 1
else:
table[value] = 1
table_percentages = {}
for key in table:
percentage = (table[key] / total) * 100
table_percentages[key] = percentage
return table_percentages
def display_table(dataset, index):
table = freq_table(dataset, index)
table_display = []
for key in table:
key_val_as_tuple = (table[key], key)
table_display.append(key_val_as_tuple)
table_sorted = sorted(table_display, reverse = True)
for entry in table_sorted:
print(entry[1], ':', entry[0])
display_table(ios_final, -5)
Games : 58.16263190564867 Entertainment : 7.883302296710118 Photo & Video : 4.9658597144630665 Education : 3.662321539416512 Social Networking : 3.2898820608317814 Shopping : 2.60707635009311 Utilities : 2.5139664804469275 Sports : 2.1415270018621975 Music : 2.0484171322160147 Health & Fitness : 2.0173805090006205 Productivity : 1.7380509000620732 Lifestyle : 1.5828677839851024 News : 1.3345747982619491 Travel : 1.2414649286157666 Finance : 1.1173184357541899 Weather : 0.8690254500310366 Food & Drink : 0.8069522036002483 Reference : 0.5586592178770949 Business : 0.5276225946617008 Book : 0.4345127250155183 Navigation : 0.186219739292365 Medical : 0.186219739292365 Catalogs : 0.12414649286157665
display_table(android_final, -4)
Tools : 8.429248476641842 Entertainment : 6.070864364703228 Education : 5.348679756262695 Business : 4.5926427443015125 Productivity : 3.8930264048747465 Lifestyle : 3.8930264048747465 Finance : 3.7011961182577298 Medical : 3.5206499661475967 Sports : 3.4642292936131795 Personalization : 3.3175355450236967 Communication : 3.238546603475513 Action : 3.1031369893929135 Health & Fitness : 3.080568720379147 Photography : 2.945159106296547 News & Magazines : 2.798465357707064 Social : 2.663055743624464 Travel & Local : 2.324531708417964 Shopping : 2.2455427668697814 Books & Reference : 2.143985556307831 Simulation : 2.0424283457458814 Dating : 1.8618821936357481 Arcade : 1.8505980591288649 Video Players & Editors : 1.7716091175806816 Casual : 1.7603249830737984 Maps & Navigation : 1.399232678853532 Food & Drink : 1.2412547957571656 Puzzle : 1.128413450688332 Racing : 0.9930038366057323 Role Playing : 0.9365831640713158 Libraries & Demo : 0.9365831640713158 Auto & Vehicles : 0.9252990295644324 Strategy : 0.9140148950575491 House & Home : 0.8350259535093659 Weather : 0.8011735499887158 Events : 0.7109004739336493 Adventure : 0.6770480704129994 Comics : 0.6093432633716994 Beauty : 0.598059128864816 Art & Design : 0.598059128864816 Parenting : 0.49650191830286616 Card : 0.45136538027533285 Casino : 0.4287971112615662 Trivia : 0.41751297675468296 Educational;Education : 0.3949447077409162 Educational : 0.3723764387271496 Board : 0.3723764387271496 Education;Education : 0.3385240352064997 Word : 0.2595350936583164 Casual;Pretend Play : 0.23696682464454977 Music : 0.2031144211238998 Racing;Action & Adventure : 0.16926201760324985 Puzzle;Brain Games : 0.16926201760324985 Entertainment;Music & Video : 0.16926201760324985 Casual;Brain Games : 0.13540961408259986 Casual;Action & Adventure : 0.13540961408259986 Arcade;Action & Adventure : 0.12412547957571654 Action;Action & Adventure : 0.1015572105619499 Educational;Pretend Play : 0.09027307605506657 Board;Brain Games : 0.09027307605506657 Simulation;Action & Adventure : 0.07898894154818326 Parenting;Education : 0.07898894154818326 Entertainment;Brain Games : 0.07898894154818326 Parenting;Music & Video : 0.06770480704129993 Educational;Brain Games : 0.06770480704129993 Casual;Creativity : 0.06770480704129993 Art & Design;Creativity : 0.06770480704129993 Education;Pretend Play : 0.056420672534416606 Role Playing;Pretend Play : 0.045136538027533285 Education;Creativity : 0.045136538027533285 Role Playing;Action & Adventure : 0.033852403520649964 Puzzle;Action & Adventure : 0.033852403520649964 Entertainment;Creativity : 0.033852403520649964 Entertainment;Action & Adventure : 0.033852403520649964 Educational;Creativity : 0.033852403520649964 Educational;Action & Adventure : 0.033852403520649964 Education;Music & Video : 0.033852403520649964 Education;Brain Games : 0.033852403520649964 Education;Action & Adventure : 0.033852403520649964 Adventure;Action & Adventure : 0.033852403520649964 Video Players & Editors;Music & Video : 0.022568269013766643 Sports;Action & Adventure : 0.022568269013766643 Simulation;Pretend Play : 0.022568269013766643 Puzzle;Creativity : 0.022568269013766643 Music;Music & Video : 0.022568269013766643 Entertainment;Pretend Play : 0.022568269013766643 Casual;Education : 0.022568269013766643 Board;Action & Adventure : 0.022568269013766643 Video Players & Editors;Creativity : 0.011284134506883321 Trivia;Education : 0.011284134506883321 Travel & Local;Action & Adventure : 0.011284134506883321 Tools;Education : 0.011284134506883321 Strategy;Education : 0.011284134506883321 Strategy;Creativity : 0.011284134506883321 Strategy;Action & Adventure : 0.011284134506883321 Simulation;Education : 0.011284134506883321 Role Playing;Brain Games : 0.011284134506883321 Racing;Pretend Play : 0.011284134506883321 Puzzle;Education : 0.011284134506883321 Parenting;Brain Games : 0.011284134506883321 Music & Audio;Music & Video : 0.011284134506883321 Lifestyle;Pretend Play : 0.011284134506883321 Lifestyle;Education : 0.011284134506883321 Health & Fitness;Education : 0.011284134506883321 Health & Fitness;Action & Adventure : 0.011284134506883321 Entertainment;Education : 0.011284134506883321 Communication;Creativity : 0.011284134506883321 Comics;Creativity : 0.011284134506883321 Casual;Music & Video : 0.011284134506883321 Card;Action & Adventure : 0.011284134506883321 Books & Reference;Education : 0.011284134506883321 Art & Design;Pretend Play : 0.011284134506883321 Art & Design;Action & Adventure : 0.011284134506883321 Arcade;Pretend Play : 0.011284134506883321 Adventure;Education : 0.011284134506883321