Mobile Apps

This Project is intended to ...

In [1]:
opened_fileA = open("AppleStore.csv")
opened_fileG = open("googleplaystore.csv")

from csv import reader

read_fileA = reader(opened_fileA)
read_fileG = reader(opened_fileG)

apple = list(read_fileA)
google = list(read_fileG)

def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]    
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line after each row

    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))
In [2]:
explore_data(apple, 0, 6, True)

print("\n")
print("\n")

explore_data(google, 0, 6, True)
['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']


['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


['529479190', 'Clash of Clans', '116476928', 'USD', '0.0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1']


['420009108', 'Temple Run', '65921024', 'USD', '0.0', '1724546', '3842', '4.5', '4.0', '1.6.2', '9+', 'Games', '40', '5', '1', '1']


['284035177', 'Pandora - Music & Radio', '130242560', 'USD', '0.0', '1126879', '3594', '4.0', '4.5', '8.4.1', '12+', 'Music', '37', '4', '1', '1']


Number of rows: 7198
Number of columns: 16




['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up']


['Pixel Draw - Number Art Coloring Book', 'ART_AND_DESIGN', '4.3', '967', '2.8M', '100,000+', 'Free', '0', 'Everyone', 'Art & Design;Creativity', 'June 20, 2018', '1.1', '4.4 and up']


Number of rows: 10842
Number of columns: 13
In [3]:
explore_data(apple, 0, 1)

print("\n")

explore_data(google, 0, 1)
['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']




['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


In [4]:
print (google[0])
print (google[10473])
['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']
['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up']
In [5]:
for row in google:
    headerlen = len(google[0])
    
    if headerlen != len(row):
        print (row)
        print(google.index(row)) # prints the row index number
['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up']
10473
In [6]:
for row in google:
    name = row[0]
    if name == 'Subway Surfers':
        print(row)

duplicate_apps = []
unique_apps = []

for row in google[1:]:
    name = row[0]
    if name in unique_apps:
        duplicate_apps.append(name)
    else:
        unique_apps.append(name)
        
print("Number of duplicate apps: ", len (duplicate_apps))
print("\n")
print("Examples of duplicate apps: ", duplicate_apps[:15])
['Subway Surfers', 'GAME', '4.5', '27722264', '76M', '1,000,000,000+', 'Free', '0', 'Everyone 10+', 'Arcade', 'July 12, 2018', '1.90.0', '4.1 and up']
['Subway Surfers', 'GAME', '4.5', '27723193', '76M', '1,000,000,000+', 'Free', '0', 'Everyone 10+', 'Arcade', 'July 12, 2018', '1.90.0', '4.1 and up']
['Subway Surfers', 'GAME', '4.5', '27724094', '76M', '1,000,000,000+', 'Free', '0', 'Everyone 10+', 'Arcade', 'July 12, 2018', '1.90.0', '4.1 and up']
['Subway Surfers', 'GAME', '4.5', '27725352', '76M', '1,000,000,000+', 'Free', '0', 'Everyone 10+', 'Arcade', 'July 12, 2018', '1.90.0', '4.1 and up']
['Subway Surfers', 'GAME', '4.5', '27725352', '76M', '1,000,000,000+', 'Free', '0', 'Everyone 10+', 'Arcade', 'July 12, 2018', '1.90.0', '4.1 and up']
['Subway Surfers', 'GAME', '4.5', '27711703', '76M', '1,000,000,000+', 'Free', '0', 'Everyone 10+', 'Arcade', 'July 12, 2018', '1.90.0', '4.1 and up']
Number of duplicate apps:  1181


Examples of duplicate apps:  ['Quick PDF Scanner + OCR FREE', 'Box', 'Google My Business', 'ZOOM Cloud Meetings', 'join.me - Simple Meetings', 'Box', 'Zenefits', 'Google Ads', 'Google My Business', 'Slack', 'FreshBooks Classic', 'Insightly CRM', 'QuickBooks Accounting: Invoicing & Expenses', 'HipChat - Chat Built for Teams', 'Xero Accounting Software']
In [10]:
reviews_max = { }

for row in google[1:]:
    name = row[0]
    n_reviews = float(row[3])
    if name in reviews_max and reviews_max[name] < n_reviews:
        reviews_max.update( name = n_reviews )
    if name not in reviews_max:        
        reviews_max.update( {name: n_reviews} )

print (len(reviews_max))
print (reviews_max)


# google_clean = []
# already_added = []

# for row in google:
#     name = app[0]
#     n_reviews = float(app[0])
#     if n_reivews == 
ValueErrorTraceback (most recent call last)
<ipython-input-10-a2efeb7567d5> in <module>()
      3 for row in google[1:]:
      4     name = row[0]
----> 5     n_reviews = float(row[3])
      6     if name in reviews_max and reviews_max[name] < n_reviews:
      7         reviews_max.update( name = n_reviews )

ValueError: could not convert string to float: '3.0M'
In [ ]: