open_a_file = open('AppleStore.csv')
open_p_file = open('googleplaystore.csv')
from csv import reader
read_a_file = reader(open_a_file)
read_p_file = reader(open_p_file)
dataset_a = list(read_a_file)
dataset_p = list(read_p_file)
ios_header = dataset_a[0]
ios_data = dataset_a[1:]
android_header = dataset_p[0]
android_data = dataset_p[1:]
def explore_data(dataset, start, end, rows_and_columns=False):
dataset_slice = dataset[start:end]
for row in dataset_slice:
print(row)
print('\n') # adds a new (empty) line after each row
if rows_and_columns:
print('Number of rows:', len(dataset))
print('Number of columns:', len(dataset[0]))
print('Apple Store\n')
explore_data(ios_data, 0, 5, rows_and_columns=True)
print('Google Play Store\n')
explore_data(android_data, 0, 5, rows_and_columns=True)
print('ios header','\n', ios_header, '\n')
# print('\n')
print('android header', '\n', android_header)
print(android_data[10472])
len(android_data[10472])
del android_data[10472]
explore_data(android_data, 0, 0, rows_and_columns=True)
for app in android_data:
name = app[0]
if name == 'Instagram':
print(app)
Step I - Get names of all the apps which have duplicate entries
unique_apps = []
duplicate_apps = []
for app in android_data:
name = app[0]
if name in unique_apps:
duplicate_apps.append(name)
unique_apps.append(name)
print(len(duplicate_apps))
print(duplicate_apps[:10])
Step II - Make a dictionary having rows of apps which we want to keep.
reviews_max = {}
for app in android_data:
name = app[0]
n_reviews = float(app[3])
if (name in reviews_max) and (reviews_max[name] < n_reviews):
reviews_max[name] = n_reviews
if name not in reviews_max:
reviews_max[name] = n_reviews
len(reviews_max)
print(len(reviews_max))
Step III - Use dictionay created above to remove duplicate rows, and getting cleaned data in android_clean list
android_clean = []
already_added = []
for app in android_data:
name = app[0]
n_reviews = float(app[3])
if (name not in already_added) and (n_reviews == reviews_max[name]):
android_clean.append(app)
already_added.append(name)
explore_data(android_clean, 0, 3, rows_and_columns=True)
Step I- We will define a function which filters out Non-English Strings
def english_or_not(a_string):
count = 0
for i in a_string:
if (ord(i) > 127):
count += 1
if count >= 3:
return False
return True
print(english_or_not('Instagram'))
print(english_or_not('爱奇艺PPS -《欢乐颂2》电视剧热播'))
print(english_or_not('Docs To Go™ Free Office Suite'))
print(english_or_not('Instachat 😜'))
*Step II - Delete Non-English Apps using the above function**
english_a_apps = []
english_ios_apps = []
for app in android_clean:
name = app[0]
if english_or_not(name):
english_a_apps.append(app)
for app in ios_data:
name = app[0]
if english_or_not(name):
english_ios_apps.append(app)
English Android Apps
explore_data(english_a_apps, 0, 3, rows_and_columns=True)
English ios Apps
explore_data(english_ios_apps, 0, 3, rows_and_columns=True)
free_a_apps = []
free_ios_apps = []
for app in english_a_apps:
price = app[7]
if price =='0':
free_a_apps.append(app)
for app in english_ios_apps:
price = app[5]
if price == '0':
free_ios_apps.append(app)
Free English Android Apps
explore_data(free_a_apps, 0, 3, rows_and_columns=True)
Free English ios Apps
explore_data(free_ios_apps, 0, 3, rows_and_columns=True)
Our aim is to determine the kinds of apps that are likely to attract more users so that we can leverage the use of in-app advertising.
To minimize risks and overhead, our validation strategy for an app idea is comprised of three steps:
We will find app profiles that are successful in both the stores. And begin our analysis by getting a sense of the most common genres for each market.
Play Store - Two columns (
Category
andGenres
) provide us an idea for the genres.
App Store - One column (prime_genre
) provides us an idea for the genres.
def freq_table(apps_list, index):
genre_dict = {}
genre_per = {}
sorted_gen_per = []
# Generating Frequency Distribution
for app in apps_list:
genre = app[index]
if genre in genre_dict:
genre_dict[genre] += 1
else:
genre_dict[genre] = 1
# Sum of all Values
sum_dict = sum(genre_dict.values())
# Generating Frequency Percentage Disrtibution
for i in genre_dict:
genre_per[i] = (genre_dict[i]/sum_dict)*100
#Sorting the list of frequency percentages
for w in sorted(genre_per, key = genre_per.get, reverse=True):
sorted_gen_per.append((w, genre_per[w]))
return sorted_gen_per
genre_ios_apps = freq_table(free_ios_apps, -5)
category_a_apps = freq_table(free_a_apps, 1)
genre_a_apps = freq_table(free_a_apps, -4)
Most Common Genres - iOS
genre_ios_apps
With more then half (55%) share, Games is the most common genre.
And, even the next few top genres are also in fun segment namely Entertainment, Photo & Video, Social Networking
Most Common Genres - Android
genre_a_apps
Most Common Categories - Android
category_a_apps
In Play Store the distribution seems to be more balanced.
Although, we have Games as one of the top most category, but it doesn't effect the distribution so much. However, apps for Practical Purposes like Tools
, Education
, Business
, Productivity
are taking the top spots.
We may also notice that Genre
column is far more granular (more categories) compared to Category
column. Since, we want to get the overall picture, it will be better to use Category
column from now on.
Being most common doesn't equate with being most popular. Maybe there is more supply then demand. So here, we will analyse by comparing the number of users(installs) per Genre.
Play Store : column -
Installs
provides us an idea for the number of installs.
App Store : Here, we don't have any column for installs. But the columnrating_count_tot
(User Ratings) provides us an idea for the installs in each genre.
Most Popular Apps - iOS
categories_ios_list = []
pop_ios_apps_dict = {}
pop_ios_apps_list = []
# Getting list of Genres in Apple Store
for app in genre_ios_apps:
categories_ios_list.append(app[0])
# Getting No. of installs/genre list in App Store
for cat in categories_ios_list:
lngth = 0
total = 0
for app in free_ios_apps:
if cat == app[-5]:
n_ratings = float(app[6])
lngth += 1
total += n_ratings
avg_rating = total/lngth
pop_ios_apps_dict[cat] = avg_rating
# Sorting in Descending Order to display the most popular genres
for i in sorted(pop_ios_apps_dict,
key = pop_ios_apps_dict.get, reverse=True):
pop_ios_apps_list.append((i, pop_ios_apps_dict[i]))
pop_ios_apps_list
Most Popular Categories - Android
categories_a_list = []
pop_a_apps_dict = {}
pop_a_apps_list = []
# Getting the list of Categories in Play Store
for app in category_a_apps:
categories_a_list.append(app[0])
# Getting the Most Ratings/Category in Play Store
for i in categories_a_list:
total = 0
lngth = 0
for app in free_a_apps:
if app[1] == i:
installs = app[5]
installs = installs.replace(',', '')
installs = float(installs.replace('+', ''))
total += installs
lngth += 1
avg_rating = total/lngth
pop_a_apps_dict[i] = avg_rating
# Sorting in Descending Order to display the most popular genres
for i in sorted(pop_a_apps_dict,
key = pop_a_apps_dict.get, reverse=True):
pop_a_apps_list.append((i, pop_a_apps_dict[i]))
pop_a_apps_list
Here, we can see various categories which are famous and common between both the stores.
We know that most of the app installs and popularity comes from a few apps. Such as Facebook for Social and Youtube for Music or Video Players. We are also not interested in building up a communication app like Whatsapp, Skype, etc. But there is one category which is popular in both stores - Photo Category.
We can recommend developing an app which is in Photographs Category.