#!/usr/bin/env python
# coding: utf-8

# # Mobile App Data

# This project is about identifying profitable profiles for the App Store and Google Play Markets

# In[1]:


### Google Play data set ### 
opened_file = open('googleplaystore.csv')
from csv import reader
read_file = reader(opened_file)
android = list(read_file)
android_header = android[0]
android = android[1:]

### App Store data set ###
opened_file = open('AppleStore.csv')
read_file = reader(opened_file)
ios = list(read_file)
ios_header =ios[0]
ios = ios[2:]

def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]    
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line after each row

    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))

print(android_header)
print('\n')
explore_data(android, 0, 3, True)


# In[2]:


print(android[10472])  # incorrect row
print('\n')
print(android_header)  # header
print('\n')
print(android[0])      # correct row

del(android[10472])
print(android[10472])  # deleted row


# In[3]:


### Google Play data set has duplicates, so let's get rid of em! ###
for app in android:
    name = app[0]
    if name == 'Instagram':
        print(app)


# In[4]:


### I'm not going to remove duplicates randomly. 
### Rather, I'm going to delete all except the first one found ###
seen_duplicates = []
unique_apps = []

for app in android:
    name == app[0]
    if name in unique_apps:
        seen_duplicates.append(name)
    else:
        unique_apps.append(name)
        
print('Number of duplicates:', len(seen_duplicates))
print('\n')
print('Number of unique', len(unique_apps))
print('Examples of duplicate apps:', seen_duplicates[:15] )


# In[5]:


### Removing duplicate entries and store
### separate lists for new cleaned data set and 
### just app names for detecting duplicates ###

reviews_max = {}
for app in android[1:]:
    name = app[0]
    n_reviews = float(app[3])
    if (name in reviews_max) and (reviews_max[name] < n_reviews):
        reviews_max[name] = n_reviews
    if (name not in reviews_max):
        reviews_max[name] = n_reviews
print('Expected length:', len(android) - 1181)
print('Actual length:', len(reviews_max))
        
android_clean = []
already_added = []

for app in android[1:]:
    name = app[0]
    n_reviews = float(app[3])
    if n_reviews == reviews_max[name] and name not in already_added:
        android_clean.append(app)
        already_added.append(name)


# In[6]:


### exploring android_clean data set to ensure it displays as expected ###
explore_data(android_clean, 0, 3, True)


# In[7]:


## adding function that takes a string and determines if there is any 
## character that doesn't belong to the set of common English characters 
## if there are more than 3 chars that fall outside the ASCII range (0-127)
## it is determined to be non-english
def english_only(language):
    count = 0
    for char in language:
        if(ord(char) > 127):
            count += 1;
    
    if count > 3:
        return False;
    else:
        return True;

print(english_only('Instagram'))
print(english_only('爱奇艺PPS -《欢乐颂2》电视剧热播'))
print(english_only('Docs To Go™ Free Office Suite'))
print(english_only('Instachat 😜'))


# In[8]:


## separate Android and iOS apps and find out how many of each we have ###
android_english = []
ios_english = []

for app in android_clean:
    name = app[0]
    if english_only(name):
        android_english.append(app)

for app in ios:
    name = app[1];
    if english_only(name):
        ios_english.append(app)
        
explore_data(android_english, 0, 3, True)
print('\n')
explore_data(ios_english, 0, 3, True)


# In[9]:


## isolating the free android and iOS apps ###
for app in android_clean:
    name = app[0]
    if(english_only):
        android_english.append(name)

for app in ios:
    name = app[1];
    if(english_only):
        ios_english.append(name)


# In[10]:


### We want to find and app profile that fits both the App Store and Google Play because by analyzing apps that are successful on both, we have a way to measure the threshold of entry 
### in order to be/remain competitive on those platforms ###

explore_data(android_english, 0, 3, True)


# In[11]:


### function to generate frequency tables to show percentages ###

def freq_table(dataset, index):
    table = {}
    total = 0
    
    for row in dataset:
        total += 1
        value = row[index]
        if value in table:
            table[value] += 1
        else:
            table[value] = 1
    
    table_percentages = {}
    for key in table:
        percentage = (table[key] / total) * 100
        table_percentages[key] = percentage
    
    return table_percentages

### function to display the percentages in desc ###
def display_table(dataset, index, label):
    table = freq_table(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)

    table_sorted = sorted(table_display, reverse = True)
    print("Column: " + "***" + label.upper() + "***")
    for entry in table_sorted:
        print(entry[1], ':', entry[0])
    

# In[12]:


display_table(ios, -5, "ios")


# In[13]:


display_table(android_clean, 1, "Category")


# In[14]:


display_table(android_clean, -4, "Genres")


# In[15]:


freq_table(ios, -5)


# In[27]:


### calculate most popular apps by genre  ###

genres_ios = freq_table(ios, -5)

for genre in genres_ios:
    total = 0
    len_genre = 0
    for genre_app in ios:
        genre_app = app[-5]
        if genre_app == genre:
            user_ratings = float(app[5])
            total += user_ratings
            len_genre += 1
    avg_user_ratings = total / len_genre
print(genre)
print(avg_user_ratings)


#