#!/usr/bin/env python # coding: utf-8 # # Geoguessr game analysis # In[ ]: from os import environ import atoti as tt # In[ ]: config = "mybinder-config.yml" if "JUPYTERHUB_SERVICE_PREFIX" in environ else "local-config.yml" session = tt.create_session("Geoguessr", config=config) print(session.url) # ## Load the data # In[ ]: scores = session.read_csv("data/scores.csv", keys=["GameId", "Round", "Player"]) games = session.read_csv("data/games.csv", keys=["GameId"]) rounds = session.read_csv("data/rounds.csv", keys=["GameId", "Round"]) continents = session.read_csv("data/continents.csv", keys=["Country"], sep=",") scores.join(games, mapping={"GameId": "GameId"}) scores.join(rounds, mapping={"GameId": "GameId", "Round": "Round"}) rounds.join(continents, mapping={"Country": "Country"}) # In[ ]: session.stores.schema # ## Hierarchy structure # In[ ]: cube = session.create_cube(scores) # In[ ]: m, lvl, h = cube.measures, cube.levels, cube.hierarchies # In[ ]: h["Games"] = [scores["GameId"], scores["Round"]] h["Round Number"] = { "Round Number": scores["Round"]} del h["GameId"] del h["Round"] # In[ ]: h["Round Number"] = { "Round Number": scores["Round"]} # In[ ]: h["Geography"] = [continents["Continent"], rounds["Country"]] del h["Continent"] del h["Country"] h["Geography"].dimension = "Geography" # In[ ]: h # ## New measures # In[ ]: m["score"] = tt.agg.mean( tt.agg.sum(scores["Score"]), scope=tt.scope.origin(lvl["Player"], lvl["GameId"]) ) m["score"].formatter = "DOUBLE[#,###]" # In[ ]: m["avg score"] = tt.agg.mean(scores["Score"]) # In[ ]: session.visualize() # In[ ]: session.visualize("Average Score per country") # In[ ]: session.visualize("Average Score per continent") # In[ ]: m["one"] = 1 m["Number of rounds"] = tt.agg.sum(m["one"], scope=tt.scope.origin(lvl["Round"])) # In[ ]: session.visualize("Most seen countries") # In[ ]: session.visualize("Most seen continent") # In[ ]: m["find country rate"] = tt.filter(m["contributors.COUNT"], lvl["HasCountry"] == True) / m["contributors.COUNT"] m["find country rate"].formatter = "DOUBLE[0.00%]" # In[ ]: session.visualize("Find country rate per user") # In[ ]: m["Best score"] = tt.agg.max(m["score"], scope=tt.scope.origin(lvl["GameId"], lvl["Player"])) m["Worst score"] = tt.agg.min(m["score"], scope=tt.scope.origin(lvl["GameId"], lvl["Player"])) # In[ ]: cube.query(m["Best score"], levels=lvl["Player"]) # In[ ]: m["Winner"] = tt.where( lvl["GameId"] != None, tt.agg.max_member(m["score"], lvl["Player"]) ) # In[ ]: