# default_exp understat
A module for fetching data from understat.com
#hide
from nbdev.showdoc import *
#export
import enum
import re
import json
import requests
import bs4
#export
def fetch_html(url):
"""
Fetch HTML and decode into a `bs4.BeautifulSoup` object
"""
r = requests.get(url)
r.raise_for_status()
return bs4.BeautifulSoup(str(r.content, 'unicode-escape'), features='html.parser')
def extract_json(soup, json_var):
""" Extract a JSON variable from understat HTML. """
node, *__ = [s for s in soup.select('script') if s.string and json_var in s.string]
# Clean string by removing and newlines (\n) and tabs (\t)
node_string = ' '.join(node.string.split())
json_value = re.match(f"var {json_var} = JSON\.parse\(\'(?P<json>.*?)\'\)", node_string).group('json')
return json.loads(json_value)
#export
# 'Competition' might be a better name, but let's stick with understat's terminology
class League(enum.Enum):
"""
Understat leagues
"""
EPL = 'EPL'
LA_LIGA = 'La_Liga'
SERIE_A = 'Serie_A'
BUNDESLIGA = 'Bundesliga'
LIGUE_1 = 'Ligue_1'
RPL = 'RPL'
#export
class Understat:
"""
Fetches understat data webpages
"""
def __init__(self, base_url: str='https://understat.com'):
self.base_url = base_url
def matches(self, league: League, season: int):
""" Fetch match data for a given `league` and `season` (start year). """
league_url = f'{self.base_url}/league/{league.value}/{season}'
soup = fetch_html(league_url)
return extract_json(soup, 'datesData')
def shots(self, match_id: int):
match_url = f'{self.base_url}/match/{match_id}'
soup = fetch_html(match_url)
return extract_json(soup, 'shotsData')
Fetch matches from Understat
understat = Understat()
matches = understat.matches(League.EPL, 2019)[17]
matches
{'id': '11660', 'isResult': True, 'h': {'id': '238', 'title': 'Sheffield United', 'short_title': 'SHE'}, 'a': {'id': '78', 'title': 'Crystal Palace', 'short_title': 'CRY'}, 'goals': {'h': '1', 'a': '0'}, 'xG': {'h': '1.84778', 'a': '0.241912'}, 'datetime': '2019-08-18 14:00:00', 'forecast': {'w': '0.8326', 'd': '0.1408', 'l': '0.0266'}}
Fetch individual match shots
shots = understat.shots(11660)
# Take the home team's 5th shot
shots['h'][5]
{'id': '311085', 'minute': '25', 'result': 'BlockedShot', 'X': '0.899000015258789', 'Y': '0.5609999847412109', 'xG': '0.07507339864969254', 'player': 'Jack O'Connell', 'h_a': 'h', 'player_id': '7705', 'situation': 'FromCorner', 'season': '2019', 'shotType': 'LeftFoot', 'match_id': '11660', 'h_team': 'Sheffield United', 'a_team': 'Crystal Palace', 'h_goals': '1', 'a_goals': '0', 'date': '2019-08-18 14:00:00', 'player_assisted': 'Oliver Norwood', 'lastAction': 'Pass'}