#!/usr/bin/env python
# coding: utf-8

# # Simple Scraper
# 
# 
# Find how to split the problem up.
# 
# Do simple tests of each part, if necessary, then assemble them later on...
# 

# In[2]:


#Load file
import requests
url='https://www.ssepd.co.uk/Powertrack/'
r =requests.get(url)


# In[7]:


from bs4 import BeautifulSoup
soup= BeautifulSoup(r.text,'html.parser')


# Find a div of interest...

# In[68]:


powertracksummary= soup.findAll('div', {'class':"power-track-summary"})


#How many are there? There should be just one
len(powertracksummary)


# In[69]:


powertracksummary = powertracksummary[0]


# In[105]:


#halfrows because there is another block of divs that contains data for same data row
halfrows=powertracksummary.findAll('div',{'class':'row'})


# In[96]:


len(halfrows)


# Okay - there's something wrong with that tag because there are *not* 21 rows in the table. The `row` class is not uniquely identifying the thing we are interested in, which makes it less than useful...

# ## Make sure you know what you're working with... 
# 
# (I didnlt do this when you were here - I should have checked!)
# 
# Let's just look at some of the `row` classed rows...

# In[72]:


#preview one of them
halfrows[0]


# In[102]:


#Let's look at the last one...
halfrows[-1]


# In[103]:


#for reference... 
halfrows[-1]['class']


# This is not good and can be a massive distractor / time waster trying to scrape this if we assume the `row` classed rows are all the same structure. In fact, the `row` class is actually grabbing us different sorts of row, so it's not very useful...
# 
# We really need to look for an alternative, becuase the main of of scraping is to look for repeatable patterns that we can parse information from in a regulaar, repeated way...
# 
# If the `row` class pulls back rows that are all structured the same, we can write a scraper for one row that will work on them all; if the class pulls back two or more srots of row, we need to detect which sort and scrape each separately, which just gets messy.

# ## A Unique identifier
# 
# The `accordion-group` group does identify rows properly:

# In[128]:


accordionrows = powertracksummary.findAll('div',{'class':'accordion-group'})

len(accordionrows)


# So use that...
# 
# Let's try and scrape one row:

# In[130]:


accordionrow = accordionrows[0]


# In[131]:


accordionrow.find('div',{'class':'date'})


# In[134]:


divs = accordionrow.find('div',{'class':'date'}).findAll('div')
divs


# In[137]:


_date = divs[0].text.strip()
_date


# In[138]:


_time = divs[1].text.strip()
_time


# Example of finding all the dates and times:

# In[139]:


#Create a list to store data from each row ,one row per list item
records = []

#Assemble the recipe from the ingredients we started to prepare above
for accordionrow in accordionrows:
    
    divs = accordionrow.find('div',{'class':'date'}).findAll('div')
    
    _date = divs[0].text.strip()
    _time = divs[1].text.strip()
    
    record = { 'time':_time, 'date':_date}
    records.append(record)
    
records


# In[ ]: