#!/usr/bin/env python
# coding: utf-8

# In[ ]:


pip install bs4


# In[ ]:


pip install html5lib


# In[ ]:


from selenium import webdriver 
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
import requests


# In[ ]:


# In[ ]:


headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
#driver = webdriver.Chrome()
#driver.get(URL)
#time.sleep(3)
#html = driver.page_source
#soup = BeautifulSoup(html, 'html.parser')
#r = requests.get(url=URL, headers=headers)
#driver.switch_to.frame("sp_message_iframe_764224") 
#element = driver.find_element(By.XPATH, '/html/body/div/div[2]/div[3]/div[2]/button').click()


# In[ ]:


element = driver.find_element(by=By.XPATH, value= '//*[title="ACCEPT ALL]' )
element.click()


# In[ ]:


players_list = []
age_list = []
position_list = []
value_list = []
badge_list = []


# In[ ]:


for pagenum in range(1, 5):
    URL = "https://www.transfermarkt.co.uk/premier-league/marktwerte/wettbewerb/GB1/page/" +str(pagenum)
    r = requests.get(url=URL, headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser')

    players = soup.find_all("td", class_="hauptlink")
    ages = soup.find_all("td", class_="zentriert")
    nationality = soup.find_all("td", class_="zentriert")
    values = soup.find_all("td", class_="rechts hauptlink")
    

# In[ ]:


for pagenum in range(1, 5):
    URL = "https://www.transfermarkt.co.uk/premier-league/marktwerte/wettbewerb/GB1/page/" + str(pagenum)
    r = requests.get(url=URL, headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser')

    players = soup.find_all("td", class_="hauptlink")
    for player in players:
        players_list.append(player.text)

    ages = soup.find_all("td", class_="zentriert")
    for age in ages:
        age_list.append(age.text)
        
    nationalities = soup.find_all("td", class_="zentriert")
    for nation in nationality:
        flags = nation.find_all('img')  
        if flags:  
            first_flag = flags[0]  
            first_nationality = first_flag['title']
            nationality_list.append(first_nationality)
    
    values = soup.find_all("td", class_="rechts hauptlink")
    for value in values:
        value_list.append(value.text)


# In[ ]:


badges = soup.find_all("td", class_="zentriert")
for badge in badges:
    flags = badge.find_all('img')  
    if flags:  
        first_flag = flags[0]  
        first_badge = first_flag['title']
        badge_list.append(first_badge)
        print(first_nationality)


# In[ ]:


nationality_list


# In[ ]:


players


# In[ ]:


for player in players:
    players_list.append(player.text)


# In[ ]:


players_list


# In[ ]:


i = 1
while i < len(players_list):
    del players_list[i]
    i += 1
print(players_list)


# In[ ]:


ages = soup.find_all("td", class_="zentriert")


# In[ ]:


ages


# In[ ]:


for age in ages:
    age_list.append(age.text)


# In[ ]:


age_list


# In[ ]:


new_age_list = []
for i in range(2, len(age_list), 4):
    new_age_list.append(age_list[i])
print(new_age_list)


# In[ ]:


new_age_list


# In[ ]:


nationality = soup.find_all("td", class_="zentriert")


# In[ ]:


nationality


# In[ ]:


for pagenum in range(1, 5):
    URL = "https://www.transfermarkt.co.uk/premier-league/marktwerte/wettbewerb/GB1/page/" + str(pagenum)
    r = requests.get(url=URL, headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser')
    table = soup.find("table", class_='items')
    tbody = table.find("tbody")
    badges = tbody.find_all("td", class_="zentriert")
    for badge in badges:
        flags = badge.find_all('img')  
        if flags:  
            first_flag = flags[0]  
            first_badge = first_flag['title']
            badge_list.append(first_badge)
            print(first_badge)


# In[ ]:


badge_list


# In[ ]:


del badge_list[-5:]


# In[ ]:


indexes_to_delete = [219, 218, 217, 216, 215, 164, 163, 162, 161, 160, 109, 108, 107, 106, 105, 54, 53, 52, 51, 50]

for index in indexes_to_delete:
    del badge_list[index]


# In[ ]:


badge_list


# In[ ]:


nationality_list = badge_list[::2]
club_list = badge_list[1::2]


# In[ ]:


nationality_list


# In[ ]:


club_list


# In[ ]:


for nation in nationality:
    flags = nation.find('img')  
    if flags:  
        first_flag = flags['0']  
        first_nationality = first_flag['title']
        nationality_list.append(first_nationality)
        print(first_nationality)


# In[ ]:


i = 1
while i < len(nationality_list):
    del nationality_list[i]
    i += 1
print(nationality_list)


# In[ ]:


values = soup.find_all("td", class_="rechts hauptlink")


# In[ ]:


values


# In[ ]:


for value in values:
    value_list.append(value.text)


# In[ ]:


value_list


# In[ ]:


all_players = []

for pagenum in range(1, 5):
    URL = "https://www.transfermarkt.co.uk/premier-league/marktwerte/wettbewerb/GB1/page/" + str(pagenum)
    r = requests.get(url=URL, headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser')
    table = soup.find("table", class_='items')
    tbody = table.find("tbody")
    rows = tbody.find_all("tr")

    for row in rows:
        cols = row.find_all('td')
        cols = [col.text.strip() for col in cols]
        all_players.append(cols)

    print(all_players)
    print(len(all_players))


# In[ ]:


all_players


# In[ ]:


final_players = [all_players[i] for i in range(len(all_players)) if i % 3 == 0]

print(final_players)


# In[ ]:


final_players[2]


# In[ ]:


import pandas as pd

df = pd.DataFrame(final_players, columns=['Rank', 'null', 'null1', 'Player', 'Position', 'null3', 'Age', 'null4', 'Value'])


# In[ ]:


df.head()


# In[ ]:


# In[ ]:


df['Nationality'] = nationality_list


# In[ ]:


df['Club'] = club_list


# In[ ]:


USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
URL_TEMPLATE = 'https://www.transfermarkt.co.uk/premier-league/marktwerte/wettbewerb/GB1/page/{}'
headers = {'User-Agent': USER_AGENT}


# In[ ]:


def scrape_webpage(url):
    response = requests.get(url=url, headers=headers)
    return BeautifulSoup(response.content, 'html.parser')


# In[ ]:


all_players = []
for pagenum in range(1, 5):
    url = URL_TEMPLATE.format(pagenum)
    soup = scrape_webpage(url)
    table = soup.find("table", class_='items')
    tbody = table.find("tbody")
    rows = tbody.find_all("tr")
    for row in rows:
        cols = row.find_all('td')
        cols = [col.text.strip() for col in cols]
        all_players.append(cols)
final_players = [all_players[i] for i in range(len(all_players)) if i % 3 == 0]
print(final_players)


# In[ ]:


all_players = []
for pagenum in range(1, 5):
    URL = "https://www.transfermarkt.co.uk/premier-league/marktwerte/wettbewerb/GB1/page/" + str(pagenum)
    r = requests.get(url=URL, headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser')
    table = soup.find("table", class_='items')
    tbody = table.find("tbody")
    rows = tbody.find_all("tr")

    for row in rows:
        cols = row.find_all('td')
        cols = [col.text.strip() for col in cols]
        all_players.append(cols)

final_players = [all_players[i] for i in range(len(all_players)) if i % 3 == 0]


# In[ ]: