import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
plt.style.use('seaborn-deep')
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
def scrape_table():
"""
Scrapes crytpo51 table
:return: pandas dataframe
"""
r = requests.get("https://www.crypto51.app/")
soup = BeautifulSoup(r.text, "lxml")
data = []
headers = []
table = soup.find('table')
column_names = table.find('thead').find_all('th')
headers = [ele.text.strip() for ele in column_names]
rows = table.find('tbody').find_all('tr')
for row in rows:
data.append([i.text.strip() for i in row.find_all('td') if i])
return pd.DataFrame(data, columns=headers)
raw_df = scrape_table()
# raw_df.to_pickle("data/raw_crypto51.p")
# raw_df = pd.read_pickle("data/raw_crypto51.p")
raw_df.head()
raw_df.tail()
def process_raw_market_cap(raw_mcap):
"""
Converts market cap string into integer, removing symbols.
"""
mcap = raw_mcap[1:].replace(",", "")
if len(mcap.split(" ")) == 2:
num, power = mcap.split(" ")
if power == "M":
return int(float(num) * 10**6)
elif power == "B":
return int(float(num) * 10**9)
else:
return int(mcap)
def process_raw_hashrate(raw_hashrate):
"""
Converts all hash rate strings to ints in MH/s
"""
num, power = raw_hashrate.replace(',', '').split(' ')
power = power.split('H/s')[0]
if power == "M":
return int(num)
elif power == "G":
return int(num) * 10**3
elif power == "T":
return int(num) * 10**6
elif power == "P":
return int(num) * 10**9
processed_df = raw_df.iloc[:-4,:].copy()
processed_df["Hash Rate"] = processed_df["Hash Rate"].apply(process_raw_hashrate)
processed_df["Market Cap"] = processed_df["Market Cap"].apply(process_raw_market_cap)
processed_df["1h Attack Cost"] = processed_df["1h Attack Cost"].str.replace(",", "").str.replace("$", "").astype(int)
processed_df["NiceHash-able"] = processed_df["NiceHash-able"].str.replace(",", "").str.replace("%", "").astype(float)/100
processed_df.head()
# Log transforms
processed_df["Log Market Cap"] = np.log(processed_df["Market Cap"])
processed_df["Log Attack Cost"] = np.log(1 + processed_df["1h Attack Cost"])
processed_df["Log NiceHash"] = np.log(1 + processed_df["NiceHash-able"])
from scipy.stats import linregress
# Linear fit
print("Linear\n=========")
_, _, r_value_1, p_value_1, _ = linregress(processed_df["Market Cap"],
processed_df["1h Attack Cost"])
print("R^2:", r_value_1**2)
print("p value:", p_value_1)
# Log-log fit
print("Power law\n=========")
slope, intercept, r_value, p_value, _ = linregress(processed_df["Log Market Cap"],
processed_df["Log Attack Cost"])
print("R^2:", r_value**2)
print("p value:", p_value)
# Power law expression
multiplier = np.exp(intercept)
print(f"\ncost = {multiplier} * mcap ^ {slope:.2f}")
from adjustText import adjust_text
fig, ax = plt.subplots(figsize=(10,6))
processed_df.plot.scatter("Log Market Cap", "Log Attack Cost", c="Log NiceHash",
colormap="plasma", ax=ax)
# Labels
texts = []
def annotate_df(row):
texts.append(ax.annotate(row.Symbol, (row["Log Market Cap"], row["Log Attack Cost"]),
xytext=(3,-5),
textcoords='offset points',
size=5,
color='darkslategrey'))
ab = processed_df.apply(annotate_df, axis=1)
# adjust_text(texts)
# Trendline
x = np.linspace(11, 26)
ax.plot(x, intercept + x * slope, "--k")
# plt.savefig("attackcost-mcap.png", dpi=500)
plt.show()