As part of getting a better handle on blockchain data, BigQuery, Altair, and Machine Learning, I pulled some Ethereum transaction data and plotted it.
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=os.path.expanduser("~/.credentials/Notebook bigquery-c422e406404b.json")
import altair as alt
alt.data_transformers.disable_max_rows()
DataTransformerRegistry.enable('default')
from google.cloud import bigquery
client = bigquery.Client()
query ="""
SELECT
EXTRACT(DATE FROM block_timestamp) AS date,
AVG(value) AS average_value,
AVG(gas_price) AS average_gas_price,
FROM `bigquery-public-data.ethereum_blockchain.transactions`
WHERE
EXTRACT(YEAR FROM block_timestamp) = 2019
GROUP BY date
ORDER BY date
"""
We calculate some basic statistics on raw transaction value data for each day over 2019.
schema = client.get_table("bigquery-public-data.ethereum_blockchain.transactions").schema
schema
[SchemaField('hash', 'STRING', 'REQUIRED', 'Hash of the transaction', (), None), SchemaField('nonce', 'INTEGER', 'REQUIRED', 'The number of transactions made by the sender prior to this one', (), None), SchemaField('transaction_index', 'INTEGER', 'REQUIRED', 'Integer of the transactions index position in the block', (), None), SchemaField('from_address', 'STRING', 'REQUIRED', 'Address of the sender', (), None), SchemaField('to_address', 'STRING', 'NULLABLE', 'Address of the receiver. null when its a contract creation transaction', (), None), SchemaField('value', 'NUMERIC', 'NULLABLE', 'Value transferred in Wei', (), None), SchemaField('gas', 'INTEGER', 'NULLABLE', 'Gas provided by the sender', (), None), SchemaField('gas_price', 'INTEGER', 'NULLABLE', 'Gas price provided by the sender in Wei', (), None), SchemaField('input', 'STRING', 'NULLABLE', 'The data sent along with the transaction', (), None), SchemaField('receipt_cumulative_gas_used', 'INTEGER', 'NULLABLE', 'The total amount of gas used when this transaction was executed in the block', (), None), SchemaField('receipt_gas_used', 'INTEGER', 'NULLABLE', 'The amount of gas used by this specific transaction alone', (), None), SchemaField('receipt_contract_address', 'STRING', 'NULLABLE', 'The contract address created, if the transaction was a contract creation, otherwise null', (), None), SchemaField('receipt_root', 'STRING', 'NULLABLE', '32 bytes of post-transaction stateroot (pre Byzantium)', (), None), SchemaField('receipt_status', 'INTEGER', 'NULLABLE', 'Either 1 (success) or 0 (failure) (post Byzantium)', (), None), SchemaField('block_timestamp', 'TIMESTAMP', 'REQUIRED', 'Timestamp of the block where this transaction was in', (), None), SchemaField('block_number', 'INTEGER', 'REQUIRED', 'Block number where this transaction was in', (), None), SchemaField('block_hash', 'STRING', 'REQUIRED', 'Hash of the block where this transaction was in', (), None)]
values = client.query(query).to_dataframe(dtypes={'average_value': float, 'average_gas_price': float}, date_as_object=False)
values.head()
date | average_value | average_gas_price | |
---|---|---|---|
0 | 2019-01-01 | 3.719103e+18 | 1.431514e+10 |
1 | 2019-01-02 | 4.649915e+18 | 1.349952e+10 |
2 | 2019-01-03 | 4.188781e+18 | 1.269504e+10 |
3 | 2019-01-04 | 6.958368e+18 | 1.418197e+10 |
4 | 2019-01-05 | 8.167590e+18 | 2.410475e+10 |
chart = alt.Chart(values).mark_line().encode(
alt.X('date:T', axis=alt.Axis(format=("%x"), labelAngle=270))
).properties(width=600)
alt.layer(
chart.encode(alt.Y('average_value:Q', axis=alt.Axis(format=",.2e")), color=alt.value('darkred'), opacity=alt.value(0.65)),
chart.encode(alt.Y('average_gas_price', axis=alt.Axis(format=",.2e")))
).resolve_scale(y='independent')