Template for deliverable for https://classes.daveeargle.com/security-analytics-assignments/labs/lab-publicly-accessible-datasets.html
Deliverable:
Use the dataset from this OpenML phish_url dataset for all tasks in this notebook.
import pandas as pd
def do_openml():
# Use url hacking to get a direct download url from the OpenML page for the dataset.
# For this lab, do _not_ use sklearn's fetch_openml().
# The generalization described in the lab doc does _not work_ consistently.
# But I'm leaving it as-is because the purpose of this is to teach you url hacking.
df = pd.read_csv()
return df
do_openml()
def do_github():
# Upload the dataset to a github repository, and use a direct download link for it below
df = pd.read_csv()
return df
do_github()
def do_cloud_storage():
# Upload the phish_url dataset to one of the following cloud storage providers:
# * Google Drive
# * Dropbox
#
# Use url hacking to get a direct download link for it, and use it below.
df = pd.read_csv()
return df
do_cloud_storage()
def do_gcp():
# Create a public GCP bucket, and upload the dataset to it.
# Use a direct download link to load the file below.
df = pd.read_csv()
return df
do_gcp()
def do_aws():
# Create a public aws s3 bucket, and upload the dataset to it.
# Use a direct download link to load the file below.
df = pd.read_csv()
return df
do_aws()