from sklearn.datasets import fetch_openml
X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True)
X, y
( pclass name sex \ 0 1.0 Allen, Miss. Elisabeth Walton female 1 1.0 Allison, Master. Hudson Trevor male 2 1.0 Allison, Miss. Helen Loraine female 3 1.0 Allison, Mr. Hudson Joshua Creighton male 4 1.0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female ... ... ... ... 1304 3.0 Zabour, Miss. Hileni female 1305 3.0 Zabour, Miss. Thamine female 1306 3.0 Zakarian, Mr. Mapriededer male 1307 3.0 Zakarian, Mr. Ortin male 1308 3.0 Zimmerman, Mr. Leo male age sibsp parch ticket fare cabin embarked boat body \ 0 29.0000 0.0 0.0 24160 211.3375 B5 S 2 NaN 1 0.9167 1.0 2.0 113781 151.5500 C22 C26 S 11 NaN 2 2.0000 1.0 2.0 113781 151.5500 C22 C26 S None NaN 3 30.0000 1.0 2.0 113781 151.5500 C22 C26 S None 135.0 4 25.0000 1.0 2.0 113781 151.5500 C22 C26 S None NaN ... ... ... ... ... ... ... ... ... ... 1304 14.5000 1.0 0.0 2665 14.4542 None C None 328.0 1305 NaN 1.0 0.0 2665 14.4542 None C None NaN 1306 26.5000 0.0 0.0 2656 7.2250 None C None 304.0 1307 27.0000 0.0 0.0 2670 7.2250 None C None NaN 1308 29.0000 0.0 0.0 315082 7.8750 None S None NaN home.dest 0 St Louis, MO 1 Montreal, PQ / Chesterville, ON 2 Montreal, PQ / Chesterville, ON 3 Montreal, PQ / Chesterville, ON 4 Montreal, PQ / Chesterville, ON ... ... 1304 None 1305 None 1306 None 1307 None 1308 None [1309 rows x 13 columns], 0 1 1 1 2 0 3 0 4 0 .. 1304 0 1305 0 1306 0 1307 0 1308 0 Name: survived, Length: 1309, dtype: category Categories (2, object): ['0', '1'])
from google.colab import drive
Mounted at /content/drive
!pip install kaggle
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
print('User uploaded file "{name}" with length {length} bytes'.format(
name=fn, length=len(uploaded[fn])))
# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets list --sort-by votes
!kaggle competitions list -s titanic
!kaggle competitions download -c titanic
import pandas as pd
titanic = pd.read_csv('train.csv')
