from sklearn.datasets import fetch_openml
X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True)
X, y
( pclass name sex \ 0 1.0 Allen, Miss. Elisabeth Walton female 1 1.0 Allison, Master. Hudson Trevor male 2 1.0 Allison, Miss. Helen Loraine female 3 1.0 Allison, Mr. Hudson Joshua Creighton male 4 1.0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female ... ... ... ... 1304 3.0 Zabour, Miss. Hileni female 1305 3.0 Zabour, Miss. Thamine female 1306 3.0 Zakarian, Mr. Mapriededer male 1307 3.0 Zakarian, Mr. Ortin male 1308 3.0 Zimmerman, Mr. Leo male age sibsp parch ticket fare cabin embarked boat body \ 0 29.0000 0.0 0.0 24160 211.3375 B5 S 2 NaN 1 0.9167 1.0 2.0 113781 151.5500 C22 C26 S 11 NaN 2 2.0000 1.0 2.0 113781 151.5500 C22 C26 S None NaN 3 30.0000 1.0 2.0 113781 151.5500 C22 C26 S None 135.0 4 25.0000 1.0 2.0 113781 151.5500 C22 C26 S None NaN ... ... ... ... ... ... ... ... ... ... 1304 14.5000 1.0 0.0 2665 14.4542 None C None 328.0 1305 NaN 1.0 0.0 2665 14.4542 None C None NaN 1306 26.5000 0.0 0.0 2656 7.2250 None C None 304.0 1307 27.0000 0.0 0.0 2670 7.2250 None C None NaN 1308 29.0000 0.0 0.0 315082 7.8750 None S None NaN home.dest 0 St Louis, MO 1 Montreal, PQ / Chesterville, ON 2 Montreal, PQ / Chesterville, ON 3 Montreal, PQ / Chesterville, ON 4 Montreal, PQ / Chesterville, ON ... ... 1304 None 1305 None 1306 None 1307 None 1308 None [1309 rows x 13 columns], 0 1 1 1 2 0 3 0 4 0 .. 1304 0 1305 0 1306 0 1307 0 1308 0 Name: survived, Length: 1309, dtype: category Categories (2, object): ['0', '1'])
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
!pip install kaggle
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Requirement already satisfied: kaggle in /usr/local/lib/python3.7/dist-packages (1.5.12) Requirement already satisfied: urllib3 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.24.3) Requirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from kaggle) (2022.5.18.1) Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from kaggle) (4.64.0) Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.23.0) Requirement already satisfied: python-slugify in /usr/local/lib/python3.7/dist-packages (from kaggle) (6.1.2) Requirement already satisfied: python-dateutil in /usr/local/lib/python3.7/dist-packages (from kaggle) (2.8.2) Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.7/dist-packages (from kaggle) (1.15.0) Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.7/dist-packages (from python-slugify->kaggle) (1.3) Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (3.0.4) Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->kaggle) (2.10)
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
print('User uploaded file "{name}" with length {length} bytes'.format(
name=fn, length=len(uploaded[fn])))
# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json
Saving kaggle.json to kaggle.json User uploaded file "kaggle.json" with length 63 bytes
!kaggle datasets list --sort-by votes
ref title size lastUpdated downloadCount voteCount usabilityRating ------------------------------------------------------------ -------------------------------------------------- ----- ------------------- ------------- --------- --------------- jessicali9530/animal-crossing-new-horizons-nookplaza-dataset Animal Crossing New Horizons Catalog 577KB 2021-06-08 15:05:09 19102 15629 0.8235294 allen-institute-for-ai/CORD-19-research-challenge COVID-19 Open Research Dataset Challenge (CORD-19) 15GB 2022-04-25 19:32:16 145234 9920 0.88235295 mlg-ulb/creditcardfraud Credit Card Fraud Detection 66MB 2018-03-23 01:17:27 412404 9149 0.85294116 shivamb/netflix-shows Netflix Movies and TV Shows 1MB 2021-09-27 04:44:36 239227 6571 1.0 sudalairajkumar/novel-corona-virus-2019-dataset Novel Corona Virus 2019 Dataset 9MB 2021-06-24 04:27:25 372822 5801 0.9705882 paultimothymooney/chest-xray-pneumonia Chest X-Ray Images (Pneumonia) 2GB 2018-03-24 19:41:59 171502 5067 0.75 datasnaek/youtube-new Trending YouTube Video Statistics 201MB 2019-06-03 00:56:47 175279 4536 0.7941176 gregorut/videogamesales Video Game Sales 381KB 2016-10-26 09:10:49 318873 4532 0.5882353 lava18/google-play-store-apps Google Play Store Apps 2MB 2019-02-03 13:55:47 182845 4025 0.7058824 hugomathien/soccer European Soccer Database 33MB 2016-10-23 22:31:38 154883 3850 0.7058824 timoboz/data-science-cheat-sheets Data Science Cheat Sheets 596MB 2020-02-04 19:42:27 44872 3828 0.875 borismarjanovic/price-volume-data-for-all-us-stocks-etfs Huge Stock Market Dataset 492MB 2017-11-16 14:53:29 87432 3756 0.75 unsdsn/world-happiness World Happiness Report 37KB 2019-11-27 04:41:47 225528 3563 0.85294116 zynicide/wine-reviews Wine Reviews 51MB 2017-11-27 17:08:04 160348 3315 0.7941176 spscientist/students-performance-in-exams Students Performance in Exams 9KB 2018-11-09 18:25:25 170692 3263 0.7058824 uciml/pima-indians-diabetes-database Pima Indians Diabetes Database 9KB 2016-10-06 18:31:56 285971 3107 0.88235295 neuromusic/avocado-prices Avocado Prices 629KB 2018-06-06 05:28:35 157903 3054 0.9705882 tmdb/tmdb-movie-metadata TMDB 5000 Movie Dataset 9MB 2017-09-28 01:09:12 203902 3041 0.8235294 mczielinski/bitcoin-historical-data Bitcoin Historical Data 100MB 2021-04-11 19:41:13 109469 2986 1.0
!kaggle competitions list -s titanic
ref deadline category reward teamCount userHasEntered ----------------- ------------------- --------------- --------- --------- -------------- spaceship-titanic 2030-01-01 00:00:00 Getting Started Knowledge 2167 False titanic 2030-01-01 00:00:00 Getting Started Knowledge 13878 False
!kaggle competitions download -c titanic
Downloading titanic.zip to /content 0% 0.00/34.1k [00:00<?, ?B/s] 100% 34.1k/34.1k [00:00<00:00, 27.2MB/s]
!unzip titanic.zip
Archive: titanic.zip inflating: gender_submission.csv inflating: test.csv inflating: train.csv
import pandas as pd
titanic = pd.read_csv('train.csv')
titanic
PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
886 | 887 | 0 | 2 | Montvila, Rev. Juozas | male | 27.0 | 0 | 0 | 211536 | 13.0000 | NaN | S |
887 | 888 | 1 | 1 | Graham, Miss. Margaret Edith | female | 19.0 | 0 | 0 | 112053 | 30.0000 | B42 | S |
888 | 889 | 0 | 3 | Johnston, Miss. Catherine Helen "Carrie" | female | NaN | 1 | 2 | W./C. 6607 | 23.4500 | NaN | S |
889 | 890 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0 | 0 | 0 | 111369 | 30.0000 | C148 | C |
890 | 891 | 0 | 3 | Dooley, Mr. Patrick | male | 32.0 | 0 | 0 | 370376 | 7.7500 | NaN | Q |
891 rows × 12 columns