import torch
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
df = pd.read_csv("Data/iris.csv")
df.sample(5)
sepal.length | sepal.width | petal.length | petal.width | species | |
---|---|---|---|---|---|
24 | 4.8 | 3.4 | 1.9 | 0.2 | Setosa |
68 | 6.2 | 2.2 | 4.5 | 1.5 | Versicolor |
14 | 5.8 | 4.0 | 1.2 | 0.2 | Setosa |
136 | 6.3 | 3.4 | 5.6 | 2.4 | Virginica |
58 | 6.6 | 2.9 | 4.6 | 1.3 | Versicolor |
df["species"].unique()
array(['Setosa', 'Versicolor', 'Virginica'], dtype=object)
{val: ind for ind, val in enumerate(df["species"].unique())}
{'Setosa': 0, 'Versicolor': 1, 'Virginica': 2}
class Iris(Dataset):
def __init__(self, target_col_name="species"):
self.df = pd.read_csv("Data/iris.csv")
x = self.df.drop(target_col_name, axis=1).to_numpy()
self.x = torch.from_numpy(x)
replacement_dict = {'Setosa': 0, 'Versicolor': 1, 'Virginica': 2}
y = self.df[target_col_name].replace(replacement_dict).to_numpy()
self.y = torch.from_numpy(y)
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return self.df.shape[0]
df.shape
(150, 5)
iris_data = Iris()
len(iris_data)
150
iris_data_loader = DataLoader(iris_data, batch_size=8)
for data in iris_data_loader:
x, y = data
print(f"independent col data: {x}, \ntaget_col: {y}")
break
independent col data: tensor([[5.1000, 3.5000, 1.4000, 0.2000], [4.9000, 3.0000, 1.4000, 0.2000], [4.7000, 3.2000, 1.3000, 0.2000], [4.6000, 3.1000, 1.5000, 0.2000], [5.0000, 3.6000, 1.4000, 0.2000], [5.4000, 3.9000, 1.7000, 0.4000], [4.6000, 3.4000, 1.4000, 0.3000], [5.0000, 3.4000, 1.5000, 0.2000]], dtype=torch.float64), taget_col: tensor([0, 0, 0, 0, 0, 0, 0, 0])
iris_data_loader = DataLoader(iris_data, batch_size=8, shuffle=True)
for data in iris_data_loader:
x, y = data
print(f"independent col data: {x}, \ntaget_col: {y}")
break
independent col data: tensor([[4.8000, 3.4000, 1.6000, 0.2000], [5.7000, 2.8000, 4.5000, 1.3000], [5.8000, 2.7000, 5.1000, 1.9000], [6.3000, 2.8000, 5.1000, 1.5000], [7.3000, 2.9000, 6.3000, 1.8000], [6.9000, 3.1000, 4.9000, 1.5000], [4.7000, 3.2000, 1.6000, 0.2000], [6.5000, 2.8000, 4.6000, 1.5000]], dtype=torch.float64), taget_col: tensor([0, 1, 2, 2, 2, 1, 0, 1])