In [9]:

import torch 
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd

Loading structured dataset¶

In [10]:

df = pd.read_csv("Data/iris.csv")
df.sample(5)

Out[10]:

	sepal.length	sepal.width	petal.length	petal.width	species
24	4.8	3.4	1.9	0.2	Setosa
68	6.2	2.2	4.5	1.5	Versicolor
14	5.8	4.0	1.2	0.2	Setosa
136	6.3	3.4	5.6	2.4	Virginica
58	6.6	2.9	4.6	1.3	Versicolor

In [11]:

df["species"].unique()

Out[11]:

array(['Setosa', 'Versicolor', 'Virginica'], dtype=object)

In [14]:

{val: ind for ind, val in enumerate(df["species"].unique())}

Out[14]:

{'Setosa': 0, 'Versicolor': 1, 'Virginica': 2}

In [15]:

class Iris(Dataset):
    def __init__(self, target_col_name="species"):
        self.df = pd.read_csv("Data/iris.csv")
        x = self.df.drop(target_col_name, axis=1).to_numpy()
        self.x = torch.from_numpy(x)
        
        replacement_dict = {'Setosa': 0, 'Versicolor': 1, 'Virginica': 2}
        y = self.df[target_col_name].replace(replacement_dict).to_numpy()
        self.y = torch.from_numpy(y)

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.df.shape[0]

In [17]:

df.shape

Out[17]:

(150, 5)

In [16]:

iris_data = Iris()
len(iris_data)

Out[16]:

In [18]:

iris_data_loader = DataLoader(iris_data, batch_size=8)

In [20]:

for data in iris_data_loader:
    x, y = data
    print(f"independent col data: {x}, \ntaget_col: {y}")
    break

independent col data: tensor([[5.1000, 3.5000, 1.4000, 0.2000],
        [4.9000, 3.0000, 1.4000, 0.2000],
        [4.7000, 3.2000, 1.3000, 0.2000],
        [4.6000, 3.1000, 1.5000, 0.2000],
        [5.0000, 3.6000, 1.4000, 0.2000],
        [5.4000, 3.9000, 1.7000, 0.4000],
        [4.6000, 3.4000, 1.4000, 0.3000],
        [5.0000, 3.4000, 1.5000, 0.2000]], dtype=torch.float64), 
taget_col: tensor([0, 0, 0, 0, 0, 0, 0, 0])

In [21]:

iris_data_loader = DataLoader(iris_data, batch_size=8, shuffle=True)

In [22]:

for data in iris_data_loader:
    x, y = data
    print(f"independent col data: {x}, \ntaget_col: {y}")
    break

independent col data: tensor([[4.8000, 3.4000, 1.6000, 0.2000],
        [5.7000, 2.8000, 4.5000, 1.3000],
        [5.8000, 2.7000, 5.1000, 1.9000],
        [6.3000, 2.8000, 5.1000, 1.5000],
        [7.3000, 2.9000, 6.3000, 1.8000],
        [6.9000, 3.1000, 4.9000, 1.5000],
        [4.7000, 3.2000, 1.6000, 0.2000],
        [6.5000, 2.8000, 4.6000, 1.5000]], dtype=torch.float64), 
taget_col: tensor([0, 1, 2, 2, 2, 1, 0, 1])

In [ ]: