import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
Repository: https://github.com/felixriese/hyperspectral-soilmoisture-dataset
# load dataframe
path = "https://raw.githubusercontent.com/felixriese/hyperspectral-soilmoisture-dataset/master/soilmoisture_dataset.csv"
df = pd.read_csv(path, index_col=0)
# get hyperspectral bands:
hypbands = []
for col in df.columns:
try:
int(col)
except Exception:
continue
hypbands.append(col)
# split dataset
X_train, X_test, y_train, y_test = train_test_split(
df[hypbands], df["soil_moisture"],
test_size=0.5, random_state=42, shuffle=True)
lg = LinearRegression()
lg.fit(X_train, y_train)
lg.score(X_test, y_test)