Stochastic gradient descent is an iterative algorithm that optimizes an objective function by using samples from the dataset. cuML's implementation is mini-batch SGD (MBSGD), which is not implemented by Scikit-learn.
The model can take array-like objects, either in host as NumPy arrays or in device (as Numba or cuda_array_interface-compliant), as well as cuDF DataFrames as the input.
For information about cuDF, refer to the documentation: https://rapidsai.github.io/projects/cudf/en/latest/
For information about cuML's mini-batch SGD implementation: https://rapidsai.github.io/projects/cuml/en/latest/api.html#stochastic-gradient-descent
import os
import numpy as np
import pandas as pd
import cudf as gd
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error
from cuml.linear_model import MBSGDRegressor as cumlSGD
from sklearn.linear_model import SGDRegressor as skSGD
n_samples = 2**20
n_features = 399
learning_rate = 'adaptive'
penalty = 'elasticnet'
loss = 'squared_loss'
max_iter = 500
%%time
X,y = make_regression(n_samples=n_samples, n_features=n_features, random_state=0)
X = pd.DataFrame(X)
y = pd.Series(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)
%%time
X_cudf = gd.DataFrame.from_pandas(X_train)
X_cudf_test = gd.DataFrame.from_pandas(X_test)
y_cudf = gd.Series(y_train)
%%time
sgd_sk = skSGD(learning_rate=learning_rate,
eta0=0.07,
max_iter=max_iter,
tol=0.001,
fit_intercept=True,
penalty=penalty,
loss=loss)
sgd_sk.fit(X_train, y_train)
%%time
y_sk = sgd_sk.predict(X_test)
error_sk = mean_squared_error(y_test,y_sk)
%%time
sgd_cuml = cumlSGD(learning_rate=learning_rate,
eta0=0.07,
epochs=max_iter,
batch_size=512,
tol=0.001,
penalty=penalty,
loss=loss)
sgd_cuml.fit(X_cudf, y_cudf)
%%time
y_cuml = sgd_cuml.predict(X_cudf_test).to_array().ravel()
error_cuml = mean_squared_error(y_test,y_cuml)
print("SKL MSE(y): %s" % error_sk)
print("CUML MSE(y): %s" % error_cuml)