import pandas as pd dataframe = pd.read_csv("https://raw.githubusercontent.com/ujwalnk/MachineLearning101/main/data/01%20Weather%20Data.csv") dataframe.head() dataframe = dataframe.dropna() dataframe.isnull().sum(), dataframe.count() dataframe = dataframe.drop("Date", axis=1) dataframe = dataframe.drop_duplicates() dataframe.sort_values("RainTomorrow", axis=0, ascending=True, inplace=True) dataframe["RainTomorrow"].value_counts() from sklearn.model_selection import train_test_split # Import label encoder from sklearn import preprocessing label_encoder = preprocessing.LabelEncoder() dataframe["RainTomorrow"] = label_encoder.fit_transform(dataframe["RainTomorrow"]) y = dataframe["RainTomorrow"] X = dataframe = pd.get_dummies(dataframe.drop("RainTomorrow", axis=1)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2) X_train.shape, X_test.shape, y_train.shape, y_test.shape from sklearn.linear_model import SGDClassifier as clf sgd_model = clf() sgd_model.fit(X_train, y_train) sgd_model.score(X_test, y_test) sgd_model.coef_