from sklearn.datasets import fetch_openml import pandas as pd import numpy as np import statsmodels.formula.api as smf import statsmodels.api as sm X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True) type(X) X.info() y.dtype y.attrs y.values y y.index y.empty X.columns y.name y.flags y.astype y.rank y.type y.astype type(y) df = pd.merge(X, y, right_index = True, left_index = True) print(df) df.info() df.index formula = 'survived ~ age + pclass' model = smf.glm(formula = formula, data=df, family=sm.families.Binomial()) result=model.fit() print(result.summary())