import pandas as pd df = pd.read_csv("https://raw.githubusercontent.com/pplonski/datasets-for-start/master/adult/data.csv",skipinitialspace=True,) target = 'income' df import sys import warnings warnings.simplefilter(action='ignore') import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(df[df.columns[:-1]], df[target], test_size=0.25) try: import sweetviz as sv except: !pip install sweetviz --user print('Restart runtime to import sweetviz') import sweetviz as sv my_report = sv.analyze(df) # we generate html report # Default arguments will generate to "SWEETVIZ_REPORT.html" my_report.show_html() # we generate inline report my_report.show_notebook() # port_intra.show_my_report_intra = sv.compare_intra(df, df["sex"] == "Male", ["Male", "Female"]) # my_renotebook() try: import ydata_profiling except: !pip install ydata_profiling import ydata_profiling from ydata_profiling import ProfileReport profile = ProfileReport(df, title="Profiling Report") #To generate a HTML report file, save the ProfileReport to an object and use the to_file() function: profile.to_file("ydata_profiling.html") #The HTML report can be directly embedded in a cell in a similar fashion: profile.to_notebook_iframe() try: from supervised.automl import AutoML except: !pip install mljar-supervised from supervised.automl import AutoML automl = AutoML(mode="Perform",results_path="AutoML_classifier") # automl = AutoML(mode="Explain",results_path="AutoML_classifier") #automl = AutoML(results_path="AutoML_classifier") automl.fit(X_train, y_train) y_predicted = automl.predict(X_test) y_predicted from sklearn.metrics import accuracy_score accuracy_score(y_test, y_predicted) #print(automl.score(X_test1, y_test)) pd.read_csv('/content/AutoML_classifier/leaderboard.csv') import imageio def show_image(file): im=imageio.imread(file) plt.figure(figsize=(8,8)) plt.imshow(im) plt.show() show_image('/content/AutoML_classifier/features_heatmap.png') show_image('/content/AutoML_classifier/ldb_performance_boxplot.png') show_image('/content/AutoML_classifier/correlation_heatmap.png') show_image('/content/AutoML_classifier/Ensemble/confusion_matrix.png') show_image('/content/AutoML_classifier/Ensemble/precision_recall_curve.png') show_image('/content/AutoML_classifier/Ensemble/roc_curve.png') show_image('/content/AutoML_classifier/38_CatBoost/permutation_importance.png') # automl1 = AutoML(algorithms=["CatBoost"],mode="Optuna", start_random_models=3) # automl1.fit(X_train, y_train) try: from lazypredict.Supervised import LazyClassifier except: !pip install lazypredict from lazypredict.Supervised import LazyClassifier clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None) models,predictions = clf.fit(X_train, X_test, y_train, y_test) models #!pip install tpot # X_train1 = pd.get_dummies(X_train).values # X_test1 = pd.get_dummies(X_test).values # #X_train1 = X_train # #X_test1 = X_test # from tpot import TPOTClassifier # tpot = TPOTClassifier(generations=2, population_size=50, verbosity=2, random_state=42) # tpot.fit(X_train1, y_train) # tpot.export('TPOTClassifier_pipeline.py') #!pip install -U scikit-learn==0.23.2 #!pip install --use-deprecated=legacy-resolver pycaret[full] #from pycaret.classification import * #s = setup(df, target = target) #best = compare_models() #print(best) #plot_model(best) #evaluate_model(best)