#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json"
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json"
#r "nuget:Microsoft.ML, 1.5.1"
#r "nuget:Microsoft.ML.AutoML, 0.17.1"
#r "nuget:Microsoft.Data.Analysis, 0.4.0"
#r "nuget: XPlot.Plotly.Interactive, 4.0.2"
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;
using Microsoft.DotNet.Interactive.Formatting;
using Microsoft.Data.Analysis;
using XPlot.Plotly;
using Microsoft.AspNetCore.Html;
using Microsoft.DotNet.Interactive.Formatting;
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;
Formatter.Register<DataFrame>((df, writer) =>
{
var headers = new List<IHtmlContent>();
headers.Add(th(i("index")));
headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
var rows = new List<List<IHtmlContent>>();
var take = 20;
for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
{
var cells = new List<IHtmlContent>();
cells.Add(td(i));
foreach (var obj in df.Rows[i])
{
cells.Add(td(obj));
}
rows.Add(cells);
}
var t = table(
thead(
headers),
tbody(
rows.Select(
r => tr(r))));
writer.Write(t);
}, "text/html");
using System.IO;
using System.Net.Http;
string housingPath = "housing.csv";
if (!File.Exists(housingPath))
{
var contents = await new HttpClient()
.GetStringAsync("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv");
File.WriteAllText("housing.csv", contents);
}
var housingData = DataFrame.LoadCsv(housingPath);
housingData
housingData.Description()
Chart.Plot(
new Histogram()
{
x = housingData.Columns["median_house_value"],
nbinsx = 20
}
)
var chart = Chart.Plot(
new Scattergl()
{
x = housingData.Columns["longitude"],
y = housingData.Columns["latitude"],
mode = "markers",
marker = new Marker()
{
color = housingData.Columns["median_house_value"],
colorscale = "Jet"
}
}
);
chart.Width = 600;
chart.Height = 600;
display(chart);
static T[] Shuffle<T>(T[] array)
{
Random rand = new Random();
for (int i = 0; i < array.Length; i++)
{
int r = i + rand.Next(array.Length - i);
T temp = array[r];
array[r] = array[i];
array[i] = temp;
}
return array;
}
int[] randomIndices = Shuffle(Enumerable.Range(0, (int)housingData.Rows.Count).ToArray());
int testSize = (int)(housingData.Rows.Count * .1);
int[] trainRows = randomIndices[testSize..];
int[] testRows = randomIndices[..testSize];
DataFrame housing_train = housingData[trainRows];
DataFrame housing_test = housingData[testRows];
display(housing_train.Rows.Count);
display(housing_test.Rows.Count);
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.AutoML;
#!time
var mlContext = new MLContext();
var experiment = mlContext.Auto().CreateRegressionExperiment(maxExperimentTimeInSeconds: 15);
var result = experiment.Execute(housing_train, labelColumnName:"median_house_value");
var scatters = result.RunDetails.Where(d => d.ValidationMetrics != null).GroupBy(
r => r.TrainerName,
(name, details) => new Scattergl()
{
name = name,
x = details.Select(r => r.RuntimeInSeconds),
y = details.Select(r => r.ValidationMetrics.MeanAbsoluteError),
mode = "markers",
marker = new Marker() { size = 12 }
});
var chart = Chart.Plot(scatters);
chart.WithXTitle("Training Time");
chart.WithYTitle("Error");
display(chart);
Console.WriteLine($"Best Trainer:{result.BestRun.TrainerName}");
var testResults = result.BestRun.Model.Transform(housing_test);
var trueValues = testResults.GetColumn<float>("median_house_value");
var predictedValues = testResults.GetColumn<float>("Score");
var predictedVsTrue = new Scattergl()
{
x = trueValues,
y = predictedValues,
mode = "markers",
};
var maximumValue = Math.Max(trueValues.Max(), predictedValues.Max());
var perfectLine = new Scattergl()
{
x = new[] {0, maximumValue},
y = new[] {0, maximumValue},
mode = "lines",
};
var chart = Chart.Plot(new[] {predictedVsTrue, perfectLine });
chart.WithXTitle("True Values");
chart.WithYTitle("Predicted Values");
chart.WithLegend(false);
chart.Width = 600;
chart.Height = 600;
display(chart);