#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json"
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json"
#r "nuget:Microsoft.ML, 1.7.0"
#r "nuget:Microsoft.ML.AutoML, 0.19.0"
#r "nuget:Microsoft.Data.Analysis, 0.19.0"
#r "nuget:XPlot.Plotly.Interactive, 4.0.6"
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;
using Microsoft.DotNet.Interactive.Formatting;
using Microsoft.Data.Analysis;
using XPlot.Plotly;
using System.IO;
using System.Net.Http;
string housingPath = "housing.csv";
if (!File.Exists(housingPath))
{
var contents = await new HttpClient()
.GetStringAsync("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv");
File.WriteAllText(housingPath, contents);
}
var housingData = DataFrame.LoadCsv(housingPath);
housingData
housingData.Description()
Chart.Plot(
new Histogram()
{
x = housingData.Columns["median_house_value"],
nbinsx = 20
}
)
var chart = Chart.Plot(
new Scattergl()
{
x = housingData.Columns["longitude"],
y = housingData.Columns["latitude"],
mode = "markers",
marker = new Marker()
{
color = housingData.Columns["median_house_value"],
colorscale = "Jet"
}
}
);
chart.Width = 600;
chart.Height = 600;
chart.Display();
static T[] Shuffle<T>(T[] array)
{
Random rand = new Random();
for (int i = 0; i < array.Length; i++)
{
int r = i + rand.Next(array.Length - i);
T temp = array[r];
array[r] = array[i];
array[i] = temp;
}
return array;
}
int[] randomIndices = Shuffle(Enumerable.Range(0, (int)housingData.Rows.Count).ToArray());
int testSize = (int)(housingData.Rows.Count * .1);
int[] trainRows = randomIndices[testSize..];
int[] testRows = randomIndices[..testSize];
DataFrame housing_train = housingData[trainRows];
DataFrame housing_test = housingData[testRows];
housing_train.Rows.Count.Display();
housing_test.Rows.Count.Display();
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.AutoML;
#!time
var mlContext = new MLContext();
var experiment = mlContext.Auto().CreateRegressionExperiment(maxExperimentTimeInSeconds: 15);
var result = experiment.Execute(housing_train, labelColumnName:"median_house_value");
var scatters = result.RunDetails.Where(d => d.ValidationMetrics != null).GroupBy(
r => r.TrainerName,
(name, details) => new Scattergl()
{
name = name,
x = details.Select(r => r.RuntimeInSeconds),
y = details.Select(r => r.ValidationMetrics.MeanAbsoluteError),
mode = "markers",
marker = new Marker() { size = 12 }
});
var chart = Chart.Plot(scatters);
chart.WithXTitle("Training Time");
chart.WithYTitle("Error");
chart.Display();
Console.WriteLine($"Best Trainer:{result.BestRun.TrainerName}");
var testResults = result.BestRun.Model.Transform(housing_test);
var trueValues = testResults.GetColumn<float>("median_house_value");
var predictedValues = testResults.GetColumn<float>("Score");
var predictedVsTrue = new Scattergl()
{
x = trueValues,
y = predictedValues,
mode = "markers",
};
var maximumValue = Math.Max(trueValues.Max(), predictedValues.Max());
var perfectLine = new Scattergl()
{
x = new[] {0, maximumValue},
y = new[] {0, maximumValue},
mode = "lines",
};
var chart = Chart.Plot(new[] {predictedVsTrue, perfectLine });
chart.WithXTitle("True Values");
chart.WithYTitle("Predicted Values");
chart.WithLegend(false);
chart.Width = 600;
chart.Height = 600;
chart.Display();