#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json"
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json"
#r "nuget:Microsoft.Data.Analysis,0.19.0"
#r "nuget:XPlot.Plotly.Interactive, 4.0.6"
using Microsoft.Data.Analysis;
PrimitiveDataFrameColumn<DateTime> dateTimes = new PrimitiveDataFrameColumn<DateTime>("DateTimes"); // Default length is 0.
PrimitiveDataFrameColumn<int> ints = new PrimitiveDataFrameColumn<int>("Ints", 3); // Makes a column of length 3. Filled with nulls initially
StringDataFrameColumn strings = new StringDataFrameColumn("Strings", 3); // Makes a column of length 3. Filled with nulls initially
// Append 3 values to dateTimes
dateTimes.Append(DateTime.Parse("2019/01/01"));
dateTimes.Append(DateTime.Parse("2019/01/01"));
dateTimes.Append(DateTime.Parse("2019/01/02"));
DataFrame df = new DataFrame(dateTimes, ints, strings ); // This will throw if the columns are of different lengths
df
// To change a value directly through df
df[0, 1] = 10; // 0 is the rowIndex, and 1 is the columnIndex. This sets the 0th value in the Ints columns to 10
df
// Modify ints and strings columns by indexing
ints[1] = 100;
strings[1] = "Foo!";
df
// Indexing can throw when types don't match.
// ints[1] = "this will throw because I am a string";
// Info can be used to figure out the type of data in a column.
df.Info()
// Add 5 to ints through the DataFrame
df["Ints"].Add(5, inPlace: true);
df
// We can also use binary operators. Binary operators produce a copy, so assign it back to our Ints column
df["Ints"] = (ints / 5) * 100;
df
// Fill nulls in our columns, if any. Ints[2], Strings[0] and Strings[1] are null
df["Ints"].FillNulls(-1, inPlace: true);
df["Strings"].FillNulls("Bar", inPlace: true);
df
// To inspect the first row
DataFrameRow row0 = df.Rows[0];
row0
// Filter rows based on equality
PrimitiveDataFrameColumn<bool> boolFilter = df["Strings"].ElementwiseEquals("Bar");
boolFilter
DataFrame filtered = df.Filter(boolFilter);
filtered
// Sort our dataframe using the Ints column
DataFrame sorted = df.OrderBy("Ints");
sorted
// GroupBy
GroupBy groupBy = df.GroupBy("DateTimes");
// Count of values in each group
DataFrame groupCounts = groupBy.Count();
groupCounts
// Alternatively find the sum of the values in each group in Ints
DataFrame intsGroupSum = groupBy.Sum("Ints");
intsGroupSum
using XPlot.Plotly;
using System.Linq;
#r "nuget:MathNet.Numerics,4.9.0"
using MathNet.Numerics.Distributions;
double mean = 0;
double stdDev = 0.1;
MathNet.Numerics.Distributions.Normal normalDist = new Normal(mean, stdDev);
PrimitiveDataFrameColumn<double> doubles = new PrimitiveDataFrameColumn<double>("Normal Distribution", normalDist.Samples().Take(1000));
display(Chart.Plot(
new Histogram()
{
x = doubles,
nbinsx = 30
}
));