#i "nuget:https://api.nuget.org/v3/index.json"
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json"
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json"
#r "nuget:Microsoft.Data.Analysis, 0.21.0"
#r "nuget: Plotly.NET.Interactive, 4.2.0"
#r "nuget: Plotly.Net, 4.2.0"
using Microsoft.Data.Analysis;
DateTimeDataFrameColumn dateTimes = new DateTimeDataFrameColumn("DateTimes"); // Default length is 0.
Int32DataFrameColumn ints = new Int32DataFrameColumn("Ints", 6); // Makes a column of length 3. Filled with nulls initially
StringDataFrameColumn strings = new StringDataFrameColumn("Strings", 6); // Makes a column of length 3. Filled with nulls initially
// Append 6 values to dateTimes
dateTimes.Append(DateTime.Parse("2019/01/01"));
dateTimes.Append(DateTime.Parse("2019/01/01"));
dateTimes.Append(DateTime.Parse("2019/01/02"));
dateTimes.Append(DateTime.Parse("2019/02/02"));
dateTimes.Append(DateTime.Parse("2019/02/02"));
dateTimes.Append(DateTime.Parse("2019/03/02"));
DataFrame df = new DataFrame(dateTimes, ints, strings ); // This will throw if the columns are of different lengths
df
// To change a value directly through df
df[0, 1] = 10; // 0 is the rowIndex, and 1 is the columnIndex. This sets the 0th value in the Ints columns to 10
df
// Modify ints and strings columns by indexing
ints[1] = 24;
strings[1] = "Foo!";
df
// Indexing can throw when types don't match.
// ints[1] = "this will throw because I am a string";
// Info can be used to figure out the type of data in a column.
df.Info()
// Add 5 to ints through the DataFrame
df["Ints"].Add(5, inPlace: true);
df
// We can also use binary operators. Binary operators produce a copy, so assign it back to our Ints column
df["Ints"] = (ints / 5) * 20;
df
// Fill nulls in our columns, if any. Ints[2], Strings[0] and Strings[1] are null
df["Ints"].FillNulls(100, inPlace: true);
df["Strings"].FillNulls("Bar", inPlace: true);
df
// To inspect the first row
DataFrameRow row0 = df.Rows[0];
row0
// Filter rows based on equality
PrimitiveDataFrameColumn<bool> boolFilter = df["Strings"].ElementwiseEquals("Bar");
boolFilter
DataFrame filtered = df.Filter(boolFilter);
filtered
// Sort our dataframe using the Ints column
DataFrame sorted = df.OrderBy("Ints");
sorted
//Clone dataframe
var newDf = df.Clone();
//Add new column
StringDataFrameColumn newColumn = new StringDataFrameColumn("Month", ((DateTimeDataFrameColumn)df["DateTimes"]).Select(x => x.Value.ToString("MMMM")));
newDf.Columns.Add(newColumn);
// GroupBy month
GroupBy groupBy = newDf.GroupBy("Month");
//Show grouped data
DataFrame groupedDf = groupBy.Head(10);
groupedDf
// Count of values in each group
DataFrame groupCounts = groupBy.Count();
groupCounts
// Alternatively find the sum of the values in each group in Ints
DataFrame intsGroupSum = groupBy.Sum("Ints");
intsGroupSum
using Plotly.NET;
using System.Linq;
using Microsoft.FSharp.Core;
display(Chart2D.Chart.Column<int, string, string, string, string>(
((Int32DataFrameColumn)intsGroupSum["Ints"]).Select(x => x ?? default), new FSharpOption<IEnumerable<string>>(((StringDataFrameColumn)intsGroupSum["Month"]))));