%%classpath add mvn tech.tablesaw tablesaw-plot 0.11.4 tech.tablesaw tablesaw-smile 0.11.4 tech.tablesaw tablesaw-beakerx 0.11.4 %import tech.tablesaw.aggregate.* %import tech.tablesaw.api.* %import tech.tablesaw.api.ml.clustering.* %import tech.tablesaw.api.ml.regression.* %import tech.tablesaw.columns.* // display Tablesaw tables with BeakerX table display widget tech.tablesaw.beakerx.TablesawDisplayer.register() // 3 tornadoes = Table.read().csv("../../../doc/resources/data/tornadoes_2014.csv") //cell 3 expected result Image("../../resources/img/groovy/tablesaw/cell3_case1.png") // 4 //print dataset structure tornadoes.structure() //cell 4 expected result Image("../../resources/img/groovy/tablesaw/cell4_case1.png") //get header names tornadoes.columnNames() //displays the row and column counts tornadoes.shape() // 7 //displays the first n rows tornadoes.first(10) //cell 7 expected result Image("../../resources/img/groovy/tablesaw/cell7_case1.png") // 8 import static tech.tablesaw.api.QueryHelper.column tornadoes.structure().selectWhere(column("Column Type").isEqualTo("FLOAT")) //cell 8 expected result Image("../../resources/img/groovy/tablesaw/cell8_case1.png") //summarize the data in each column tornadoes.summary() //Mapping operations def month = tornadoes.dateColumn("Date").month() tornadoes.addColumn(month); tornadoes.columnNames() // 11 //Sorting by column tornadoes.sortOn("-Fatalities") //cell 11 expected result Image("../../resources/img/groovy/tablesaw/cell11_case1.png") // 12 //Descriptive statistics tornadoes.column("Fatalities").summary() //cell 12 expected result Image("../../resources/img/groovy/tablesaw/cell12_case1.png") // 13 //Performing totals and sub-totals def injuriesByScale = tornadoes.median("Injuries").by("Scale") injuriesByScale.setName("Median injuries by Tornado Scale") injuriesByScale //cell 13 expected result Image("../../resources/img/groovy/tablesaw/cell13_case1.png") // 14 //Cross Tabs CrossTab.xCount(tornadoes, tornadoes.categoryColumn("State"), tornadoes.shortColumn("Scale")) //cell 14 expected result Image("../../resources/img/groovy/tablesaw/cell14_case1.png") // 15 t = Table.read().csv("../../../doc/resources/data/whiskey.csv") t.structure() //cell 15 expected result Image("../../resources/img/groovy/tablesaw/cell15_case1.png") // 16 model = new Kmeans( 5, t.nCol(2), t.nCol(3), t.nCol(4), t.nCol(5), t.nCol(6), t.nCol(7), t.nCol(8), t.nCol(9), t.nCol(10), t.nCol(11), t.nCol(12), t.nCol(13) ); //print claster formation model.clustered(t.column("Distillery")); // cell 16 expected result Image("../../resources/img/groovy/tablesaw/cell16_case1.png") // 17 //print centroids for each claster model.labeledCentroids(); // cell 17 expected result Image("../../resources/img/groovy/tablesaw/cell17_case1.png") //gets the distortion for our model model.distortion() def n = t.rowCount(); def kValues = new double[n - 2]; def distortions = new double[n - 2]; for (int k = 2; k < n; k++) { kValues[k - 2] = k; def kmeans = new Kmeans(k, t.nCol(2), t.nCol(3), t.nCol(4), t.nCol(5), t.nCol(6), t.nCol(7), t.nCol(8), t.nCol(9), t.nCol(10), t.nCol(11), t.nCol(12), t.nCol(13) ); distortions[k - 2] = kmeans.distortion(); } def linearYPlot = new Plot(title: "K-means clustering demo", xLabel:"K", yLabel: "distortion") linearYPlot << new Line(x: kValues, y: distortions) import static tech.tablesaw.api.QueryHelper.column baseball = Table.read().csv("../../../doc/resources/data/baseball.csv"); // filter to the data available at the start of the 2002 season moneyball = baseball.selectWhere(column("year").isLessThan(2002)); wins = moneyball.nCol("W"); year = moneyball.nCol("Year"); playoffs = moneyball.column("Playoffs"); runDifference = moneyball.shortColumn("RS").subtract(moneyball.shortColumn("RA")); moneyball.addColumn(runDifference); runDifference.setName("RD"); def Plot = new Plot(title: "RD x Wins", xLabel:"RD", yLabel: "W") Plot << new Points(x: moneyball.numericColumn("RD").toDoubleArray(), y: moneyball.numericColumn("W").toDoubleArray()) winsModel = LeastSquares.train(wins, runDifference); def runDiff = new double[1]; runDiff[0] = 135; def expectedWins = winsModel.predict(runDiff); runsScored2 = LeastSquares.train(moneyball.nCol("RS"), moneyball.nCol("OBP"), moneyball.nCol("SLG")); new Histogram(xLabel:"X", yLabel:"Proportion", data: Arrays.asList(runsScored2.residuals()), binCount: 25); %classpath add mvn com.jimmoores quandl-tablesaw 2.0.0 %import com.jimmoores.quandl.* %import com.jimmoores.quandl.tablesaw.* // 34 TableSawQuandlSession session = TableSawQuandlSession.create(); Table table = session.getDataSet(DataSetRequest.Builder.of("WIKI/AAPL").build()); // Create a new column containing the year ShortColumn yearColumn = table.dateColumn("Date").year(); yearColumn.setName("Year"); table.addColumn(yearColumn); // Create max, min and total volume tables aggregated by year Table summaryMax = table.groupBy("year").max("Adj. Close"); Table summaryMin = table.groupBy("year").min("Adj. Close"); Table summaryVolume = table.groupBy("year")sum("Volume"); // Create a new table from each of these summary = Table.create("Summary", summaryMax.column(0), summaryMax.column(1), summaryMin.column(1), summaryVolume.column(1)); // Add back a DateColumn to the summary...will be used for plotting DateColumn yearDates = new DateColumn("YearDate"); for(year in summary.column('Year')){ yearDates.append(java.time.LocalDate.of(year,1,1)); } summary.addColumn(yearDates) summary // cell 34 expected result Image("../../resources/img/groovy/tablesaw/cell34_case1.png") years = summary.column('YearDate').collect() plot = new TimePlot(title: 'Price Chart for AAPL', xLabel: 'Time', yLabel: 'Max [Adj. Close]') plot << new YAxis(label: 'Volume') plot << new Points(x: years, y: summary.column('Max [Adj. Close]').collect()) plot << new Line(x: years, y: summary.column('Max [Adj. Close]').collect(), color: Color.blue) plot << new Stems(x: years, y: summary.column('Sum [Volume]').collect(), yAxis: 'Volume')