%%classpath add mvn tech.tablesaw tablesaw-beakerx 0.30.3 com.jimmoores quandl-tablesaw 2.0.0 com.github.haifengl smile-core 1.5.2 %import static tech.tablesaw.aggregate.AggregateFunctions.* %import tech.tablesaw.api.* %import tech.tablesaw.columns.* %import smile.clustering.* %import smile.regression.* // display Tablesaw tables with BeakerX table display widget tech.tablesaw.beakerx.TablesawDisplayer.register() tornadoes = Table.read().csv("../resources/data/tornadoes_2014.csv") //print dataset structure tornadoes.structure() //get header names tornadoes.columnNames() //displays the row and column counts tornadoes.shape() //displays the first n rows tornadoes.first(10) tornadoes.structure() //summarize the data in each column tornadoes.summary() //Mapping operations def month = tornadoes.dateColumn("Date").month() tornadoes.addColumns(month); tornadoes.columnNames() //Sorting by column tornadoes.sortOn("-Fatalities") //Descriptive statistics tornadoes.column("Fatalities").summary() //Performing totals and sub-totals def injuriesByScale = tornadoes.summarize("Injuries", median).by("Scale") injuriesByScale.setName("Median injuries by Tornado Scale") injuriesByScale //Cross Tabs tornadoes.xTabCounts("State", "Scale") whiskeyData = Table.read().csv("../resources/data/whiskey.csv") whiskeyData.structure() kMeans = new KMeans(whiskeyData.as().doubleMatrix("Body", "Sweetness", "Smoky", "Medicinal", "Tobacco", "Honey", "Spicy", "Winey", "Nutty", "Malty", "Fruity", "Floral"), 5) Table whiskeyClusters = Table.create("Clusters", whiskeyData.stringColumn("Distillery"), DoubleColumn.create("Cluster", kMeans.getClusterLabel())); whiskeyClusters = whiskeyClusters.sortAscendingOn("Cluster", "Distillery"); baseball = Table.read().csv("../resources/data/baseball.csv"); // filter to the data available at the start of the 2002 season moneyball = baseball.where(baseball.numberColumn("year").isLessThan(2002)); wins = moneyball.nCol("W"); year = moneyball.nCol("Year"); playoffs = moneyball.column("Playoffs"); runDifference = moneyball.numberColumn("RS").subtract(moneyball.numberColumn("RA")).setName("RD"); moneyball.addColumns(runDifference); def Plot = new Plot(title: "RD x Wins", xLabel:"RD", yLabel: "W") Plot << new Points(x: moneyball.numberColumn("RD").asDoubleArray(), y: moneyball.numberColumn("W").asDoubleArray()) winsModel = new OLS(moneyball.select("W", "RD").smile().numericDataset("RD")); runsScored = new OLS(moneyball.select("OBP", "SLG", "RS").smile().numericDataset("RS")); new Histogram(xLabel:"X", yLabel:"Proportion", data: Arrays.asList(runsScored.residuals()), binCount: 25); %import com.jimmoores.quandl.* %import com.jimmoores.quandl.tablesaw.* TableSawQuandlSession session = TableSawQuandlSession.create(); Table table = session.getDataSet(DataSetRequest.Builder.of("WIKI/AAPL").build()); // Create a new column containing the year NumberColumn yearColumn = table.dateColumn("Date").year(); yearColumn.setName("Year"); table.addColumns(yearColumn); // Create max, min and total volume tables aggregated by year Table summaryMax = table.summarize("Adj. Close", max).by("year"); Table summaryMin = table.summarize("Adj. Close", min).by("year"); Table summaryVolume = table.summarize("Volume", sum).by("year"); // Create a new table from each of these summary = Table.create("Summary", summaryMax.column(0), summaryMax.column(1), summaryMin.column(1), summaryVolume.column(1)); // Add back a DateColumn to the summary...will be used for plotting DateColumn yearDates = DateColumn.create("YearDate"); for (year in summary.column('Year')) { yearDates.append(java.time.LocalDate.of((int) year, 1, 1)); } summary.addColumns(yearDates) summary years = summary.column('YearDate').collect() plot = new TimePlot(title: 'Price Chart for AAPL', xLabel: 'Time', yLabel: 'Max [Adj. Close]') plot << new YAxis(label: 'Volume') plot << new Points(x: years, y: summary.column('Max [Adj. Close]').collect()) plot << new Line(x: years, y: summary.column('Max [Adj. Close]').collect(), color: Color.blue) plot << new Stems(x: years, y: summary.column('Sum [Volume]').collect(), yAxis: 'Volume')