%useLatestDescriptors %use dataframe val animal by columnOf("cat", "cat", "snake", "dog", "dog", "cat", "snake", "cat", "dog", "dog") val age by columnOf(2.5, 3.0, 0.5, Double.NaN, 5.0, 2.0, 4.5, Double.NaN, 7.0, 3.0) val visits by columnOf(1, 3, 2, 3, 2, 3, 1, 1, 2, 1) val priority by columnOf("yes", "yes", "no", "yes", "no", "no", "no", "yes", "no", "no") val df = dataFrameOf(animal, age, visits, priority) df df.schema() df.describe() df[0 ..< 3] // df[0..2] // or equivalently df.head(3) // or df.take(3) df[animal, age] df[3, 4, 8][animal, age] df.filter { visits > 2 } df.filter { age.isNaN() } df.filter { animal == "cat" && age < 3 } df.filter { age in 2.0..4.0 } df.update { age }.at(5).with { 1.5 } df.visits.sum() df.groupBy { animal }.mean { age } val modifiedDf = df.append("dog", 5.5, 2, "no") modifiedDf.dropLast() df.groupBy { animal }.count() df.sortBy { age.desc() and visits } df.convert { priority }.with { it == "yes" } df.update { animal }.where { it == "dog" }.with { "corgi" } df.pivot { visits }.groupBy { animal }.mean(skipNA = true) { age } val df = dataFrameOf("A")(1, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7) df df.filter { prev()?.A != A } df.filter { diffOrNull { A } != 0 } df.distinct() val df = dataFrameOf("a", "b", "c").randomDouble(5) df df.update { colsOf() } .with { it - rowMean() } val names = ('a'..'j').map { it.toString() } val df = dataFrameOf(names).randomDouble(5) df df.sum().transpose().minBy("value")["name"] val df = dataFrameOf("a", "b", "c").randomInt(30, 0..2) df.distinct().count() val nan = Double.NaN val names = ('a'..'j').map { it.toString() } val data = listOf( 0.04, nan, nan, 0.25, nan, 0.43, 0.71, 0.51, nan, nan, nan, nan, nan, 0.04, 0.76, nan, nan, 0.67, 0.76, 0.16, nan, nan, 0.5, nan, 0.31, 0.4, nan, nan, 0.24, 0.01, 0.49, nan, nan, 0.62, 0.73, 0.26, 0.85, nan, nan, nan, nan, nan, 0.41, nan, 0.05, nan, 0.61, nan, 0.48, 0.68, ) val df = dataFrameOf(names)(*data.toTypedArray()) df df.mapToColumn("res") { namedValuesOf() .filter { it.value.isNaN() }.drop(2) .firstOrNull()?.name } val grps by columnOf("a", "a", "a", "b", "b", "c", "a", "a", "b", "c", "c", "c", "b", "b", "c") val vals by columnOf(12, 345, 3, 1, 45, 14, 4, 52, 54, 23, 235, 21, 57, 3, 87) val df = dataFrameOf(grps, vals) df df.groupBy { grps }.aggregate { vals.sortDesc().take(3).sum() into "res" } import kotlin.random.Random val random = Random(42) val list = List(200) { random.nextInt(1, 101) } val df = dataFrameOf("A", "B")(*list.toTypedArray()) df df.groupBy { A.map { (it - 1) / 10 } }.sum { B } .sortBy { A } .convert { A }.with { "(${it * 10}, ${it * 10 + 10}]" } val df = dataFrameOf("X")(7, 2, 0, 3, 4, 2, 5, 0, 3, 4) df df.mapToColumn("Y") { if (it.X == 0) 0 else (prev()?.newValue() ?: 0) + 1 } val names = ('a'..'h').map { it.toString() } // val names = (0..7).map { it.toString() } val random = Random(30) val list = List(64) { random.nextInt(1, 101) } val df = dataFrameOf(names)(*list.toTypedArray()) df df.add("index") { index() } .gather { dropLast() }.into("name", "vals") .sortByDesc("vals").take(3)["index", "name"] val random = Random(31) val lab = listOf("A", "B") val vals by columnOf(*Array(15) { random.nextInt(-30, 30) }) val grps by columnOf(*Array(15) { lab[random.nextInt(0, 2)] }) val df = dataFrameOf(vals, grps) df val means = df.filter { vals >= 0 } .groupBy { grps }.mean() .pivot { grps }.values { vals } df.add("patched_values") { if (vals < 0) means[grps] else vals.toDouble() } val groups by columnOf("a", "a", "b", "b", "a", "b", "b", "b", "a", "b", "a", "b") val value by columnOf(1.0, 2.0, 3.0, Double.NaN, 2.0, 3.0, Double.NaN, 1.0, 7.0, 3.0, Double.NaN, 8.0) val df = dataFrameOf(groups, value) df df.add("id") { index() } .groupBy { groups }.add("res") { relative(-2..0).value.filter { !it.isNaN() }.mean() }.concat() .sortBy("id") .remove("id") import kotlinx.datetime.* class DateRangeIterator(first: LocalDate, last: LocalDate, val step: Int) : Iterator { private val finalElement: LocalDate = last private var hasNext: Boolean = if (step > 0) first <= last else first >= last private var next: LocalDate = if (hasNext) first else finalElement override fun hasNext(): Boolean = hasNext override fun next(): LocalDate { val value = next if (value == finalElement) { if (!hasNext) throw kotlin.NoSuchElementException() hasNext = false } else { next = next.plus(step, DateTimeUnit.DayBased(1)) } return value } } operator fun ClosedRange.iterator() = DateRangeIterator(this.start, this.endInclusive, 1) fun ClosedRange.toList(): List { return when (val size = this.start.daysUntil(this.endInclusive)) { 0 -> emptyList() 1 -> listOf(iterator().next()) else -> { val dest = ArrayList(size) for (item in this) { dest.add(item) } dest } } } val start = LocalDate(2015, 1, 1) val end = LocalDate(2016, 1, 1) val days = (start..end).toList() val dti = days.toColumn("dti") val s = List(dti.size()) { Random.nextDouble() }.toColumn("s") val df = dataFrameOf(dti, s) df.head() df.filter { dti.dayOfWeek == DayOfWeek.TUESDAY }.sum { s } df.groupBy { dti.map { it.month } named "month" }.mean() df.add("month4") { when (dti.monthNumber) { in 1..4 -> 1 in 5..8 -> 2 else -> 3 } }.groupBy("month4").aggregate { maxBy(s) into "max" } import java.time.temporal.WeekFields import java.util.* val start = LocalDate(2015, 1, 1) val end = LocalDate(2016, 12, 31) (start..end).toList().toColumn("3thu").filter { it.toJavaLocalDate()[WeekFields.of(Locale.ENGLISH).weekOfMonth()] == 3 && it.dayOfWeek.value == 4 } val fromTo = listOf("LoNDon_paris", "MAdrid_miLAN", "londON_StockhOlm", "Budapest_PaRis", "Brussels_londOn").toColumn("From_To") val flightNumber = listOf(10045.0, Double.NaN, 10065.0, Double.NaN, 10085.0).toColumn("FlightNumber") val recentDelays = listOf(listOf(23, 47), listOf(), listOf(24, 43, 87), listOf(13), listOf(67, 32)).toColumn("RecentDelays") val airline = listOf("KLM(!)", "{Air France} (12)", "(British Airways. )", "12. Air France", "'Swiss Air'").toColumn("Airline") var df = dataFrameOf(fromTo, flightNumber, recentDelays, airline) df df = df.fillNaNs { FlightNumber } .with { prev()!!.FlightNumber + (next()!!.FlightNumber - prev()!!.FlightNumber) / 2 } .convert { FlightNumber }.toInt() df var df2 = df.split { From_To }.by("_").into("From", "To") df2 df2 = df2.update { From and To }.with { it.lowercase().replaceFirstChar(Char::uppercase) } df2 df2 = df2.update { Airline }.with { "([a-zA-Z\\s]+)".toRegex().find(it)?.value ?: "" } df2 val prep_df = df2.split { RecentDelays }.into { "delay_$it" } prep_df