%useLatestDescriptors %use dataframe@kc25 val df = DataFrame.readCsv( "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv" ) df df.describe() // Select "full_name", "stargazers_count" and "topics" columns val dfSelected = df.select { full_name and stargazers_count and topics } dfSelected // Keep only rows where "stargazers_count" value is more than 1000 val dfFiltered = dfSelected.filter { stargazers_count >= 1000 } dfFiltered // Rename "full_name" column into "name" val dfRenamed = dfFiltered .rename { full_name }.into("name") // And "stargazers_count" into "starsCount" .rename { stargazers_count }.into("starsCount") dfRenamed val dfUpdated = dfRenamed // Update "name" values with only its second part (after '/') .update { name }.with { it.split("/")[1] } // Convert "topics" `String` values into `List` by splitting: .convert { topics }.with { it.removeSurrounding("[", "]").split(", ") } dfUpdated dfUpdated.topics.type() // Add a `Boolean` column indicating whether the `name` contains the "intellij" substring // or the topics include "intellij". val dfWithIsIntellij = dfUpdated.add("isIntellij") { name.contains("intellij") || "intellij" in topics } dfWithIsIntellij val groupedByIsIntellij = dfWithIsIntellij.groupBy { isIntellij } groupedByIsIntellij groupedByIsIntellij.count() groupedByIsIntellij.aggregate { // Compute sum and max of "starsCount" within each group into "sumStars" and "maxStars" columns sumOf { starsCount } into "sumStars" maxOf { starsCount } into "maxStars" } val dfTop10 = dfWithIsIntellij // Sort by "starsCount" value descending .sortByDesc { starsCount } .take(10) dfTop10 %use kandy@kc25 dfTop10.plot { bars { x(name) y(starsCount) } layout.title = "Top 10 JetBrains repositories by stars count" } dfWithIsIntellij.writeExcel("jb_repos.xlsx")