%use dataframe
This example uses the YouTube Data API: https://developers.google.com/youtube/v3/docs. Follow the tutorials over there to gain an API key.
val apiKey = System.getenv("YOUTUBE_API_KEY")
fun load(path: String): AnyRow = DataRow.read("https://www.googleapis.com/youtube/v3/$path&key=$apiKey")
fun load(path: String, maxPages: Int): AnyFrame {
val rows = mutableListOf<AnyRow>()
var pagePath = path
do {
val row = load(pagePath)
rows.add(row)
val next = row.getValueOrNull<String>("nextPageToken")
pagePath = path + "&pageToken=" + next
} while (next != null && rows.size < maxPages)
return rows.concat()
}
val df = load("search?q=cute%20cats&maxResults=50&part=snippet", 5)
df
val items = df.items.concat()
items
val videos = items.dropNulls { id.videoId }
.select { id.videoId named "id" and snippet }
.distinct()
videos
val parsed = videos.parse()
val loaded = parsed.convert { colsAtAnyDepth().colsOf<URL>() }.with { IMG(it, maxHeight = 150) }
.add("video") { IFRAME("https://www.youtube.com/embed/$id") }
NOTE: For this example, the DataFrame needs to be rendered as HTML. This means that when running in Kotlin Notebook, "Render DataFrame tables natively" needs to be turned off.
val clean = loaded.move { snippet.channelId and snippet.channelTitle }.under("channel")
.move { snippet.title and snippet.publishedAt }.toTop()
.remove { snippet }
clean
val statPages = clean.id.chunked(50).map {
val ids = it.joinToString("%2C")
load("videos?part=statistics&id=$ids")
}
statPages
val stats = statPages.items.concat().select { id and statistics.allCols() }.parse()
stats
val joined = clean.join(stats)
joined
val view by column<Int>()
val channels = joined.groupBy { channel }.sortByCount().aggregate {
viewCount.sum() into view
val last = maxBy { publishedAt }
last.title into "last title"
last.publishedAt into "time"
last.viewCount into "viewCount"
}.sortByDesc(view).flatten()
channels
%useLatestDescriptors
%use kandy
channels.sortBy { viewCount.desc() }.plot {
bars {
x(channelTitle.map { it.take(10) })
y(viewCount)
}
}
val growth = joined
.select { publishedAt and viewCount }
.sortBy { publishedAt }
.convert { all() }.toLong()
.cumSum { viewCount }
growth.plot {
area {
x(publishedAt)
y(viewCount)
}
}