%use dataframe, ktor-client
// to see autogenerated code, uncomment the line below:
//%trackExecution generated
val url = URL("https://gist.githubusercontent.com/cmelchior/0a9f9d95bb3de7a2304f81f2861ad62b/raw/735123034ea54c4a8d2554fff719e70b34287923/titanic.csv")
// Convert data to dataframe and rename the column names to more idiomatic Kotlin names.
// This also generate a marker interface for the typed data frame wrapper and extension properties for it.
val df = DataFrame.readCSV(url).renameToCamelCase()
df
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | A/5 21171 | 7.250000 | null | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | STON/O2. 3101282 | 7.925000 | null | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 113803 | 53.100000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 373450 | 8.050000 | null | S |
6 | 0 | 3 | Moran, Mr. James | male | null | 0 | 0 | 330877 | 8.458300 | null | Q |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | E46 | S |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | null | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 347742 | 11.133300 | null | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 237736 | 30.070800 | null | C |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | PP 9549 | 16.700000 | G6 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S |
13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | A/5. 2151 | 8.050000 | null | S |
14 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 347082 | 31.275000 | null | S |
15 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 350406 | 7.854200 | null | S |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S |
17 | 0 | 3 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 382652 | 29.125000 | null | Q |
18 | 1 | 2 | Williams, Mr. Charles Eugene | male | null | 0 | 0 | 244373 | 13.000000 | null | S |
19 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 345763 | 18.000000 | null | S |
20 | 1 | 3 | Masselmani, Mrs. Fatima | female | null | 0 | 0 | 2649 | 7.225000 | null | C |
// get typed column as extension property
df.name
name |
---|
Braund, Mr. Owen Harris |
Cumings, Mrs. John Bradley (Florence ... |
Heikkinen, Miss. Laina |
Futrelle, Mrs. Jacques Heath (Lily Ma... |
Allen, Mr. William Henry |
Moran, Mr. James |
McCarthy, Mr. Timothy J |
Palsson, Master. Gosta Leonard |
Johnson, Mrs. Oscar W (Elisabeth Vilh... |
Nasser, Mrs. Nicholas (Adele Achem) |
Sandstrom, Miss. Marguerite Rut |
Bonnell, Miss. Elizabeth |
Saundercock, Mr. William Henry |
Andersson, Mr. Anders Johan |
Vestrom, Miss. Hulda Amanda Adolfina |
Hewlett, Mrs. (Mary D Kingcome) |
Rice, Master. Eugene |
Williams, Mr. Charles Eugene |
Vander Planke, Mrs. Julius (Emelia Ma... |
Masselmani, Mrs. Fatima |
// select single column -> returns DataFrame
df.select { name }
name |
---|
Braund, Mr. Owen Harris |
Cumings, Mrs. John Bradley (Florence ... |
Heikkinen, Miss. Laina |
Futrelle, Mrs. Jacques Heath (Lily Ma... |
Allen, Mr. William Henry |
Moran, Mr. James |
McCarthy, Mr. Timothy J |
Palsson, Master. Gosta Leonard |
Johnson, Mrs. Oscar W (Elisabeth Vilh... |
Nasser, Mrs. Nicholas (Adele Achem) |
Sandstrom, Miss. Marguerite Rut |
Bonnell, Miss. Elizabeth |
Saundercock, Mr. William Henry |
Andersson, Mr. Anders Johan |
Vestrom, Miss. Hulda Amanda Adolfina |
Hewlett, Mrs. (Mary D Kingcome) |
Rice, Master. Eugene |
Williams, Mr. Charles Eugene |
Vander Planke, Mrs. Julius (Emelia Ma... |
Masselmani, Mrs. Fatima |
// select several columns
df.select { name and age and embarked }
name | age | embarked |
---|---|---|
Braund, Mr. Owen Harris | 22.000000 | S |
Cumings, Mrs. John Bradley (Florence ... | 38.000000 | C |
Heikkinen, Miss. Laina | 26.000000 | S |
Futrelle, Mrs. Jacques Heath (Lily Ma... | 35.000000 | S |
Allen, Mr. William Henry | 35.000000 | S |
Moran, Mr. James | null | Q |
McCarthy, Mr. Timothy J | 54.000000 | S |
Palsson, Master. Gosta Leonard | 2.000000 | S |
Johnson, Mrs. Oscar W (Elisabeth Vilh... | 27.000000 | S |
Nasser, Mrs. Nicholas (Adele Achem) | 14.000000 | C |
Sandstrom, Miss. Marguerite Rut | 4.000000 | S |
Bonnell, Miss. Elizabeth | 58.000000 | S |
Saundercock, Mr. William Henry | 20.000000 | S |
Andersson, Mr. Anders Johan | 39.000000 | S |
Vestrom, Miss. Hulda Amanda Adolfina | 14.000000 | S |
Hewlett, Mrs. (Mary D Kingcome) | 55.000000 | S |
Rice, Master. Eugene | 2.000000 | Q |
Williams, Mr. Charles Eugene | null | S |
Vander Planke, Mrs. Julius (Emelia Ma... | 31.000000 | S |
Masselmani, Mrs. Fatima | null | C |
// another way to select columns without compile-time check
df.select(df.name, df.age, df.sex)
name | age | sex |
---|---|---|
Braund, Mr. Owen Harris | 22.000000 | male |
Cumings, Mrs. John Bradley (Florence ... | 38.000000 | female |
Heikkinen, Miss. Laina | 26.000000 | female |
Futrelle, Mrs. Jacques Heath (Lily Ma... | 35.000000 | female |
Allen, Mr. William Henry | 35.000000 | male |
Moran, Mr. James | null | male |
McCarthy, Mr. Timothy J | 54.000000 | male |
Palsson, Master. Gosta Leonard | 2.000000 | male |
Johnson, Mrs. Oscar W (Elisabeth Vilh... | 27.000000 | female |
Nasser, Mrs. Nicholas (Adele Achem) | 14.000000 | female |
Sandstrom, Miss. Marguerite Rut | 4.000000 | female |
Bonnell, Miss. Elizabeth | 58.000000 | female |
Saundercock, Mr. William Henry | 20.000000 | male |
Andersson, Mr. Anders Johan | 39.000000 | male |
Vestrom, Miss. Hulda Amanda Adolfina | 14.000000 | female |
Hewlett, Mrs. (Mary D Kingcome) | 55.000000 | female |
Rice, Master. Eugene | 2.000000 | male |
Williams, Mr. Charles Eugene | null | male |
Vander Planke, Mrs. Julius (Emelia Ma... | 31.000000 | female |
Masselmani, Mrs. Fatima | null | female |
// select columns filtered by predicate
df.select { colsOf<String?>() }
name | sex | ticket | cabin | embarked |
---|---|---|---|---|
Braund, Mr. Owen Harris | male | A/5 21171 | null | S |
Cumings, Mrs. John Bradley (Florence ... | female | PC 17599 | C85 | C |
Heikkinen, Miss. Laina | female | STON/O2. 3101282 | null | S |
Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 113803 | C123 | S |
Allen, Mr. William Henry | male | 373450 | null | S |
Moran, Mr. James | male | 330877 | null | Q |
McCarthy, Mr. Timothy J | male | 17463 | E46 | S |
Palsson, Master. Gosta Leonard | male | 349909 | null | S |
Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 347742 | null | S |
Nasser, Mrs. Nicholas (Adele Achem) | female | 237736 | null | C |
Sandstrom, Miss. Marguerite Rut | female | PP 9549 | G6 | S |
Bonnell, Miss. Elizabeth | female | 113783 | C103 | S |
Saundercock, Mr. William Henry | male | A/5. 2151 | null | S |
Andersson, Mr. Anders Johan | male | 347082 | null | S |
Vestrom, Miss. Hulda Amanda Adolfina | female | 350406 | null | S |
Hewlett, Mrs. (Mary D Kingcome) | female | 248706 | null | S |
Rice, Master. Eugene | male | 382652 | null | Q |
Williams, Mr. Charles Eugene | male | 244373 | null | S |
Vander Planke, Mrs. Julius (Emelia Ma... | female | 345763 | null | S |
Masselmani, Mrs. Fatima | female | 2649 | null | C |
// Row indexing
df[1]
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C85 | C |
// Column indexing
df.name[0]
Braund, Mr. Owen Harris
// Same result
df[0].name
Braund, Mr. Owen Harris
// filter rows by predicate. Predicate receiver is of type TypedDataFrameRow<*> with generated extension properties
df.filter { sex == "female" }
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | STON/O2. 3101282 | 7.925000 | null | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 113803 | 53.100000 | C123 | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 347742 | 11.133300 | null | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 237736 | 30.070800 | null | C |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | PP 9549 | 16.700000 | G6 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S |
15 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 350406 | 7.854200 | null | S |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S |
19 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 345763 | 18.000000 | null | S |
20 | 1 | 3 | Masselmani, Mrs. Fatima | female | null | 0 | 0 | 2649 | 7.225000 | null | C |
23 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.000000 | 0 | 0 | 330923 | 8.029200 | null | Q |
25 | 0 | 3 | Palsson, Miss. Torborg Danira | female | 8.000000 | 3 | 1 | 349909 | 21.075000 | null | S |
26 | 1 | 3 | Asplund, Mrs. Carl Oscar (Selma Augus... | female | 38.000000 | 1 | 5 | 347077 | 31.387500 | null | S |
29 | 1 | 3 | O'Dwyer, Miss. Ellen "Nellie" | female | null | 0 | 0 | 330959 | 7.879200 | null | Q |
32 | 1 | 1 | Spencer, Mrs. William Augustus (Marie... | female | null | 1 | 0 | PC 17569 | 146.520800 | B78 | C |
33 | 1 | 3 | Glynn, Miss. Mary Agatha | female | null | 0 | 0 | 335677 | 7.750000 | null | Q |
39 | 0 | 3 | Vander Planke, Miss. Augusta Maria | female | 18.000000 | 2 | 0 | 345764 | 18.000000 | null | S |
40 | 1 | 3 | Nicola-Yarred, Miss. Jamila | female | 14.000000 | 1 | 0 | 2651 | 11.241700 | null | C |
41 | 0 | 3 | Ahlin, Mrs. Johan (Johanna Persdotter... | female | 40.000000 | 1 | 0 | 7546 | 9.475000 | null | S |
df.filter { age > 50 } // compilation error, because 'age' is a nullable property
org.jetbrains.kotlinx.jupyter.repl.impl.JupyterCompilerImpl.compileSync(JupyterCompilerImpl.kt:201) org.jetbrains.kotlinx.jupyter.repl.impl.InternalEvaluatorImpl.eval(InternalEvaluatorImpl.kt:126) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$execute$1$result$1.invoke(CellExecutorImpl.kt:80) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$execute$1$result$1.invoke(CellExecutorImpl.kt:78) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withHost(ReplForJupyterImpl.kt:762) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl.execute-L4Nmkdk(CellExecutorImpl.kt:78) org.jetbrains.kotlinx.jupyter.repl.execution.CellExecutor$DefaultImpls.execute-L4Nmkdk$default(CellExecutor.kt:13) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:585) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.access$evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:138) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl$evalEx$1.invoke(ReplForJupyterImpl.kt:442) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl$evalEx$1.invoke(ReplForJupyterImpl.kt:439) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withEvalContext(ReplForJupyterImpl.kt:420) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evalEx(ReplForJupyterImpl.kt:439) org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor$processExecuteRequest$1$response$1$1.invoke(IdeCompatibleMessageRequestProcessor.kt:136) org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor$processExecuteRequest$1$response$1$1.invoke(IdeCompatibleMessageRequestProcessor.kt:135) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$Task.execute(JupyterExecutorImpl.kt:42) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$executorThread$1.invoke(JupyterExecutorImpl.kt:82) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$executorThread$1.invoke(JupyterExecutorImpl.kt:80) kotlin.concurrent.ThreadsKt$thread$thread$1.run(Thread.kt:30)
// filter rows where 'age' is not null.
val withAges = df.dropNA { age }
withAges
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | A/5 21171 | 7.250000 | null | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | STON/O2. 3101282 | 7.925000 | null | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 113803 | 53.100000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 373450 | 8.050000 | null | S |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | E46 | S |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | null | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 347742 | 11.133300 | null | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 237736 | 30.070800 | null | C |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | PP 9549 | 16.700000 | G6 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S |
13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | A/5. 2151 | 8.050000 | null | S |
14 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 347082 | 31.275000 | null | S |
15 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 350406 | 7.854200 | null | S |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S |
17 | 0 | 3 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 382652 | 29.125000 | null | Q |
19 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 345763 | 18.000000 | null | S |
21 | 0 | 2 | Fynney, Mr. Joseph J | male | 35.000000 | 0 | 0 | 239865 | 26.000000 | null | S |
22 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.000000 | 0 | 0 | 248698 | 13.000000 | D56 | S |
23 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.000000 | 0 | 0 | 330923 | 8.029200 | null | Q |
// now filtration works
withAges.filter { age > 50 }
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | E46 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S |
34 | 0 | 2 | Wheadon, Mr. Edward H | male | 66.000000 | 0 | 0 | C.A. 24579 | 10.500000 | null | S |
55 | 0 | 1 | Ostby, Mr. Engelhart Cornelius | male | 65.000000 | 0 | 1 | 113509 | 61.979200 | B30 | C |
95 | 0 | 3 | Coxon, Mr. Daniel | male | 59.000000 | 0 | 0 | 364500 | 7.250000 | null | S |
97 | 0 | 1 | Goldschmidt, Mr. George B | male | 71.000000 | 0 | 0 | PC 17754 | 34.654200 | A5 | C |
117 | 0 | 3 | Connors, Mr. Patrick | male | 70.500000 | 0 | 0 | 370369 | 7.750000 | null | Q |
125 | 0 | 1 | White, Mr. Percival Wayland | male | 54.000000 | 0 | 1 | 35281 | 77.287500 | D26 | S |
151 | 0 | 2 | Bateman, Rev. Robert James | male | 51.000000 | 0 | 0 | S.O.P. 1166 | 12.525000 | null | S |
153 | 0 | 3 | Meo, Mr. Alfonzo | male | 55.500000 | 0 | 0 | A.5. 11206 | 8.050000 | null | S |
156 | 0 | 1 | Williams, Mr. Charles Duane | male | 51.000000 | 0 | 1 | PC 17597 | 61.379200 | null | C |
171 | 0 | 1 | Van der hoef, Mr. Wyckoff | male | 61.000000 | 0 | 0 | 111240 | 33.500000 | B19 | S |
175 | 0 | 1 | Smith, Mr. James Clinch | male | 56.000000 | 0 | 0 | 17764 | 30.695800 | A7 | C |
196 | 1 | 1 | Lurette, Miss. Elise | female | 58.000000 | 0 | 0 | PC 17569 | 146.520800 | B80 | C |
223 | 0 | 3 | Green, Mr. George Henry | male | 51.000000 | 0 | 0 | 21440 | 8.050000 | null | S |
233 | 0 | 2 | Sjostedt, Mr. Ernst Adolf | male | 59.000000 | 0 | 0 | 237442 | 13.500000 | null | S |
250 | 0 | 2 | Carter, Rev. Ernest Courtenay | male | 54.000000 | 1 | 0 | 244252 | 26.000000 | null | S |
253 | 0 | 1 | Stead, Mr. William Thomas | male | 62.000000 | 0 | 0 | 113514 | 26.550000 | C87 | S |
263 | 0 | 1 | Taussig, Mr. Emil | male | 52.000000 | 1 | 1 | 110413 | 79.650000 | E67 | S |
// find the oldest survived woman
withAges.filter { survived == 1 && sex == "female" }.maxBy{ age }
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
276 | 1 | 1 | Andrews, Miss. Kornelia Theodosia | female | 63.000000 | 1 | 0 | 13502 | 77.958300 | D7 | S |
// sort by single column
withAges.sortByDesc { age }
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
631 | 1 | 1 | Barkworth, Mr. Algernon Henry Wilson | male | 80.000000 | 0 | 0 | 27042 | 30.000000 | A23 | S |
852 | 0 | 3 | Svensson, Mr. Johan | male | 74.000000 | 0 | 0 | 347060 | 7.775000 | null | S |
97 | 0 | 1 | Goldschmidt, Mr. George B | male | 71.000000 | 0 | 0 | PC 17754 | 34.654200 | A5 | C |
494 | 0 | 1 | Artagaveytia, Mr. Ramon | male | 71.000000 | 0 | 0 | PC 17609 | 49.504200 | null | C |
117 | 0 | 3 | Connors, Mr. Patrick | male | 70.500000 | 0 | 0 | 370369 | 7.750000 | null | Q |
673 | 0 | 2 | Mitchell, Mr. Henry Michael | male | 70.000000 | 0 | 0 | C.A. 24580 | 10.500000 | null | S |
746 | 0 | 1 | Crosby, Capt. Edward Gifford | male | 70.000000 | 1 | 1 | WE/P 5735 | 71.000000 | B22 | S |
34 | 0 | 2 | Wheadon, Mr. Edward H | male | 66.000000 | 0 | 0 | C.A. 24579 | 10.500000 | null | S |
55 | 0 | 1 | Ostby, Mr. Engelhart Cornelius | male | 65.000000 | 0 | 1 | 113509 | 61.979200 | B30 | C |
281 | 0 | 3 | Duane, Mr. Frank | male | 65.000000 | 0 | 0 | 336439 | 7.750000 | null | Q |
457 | 0 | 1 | Millet, Mr. Francis Davis | male | 65.000000 | 0 | 0 | 13509 | 26.550000 | E38 | S |
439 | 0 | 1 | Fortune, Mr. Mark | male | 64.000000 | 1 | 4 | 19950 | 263.000000 | C23 C25 C27 | S |
546 | 0 | 1 | Nicholson, Mr. Arthur Ernest | male | 64.000000 | 0 | 0 | 693 | 26.000000 | null | S |
276 | 1 | 1 | Andrews, Miss. Kornelia Theodosia | female | 63.000000 | 1 | 0 | 13502 | 77.958300 | D7 | S |
484 | 1 | 3 | Turkula, Mrs. (Hedwig) | female | 63.000000 | 0 | 0 | 4134 | 9.587500 | null | S |
253 | 0 | 1 | Stead, Mr. William Thomas | male | 62.000000 | 0 | 0 | 113514 | 26.550000 | C87 | S |
556 | 0 | 1 | Wright, Mr. George | male | 62.000000 | 0 | 0 | 113807 | 26.550000 | null | S |
571 | 1 | 2 | Harris, Mr. George | male | 62.000000 | 0 | 0 | S.W./PP 752 | 10.500000 | null | S |
830 | 1 | 1 | Stone, Mrs. George Nelson (Martha Eve... | female | 62.000000 | 0 | 0 | 113572 | 80.000000 | B28 | null |
171 | 0 | 1 | Van der hoef, Mr. Wyckoff | male | 61.000000 | 0 | 0 | 111240 | 33.500000 | B19 | S |
// sort by several columns
withAges.sortBy { age and name }
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
804 | 1 | 3 | Thomas, Master. Assad Alexander | male | 0.420000 | 0 | 1 | 2625 | 8.516700 | null | C |
756 | 1 | 2 | Hamalainen, Master. Viljo | male | 0.670000 | 1 | 1 | 250649 | 14.500000 | null | S |
645 | 1 | 3 | Baclini, Miss. Eugenie | female | 0.750000 | 2 | 1 | 2666 | 19.258300 | null | C |
470 | 1 | 3 | Baclini, Miss. Helene Barbara | female | 0.750000 | 2 | 1 | 2666 | 19.258300 | null | C |
79 | 1 | 2 | Caldwell, Master. Alden Gates | male | 0.830000 | 0 | 2 | 248738 | 29.000000 | null | S |
832 | 1 | 2 | Richards, Master. George Sibley | male | 0.830000 | 1 | 1 | 29106 | 18.750000 | null | S |
306 | 1 | 1 | Allison, Master. Hudson Trevor | male | 0.920000 | 1 | 2 | 113781 | 151.550000 | C22 C26 | S |
184 | 1 | 2 | Becker, Master. Richard F | male | 1.000000 | 2 | 1 | 230136 | 39.000000 | F4 | S |
789 | 1 | 3 | Dean, Master. Bertram Vere | male | 1.000000 | 1 | 2 | C.A. 2315 | 20.575000 | null | S |
387 | 0 | 3 | Goodwin, Master. Sidney Leonard | male | 1.000000 | 5 | 2 | CA 2144 | 46.900000 | null | S |
173 | 1 | 3 | Johnson, Miss. Eleanor Ileen | female | 1.000000 | 1 | 1 | 347742 | 11.133300 | null | S |
828 | 1 | 2 | Mallet, Master. Andre | male | 1.000000 | 0 | 2 | S.C./PARIS 2079 | 37.004200 | null | C |
382 | 1 | 3 | Nakid, Miss. Maria ("Mary") | female | 1.000000 | 0 | 2 | 2653 | 15.741700 | null | C |
165 | 0 | 3 | Panula, Master. Eino Viljami | male | 1.000000 | 4 | 1 | 3101295 | 39.687500 | null | S |
298 | 0 | 1 | Allison, Miss. Helen Loraine | female | 2.000000 | 1 | 2 | 113781 | 151.550000 | C22 C26 | S |
120 | 0 | 3 | Andersson, Miss. Ellis Anna Maria | female | 2.000000 | 4 | 2 | 347082 | 31.275000 | null | S |
480 | 1 | 3 | Hirvonen, Miss. Hildur E | female | 2.000000 | 0 | 1 | 3101298 | 12.287500 | null | S |
341 | 1 | 2 | Navratil, Master. Edmond Roger | male | 2.000000 | 1 | 1 | 230080 | 26.000000 | F2 | S |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | null | S |
825 | 0 | 3 | Panula, Master. Urho Abraham | male | 2.000000 | 4 | 1 | 3101295 | 39.687500 | null | S |
// another way
withAges.sortBy(withAges.age, withAges.name)
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
804 | 1 | 3 | Thomas, Master. Assad Alexander | male | 0.420000 | 0 | 1 | 2625 | 8.516700 | null | C |
756 | 1 | 2 | Hamalainen, Master. Viljo | male | 0.670000 | 1 | 1 | 250649 | 14.500000 | null | S |
645 | 1 | 3 | Baclini, Miss. Eugenie | female | 0.750000 | 2 | 1 | 2666 | 19.258300 | null | C |
470 | 1 | 3 | Baclini, Miss. Helene Barbara | female | 0.750000 | 2 | 1 | 2666 | 19.258300 | null | C |
79 | 1 | 2 | Caldwell, Master. Alden Gates | male | 0.830000 | 0 | 2 | 248738 | 29.000000 | null | S |
832 | 1 | 2 | Richards, Master. George Sibley | male | 0.830000 | 1 | 1 | 29106 | 18.750000 | null | S |
306 | 1 | 1 | Allison, Master. Hudson Trevor | male | 0.920000 | 1 | 2 | 113781 | 151.550000 | C22 C26 | S |
184 | 1 | 2 | Becker, Master. Richard F | male | 1.000000 | 2 | 1 | 230136 | 39.000000 | F4 | S |
789 | 1 | 3 | Dean, Master. Bertram Vere | male | 1.000000 | 1 | 2 | C.A. 2315 | 20.575000 | null | S |
387 | 0 | 3 | Goodwin, Master. Sidney Leonard | male | 1.000000 | 5 | 2 | CA 2144 | 46.900000 | null | S |
173 | 1 | 3 | Johnson, Miss. Eleanor Ileen | female | 1.000000 | 1 | 1 | 347742 | 11.133300 | null | S |
828 | 1 | 2 | Mallet, Master. Andre | male | 1.000000 | 0 | 2 | S.C./PARIS 2079 | 37.004200 | null | C |
382 | 1 | 3 | Nakid, Miss. Maria ("Mary") | female | 1.000000 | 0 | 2 | 2653 | 15.741700 | null | C |
165 | 0 | 3 | Panula, Master. Eino Viljami | male | 1.000000 | 4 | 1 | 3101295 | 39.687500 | null | S |
298 | 0 | 1 | Allison, Miss. Helen Loraine | female | 2.000000 | 1 | 2 | 113781 | 151.550000 | C22 C26 | S |
120 | 0 | 3 | Andersson, Miss. Ellis Anna Maria | female | 2.000000 | 4 | 2 | 347082 | 31.275000 | null | S |
480 | 1 | 3 | Hirvonen, Miss. Hildur E | female | 2.000000 | 0 | 1 | 3101298 | 12.287500 | null | S |
341 | 1 | 2 | Navratil, Master. Edmond Roger | male | 2.000000 | 1 | 1 | 230080 | 26.000000 | F2 | S |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | null | S |
825 | 0 | 3 | Panula, Master. Urho Abraham | male | 2.000000 | 4 | 1 | 3101295 | 39.687500 | null | S |
// add new column and store result in a new field
val withYear = withAges.add("year") { 1912 - age }
withYear
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked | year |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | A/5 21171 | 7.250000 | null | S | 1890.000000 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C85 | C | 1874.000000 |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | STON/O2. 3101282 | 7.925000 | null | S | 1886.000000 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 113803 | 53.100000 | C123 | S | 1877.000000 |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 373450 | 8.050000 | null | S | 1877.000000 |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | E46 | S | 1858.000000 |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | null | S | 1910.000000 |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 347742 | 11.133300 | null | S | 1885.000000 |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 237736 | 30.070800 | null | C | 1898.000000 |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | PP 9549 | 16.700000 | G6 | S | 1908.000000 |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S | 1854.000000 |
13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | A/5. 2151 | 8.050000 | null | S | 1892.000000 |
14 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 347082 | 31.275000 | null | S | 1873.000000 |
15 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 350406 | 7.854200 | null | S | 1898.000000 |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S | 1857.000000 |
17 | 0 | 3 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 382652 | 29.125000 | null | Q | 1910.000000 |
19 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 345763 | 18.000000 | null | S | 1881.000000 |
21 | 0 | 2 | Fynney, Mr. Joseph J | male | 35.000000 | 0 | 0 | 239865 | 26.000000 | null | S | 1877.000000 |
22 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.000000 | 0 | 0 | 248698 | 13.000000 | D56 | S | 1878.000000 |
23 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.000000 | 0 | 0 | 330923 | 8.029200 | null | Q | 1897.000000 |
// check new column
withYear.year
year |
---|
1890.000000 |
1874.000000 |
1886.000000 |
1877.000000 |
1877.000000 |
1858.000000 |
1910.000000 |
1885.000000 |
1898.000000 |
1908.000000 |
1854.000000 |
1892.000000 |
1873.000000 |
1898.000000 |
1857.000000 |
1910.000000 |
1881.000000 |
1877.000000 |
1878.000000 |
1897.000000 |
// add several columns
withAges.add {
"year" from { 1912 - age }
"died" from { survived == 0 }
}
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked | year | died |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | A/5 21171 | 7.250000 | null | S | 1890.000000 | true |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C85 | C | 1874.000000 | false |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | STON/O2. 3101282 | 7.925000 | null | S | 1886.000000 | false |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 113803 | 53.100000 | C123 | S | 1877.000000 | false |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 373450 | 8.050000 | null | S | 1877.000000 | true |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | E46 | S | 1858.000000 | true |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | null | S | 1910.000000 | true |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 347742 | 11.133300 | null | S | 1885.000000 | false |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 237736 | 30.070800 | null | C | 1898.000000 | false |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | PP 9549 | 16.700000 | G6 | S | 1908.000000 | false |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S | 1854.000000 | false |
13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | A/5. 2151 | 8.050000 | null | S | 1892.000000 | true |
14 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 347082 | 31.275000 | null | S | 1873.000000 | true |
15 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 350406 | 7.854200 | null | S | 1898.000000 | true |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S | 1857.000000 | false |
17 | 0 | 3 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 382652 | 29.125000 | null | Q | 1910.000000 | true |
19 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 345763 | 18.000000 | null | S | 1881.000000 | true |
21 | 0 | 2 | Fynney, Mr. Joseph J | male | 35.000000 | 0 | 0 | 239865 | 26.000000 | null | S | 1877.000000 | true |
22 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.000000 | 0 | 0 | 248698 | 13.000000 | D56 | S | 1878.000000 | false |
23 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.000000 | 0 | 0 | 330923 | 8.029200 | null | Q | 1897.000000 | false |
// another way to build new column via column arithmetics
val birthYear = withAges.age * (-1) + 1912
// new column can be added to dataframe with '+' operator
withAges + birthYear.rename("year")
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked | year |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | A/5 21171 | 7.250000 | null | S | 1890.000000 |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C85 | C | 1874.000000 |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | STON/O2. 3101282 | 7.925000 | null | S | 1886.000000 |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 113803 | 53.100000 | C123 | S | 1877.000000 |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 373450 | 8.050000 | null | S | 1877.000000 |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | E46 | S | 1858.000000 |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | null | S | 1910.000000 |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 347742 | 11.133300 | null | S | 1885.000000 |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 237736 | 30.070800 | null | C | 1898.000000 |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | PP 9549 | 16.700000 | G6 | S | 1908.000000 |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S | 1854.000000 |
13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | A/5. 2151 | 8.050000 | null | S | 1892.000000 |
14 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 347082 | 31.275000 | null | S | 1873.000000 |
15 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 350406 | 7.854200 | null | S | 1898.000000 |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S | 1857.000000 |
17 | 0 | 3 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 382652 | 29.125000 | null | Q | 1910.000000 |
19 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 345763 | 18.000000 | null | S | 1881.000000 |
21 | 0 | 2 | Fynney, Mr. Joseph J | male | 35.000000 | 0 | 0 | 239865 | 26.000000 | null | S | 1877.000000 |
22 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.000000 | 0 | 0 | 248698 | 13.000000 | D56 | S | 1878.000000 |
23 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.000000 | 0 | 0 | 330923 | 8.029200 | null | Q | 1897.000000 |
// Iterable of columns can also be added with '+'
withAges + withAges.columns().map { it.rename(it.name + " duplicate") }
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked | passengerId duplicate | survived duplicate | pclass duplicate | name duplicate | sex duplicate | age duplicate | sibSp duplicate | parch duplicate | ticket duplicate | fare duplicate | cabin duplicate | embarked duplicate |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | A/5 21171 | 7.250000 | null | S | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | A/5 21171 | 7.250000 | null | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C85 | C | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | STON/O2. 3101282 | 7.925000 | null | S | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | STON/O2. 3101282 | 7.925000 | null | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 113803 | 53.100000 | C123 | S | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 113803 | 53.100000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 373450 | 8.050000 | null | S | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 373450 | 8.050000 | null | S |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | E46 | S | 7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | E46 | S |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | null | S | 8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | null | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 347742 | 11.133300 | null | S | 9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 347742 | 11.133300 | null | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 237736 | 30.070800 | null | C | 10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 237736 | 30.070800 | null | C |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | PP 9549 | 16.700000 | G6 | S | 11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | PP 9549 | 16.700000 | G6 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S | 12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S |
13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | A/5. 2151 | 8.050000 | null | S | 13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | A/5. 2151 | 8.050000 | null | S |
14 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 347082 | 31.275000 | null | S | 14 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 347082 | 31.275000 | null | S |
15 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 350406 | 7.854200 | null | S | 15 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 350406 | 7.854200 | null | S |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S | 16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S |
17 | 0 | 3 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 382652 | 29.125000 | null | Q | 17 | 0 | 3 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 382652 | 29.125000 | null | Q |
19 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 345763 | 18.000000 | null | S | 19 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 345763 | 18.000000 | null | S |
21 | 0 | 2 | Fynney, Mr. Joseph J | male | 35.000000 | 0 | 0 | 239865 | 26.000000 | null | S | 21 | 0 | 2 | Fynney, Mr. Joseph J | male | 35.000000 | 0 | 0 | 239865 | 26.000000 | null | S |
22 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.000000 | 0 | 0 | 248698 | 13.000000 | D56 | S | 22 | 1 | 2 | Beesley, Mr. Lawrence | male | 34.000000 | 0 | 0 | 248698 | 13.000000 | D56 | S |
23 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.000000 | 0 | 0 | 330923 | 8.029200 | null | Q | 23 | 1 | 3 | McGowan, Miss. Anna "Annie" | female | 15.000000 | 0 | 0 | 330923 | 8.029200 | null | Q |
// remove single column
df.remove { ticket }
passengerId | survived | pclass | name | sex | age | sibSp | parch | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | 7.250000 | null | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | 71.283300 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | 7.925000 | null | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 53.100000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 8.050000 | null | S |
6 | 0 | 3 | Moran, Mr. James | male | null | 0 | 0 | 8.458300 | null | Q |
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 51.862500 | E46 | S |
8 | 0 | 3 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 21.075000 | null | S |
9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 11.133300 | null | S |
10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 30.070800 | null | C |
11 | 1 | 3 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | 16.700000 | G6 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 26.550000 | C103 | S |
13 | 0 | 3 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | 8.050000 | null | S |
14 | 0 | 3 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 31.275000 | null | S |
15 | 0 | 3 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 7.854200 | null | S |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 16.000000 | null | S |
17 | 0 | 3 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 29.125000 | null | Q |
18 | 1 | 2 | Williams, Mr. Charles Eugene | male | null | 0 | 0 | 13.000000 | null | S |
19 | 0 | 3 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 18.000000 | null | S |
20 | 1 | 3 | Masselmani, Mrs. Fatima | female | null | 0 | 0 | 7.225000 | null | C |
// remove several columns
df.remove { pclass and ticket and cabin and survived }
passengerId | name | sex | age | sibSp | parch | fare | embarked |
---|---|---|---|---|---|---|---|
1 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | 7.250000 | S |
2 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | 71.283300 | C |
3 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | 7.925000 | S |
4 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 53.100000 | S |
5 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 8.050000 | S |
6 | Moran, Mr. James | male | null | 0 | 0 | 8.458300 | Q |
7 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 51.862500 | S |
8 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 21.075000 | S |
9 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 11.133300 | S |
10 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 30.070800 | C |
11 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | 16.700000 | S |
12 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 26.550000 | S |
13 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | 8.050000 | S |
14 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 31.275000 | S |
15 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 7.854200 | S |
16 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 16.000000 | S |
17 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 29.125000 | Q |
18 | Williams, Mr. Charles Eugene | male | null | 0 | 0 | 13.000000 | S |
19 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 18.000000 | S |
20 | Masselmani, Mrs. Fatima | female | null | 0 | 0 | 7.225000 | C |
// remove several columns by column instances
df.remove(df.passengerId, df.pclass)
survived | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|
0 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | A/5 21171 | 7.250000 | null | S |
1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C85 | C |
1 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | STON/O2. 3101282 | 7.925000 | null | S |
1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 113803 | 53.100000 | C123 | S |
0 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 373450 | 8.050000 | null | S |
0 | Moran, Mr. James | male | null | 0 | 0 | 330877 | 8.458300 | null | Q |
0 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | E46 | S |
0 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | null | S |
1 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 347742 | 11.133300 | null | S |
1 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 237736 | 30.070800 | null | C |
1 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | PP 9549 | 16.700000 | G6 | S |
1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S |
0 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | A/5. 2151 | 8.050000 | null | S |
0 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 347082 | 31.275000 | null | S |
0 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 350406 | 7.854200 | null | S |
1 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S |
0 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 382652 | 29.125000 | null | Q |
1 | Williams, Mr. Charles Eugene | male | null | 0 | 0 | 244373 | 13.000000 | null | S |
0 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 345763 | 18.000000 | null | S |
1 | Masselmani, Mrs. Fatima | female | null | 0 | 0 | 2649 | 7.225000 | null | C |
// '-' operator can also be used for removing columns
df - { passengerId } - { pclass } - { cabin }
survived | name | sex | age | sibSp | parch | ticket | fare | embarked |
---|---|---|---|---|---|---|---|---|
0 | Braund, Mr. Owen Harris | male | 22.000000 | 1 | 0 | A/5 21171 | 7.250000 | S |
1 | Cumings, Mrs. John Bradley (Florence ... | female | 38.000000 | 1 | 0 | PC 17599 | 71.283300 | C |
1 | Heikkinen, Miss. Laina | female | 26.000000 | 0 | 0 | STON/O2. 3101282 | 7.925000 | S |
1 | Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 35.000000 | 1 | 0 | 113803 | 53.100000 | S |
0 | Allen, Mr. William Henry | male | 35.000000 | 0 | 0 | 373450 | 8.050000 | S |
0 | Moran, Mr. James | male | null | 0 | 0 | 330877 | 8.458300 | Q |
0 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | S |
0 | Palsson, Master. Gosta Leonard | male | 2.000000 | 3 | 1 | 349909 | 21.075000 | S |
1 | Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 27.000000 | 0 | 2 | 347742 | 11.133300 | S |
1 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14.000000 | 1 | 0 | 237736 | 30.070800 | C |
1 | Sandstrom, Miss. Marguerite Rut | female | 4.000000 | 1 | 1 | PP 9549 | 16.700000 | S |
1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | S |
0 | Saundercock, Mr. William Henry | male | 20.000000 | 0 | 0 | A/5. 2151 | 8.050000 | S |
0 | Andersson, Mr. Anders Johan | male | 39.000000 | 1 | 5 | 347082 | 31.275000 | S |
0 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.000000 | 0 | 0 | 350406 | 7.854200 | S |
1 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | S |
0 | Rice, Master. Eugene | male | 2.000000 | 4 | 1 | 382652 | 29.125000 | Q |
1 | Williams, Mr. Charles Eugene | male | null | 0 | 0 | 244373 | 13.000000 | S |
0 | Vander Planke, Mrs. Julius (Emelia Ma... | female | 31.000000 | 1 | 0 | 345763 | 18.000000 | S |
1 | Masselmani, Mrs. Fatima | female | null | 0 | 0 | 2649 | 7.225000 | C |
// group by single column
df.groupBy { embarked }.count()
embarked | count |
---|---|
S | 644 |
C | 168 |
Q | 77 |
null | 2 |
// group by several columns
df.groupBy{ sex and survived }.count()
sex | survived | count |
---|---|---|
male | 0 | 468 |
female | 1 | 233 |
female | 0 | 81 |
male | 1 | 109 |
// another way
df.groupBy(df.sex, df.survived).count()
sex | survived | count |
---|---|---|
male | 0 | 468 |
female | 1 | 233 |
female | 0 | 81 |
male | 1 | 109 |
// Various summarization operations on grouped data frame
withAges.groupBy { embarked }.aggregate{
// Methods are invoked on every group as like calling a single DataFrame.
count() into "total count"
(count { survived == 1 }.toDouble() / count() * 100.0) into "survival rate"
age.mean() into "average age" // Column operations are also supported
age.median() into "median age"
val youngest = minBy { age }
youngest.name into "youngest"
youngest.age into "youngest age"
val oldest = maxBy { age }
oldest.name into "oldest"
oldest.age into "oldest age"
}
embarked | total count | survival rate | average age | median age | youngest | youngest age | oldest | oldest age |
---|---|---|---|---|---|---|---|---|
S | 554 | 36.281588 | 29.445397 | 28.000000 | Hamalainen, Master. Viljo | 0.670000 | Barkworth, Mr. Algernon Henry Wilson | 80.000000 |
C | 130 | 60.769231 | 30.814769 | 29.000000 | Thomas, Master. Assad Alexander | 0.420000 | Goldschmidt, Mr. George B | 71.000000 |
Q | 28 | 28.571429 | 28.089286 | 27.000000 | Rice, Master. Eugene | 2.000000 | Connors, Mr. Patrick | 70.500000 |
null | 2 | 100.000000 | 50.000000 | 50.000000 | Icard, Miss. Amelie | 38.000000 | Stone, Mrs. George Nelson (Martha Eve... | 62.000000 |
df.size()
891 x 12
withAges.count { age > 50 }
64
withAges.sortBy(){ age }.take(5)
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
804 | 1 | 3 | Thomas, Master. Assad Alexander | male | 0.420000 | 0 | 1 | 2625 | 8.516700 | null | C |
756 | 1 | 2 | Hamalainen, Master. Viljo | male | 0.670000 | 1 | 1 | 250649 | 14.500000 | null | S |
470 | 1 | 3 | Baclini, Miss. Helene Barbara | female | 0.750000 | 2 | 1 | 2666 | 19.258300 | null | C |
645 | 1 | 3 | Baclini, Miss. Eugenie | female | 0.750000 | 2 | 1 | 2666 | 19.258300 | null | C |
79 | 1 | 2 | Caldwell, Master. Alden Gates | male | 0.830000 | 0 | 2 | 248738 | 29.000000 | null | S |
withAges.sortBy{ age }.takeLast(5)
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
117 | 0 | 3 | Connors, Mr. Patrick | male | 70.500000 | 0 | 0 | 370369 | 7.750000 | null | Q |
97 | 0 | 1 | Goldschmidt, Mr. George B | male | 71.000000 | 0 | 0 | PC 17754 | 34.654200 | A5 | C |
494 | 0 | 1 | Artagaveytia, Mr. Ramon | male | 71.000000 | 0 | 0 | PC 17609 | 49.504200 | null | C |
852 | 0 | 3 | Svensson, Mr. Johan | male | 74.000000 | 0 | 0 | 347060 | 7.775000 | null | S |
631 | 1 | 1 | Barkworth, Mr. Algernon Henry Wilson | male | 80.000000 | 0 | 0 | 27042 | 30.000000 | A23 | S |
// 'rows' field is Iterable<TypedDataFrameRow<*>> so it can be used in any stdlib extensions for Iterable
df.rows().map { it.name }.take(5)
[Braund, Mr. Owen Harris, Cumings, Mrs. John Bradley (Florence Briggs Thayer), Heikkinen, Miss. Laina, Futrelle, Mrs. Jacques Heath (Lily May Peel), Allen, Mr. William Henry]
// Sample List
data class Item(val first: Int, val second: Double)
val itemsList = listOf(Item(1,2.0), Item(2, 3.0), Item(3, 4.0))
// List -> DataFrame by reflection
itemsList.toDataFrame()
first | second |
---|---|
1 | 2.000000 |
2 | 3.000000 |
3 | 4.000000 |
// List -> DataFrame by mappings
itemsList.toDataFrame {
"a" from { it.first }
"b" from { it.second }
"c" from { it.first * it.second }
}
a | b | c |
---|---|---|
1 | 2.000000 | 2.000000 |
2 | 3.000000 | 6.000000 |
3 | 4.000000 | 12.000000 |
// Convert data frame to a list of data class items
@DataSchema
data class Person(val name: String, val age: Double?)
val passengers = df.toListOf<Person>()
// Check type of the element
passengers[0].javaClass
class Line_89_jupyter$Person
// Do any list operations
passengers.maxBy { it.age ?: .0 }
Person(name=Barkworth, Mr. Algernon Henry Wilson, age=80.0)
// Create marker interface to write column-specific extensions for dataframes
@DataSchema
interface SimplePerson {
val name: String
val age: Double
}
// Create extension for any data frame with fields 'name' and 'age'
fun DataFrame<SimplePerson>.getOlderThan(minAge: Double) = filter { age > minAge }
// The @DataSchema and extension function is automatically applied to any dataframe
// created after that match the column names and types
val updatedWithAges = withAges
// The dataframe is now considered a subtype of `SimplePerson` and can access extension functions.
updatedWithAges.getOlderThan(50.0)
passengerId | survived | pclass | name | sex | age | sibSp | parch | ticket | fare | cabin | embarked |
---|---|---|---|---|---|---|---|---|---|---|---|
7 | 0 | 1 | McCarthy, Mr. Timothy J | male | 54.000000 | 0 | 0 | 17463 | 51.862500 | E46 | S |
12 | 1 | 1 | Bonnell, Miss. Elizabeth | female | 58.000000 | 0 | 0 | 113783 | 26.550000 | C103 | S |
16 | 1 | 2 | Hewlett, Mrs. (Mary D Kingcome) | female | 55.000000 | 0 | 0 | 248706 | 16.000000 | null | S |
34 | 0 | 2 | Wheadon, Mr. Edward H | male | 66.000000 | 0 | 0 | C.A. 24579 | 10.500000 | null | S |
55 | 0 | 1 | Ostby, Mr. Engelhart Cornelius | male | 65.000000 | 0 | 1 | 113509 | 61.979200 | B30 | C |
95 | 0 | 3 | Coxon, Mr. Daniel | male | 59.000000 | 0 | 0 | 364500 | 7.250000 | null | S |
97 | 0 | 1 | Goldschmidt, Mr. George B | male | 71.000000 | 0 | 0 | PC 17754 | 34.654200 | A5 | C |
117 | 0 | 3 | Connors, Mr. Patrick | male | 70.500000 | 0 | 0 | 370369 | 7.750000 | null | Q |
125 | 0 | 1 | White, Mr. Percival Wayland | male | 54.000000 | 0 | 1 | 35281 | 77.287500 | D26 | S |
151 | 0 | 2 | Bateman, Rev. Robert James | male | 51.000000 | 0 | 0 | S.O.P. 1166 | 12.525000 | null | S |
153 | 0 | 3 | Meo, Mr. Alfonzo | male | 55.500000 | 0 | 0 | A.5. 11206 | 8.050000 | null | S |
156 | 0 | 1 | Williams, Mr. Charles Duane | male | 51.000000 | 0 | 1 | PC 17597 | 61.379200 | null | C |
171 | 0 | 1 | Van der hoef, Mr. Wyckoff | male | 61.000000 | 0 | 0 | 111240 | 33.500000 | B19 | S |
175 | 0 | 1 | Smith, Mr. James Clinch | male | 56.000000 | 0 | 0 | 17764 | 30.695800 | A7 | C |
196 | 1 | 1 | Lurette, Miss. Elise | female | 58.000000 | 0 | 0 | PC 17569 | 146.520800 | B80 | C |
223 | 0 | 3 | Green, Mr. George Henry | male | 51.000000 | 0 | 0 | 21440 | 8.050000 | null | S |
233 | 0 | 2 | Sjostedt, Mr. Ernst Adolf | male | 59.000000 | 0 | 0 | 237442 | 13.500000 | null | S |
250 | 0 | 2 | Carter, Rev. Ernest Courtenay | male | 54.000000 | 1 | 0 | 244252 | 26.000000 | null | S |
253 | 0 | 1 | Stead, Mr. William Thomas | male | 62.000000 | 0 | 0 | 113514 | 26.550000 | C87 | S |
263 | 0 | 1 | Taussig, Mr. Emil | male | 52.000000 | 1 | 1 | 110413 | 79.650000 | E67 | S |
import org.jetbrains.kotlinx.dataframe.codeGen.generateInterfaces
// code for marker interface can be auto-generated
withAges.select{ name and age and sex and ticket }.generateInterfaces("Person")
@DataSchema interface Person { val age: kotlin.Double val name: kotlin.String val sex: kotlin.String val ticket: kotlin.String }
// Copy-paste the interface and run it
@DataSchema
interface Person {
val age: kotlin.Double
val name: kotlin.String
val sex: kotlin.String
val ticket: kotlin.String
}
// Now interface 'Person' is available, so we can write an extension method,
// that will work for any data frame with these four columns
fun DataFrame<Person>.addSummary() = add("summary") {"$sex $name $age has ticket $ticket"}
// for example, it works for 'withAges' data frame, but only after the cell has been evaluated.
val dfWithSummary = withAges
dfWitSummary.addSummary()
org.jetbrains.kotlinx.jupyter.repl.impl.JupyterCompilerImpl.compileSync(JupyterCompilerImpl.kt:201) org.jetbrains.kotlinx.jupyter.repl.impl.InternalEvaluatorImpl.eval(InternalEvaluatorImpl.kt:126) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$execute$1$result$1.invoke(CellExecutorImpl.kt:80) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$execute$1$result$1.invoke(CellExecutorImpl.kt:78) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withHost(ReplForJupyterImpl.kt:762) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl.execute-L4Nmkdk(CellExecutorImpl.kt:78) org.jetbrains.kotlinx.jupyter.repl.execution.CellExecutor$DefaultImpls.execute-L4Nmkdk$default(CellExecutor.kt:13) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:585) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.access$evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:138) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl$evalEx$1.invoke(ReplForJupyterImpl.kt:442) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl$evalEx$1.invoke(ReplForJupyterImpl.kt:439) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withEvalContext(ReplForJupyterImpl.kt:420) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evalEx(ReplForJupyterImpl.kt:439) org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor$processExecuteRequest$1$response$1$1.invoke(IdeCompatibleMessageRequestProcessor.kt:136) org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor$processExecuteRequest$1$response$1$1.invoke(IdeCompatibleMessageRequestProcessor.kt:135) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$Task.execute(JupyterExecutorImpl.kt:42) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$executorThread$1.invoke(JupyterExecutorImpl.kt:82) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$executorThread$1.invoke(JupyterExecutorImpl.kt:80) kotlin.concurrent.ThreadsKt$thread$thread$1.run(Thread.kt:30)
// When data frame variable is mutable, a strongly typed wrapper for it
// is generated only once after the first execution of a cell where it is declared
var nameAndSex = df.select(df.name, df.sex)
nameAndSex
name | sex |
---|---|
Braund, Mr. Owen Harris | male |
Cumings, Mrs. John Bradley (Florence ... | female |
Heikkinen, Miss. Laina | female |
Futrelle, Mrs. Jacques Heath (Lily Ma... | female |
Allen, Mr. William Henry | male |
Moran, Mr. James | male |
McCarthy, Mr. Timothy J | male |
Palsson, Master. Gosta Leonard | male |
Johnson, Mrs. Oscar W (Elisabeth Vilh... | female |
Nasser, Mrs. Nicholas (Adele Achem) | female |
Sandstrom, Miss. Marguerite Rut | female |
Bonnell, Miss. Elizabeth | female |
Saundercock, Mr. William Henry | male |
Andersson, Mr. Anders Johan | male |
Vestrom, Miss. Hulda Amanda Adolfina | female |
Hewlett, Mrs. (Mary D Kingcome) | female |
Rice, Master. Eugene | male |
Williams, Mr. Charles Eugene | male |
Vander Planke, Mrs. Julius (Emelia Ma... | female |
Masselmani, Mrs. Fatima | female |
// let's declare immutable variable, that contains all string columns
val strings = df.select { colsOf<String>() }
strings
name | sex | ticket |
---|---|---|
Braund, Mr. Owen Harris | male | A/5 21171 |
Cumings, Mrs. John Bradley (Florence ... | female | PC 17599 |
Heikkinen, Miss. Laina | female | STON/O2. 3101282 |
Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 113803 |
Allen, Mr. William Henry | male | 373450 |
Moran, Mr. James | male | 330877 |
McCarthy, Mr. Timothy J | male | 17463 |
Palsson, Master. Gosta Leonard | male | 349909 |
Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 347742 |
Nasser, Mrs. Nicholas (Adele Achem) | female | 237736 |
Sandstrom, Miss. Marguerite Rut | female | PP 9549 |
Bonnell, Miss. Elizabeth | female | 113783 |
Saundercock, Mr. William Henry | male | A/5. 2151 |
Andersson, Mr. Anders Johan | male | 347082 |
Vestrom, Miss. Hulda Amanda Adolfina | female | 350406 |
Hewlett, Mrs. (Mary D Kingcome) | female | 248706 |
Rice, Master. Eugene | male | 382652 |
Williams, Mr. Charles Eugene | male | 244373 |
Vander Planke, Mrs. Julius (Emelia Ma... | female | 345763 |
Masselmani, Mrs. Fatima | female | 2649 |
// 'nameAndSex' is assignable from 'strings',
// because 'strings' has all the columns that are required by type of 'nameAndSex'
nameAndSex = strings
// note, that the actual value of 'nameAndSex' is still a data frame of all string columns
nameAndSex
name | sex | ticket |
---|---|---|
Braund, Mr. Owen Harris | male | A/5 21171 |
Cumings, Mrs. John Bradley (Florence ... | female | PC 17599 |
Heikkinen, Miss. Laina | female | STON/O2. 3101282 |
Futrelle, Mrs. Jacques Heath (Lily Ma... | female | 113803 |
Allen, Mr. William Henry | male | 373450 |
Moran, Mr. James | male | 330877 |
McCarthy, Mr. Timothy J | male | 17463 |
Palsson, Master. Gosta Leonard | male | 349909 |
Johnson, Mrs. Oscar W (Elisabeth Vilh... | female | 347742 |
Nasser, Mrs. Nicholas (Adele Achem) | female | 237736 |
Sandstrom, Miss. Marguerite Rut | female | PP 9549 |
Bonnell, Miss. Elizabeth | female | 113783 |
Saundercock, Mr. William Henry | male | A/5. 2151 |
Andersson, Mr. Anders Johan | male | 347082 |
Vestrom, Miss. Hulda Amanda Adolfina | female | 350406 |
Hewlett, Mrs. (Mary D Kingcome) | female | 248706 |
Rice, Master. Eugene | male | 382652 |
Williams, Mr. Charles Eugene | male | 244373 |
Vander Planke, Mrs. Julius (Emelia Ma... | female | 345763 |
Masselmani, Mrs. Fatima | female | 2649 |
// but typed access to the fields works only for 'name' and 'sex'
nameAndSex.sex // this is OK
sex |
---|
male |
female |
female |
female |
male |
male |
male |
male |
female |
female |
female |
female |
male |
male |
female |
female |
male |
male |
female |
female |
// this fails with compilation error
nameAndSex.ticket
org.jetbrains.kotlinx.jupyter.repl.impl.JupyterCompilerImpl.compileSync(JupyterCompilerImpl.kt:201) org.jetbrains.kotlinx.jupyter.repl.impl.InternalEvaluatorImpl.eval(InternalEvaluatorImpl.kt:126) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$execute$1$result$1.invoke(CellExecutorImpl.kt:80) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$execute$1$result$1.invoke(CellExecutorImpl.kt:78) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withHost(ReplForJupyterImpl.kt:762) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl.execute-L4Nmkdk(CellExecutorImpl.kt:78) org.jetbrains.kotlinx.jupyter.repl.execution.CellExecutor$DefaultImpls.execute-L4Nmkdk$default(CellExecutor.kt:13) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:585) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.access$evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:138) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl$evalEx$1.invoke(ReplForJupyterImpl.kt:442) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl$evalEx$1.invoke(ReplForJupyterImpl.kt:439) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withEvalContext(ReplForJupyterImpl.kt:420) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evalEx(ReplForJupyterImpl.kt:439) org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor$processExecuteRequest$1$response$1$1.invoke(IdeCompatibleMessageRequestProcessor.kt:136) org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor$processExecuteRequest$1$response$1$1.invoke(IdeCompatibleMessageRequestProcessor.kt:135) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$Task.execute(JupyterExecutorImpl.kt:42) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$executorThread$1.invoke(JupyterExecutorImpl.kt:82) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$executorThread$1.invoke(JupyterExecutorImpl.kt:80) kotlin.concurrent.ThreadsKt$thread$thread$1.run(Thread.kt:30)
nameAndSex["ticket"] // the requested column is still available by column name string
ticket |
---|
A/5 21171 |
PC 17599 |
STON/O2. 3101282 |
113803 |
373450 |
330877 |
17463 |
349909 |
347742 |
237736 |
PP 9549 |
113783 |
A/5. 2151 |
347082 |
350406 |
248706 |
382652 |
244373 |
345763 |
2649 |
// now let's create a variable with two other columns
val nameAndTicket = df.select(df.name, df.ticket)
nameAndTicket
name | ticket |
---|---|
Braund, Mr. Owen Harris | A/5 21171 |
Cumings, Mrs. John Bradley (Florence ... | PC 17599 |
Heikkinen, Miss. Laina | STON/O2. 3101282 |
Futrelle, Mrs. Jacques Heath (Lily Ma... | 113803 |
Allen, Mr. William Henry | 373450 |
Moran, Mr. James | 330877 |
McCarthy, Mr. Timothy J | 17463 |
Palsson, Master. Gosta Leonard | 349909 |
Johnson, Mrs. Oscar W (Elisabeth Vilh... | 347742 |
Nasser, Mrs. Nicholas (Adele Achem) | 237736 |
Sandstrom, Miss. Marguerite Rut | PP 9549 |
Bonnell, Miss. Elizabeth | 113783 |
Saundercock, Mr. William Henry | A/5. 2151 |
Andersson, Mr. Anders Johan | 347082 |
Vestrom, Miss. Hulda Amanda Adolfina | 350406 |
Hewlett, Mrs. (Mary D Kingcome) | 248706 |
Rice, Master. Eugene | 382652 |
Williams, Mr. Charles Eugene | 244373 |
Vander Planke, Mrs. Julius (Emelia Ma... | 345763 |
Masselmani, Mrs. Fatima | 2649 |
nameAndSex = nameAndHome // this assignment doesn't work because of columns mismatch
org.jetbrains.kotlinx.jupyter.repl.impl.JupyterCompilerImpl.compileSync(JupyterCompilerImpl.kt:201) org.jetbrains.kotlinx.jupyter.repl.impl.InternalEvaluatorImpl.eval(InternalEvaluatorImpl.kt:126) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$execute$1$result$1.invoke(CellExecutorImpl.kt:80) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$execute$1$result$1.invoke(CellExecutorImpl.kt:78) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withHost(ReplForJupyterImpl.kt:762) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl.execute-L4Nmkdk(CellExecutorImpl.kt:78) org.jetbrains.kotlinx.jupyter.repl.execution.CellExecutor$DefaultImpls.execute-L4Nmkdk$default(CellExecutor.kt:13) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:585) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.access$evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:138) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl$evalEx$1.invoke(ReplForJupyterImpl.kt:442) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl$evalEx$1.invoke(ReplForJupyterImpl.kt:439) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withEvalContext(ReplForJupyterImpl.kt:420) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evalEx(ReplForJupyterImpl.kt:439) org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor$processExecuteRequest$1$response$1$1.invoke(IdeCompatibleMessageRequestProcessor.kt:136) org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor$processExecuteRequest$1$response$1$1.invoke(IdeCompatibleMessageRequestProcessor.kt:135) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$Task.execute(JupyterExecutorImpl.kt:42) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$executorThread$1.invoke(JupyterExecutorImpl.kt:82) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$executorThread$1.invoke(JupyterExecutorImpl.kt:80) kotlin.concurrent.ThreadsKt$thread$thread$1.run(Thread.kt:30)
// unfortunately, there is a way to get a runtime error here,
// because typed wrappers are generated only after execution of a cell
// so the following assigment will pass fine, because return type of 'select' is the same as in 'df' variable,
// although the set of columns was reduced
nameAndSex = df.select(df.name, df.ticket)
// if we try to access the column, we get runtime error
nameAndSex.sex
org.jetbrains.kotlinx.jupyter.repl.impl.JupyterCompilerImpl.compileSync(JupyterCompilerImpl.kt:201) org.jetbrains.kotlinx.jupyter.repl.impl.InternalEvaluatorImpl.eval(InternalEvaluatorImpl.kt:126) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$execute$1$result$1.invoke(CellExecutorImpl.kt:80) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$execute$1$result$1.invoke(CellExecutorImpl.kt:78) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withHost(ReplForJupyterImpl.kt:762) org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl.execute-L4Nmkdk(CellExecutorImpl.kt:78) org.jetbrains.kotlinx.jupyter.repl.execution.CellExecutor$DefaultImpls.execute-L4Nmkdk$default(CellExecutor.kt:13) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:585) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.access$evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:138) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl$evalEx$1.invoke(ReplForJupyterImpl.kt:442) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl$evalEx$1.invoke(ReplForJupyterImpl.kt:439) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withEvalContext(ReplForJupyterImpl.kt:420) org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evalEx(ReplForJupyterImpl.kt:439) org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor$processExecuteRequest$1$response$1$1.invoke(IdeCompatibleMessageRequestProcessor.kt:136) org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor$processExecuteRequest$1$response$1$1.invoke(IdeCompatibleMessageRequestProcessor.kt:135) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$Task.execute(JupyterExecutorImpl.kt:42) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$executorThread$1.invoke(JupyterExecutorImpl.kt:82) org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$executorThread$1.invoke(JupyterExecutorImpl.kt:80) kotlin.concurrent.ThreadsKt$thread$thread$1.run(Thread.kt:30)
Support operations:
Improve typed wrappers for: