help(display)
from dataclasses import dataclass
@dataclass
class Element:
atomic_number: int
name: str
elements = [Element(1, "Hydrogen"), Element(2, "Helium")]
display(elements)
from collections import namedtuple
Magic = namedtuple("Magic", "magic_type magic_name")
line_magics = [("line", x) for x in get_ipython().magics_manager.magics["line"].keys()]
cell_magics = [("cell", x) for x in get_ipython().magics_manager.magics["cell"].keys()]
display([Magic(x[0], x[1]) for x in (line_magics + cell_magics)])
%pwd
%%html
<h1>Hello SVG!</h1>
<svg width="100" height="100">
<circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" />
</svg>
import ipywidgets as widgets
record_count = widgets.IntSlider(1, 1, 1000000)
record_count
spark.range(record_count.value).count()
spark.read.text("dbfs:/databricks-datasets/README.md").collect()[:5]
%sh ls -lh /dbfs/databricks-datasets/README.md
with open("/dbfs/databricks-datasets/README.md") as f:
contents = f.read()
print(contents)
import sys
sys.path.append("/Workspace/Repos/some_repo/common")
import constants
import requests
import json
from pprint import pprint
api_url = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().getOrElse(None)
token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().getOrElse(None)
print(f"api_url: {api_url}")
print(f"token: {token}")
resp = requests.get(
f"{api_url}/api/2.0/clusters/list",
headers={"Authorization": f"Bearer {token}"}
)
print()
pprint(json.loads(resp.content))
%pip install databricks-cli
from databricks_cli.sdk import ClusterService, ApiClient
client = ClusterService(ApiClient(host = api_url, token=token))
clusters = client.list_clusters()
pprint(clusters)
spark.read.parquet("<<folder-containing-parquet-files>>").describe().display()
spark.range(1_000_000).selectExpr("id", "concat('Person-', id) as name").cache().count()
spark.sparkContext.parallelize(range(1_000_000)).map(lambda x: (x, x * x)).cache().count()
%scala
import org.apache.spark.sql.functions.{lit, col, array_repeat, when, explode, acos, udf}
import org.apache.spark.SparkEnv
import org.apache.spark.sql.functions.udf
val getExecutorId = udf(() => SparkEnv.get.executorId)
spark.udf.register("getExecutorId", getExecutorId)
val df = spark.range(10000000)
.selectExpr("id", "getExecutorId() as executor")
.withColumn("array", when($"executor" === "0", array_repeat(lit(0.5), 1000)).otherwise(array_repeat(lit(0.5), 10)))
.withColumn("item", explode($"array"))
.drop("array")
.withColumn("acos", acos($"item"))
df.write.format("noop").mode("overwrite").save()
%scala
println(s"Spark session instance : ${spark}")
println(s"Spark context instance : ${spark.sparkContext}")