spark.version import java.sql.Date import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ val purchasesDF = Seq( (Date.valueOf("2019-01-01"), "01"), (Date.valueOf("2019-05-10"), "01"), (Date.valueOf("2019-03-05"), "02"), (Date.valueOf("2019-02-20"), "03"), (Date.valueOf("2019-01-20"), "02") ).toDF("purchase_date", "device_id") val devicesDF = Seq( ("01", "notebook", 600.00), ("02", "small phone", 100.00), ("03", "camera",150.00), ("04", "large phone", 700.00) ).toDF("device_id", "device_name", "price") %%dataframe purchasesDF.join(devicesDF, col("device_id") === col("device_id"), "inner") %%dataframe %%scan purchasesDF.as("purchases") .join(devicesDF.as("devices"), col("purchases.device_id") === col("devices.device_id")) purchasesDF.as("purchases") .join(devicesDF.as("devices"), col("purchases.device_id") === col("devices.device_id")) .select("xxxx") %%dataframe %%scan purchasesDF.as("purchases") .join(devicesDF.as("devices"), col("purchases.device_id") === col("devices.device_id")) .drop("device_id")