%%help %%info %%logs sc.parallelize(range(1000)).count() import os print(os.environ.get('SPARK_HOME', None)) print(os.environ.get('HADOOP_CONF_DIR', None)) %%info sc.parallelize(range(1000)).count() sc.parallelize(range(2000)).count() %%sql show tables %%sql select * from movies_pq_s3 limit 100 %%sql -o ratings select movieid, rating from ratings_pq_s3 %%local %matplotlib inline import matplotlib import seaborn as sns import matplotlib.pyplot as plt sns.distplot(ratings.rating, kde=False, rug=True)