#!/usr/bin/env python # coding: utf-8 # # Read Cassandra as DataFrame # > Short guide on how to read data from Cassandra into pandas dataframe format # # - toc: true # - badges: true # - comments: true # - categories: [Cassandra] # - image: # In[ ]: import os from cassandra.cqlengine.models import Model from cassandra.cqlengine import columns from datetime import datetime import pandas as pd # In[ ]: import os from datetime import datetime from cassandra.cqlengine.management import sync_table from cassandra.policies import TokenAwarePolicy from cassandra.auth import PlainTextAuthProvider from cassandra.cluster import ( Cluster, DCAwareRoundRobinPolicy ) from cassandra.cqlengine.connection import ( register_connection, set_default_connection ) # In[ ]: CASSANDRA_USERNAME='cassandra' CASSANDRA_PASSWORD='cassandra' CASSANDRA_HOST='127.0.0.1' CASSANDRA_PORT=9042 # In[ ]: session = None cluster = None auth_provider = PlainTextAuthProvider(username=CASSANDRA_USERNAME, password=CASSANDRA_PASSWORD) cluster = Cluster([CASSANDRA_HOST], load_balancing_policy=TokenAwarePolicy(DCAwareRoundRobinPolicy()), port=CASSANDRA_PORT, auth_provider=auth_provider, executor_threads=2, protocol_version=4, ) # In[ ]: session = cluster.connect() register_connection(str(session), session=session) set_default_connection(str(session)) # In[ ]: rows = session.execute('select * from demo.click_stream;') df = pd.DataFrame(list(rows)) df.head() # In[ ]: df.info() # In[ ]: df.describe() # In[ ]: df.item_id.value_counts() # In[ ]: df.to_pickle('../recommender/data/logs_test_020521_1.p')