#!/usr/bin/env python # coding: utf-8 # ## Saving Profiles to S3 # --- # In[43]: from whylogs import get_or_create_session import pandas as pd # In[44]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # ## Create a mock s3 server # # For this example we will create a fake s3 server using moto lib. You should remove this section if you have you own bucket setup on aws. Make sure you have your aws configuration set. By default this mock server creates a server in region "us-east-1" # In[45]: BUCKET="super_awesome_bucket" # In[46]: from moto import mock_s3 from moto.s3.responses import DEFAULT_REGION_NAME import boto3 mocks3 = mock_s3() mocks3.start() res = boto3.resource('s3', region_name=DEFAULT_REGION_NAME) res.create_bucket(Bucket=BUCKET) # ## Load Data # We can go by our usual way, load a example csv data # In[47]: df = pd.read_csv("lending_club_1000.csv") # ## Config File # --- # Seting up whylogs to save your data on s3 can be in several ways. Simplest is to simply create a config file,where each data format can be saved to a specific location. As shown below # In[48]: CONFIG = """ project: s3_example_project pipeline: latest_results verbose: false writers: - formats: - protobuf output_path: s3://super_awesome_bucket/ path_template: $name/dataset_summary filename_template: dataset_summary type: s3 - formats: - flat output_path: s3://super_awesome_bucket/ path_template: $name/dataset_summary filename_template: dataset_summary type: s3 - formats: - json output_path: s3://super_awesome_bucket/ path_template: $name/dataset_summary filename_template: dataset_summary type: s3 """ # In[49]: config_path=".whylogs.yaml" with open(".whylogs.yaml","w") as file: file.write(CONFIG) # Checking the content: # In[50]: get_ipython().run_line_magic('cat', '.whylogs.yaml') # If you have a custom name for your config file or place it in a special location you can use the helper function # In[51]: from whylogs.app.session import load_config, session_from_config config = load_config(".whylogs.yaml") session = session_from_config(config) print(session.get_config().to_yaml()) # Otherwise if the file is located in your home directory or current location you are running, you can simply run `get_or_create_session()` # In[52]: session= get_or_create_session() print(session.get_config().to_yaml()) # ## Loggin Data # --- # The data can be save by simply closing a logger, or one a logger is out of scope. # In[53]: with session.logger("dataset_test_s3") as logger: logger.log_dataframe(df) # In[54]: client = boto3.client('s3') objects = client.list_objects(Bucket=BUCKET) [obj["Key"] for obj in objects["Contents"]] # You can define the configure for were the data is save through a configuration file or creating a custom writer. # # ### Close mock s3 server # In[55]: mocks3.stop() # In[ ]: