#!/usr/bin/env python
# coding: utf-8

# ## Saving Profiles to S3 
# ---

# In[43]:


from whylogs import get_or_create_session
import pandas as pd


# In[44]:


get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')


# ## Create a mock s3 server 
# 

# For this example we will create a fake s3 server using moto lib. You should remove this section if you have you own bucket setup on aws. Make sure you have your aws configuration set.  By default this mock server creates a server in region "us-east-1"

# In[45]:


BUCKET="super_awesome_bucket"


# In[46]:


from moto import mock_s3
from moto.s3.responses import DEFAULT_REGION_NAME
import boto3

mocks3 = mock_s3()
mocks3.start()
res = boto3.resource('s3', region_name=DEFAULT_REGION_NAME)
res.create_bucket(Bucket=BUCKET)


# ## Load Data

# We can go by our usual way, load a example csv data

# In[47]:


df = pd.read_csv("lending_club_1000.csv")


# ## Config File
# ---
# Seting up  whylogs to  save your data on s3 can be in several ways. Simplest is to simply create a config file,where each data format can be saved to a specific location. As shown below 

# In[48]:


CONFIG = """
project: s3_example_project
pipeline: latest_results
verbose: false
writers:
- formats:
  - protobuf
  output_path: s3://super_awesome_bucket/
  path_template: $name/dataset_summary
  filename_template: dataset_summary
  type: s3
- formats:
  - flat
  output_path: s3://super_awesome_bucket/
  path_template: $name/dataset_summary
  filename_template: dataset_summary
  type: s3
- formats:
  - json
  output_path: s3://super_awesome_bucket/
  path_template: $name/dataset_summary
  filename_template: dataset_summary
  type: s3
"""


# In[49]:


config_path=".whylogs.yaml"
with open(".whylogs.yaml","w") as file:
    file.write(CONFIG)


# Checking the content:

# In[50]:


get_ipython().run_line_magic('cat', '.whylogs.yaml')


# If you have a custom name for your config file or place it in a special location you can use the helper function

# In[51]:


from whylogs.app.session import load_config, session_from_config
config = load_config(".whylogs.yaml")
session = session_from_config(config)
print(session.get_config().to_yaml())


# Otherwise if the file is located in your home directory or current location you are running, you can simply run `get_or_create_session()`

# In[52]:


session= get_or_create_session()
print(session.get_config().to_yaml())


# ## Loggin Data 
# --- 
# The data can be save by simply closing a logger, or one a logger is out of scope.

# In[53]:


with session.logger("dataset_test_s3") as logger:
    logger.log_dataframe(df)


# In[54]:


client = boto3.client('s3')
objects = client.list_objects(Bucket=BUCKET)
[obj["Key"] for obj in objects["Contents"]]


# You can define the configure for were the data is save through a configuration file or creating a custom writer.
# 

# ### Close mock s3 server 

# In[55]:


mocks3.stop()


# In[ ]: