# Installed packages
import pandas as pd
# Testing
from IPython.utils.capture import capture_output
# Our package
from ydata_profiling import ProfileReport
from ydata_profiling.utils.cache import cache_file
# Read the Titanic Dataset
file_name = cache_file(
"titanic.csv",
"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv",
)
df = pd.read_csv(file_name)
# Generate the Profiling Report (with progress bar)
with capture_output() as out:
profile = ProfileReport(df, title="Titanic Dataset", progress_bar=True, lazy=False)
assert all(
any(v in s.data["text/plain"] for v in ["%|", "FloatProgress"]) for s in out.outputs
)
assert len(out.outputs) == 2
# Generate the Profiling Report (without progress bar)
with capture_output() as out:
profile = df.profile_report(
title="Titanic Dataset",
html={"style": {"full_width": True}},
progress_bar=True,
lazy=True,
)
assert len(out.outputs) == 0
with capture_output() as out:
_ = profile.to_html()
assert all(
any(v in s.data["text/plain"] for v in ["%|", "FloatProgress"]) for s in out.outputs
)
assert len(out.outputs) == 3
with capture_output() as out:
_ = profile.to_file("/tmp/tmpfile.html")
assert "Export report to file" in out.outputs[0].data["text/plain"]
assert len(out.outputs) == 1
# Test caching of the iterative building process
with capture_output() as out:
profile = ProfileReport(df, title="Titanic Dataset", progress_bar=True, lazy=True)
assert len(out.outputs) == 0
with capture_output() as out:
profile.description_set
assert len(out.outputs) == 1
with capture_output() as out:
profile.report
assert len(out.outputs) == 1
with capture_output() as out:
profile.html
assert len(out.outputs) == 1
with capture_output() as out:
profile.config.html.style.theme = "united"
profile.invalidate_cache("rendering")
profile.to_file("/tmp/cache1.html")
assert len(out.outputs) == 2
with capture_output() as out:
profile.config.pool_size = 1
profile.html
assert len(out.outputs) == 0
with capture_output() as out:
profile.config.pool_size = 0
profile.config.samples.head = 5
profile.config.samples.tail = 15
profile.invalidate_cache()
profile.to_file("/tmp/cache2.html")
assert len(out.outputs) == 4