This notebook plays around with the data generated by running the facial detection script over the whole Tribune collection.
import pandas as pd
import altair as alt
df = pd.read_csv('faces_per_image.csv')
df.describe()
faces | |
---|---|
count | 60364.000000 |
mean | 3.821433 |
std | 6.661479 |
min | 0.000000 |
25% | 1.000000 |
50% | 2.000000 |
75% | 4.000000 |
max | 174.000000 |
# More than 100 faces!?
df.loc[df['faces'] > 100]
faces | image | |
---|---|---|
20402 | 174 | FL4470426.jpg |
28301 | 117 | FL4494940.jpg |
34768 | 145 | FL4517063.jpg |
36351 | 111 | FL4522209.jpg |
36353 | 105 | FL4522212.jpg |
37843 | 109 | FL4526927.jpg |
49708 | 127 | FL4564048.jpg |
50428 | 104 | FL4566376.jpg |
53969 | 132 | FL4577551.jpg |
54041 | 110 | FL4577772.jpg |
54044 | 144 | FL4577775.jpg |
54062 | 112 | FL4577793.jpg |
54172 | 126 | FL4578163.jpg |
54173 | 111 | FL4578164.jpg |
alt.data_transformers.enable('json')
alt.Chart(df).mark_bar().encode(
x=alt.X('faces:Q', bin=alt.BinParams(maxbins=100)),
y='count():Q'
)
df['faces'].sum()
230677