#!/usr/bin/env python
# coding: utf-8

# # Benford's Law 
# ## Purpose
# To take an iterable object (assumed to contain numbers) and plot the frequency of their leading digits. Based on [Benford's Law](https://en.wikipedia.org/wiki/Benford%27s_law) (also called the first-digit law), if it is a "natural dataset," we should see the following distribution of leading digits:
# 
# | d 	|  P(d) 	|
# |---	|------:	|
# | 1 	| 30.1% 	|
# | 2 	| 17.6% 	|
# | 3 	| 12.5% 	|
# | 4 	|  9.7% 	|
# | 5 	|  7.9% 	|
# | 6 	|  6.7% 	|
# | 7 	|  5.8% 	|
# | 8 	|  5.1% 	|
# | 9 	|  4.6% 	|
# 
# ## Application
# In data science, this pattern is used to detect fraud, primarily for taxes purposes. It can also be used to detect [deepfakes](https://en.wikipedia.org/wiki/Deepfake) or altered images.

# In[1]:


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


world = pd.read_csv('world_population_data.csv')
world.head()


# In[3]:


def digit_widget(list):
    number_stash = []
    for num in list:
        leading_digit = str(num)[0]
        if leading_digit == '-':
            leading_digit = str(num)[1]
        if leading_digit == '$':
            leading_digit = str(num)[1]
        if leading_digit == 'n':
            continue
        if leading_digit == '0':
            continue
        number_stash.append(leading_digit)
    number_stash = sorted(number_stash)
    fig, ax = plt.subplots()
    ax.set_yticks([0.10, 0.20, 0.30])
    plt.hist(number_stash, bins=9, density=True)
    return plt.show()


# In[4]:


digit_widget(world['Population_2020'])


# In[5]:


digit_widget(world['Migrants'])


# In[6]:


digit_widget(world['Net_Change'])