See it:
https://towardsdatascience.com/how-to-create-fake-data-with-faker-a835e5b7a9d9¶
https://faker.readthedocs.io/en/master/¶
https://www.datacamp.com/tutorial/creating-synthetic-data-with-python-faker-tutorial¶
!pip install --q Faker
import string
from faker import Faker
import json
import numpy as np
import pandas as pd
fake = Faker()
fake = Faker('pt_BR')
fake_name=[]
for i in range(10000):
#print("Name:", i)
name = fake.name()
#print(name)
fake_name.append(name)
len(fake_name)
10000
lista_unic = set(fake_name)
len(lista_unic)
7928
# Total lines created
n = 1000
infos = ['name','job', 'company','ssn', 'address', 'sex', 'birthdate']
def create_profile(x):
print("Creating profile with ", n, " lines")
# dictionary
profile_data ={}
for i in range(0, x):
profile_data[i]={}
profile_data[i] = fake.profile(infos)
print("done")
return profile_data
def create_sales(x):
print("Creating sales with ", n, " lines")
# dictionary
sales_data ={}
for i in range(0, x):
sales_data[i]={}
sales_data[i]['price'] = np.random.randint(10,500)
sales_data[i]['qtd'] = np.random.randint(1,10)
sales_data[i]['product'] = chr(np.random.randint(ord('A'), ord('Z')))
sales_data[i]['bill'] = sales_data[i]['price'] * sales_data[i]['qtd']
sales_data[i]['way_of_payment'] = np.random.choice(['boleto', 'cartão_cred', 'a vista'])
print("done")
return sales_data
# Create fake profile
profiles = create_profile(n)
df_prof = pd.DataFrame.from_dict(profiles)
df_profile = df_prof.T
# Create fake sales
sales = create_sales(n)
temp_sales = pd.DataFrame.from_dict(sales)
df_sale = temp_sales.T
df_fake = pd.concat([df_profile, df_sale], axis=1)
# Extracting and creating column UF
lista_uf=[]
for i in range(len(df_fake['address'])):
lista_uf.append(df_fake['address'][i].split(' / ')[1])
df_fake['uf'] = lista_uf
df_fake.drop_duplicates(inplace=True)
df_fake
Creating profile with 1000 lines done Creating sales with 1000 lines done
job | company | ssn | name | sex | address | birthdate | price | qtd | product | bill | way_of_payment | uf | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Instrumentista musical | Jesus | 17650392821 | Erick Silveira | M | Trevo de da Luz, 962\nMorro Dos Macacos\n47974... | 1957-01-10 | 124 | 5 | X | 620 | cartão_cred | AM |
1 | Tecnólogo em rochas ornamentais | Araújo | 67291058449 | Sarah da Cunha | F | Trevo Pereira, 96\nGoiania\n02270785 Barbosa d... | 2017-09-15 | 158 | 2 | E | 316 | cartão_cred | MA |
2 | Ventríloquo | Viana | 12540369898 | Igor Silva | M | Morro Pinto, 42\nVila Paquetá\n42119022 Barbos... | 1931-04-25 | 369 | 4 | X | 1476 | a vista | BA |
3 | Telefonista | Cunha | 19283076559 | Sr. Bruno Fernandes | M | Estação Lima\nVila Fumec\n12545833 da Conceiçã... | 1985-10-26 | 413 | 7 | C | 2891 | boleto | AL |
4 | Borracheiro | Dias S.A. | 45819023714 | Alexandre Pinto | M | Sítio de Pires, 78\nVila Da Paz\n02665127 da R... | 1941-02-03 | 265 | 5 | P | 1325 | cartão_cred | MS |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
995 | Gravurista | Cunha | 03286759465 | Diogo Souza | M | Viaduto Gonçalves\nMarilandia\n60879609 Lima / TO | 1908-04-02 | 175 | 9 | C | 1575 | boleto | TO |
996 | Terapeuta ocupacional | Campos Cardoso e Filhos | 31072856417 | Sra. Valentina Silva | F | Lago de Silveira, 25\nVila Canto Do Sabiá\n198... | 1937-04-15 | 212 | 5 | C | 1060 | cartão_cred | AP |
997 | Árbitro e mediador | Farias Costela e Filhos | 91735680230 | Melissa Ribeiro | F | Trecho Azevedo, 364\nSanta Lúcia\n32417292 Gon... | 1990-12-03 | 175 | 3 | C | 525 | cartão_cred | PE |
998 | Tecnólogo em recursos pesqueiros | Ribeiro Moraes S.A. | 86371950240 | Emanuel Nogueira | M | Quadra de Dias, 96\nEngenho Nogueira\n95213125... | 1944-06-02 | 92 | 9 | F | 828 | a vista | MS |
999 | Meteorologista | Rocha | 14538702960 | Anthony da Mota | M | Residencial de Moura, 13\nJardim Guanabara\n47... | 2015-10-03 | 179 | 8 | C | 1432 | boleto | CE |
1000 rows × 13 columns