!pip install --q Faker import string from faker import Faker import json import numpy as np import pandas as pd fake = Faker() fake = Faker('pt_BR') fake_name=[] for i in range(10000): #print("Name:", i) name = fake.name() #print(name) fake_name.append(name) len(fake_name) lista_unic = set(fake_name) len(lista_unic) # Total lines created n = 1000 infos = ['name','job', 'company','ssn', 'address', 'sex', 'birthdate'] def create_profile(x): print("Creating profile with ", n, " lines") # dictionary profile_data ={} for i in range(0, x): profile_data[i]={} profile_data[i] = fake.profile(infos) print("done") return profile_data def create_sales(x): print("Creating sales with ", n, " lines") # dictionary sales_data ={} for i in range(0, x): sales_data[i]={} sales_data[i]['price'] = np.random.randint(10,500) sales_data[i]['qtd'] = np.random.randint(1,10) sales_data[i]['product'] = chr(np.random.randint(ord('A'), ord('Z'))) sales_data[i]['bill'] = sales_data[i]['price'] * sales_data[i]['qtd'] sales_data[i]['way_of_payment'] = np.random.choice(['boleto', 'cartão_cred', 'a vista']) print("done") return sales_data # Create fake profile profiles = create_profile(n) df_prof = pd.DataFrame.from_dict(profiles) df_profile = df_prof.T # Create fake sales sales = create_sales(n) temp_sales = pd.DataFrame.from_dict(sales) df_sale = temp_sales.T df_fake = pd.concat([df_profile, df_sale], axis=1) # Extracting and creating column UF lista_uf=[] for i in range(len(df_fake['address'])): lista_uf.append(df_fake['address'][i].split(' / ')[1]) df_fake['uf'] = lista_uf df_fake.drop_duplicates(inplace=True) df_fake