In this notebook, we'll build up a very simple simulator to generate payments data corresponding to legitimate and fraudulent transactions. (There are many ways you could improve this generator and we'll call some of them out.) We'll start by building up some functionality to run simulations in general.
The next function is all you need to run simple discrete-event simulations. Here's how to use it:
yield
a tuple consisting of an offset (the amount of time that has passed since the last event of that type) and a result (an arbitrary Python value associated with the event),simulate
function will yield the next event from all event streams indefinitely.import heapq
def simulate(event_generators, initial_time=0):
pq = []
for event in event_generators:
offset, result = next(event)
heapq.heappush(pq, (offset + initial_time, result, event))
while True:
timestamp, result, event = heapq.heappop(pq)
offset, next_result = next(event)
heapq.heappush(pq, (timestamp + offset, next_result, event))
yield (timestamp, result)
It may be easier to see how this works with an example. In the next three cells, we
from scipy import stats
def bedrockstream(mu, name):
while True:
offset, = stats.poisson.rvs(mu, size=1)
yield (offset, name)
sim = simulate([bedrockstream(10, "fred"),
bedrockstream(12, "betty"),
bedrockstream(20, "wilma"),
bedrockstream(35, "barney")])
for i in range(20):
print(next(sim))
(9, 'fred') (12, 'betty') (13, 'fred') (22, 'betty') (22, 'wilma') (26, 'barney') (30, 'betty') (32, 'fred') (40, 'betty') (40, 'wilma') (41, 'fred') (48, 'fred') (53, 'betty') (60, 'fred') (63, 'betty') (65, 'wilma') (69, 'barney') (73, 'betty') (76, 'fred') (87, 'betty')
The first problem we have to do is to decide what data we'll generate for each transaction. Some interesting possibilities include:
We'll also generate a label for each transaction (legitimate
or fraud
). We'll start with a very basic user event stream generator: all of the transactions we generate will be legitimate, and we won't do anything particularly interesting with most of the fields.
import numpy as np
MERCHANT_COUNT = 20000
# a small percentage of merchants account for most transactions
COMMON_MERCHANT_COUNT = MERCHANT_COUNT // 21
common_merchants = np.random.choice(MERCHANT_COUNT,
size=COMMON_MERCHANT_COUNT,
replace=True)
def basic_user_stream(user_id, mu):
favorite_merchants = np.random.choice(common_merchants,
size=len(common_merchants) // 5)
while True:
amount = 100.00
entry = "chip_and_pin"
foreign = False
merchant_id, = np.random.choice(favorite_merchants, size=1)
offset, = stats.poisson.rvs(mu, size=1)
result = {
"user_id": user_id,
"amount": amount,
"merchant_id": merchant_id,
"entry": entry,
"foreign": foreign
}
yield (offset, ("legitimate", *result.values()))
sim = simulate([basic_user_stream(1, 700), basic_user_stream(2, 105), basic_user_stream(3, 40)])
for i in range(20):
print(next(sim))
(43, ('legitimate', 3, 100.0, 12937, 'chip_and_pin', False)) (85, ('legitimate', 3, 100.0, 12531, 'chip_and_pin', False)) (104, ('legitimate', 2, 100.0, 9627, 'chip_and_pin', False)) (133, ('legitimate', 3, 100.0, 11515, 'chip_and_pin', False)) (176, ('legitimate', 3, 100.0, 3011, 'chip_and_pin', False)) (218, ('legitimate', 2, 100.0, 19708, 'chip_and_pin', False)) (221, ('legitimate', 3, 100.0, 17109, 'chip_and_pin', False)) (259, ('legitimate', 3, 100.0, 8559, 'chip_and_pin', False)) (310, ('legitimate', 3, 100.0, 842, 'chip_and_pin', False)) (323, ('legitimate', 2, 100.0, 18709, 'chip_and_pin', False)) (347, ('legitimate', 3, 100.0, 2361, 'chip_and_pin', False)) (396, ('legitimate', 3, 100.0, 2589, 'chip_and_pin', False)) (431, ('legitimate', 2, 100.0, 19497, 'chip_and_pin', False)) (435, ('legitimate', 3, 100.0, 11085, 'chip_and_pin', False)) (460, ('legitimate', 3, 100.0, 14329, 'chip_and_pin', False)) (498, ('legitimate', 3, 100.0, 16310, 'chip_and_pin', False)) (533, ('legitimate', 2, 100.0, 18873, 'chip_and_pin', False)) (537, ('legitimate', 3, 100.0, 7115, 'chip_and_pin', False)) (579, ('legitimate', 3, 100.0, 16801, 'chip_and_pin', False)) (620, ('legitimate', 3, 100.0, 5686, 'chip_and_pin', False))
basic_user_stream
so that they occasionally buy from any merchant.basic_user_stream
to model occasional international travel.We'll start by building a generator to build a mixture model we can use to make several kinds of transactions: small, medium, and large.
def transaction_amounts(means, percentages, distribution=None):
size = 256
if distribution is None:
distribution = lambda m, sz: stats.gamma.rvs(a=1.1, scale=min(m, 750), loc=m, size=sz)
while True:
streams = [distribution(m * 100, size) for m in means]
stream = np.floor(np.choose(np.random.choice(len(means), p=percentages, size=size), streams)) / 100
yield from stream
a = np.array([1.1,2.1,3.5])
np.floor(a)
array([1., 2., 3.])
Let's plot a histogram of some simulated transaction amounts sampled from three distributions with means corresponding to three kinds of purchases: a latte, take-out for lunch, and a pair of Levi's.
import pandas as pd
import altair as alt
alt.renderers.enable('notebook')
alt.data_transformers.enable('json')
amt = transaction_amounts([5, 15, 50], [0.5, 0.35, 0.15])
amounts = [next(amt) for i in range(80000)]
source = pd.DataFrame({"amounts": amounts})
alt.Chart(source).mark_bar().encode(
alt.X("amounts", bin=alt.Bin(maxbins=100)),
y='count()'
)
We can also plot a broader distribution of transactions:
amt = transaction_amounts([5, 10, 15, 20, 50, 100],
[0.35, 0.25, 0.15, 0.1, 0.1, 0.05])
amounts = [next(amt) for i in range(40000)]
source = pd.DataFrame({"amounts": amounts})
alt.Chart(source).mark_bar().encode(
alt.X("amounts", bin=alt.Bin(maxbins=100)),
y='count()',
)
Next up, we'll make a generator to create the entry types:
def legitimate_entry_types():
size = 256
entry_types = ["contactless", "chip_and_pin", "swipe", "manual", "online"]
entry_probs = [0.25,0.2,0.15,0.05,0.35]
while True:
stream = [entry_types[i] for i in np.random.choice(len(entry_types), p=entry_probs, size=size)]
yield from stream
...and one for selecting merchants (primarily a user's favorite merchants):
def merchant_stream(common_merchants, all_merchants, fav_percentage=0.2, probs=[0.6,0.37,0.03]):
favorite_merchants = np.random.choice(common_merchants,
size=int(len(common_merchants) * fav_percentage))
merchants = [favorite_merchants, common_merchants, all_merchants]
while True:
pool = merchants[np.random.choice(len(merchants), p=probs)]
yield int(np.random.choice(pool))
We can combine all of these to generate a stream of legitimate activity for a single user:
def legitimate_user_stream(user_id, transactions_per_day=12, amount_means=[20,100,500], amount_probs=[0.9,0.075,0.025]):
amounts = transaction_amounts(amount_means, amount_probs)
entry_types = legitimate_entry_types()
merchants = merchant_stream(common_merchants, np.arange(MERCHANT_COUNT))
SECONDS_PER_DAY = 86400
loc = SECONDS_PER_DAY // transactions_per_day
p = 1 / (loc / 10)
while True:
amount = next(amounts)
entry = next(entry_types)
foreign = entry == "online" and np.random.choice([True, False], p=[0.4, 0.6])
merchant_id = next(merchants)
offset, = stats.geom.rvs(p=p, loc=loc, size=1)
result = {
"user_id": user_id,
"amount": amount,
"merchant_id": merchant_id,
"entry": entry,
"foreign": foreign
}
yield (offset, ("legitimate", *result.values()))
sim = simulate([legitimate_user_stream(1),
legitimate_user_stream(2),
legitimate_user_stream(3)])
for i in range(200):
print(next(sim))
(7408, ('legitimate', 1, 26.34, 13610, 'contactless', False)) (7417, ('legitimate', 2, 27.82, 1445, 'chip_and_pin', False)) (7895, ('legitimate', 3, 20.94, 1456, 'chip_and_pin', False)) (14735, ('legitimate', 1, 23.31, 514, 'contactless', False)) (15296, ('legitimate', 3, 103.68, 2930, 'contactless', False)) (15397, ('legitimate', 2, 24.56, 14863, 'online', True)) (22482, ('legitimate', 1, 24.43, 14887, 'online', False)) (23033, ('legitimate', 2, 20.01, 10007, 'contactless', False)) (23812, ('legitimate', 3, 106.4, 3699, 'contactless', False)) (30717, ('legitimate', 1, 37.06, 11025, 'online', False)) (32080, ('legitimate', 2, 23.45, 16013, 'manual', False)) (33635, ('legitimate', 3, 20.17, 16846, 'swipe', False)) (38367, ('legitimate', 1, 23.7, 5183, 'chip_and_pin', False)) (39723, ('legitimate', 2, 22.04, 16275, 'online', False)) (41180, ('legitimate', 3, 23.9, 5793, 'online', True)) (46503, ('legitimate', 1, 25.06, 10416, 'online', False)) (47127, ('legitimate', 2, 24.0, 6489, 'swipe', False)) (50501, ('legitimate', 3, 40.52, 15992, 'online', True)) (54701, ('legitimate', 2, 21.15, 246, 'contactless', False)) (56336, ('legitimate', 1, 29.68, 6719, 'contactless', False)) (58156, ('legitimate', 3, 29.24, 5845, 'chip_and_pin', False)) (62027, ('legitimate', 2, 67.74, 9116, 'online', False)) (63942, ('legitimate', 1, 55.43, 7243, 'online', True)) (66610, ('legitimate', 3, 26.01, 3793, 'online', False)) (69469, ('legitimate', 2, 20.74, 15659, 'chip_and_pin', False)) (72172, ('legitimate', 1, 26.66, 8324, 'contactless', False)) (74762, ('legitimate', 3, 53.7, 8038, 'online', False)) (77407, ('legitimate', 2, 33.29, 10216, 'online', False)) (79896, ('legitimate', 1, 24.05, 6882, 'chip_and_pin', False)) (85155, ('legitimate', 3, 21.09, 1165, 'contactless', False)) (85846, ('legitimate', 2, 38.8, 18234, 'online', False)) (87770, ('legitimate', 1, 23.84, 15444, 'manual', False)) (92374, ('legitimate', 3, 33.8, 6678, 'contactless', False)) (94397, ('legitimate', 2, 22.36, 11625, 'contactless', False)) (95623, ('legitimate', 1, 31.35, 1931, 'chip_and_pin', False)) (99798, ('legitimate', 3, 20.59, 17237, 'online', False)) (102388, ('legitimate', 2, 23.29, 1454, 'online', True)) (103733, ('legitimate', 1, 21.25, 6196, 'contactless', False)) (109406, ('legitimate', 3, 46.19, 7625, 'chip_and_pin', False)) (111226, ('legitimate', 2, 33.2, 3635, 'online', True)) (112057, ('legitimate', 1, 26.88, 1814, 'online', False)) (118591, ('legitimate', 2, 26.45, 5441, 'chip_and_pin', False)) (119379, ('legitimate', 3, 20.39, 12698, 'online', False)) (119828, ('legitimate', 1, 24.02, 9389, 'online', True)) (125842, ('legitimate', 2, 504.72, 5406, 'manual', False)) (126603, ('legitimate', 3, 25.77, 18420, 'online', False)) (127339, ('legitimate', 1, 62.57, 13248, 'chip_and_pin', False)) (134049, ('legitimate', 3, 126.85, 18892, 'chip_and_pin', False)) (134202, ('legitimate', 2, 38.84, 11808, 'swipe', False)) (134937, ('legitimate', 1, 28.23, 4106, 'contactless', False)) (142161, ('legitimate', 3, 23.37, 3161, 'contactless', False)) (142264, ('legitimate', 2, 23.88, 13441, 'chip_and_pin', False)) (142312, ('legitimate', 1, 22.66, 5307, 'contactless', False)) (149536, ('legitimate', 3, 27.97, 7900, 'contactless', False)) (151041, ('legitimate', 2, 21.13, 7186, 'online', False)) (151394, ('legitimate', 1, 22.09, 11444, 'chip_and_pin', False)) (157247, ('legitimate', 3, 35.36, 3533, 'online', False)) (158603, ('legitimate', 2, 20.87, 5612, 'online', False)) (160414, ('legitimate', 1, 20.96, 424, 'contactless', False)) (165536, ('legitimate', 3, 508.73, 16609, 'swipe', False)) (165983, ('legitimate', 2, 20.16, 5057, 'contactless', False)) (169245, ('legitimate', 1, 23.78, 5742, 'swipe', False)) (173460, ('legitimate', 2, 28.95, 16882, 'chip_and_pin', False)) (173586, ('legitimate', 3, 26.02, 19339, 'online', True)) (176649, ('legitimate', 1, 30.67, 2212, 'swipe', False)) (180847, ('legitimate', 2, 20.72, 246, 'swipe', False)) (181084, ('legitimate', 3, 20.86, 3069, 'contactless', False)) (184682, ('legitimate', 1, 505.5, 2496, 'contactless', False)) (188350, ('legitimate', 3, 103.24, 19011, 'manual', False)) (189235, ('legitimate', 2, 45.69, 10007, 'contactless', False)) (192379, ('legitimate', 1, 118.27, 252, 'chip_and_pin', False)) (197203, ('legitimate', 2, 29.57, 18014, 'chip_and_pin', False)) (197558, ('legitimate', 3, 39.76, 3487, 'swipe', False)) (200216, ('legitimate', 1, 28.31, 11096, 'online', False)) (204529, ('legitimate', 2, 100.96, 6489, 'contactless', False)) (205066, ('legitimate', 3, 27.36, 17660, 'contactless', False)) (207775, ('legitimate', 1, 24.28, 13593, 'contactless', False)) (212956, ('legitimate', 3, 25.29, 1346, 'swipe', False)) (213482, ('legitimate', 2, 512.4, 19042, 'contactless', False)) (215476, ('legitimate', 1, 30.84, 2496, 'chip_and_pin', False)) (220385, ('legitimate', 3, 21.0, 8362, 'online', False)) (221307, ('legitimate', 2, 21.04, 16058, 'chip_and_pin', False)) (222724, ('legitimate', 1, 30.93, 15809, 'swipe', False)) (227959, ('legitimate', 3, 30.53, 3988, 'contactless', False)) (229982, ('legitimate', 1, 28.6, 668, 'online', True)) (229993, ('legitimate', 2, 31.82, 14860, 'contactless', False)) (235512, ('legitimate', 3, 21.28, 15992, 'online', True)) (237810, ('legitimate', 2, 29.77, 1230, 'online', False)) (238666, ('legitimate', 1, 504.4, 3692, 'online', True)) (243136, ('legitimate', 3, 20.24, 17933, 'swipe', False)) (245076, ('legitimate', 2, 21.5, 17403, 'online', False)) (245949, ('legitimate', 1, 26.43, 4230, 'chip_and_pin', False)) (252278, ('legitimate', 3, 25.47, 16385, 'contactless', False)) (252788, ('legitimate', 2, 52.09, 10364, 'manual', False)) (253948, ('legitimate', 1, 28.78, 5559, 'online', False)) (259832, ('legitimate', 3, 26.82, 3609, 'online', False)) (261967, ('legitimate', 2, 32.88, 4520, 'contactless', False)) (262236, ('legitimate', 1, 32.44, 12607, 'chip_and_pin', False)) (268436, ('legitimate', 3, 29.85, 4470, 'chip_and_pin', False)) (269421, ('legitimate', 2, 106.69, 13441, 'contactless', False)) (269873, ('legitimate', 1, 41.98, 15691, 'contactless', False)) (275774, ('legitimate', 3, 24.62, 6038, 'contactless', False)) (277318, ('legitimate', 1, 35.63, 19450, 'contactless', False)) (277332, ('legitimate', 2, 32.76, 18482, 'contactless', False)) (283273, ('legitimate', 3, 25.61, 14506, 'swipe', False)) (284747, ('legitimate', 2, 24.7, 19941, 'contactless', False)) (284982, ('legitimate', 1, 25.8, 1687, 'chip_and_pin', False)) (292247, ('legitimate', 2, 32.02, 12572, 'chip_and_pin', False)) (292247, ('legitimate', 3, 35.65, 6103, 'online', False)) (293384, ('legitimate', 1, 21.23, 6951, 'contactless', False)) (299467, ('legitimate', 3, 21.07, 12314, 'swipe', False)) (301881, ('legitimate', 1, 47.21, 12819, 'chip_and_pin', False)) (303674, ('legitimate', 2, 119.57, 16187, 'contactless', False)) (307710, ('legitimate', 3, 41.32, 2372, 'chip_and_pin', False)) (309106, ('legitimate', 1, 50.33, 3049, 'online', False)) (310999, ('legitimate', 2, 32.22, 7243, 'chip_and_pin', False)) (315847, ('legitimate', 3, 103.78, 5981, 'manual', False)) (316497, ('legitimate', 1, 24.68, 12179, 'contactless', False)) (318545, ('legitimate', 2, 511.94, 16924, 'chip_and_pin', False)) (323253, ('legitimate', 3, 21.5, 6678, 'contactless', False)) (323919, ('legitimate', 1, 27.27, 2528, 'swipe', False)) (326344, ('legitimate', 2, 27.39, 16801, 'chip_and_pin', False)) (331786, ('legitimate', 1, 32.29, 2281, 'online', True)) (332684, ('legitimate', 3, 20.9, 10314, 'manual', False)) (334392, ('legitimate', 2, 31.83, 9484, 'contactless', False)) (339065, ('legitimate', 1, 106.69, 6837, 'online', False)) (339936, ('legitimate', 3, 26.63, 16160, 'chip_and_pin', False)) (341934, ('legitimate', 2, 33.69, 5310, 'chip_and_pin', False)) (349037, ('legitimate', 1, 511.87, 3692, 'online', True)) (349293, ('legitimate', 2, 25.25, 4267, 'chip_and_pin', False)) (350517, ('legitimate', 3, 31.55, 19941, 'swipe', False)) (357456, ('legitimate', 2, 65.74, 8800, 'swipe', False)) (358139, ('legitimate', 3, 29.95, 10574, 'online', False)) (358779, ('legitimate', 1, 29.55, 4891, 'chip_and_pin', False)) (365006, ('legitimate', 2, 32.75, 4450, 'chip_and_pin', False)) (365994, ('legitimate', 1, 24.27, 10308, 'online', True)) (366124, ('legitimate', 3, 22.98, 10314, 'online', True)) (372957, ('legitimate', 2, 27.0, 7432, 'swipe', False)) (374466, ('legitimate', 1, 23.7, 18341, 'contactless', False)) (374528, ('legitimate', 3, 119.87, 2584, 'manual', False)) (380483, ('legitimate', 2, 503.49, 4106, 'online', False)) (381714, ('legitimate', 1, 30.13, 5816, 'online', True)) (382130, ('legitimate', 3, 26.39, 16047, 'online', True)) (388980, ('legitimate', 1, 23.19, 6837, 'online', False)) (389251, ('legitimate', 2, 20.77, 17311, 'chip_and_pin', False)) (389639, ('legitimate', 3, 33.21, 17958, 'contactless', False)) (396658, ('legitimate', 2, 27.58, 3049, 'chip_and_pin', False)) (397381, ('legitimate', 3, 37.64, 5727, 'online', False)) (397460, ('legitimate', 1, 21.6, 16876, 'chip_and_pin', False)) (404077, ('legitimate', 2, 25.54, 3629, 'manual', False)) (405068, ('legitimate', 1, 26.02, 11128, 'manual', False)) (406224, ('legitimate', 3, 39.71, 2483, 'contactless', False)) (412038, ('legitimate', 2, 33.57, 16461, 'chip_and_pin', False)) (412614, ('legitimate', 1, 23.49, 1661, 'chip_and_pin', False)) (414364, ('legitimate', 3, 26.73, 12191, 'swipe', False)) (419392, ('legitimate', 2, 22.42, 5365, 'contactless', False)) (420493, ('legitimate', 1, 108.5, 1814, 'contactless', False)) (422400, ('legitimate', 3, 20.81, 12050, 'chip_and_pin', False)) (427471, ('legitimate', 2, 27.65, 7645, 'online', False)) (427824, ('legitimate', 1, 23.95, 572, 'chip_and_pin', False)) (429892, ('legitimate', 3, 25.62, 9158, 'chip_and_pin', False)) (435210, ('legitimate', 1, 24.28, 7578, 'manual', False)) (435570, ('legitimate', 2, 24.04, 3629, 'swipe', False)) (438726, ('legitimate', 3, 25.96, 18059, 'swipe', False)) (443073, ('legitimate', 2, 20.15, 10725, 'online', False)) (443128, ('legitimate', 1, 36.46, 8127, 'chip_and_pin', False)) (445967, ('legitimate', 3, 26.79, 2372, 'online', False)) (450456, ('legitimate', 2, 101.42, 15816, 'manual', False)) (450638, ('legitimate', 1, 503.23, 5413, 'online', False)) (453343, ('legitimate', 3, 25.16, 6041, 'online', False)) (457708, ('legitimate', 2, 36.5, 7186, 'contactless', False)) (458310, ('legitimate', 1, 33.48, 5612, 'online', False)) (463065, ('legitimate', 3, 32.81, 12032, 'online', False)) (465102, ('legitimate', 2, 21.96, 12725, 'chip_and_pin', False)) (465878, ('legitimate', 1, 20.72, 19871, 'online', True)) (470780, ('legitimate', 3, 105.6, 16625, 'contactless', False)) (472344, ('legitimate', 2, 25.47, 4528, 'chip_and_pin', False)) (473167, ('legitimate', 1, 101.71, 9143, 'contactless', False)) (478163, ('legitimate', 3, 22.72, 3699, 'online', False)) (480534, ('legitimate', 2, 40.94, 3049, 'manual', False)) (480652, ('legitimate', 1, 22.85, 9919, 'chip_and_pin', False)) (485478, ('legitimate', 3, 100.75, 15320, 'contactless', False)) (488027, ('legitimate', 2, 24.07, 13593, 'online', False)) (488537, ('legitimate', 1, 32.04, 13248, 'online', True)) (493245, ('legitimate', 3, 66.43, 18993, 'contactless', False)) (495539, ('legitimate', 2, 32.47, 6961, 'contactless', False)) (495922, ('legitimate', 1, 500.17, 477, 'chip_and_pin', False)) (501083, ('legitimate', 3, 36.05, 12179, 'online', False)) (502958, ('legitimate', 2, 22.06, 12725, 'contactless', False)) (503456, ('legitimate', 1, 23.54, 9356, 'online', False)) (510238, ('legitimate', 2, 21.05, 11004, 'swipe', False)) (511615, ('legitimate', 1, 21.76, 9898, 'manual', False)) (512643, ('legitimate', 3, 27.02, 517, 'online', True)) (517608, ('legitimate', 2, 28.52, 7133, 'online', True)) (520154, ('legitimate', 3, 23.75, 9567, 'online', True)) (520813, ('legitimate', 1, 21.69, 18244, 'online', False)) (525036, ('legitimate', 2, 112.76, 7052, 'online', False)) (527715, ('legitimate', 3, 26.02, 15397, 'swipe', False)) (528490, ('legitimate', 1, 24.67, 4087, 'chip_and_pin', False)) (532894, ('legitimate', 2, 39.96, 3629, 'chip_and_pin', False))
We'll start with some basic assumptions:
These will guide the design of a fraudulent transaction generator.
WIP
def fraud_entry_types():
size = 256
entry_types = ["contactless", "chip_and_pin", "swipe", "manual", "online"]
entry_probs = [0.05,0.05,0.05,0.35,0.5]
while True:
stream = [entry_types[i] for i in np.random.choice(len(entry_types), p=entry_probs, size=size)]
yield from stream
def fraudulent_user_stream(user_id, transactions_per_day=12, amount_means=[5,10,20], amount_probs=[0.2, 0.2, 0.6]):
amounts = transaction_amounts(amount_means, amount_probs)
entry_types = fraud_entry_types()
SECONDS_PER_DAY = 86400
loc = SECONDS_PER_DAY // transactions_per_day * 10
p = 1 / (loc / 10)
while True:
fraud_delay, = np.floor(stats.gamma.rvs(a=6.4, loc=SECONDS_PER_DAY * 90, scale=SECONDS_PER_DAY, size=1))
fraud_delay = int(fraud_delay)
amount = next(amounts)
entry = next(entry_types)
foreign = np.random.choice([True, False], p=[0.4, 0.6])
merchant_id = np.random.choice(MERCHANT_COUNT)
offset, = stats.geom.rvs(p=p, loc=loc, size=1)
result = {
"user_id": user_id,
"amount": amount,
"merchant_id": merchant_id,
"entry": entry,
"foreign": foreign
}
yield (offset, ("legitimate", *result.values()))
source = pd.DataFrame({"amounts": stats.gamma.rvs(a=6.4, loc=86400 * 90, scale=80000, size=10000)})
alt.Chart(source).mark_bar().encode(
alt.X("amounts", bin=alt.Bin(maxbins=100)),
y='count()'
)
sum([0.05,0.05,0.25,0.5,0.35])
1.2
np.random.choice([True, False], p=[0.3, 0.7])
False