from graph_fmt import *
# Connect to Aerospike Graph
g = traversal().withRemote(DriverRemoteConnection('ws://0.0.0.0:8182/gremlin','g'))
g.V().drop().iterate()
g.with_("evaluationTimeout", 86400000)\
.call("bulk-load")\
.with_("aerospike.graphloader.vertices", "data/fraud/vertices")\
.with_("aerospike.graphloader.edges", "data/fraud/edges")\
.next()
'Success'
graph_stats = pd.DataFrame(g.V("~graph_summary").valueMap().toList())
display(dgraph_sch(graph_stats, g))
graph_stats
CytoscapeWidget(cytoscape_layout={'name': 'cola'}, cytoscape_style=[{'selector': 'any', 'style': {'content': '…
vertex_count_per_label | edge_properties_per_label | edge_count | vertex_count | vertex_properties_per_label | edge_count_per_label | |
---|---|---|---|---|---|---|
0 | [{'Email': 30, 'Device': 30, 'IP': 30, 'BrowserAgent': 30, 'MailingAddress': 30, 'Member': 30, 'Toolbar': 30}] | [{'used_mailing_address': {'weight'}, 'used_email_for_account': {'weight'}, 'used_toolbar': {'weight'}, 'used_browser_agent': {'weight'}, 'invited_email': {'weight'}, 'used_device': {'weight'}, 'referred_by': {'weight'}, 'used_ip': {'weight'}}] | [329] | [210] | [{'Email': {'email_valid', 'handle_suspicious', 'graph_object_status', 'graph_object_modified_ts', 'graph_object_created_by', 'created_ts', 'graph_object_modified_by', 'region_id'}, 'Device': {'region_id', 'graph_object_status', 'graph_object_modified_ts', 'graph_object_created_by', 'created_ts', 'graph_object_modified_by', 'device_type'}, 'IP': {'graph_object_status', 'ip_likely_city', 'graph_object_modified_ts', 'graph_object_created_by', 'ip_likely_country', 'created_ts', 'graph_object_modified_by', 'region_id'}, 'BrowserAgent': {'graph_object_status', 'graph_object_modified_ts', 'graph_object_created_by', 'created_ts', 'graph_object_modified_by', 'browser_family', 'region_id'}, 'MailingAddress': {'address1', 'graph_object_status', 'graph_object_modified_ts', 'graph_object_created_by', 'address2', 'country', 'zipcode', 'city', 'created_ts', 'graph_object_modified_by', 'avs_state', 'region_id', 'state'}, 'Member': {'first_name', 'graph_object_status', 'graph_object_modified_ts', 'graph_object_created_by', 'signup_date', 'over3_fraud_score', 'last_name', 'account_modified_ts', 'graph_object_modified_by', 'member_status', 'region_id'}, 'Toolbar': {'graph_object_status', 'graph_object_modified_ts', 'graph_object_created_by', 'created_ts', 'graph_object_modified_by', 'region_id'}}] | [{'used_mailing_address': 32, 'used_email_for_account': 30, 'used_toolbar': 30, 'used_browser_agent': 29, 'invited_email': 65, 'used_device': 33, 'referred_by': 74, 'used_ip': 36}] |
# Given the member vertex id or member_id,
# 1. find all the member vertices that shared the same IP, Device and MailingAddress in the past with the
# given member (through used_ip, used_device, used_mailing_addrress)
g.V("ME-518945").as_("m1")\
.out("used_ip", "used_device", "used_mailing_address", "referred_by")\
.in_("used_ip", "used_device", "used_mailing_address", "referred_by")\
.where(P.neq("m1")).dedup().toList()
[v[ME-526967], v[ME-603335], v[ME-351584], v[ME-548238], v[ME-076805], v[ME-094592]]
# 2. print the possible referral tree among the members generated above (including the given member)
# Comment: This makes sense. Path seems required here. They can tune number of 'times'.
g.V("ME-518945").repeat(in_("referred_by")).times(3).path().dedup().toList()
[path[v[ME-518945], v[ME-498888], v[ME-627893], v[ME-465364]], path[v[ME-518945], v[ME-498888], v[ME-627893], v[ME-477057]], path[v[ME-518945], v[ME-498888], v[ME-477057], v[ME-627893]], path[v[ME-518945], v[ME-498888], v[ME-477057], v[ME-465364]], path[v[ME-518945], v[ME-526967], v[ME-477057], v[ME-627893]], path[v[ME-518945], v[ME-526967], v[ME-477057], v[ME-465364]], path[v[ME-518945], v[ME-526967], v[ME-627893], v[ME-465364]], path[v[ME-518945], v[ME-526967], v[ME-627893], v[ME-477057]], path[v[ME-518945], v[ME-763327], v[ME-465364], v[ME-627893]], path[v[ME-518945], v[ME-763327], v[ME-465364], v[ME-477057]], path[v[ME-518945], v[ME-763327], v[ME-627893], v[ME-465364]], path[v[ME-518945], v[ME-763327], v[ME-627893], v[ME-477057]], path[v[ME-518945], v[ME-627893], v[ME-465364], v[ME-627893]], path[v[ME-518945], v[ME-627893], v[ME-465364], v[ME-477057]], path[v[ME-518945], v[ME-627893], v[ME-477057], v[ME-627893]], path[v[ME-518945], v[ME-627893], v[ME-477057], v[ME-465364]], path[v[ME-518945], v[ME-199470], v[ME-627893], v[ME-465364]], path[v[ME-518945], v[ME-199470], v[ME-627893], v[ME-477057]], path[v[ME-518945], v[ME-477057], v[ME-627893], v[ME-465364]], path[v[ME-518945], v[ME-477057], v[ME-627893], v[ME-477057]], path[v[ME-518945], v[ME-477057], v[ME-465364], v[ME-627893]], path[v[ME-518945], v[ME-477057], v[ME-465364], v[ME-477057]]]
# 3. Find all the groups of member vertices within the graph that shared the same IP, Device and MailingAddress
# (at least once, all the three components) in the past (through used_ip, used_device, used_mailing_address)
# Comment: Again lets just return vertices instead path unless required. Also edit to using logical and on lists of vertices.
g.V("194.91.138.20").in_().as_("ip_attached")\
.V("B9-2E-D5-A6-84-C0").in_().as_("device_attached")\
.V("ADDRS-042305").in_().as_("mailing_address_attached")\
.and_(select("ip_attached"),select("device_attached"),select("mailing_address_attached"))\
.dedup().toList()
[v[ME-526967], v[ME-094592]]
# Given the member_id and a score threshold calculate a score based on the IP, Device, MailingAddress or BrowserAgent that this member shared
# and used with the other members in the past (through used_ip, used_device, used_mailing_address, used_browser_agent)
# and output the score between each pair if the total score between them is greater than the given threshold.
#"ME-799370", "ME-094592"
g.V("ME-799370")\
.outE("used_ip","used_device","used_mailing_address","used_browser_agent").as_('e1')\
.select('e1').values().sum_().is_(P.lt(2.0).or_(eq(2.0))).toList()
[0.6]
g.V("ME-799370")\
.outE("used_ip","used_device","used_mailing_address","used_browser_agent")\
.values('weight').sum_().is_(P.lt(2.0).or_(eq(2.0))).toList()
[0.6]
#"ME-799370", "ME-094592"
g.V("ME-094592")\
.outE("used_ip","used_device","used_mailing_address","used_browser_agent").as_('e1')\
.select('e1').values().sum_().is_(P.lt(2.0).or_(eq(2.0))).toList()
// For the next query I need help to finish writing it. I do not know how to add a weight to an edge if it
// exists and display the result ONLY IN the sum of the edges weights is greater than a given threshold.
//
// Given the member_id and a score threshold
//
// calculate a score based on the IP, Device, MailingAddress or BrowserAgent that this member shared
// and used with the other members in the past (through used_ip, used_device, used_mailing_address, used_browser_agent)
// and output the score between each pair if the total score between them is greater than the given threshold.
//
// Comment: I editted the input to the first query's input since the other member vertices that were provided
// as input did not share any ip's, device's, mailing addrs, or browser agents with other members.
// Side note: I ran the following to add scores randomly to edges:
// List<Object> edgeids = g.E().id().toList();
// for (Object edgeid : edgeids) {
// final Random random = new Random();
// g.E(edgeid).property("score", random.nextDouble()).iterate();
// }
List<Map<String, Object>> query4Output =
g.V("ME-3518945").
as("m1").
outE("used_ip","used_device","used_mailing_address","used_browser_agent").
as("e1").
properties("score").value().as("score1").
select("e1").
otherV().
inE("used_ip","used_device","used_mailing_address","used_browser_agent").
as("e2").
properties("score").value().as("score2").
select("e2").
otherV().
where(P.neq("m1")).
as("m2").
project("member1", "member2", "score").
by(__.select("m1")).
by(__.select("m2")).
by(__.math("score1 + score2")).
toList();
for (Map<String, Object> path : query4Output) {
Double score = (Double) path.get("score");
if (score > 0.2) {
System.out.println(
"Member 1: " + path.get("member1") +
" Member 2: " + path.get("member2") +
" Score: " + score);
}
}
g.E().id_().toList()
import pandas as pd
from ipywidgets import widgets, interact, interactive, fixed, interact_manual, Layout, AppLayout, HBox, VBox
from IPython.display import display
df = pd.DataFrame([1,2,3])
def f(x, df):
df
df['Kosten A'] = x
y = x*x
print(df, y)
interact(f, x=(10,50,5), df = fixed(df))
al = widgets.IntText(
value=0,
description='AVG Latency (ms):',
style={'description_width': 'initial'},
disabled=False,
layout=Layout(width='20%')
)
tp = widgets.IntText(
value=0,
description='Trafic Proportion',
style={'description_width': 'initial'},
disabled=False,
layout=Layout(width='20%')
)
qps = widgets.IntText(
value=0,
description='Trafic Proportion',
style={'description_width': 'initial'},
disabled=False,
layout=Layout(width='20%')
)
HBox(al, tp, qps)
from ipywidgets import interact, HBox, VBox, Output, HTML, Dropdown, Button, Layout, Label
from IPython.display import display, clear_output
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
class demo():
def __init__(self):
self.df = pd.DataFrame({'A':[23,45,26,43,67], 'B':[25,65,85,74,56], 'C':[23,65,87,89,65]})
style="""
<style>
/* enlarges the default jupyter cell outputs, can revert by Cell->Current Outputs->Clear */
.container { width:1020 !important; }
/* styles for output widgets */
.o2 {width:400px; border:1px solid #ddd}
.o3 {width:400px; border:1px solid #ddd}
.o4 {width:400px; border:1px solid #ddd}
.o5 {width:800px; }
.o5 span {color:red !important}
/* custom styles for testing */
.style_A {background-color:#fafaaa}
.style_B {background-color:#faaafa}
.style_C {background-color:#aafafa}
</style>
"""
display(HTML(style))
self.o1 = Output(layout=Layout(width='400px'))
self.o2 = Output()
self.o2.add_class('o2')
self.o3 = Output()
self.o3.add_class('o3')
self.o4 = Output()
self.o4.add_class('o4')
self.o5 = Output()
self.o5.add_class('o5')
# create a scene for displaying the outputs,
# Output1 on the top row, 2,3, and 4 stacked horizontally in the second row
scene = VBox([self.o1,
HBox([self.o2, self.o3, self.o4]),
self.o5
])
display(scene)
with self.o1:
display(HTML('<h2>Demo</h2>'))
with self.o2:
self.dd_filter = Dropdown(description='Select Filter', options=['', 'A','B','C'])
self.dd_filter.observe(self.fill_values)
self.dd_values = Dropdown(description='Select Value')
self.btn = Button(description='Run')
self.btn.on_click(self.display_results)
display(self.dd_filter, self.dd_values, self.btn)
def fill_values(self,x):
if x['type'] == 'change' and x['name'] == 'value':
filter_by = x['owner'].value
if filter_by=='':
filter_values = []
else:
filter_values = self.df[filter_by].values
self.dd_values.options = filter_values
def display_results(self, x):
filter_by = self.dd_filter.value
filter_val = self.dd_values.value
if filter_by=='':
with self.o3:
clear_output()
print('Please select filter')
return
df_filtered = self.df[self.df[filter_by]>=filter_val]
with self.o3:
clear_output()
lbl = Label(value=f'Filter by {filter_by}, with {filter_val}, found {df_filtered.shape[0]} observation(s)')
lbl.add_class(f'style_{filter_by}')
display(lbl)
display(df_filtered)
with self.o4:
clear_output()
df_filtered.plot(kind='bar')
plt.show()
with self.o5:
clear_output()
display(Label(value= f'Code last run {dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}' ))
d = demo()
from ipywidgets import interact, HBox, VBox, Output
import pandas as pd
import matplotlib.pyplot as plt
@interact
def chart_and_table(filterby=['A','B','C']):
df = pd.DataFrame({'A':[23,45,26,43,67], 'B':[25,65,85,74,56], 'C':[23,65,87,89,65]})
o1, o2 = Output(), Output()
scene = HBox([o1,o2])
display(scene)
with o1:
df[[filterby]].plot(kind='bar')
plt.show()
with o2:
display(df[[filterby]])
from ipywidgets import interact, HBox, VBox, Output, HTML, Layout
import pandas as pd
import matplotlib.pyplot as plt
@interact
def chart_and_table(filterby=['A','B','C']):
df = pd.DataFrame({'A':[23,45,26,43,67], 'B':[25,65,85,74,56], 'C':[23,65,87,89,65]})
style="""
<style>
.style_a {background-color:#fafaaa}
.style_b {background-color:#faaafa}
.style_c {background-color:#aafafa}
</style>
"""
display(HTML(style))
o1 = Output(layout=Layout(width='400px'))
o2 = Output(layout=Layout(width='200px'))
o3 = Output()
o4 = Output()
scene = HBox([o1,
o2,
VBox([o3, o4])
])
display(scene)
with o1:
df[[filterby]].plot(kind='bar')
plt.show()
with o2:
display(df[[filterby]])
with o3:
if filterby=='A':
o3.add_class('style_a')
elif filterby=='B':
o3.add_class('style_b')
elif filterby=='C':
o3.add_class('style_c')
print('The selected filter is :', filterby)
with o4:
display(df)