import pandas as pd
import plotly.express as px
import pygal as pg
from string import Template
from IPython.core.display import display, HTML
%load_ext cypher
%config CypherMagic.uri='http://neo4j:neo@localhost:7474/db/data'
from IPython.display import HTML, Javascript, display
def configure_d3():
"""Tell require where to get d3 from in `require(['d3'])`"""
display(Javascript("""
require.config({
paths: {
lodash: "https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.15/lodash.min",
d3: "https://d3js.org/d3.v4.min"
}
})"""))
configure_d3()
base_html = """
<!DOCTYPE html>
<html>
<head>
<script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
<script type="text/javascript" src="https://kozea.github.io/pygal.js/2.0.x/pygal-tooltips.min.js""></script>
</head>
<body>
<figure>
{rendered_chart}
</figure>
</body>
</html>
"""
%%cypher
// Modulstruktur der Shopizer-Anwendung
MATCH (module:Project{groupId: "com.shopizer"}),
(module)-[:HAS_PARENT]->(parent:Project),
(module)-[:CREATES]->(artifact:Main:Artifact)
RETURN parent.artifactId AS Parent,
module.artifactId AS Module,
artifact.fqn AS Artifact
ORDER BY module.artifactId
%%cypher
// Markierung aller Shopizer-Knoten
MATCH (artifact:Main:Artifact{group: "com.shopizer"})
SET artifact:Shopizer
WITH artifact
MATCH (artifact)-[:CONTAINS]->(c)
SET c:Shopizer
RETURN artifact.name AS Artifact,
count(DISTINCT c) AS ContentCount
ORDER BY artifact.name
packageHierarchy = %cypher MATCH (p:Package:Shopizer)-[:CONTAINS*]->(t:Type:Shopizer) \
WITH DISTINCT p.fqn AS packageName \
MATCH (p:Package{fqn: packageName})-[:CONTAINS]->(child:Shopizer) \
WHERE (child:Package AND exists((child)-[:CONTAINS*]->(:Type:Shopizer))) OR child:Type \
WITH p, child, child:Type AS leaf \
RETURN DISTINCT p.fqn AS Parent_Fqn, p.name AS Parent_Name, child.fqn AS Child_Fqn, child.name AS Child_Name, leaf AS Child_Is_Leaf \
ORDER BY Parent_Fqn
package_hierarchy_df = packageHierarchy.get_dataframe()
text = Template(open('vis/circle-packing/circle-packing-diagram.html', 'r').read().replace("\n","")).substitute({
'circle_data': package_hierarchy_df.to_csv(index = False).replace("\r\n","\\n").replace("\n","\\n"),
'container': 'type-packing-diagram'
})
HTML(text)
→ Eigentliche Struktur der Anwendung bleibt unsichtbar
moduleDependencies = %cypher MATCH (a1:Artifact:Shopizer)-[:CONTAINS]->(t1:Type:Shopizer), \
(a2:Artifact:Shopizer)-[:CONTAINS]->(t2:Type:Shopizer), \
(t1)-[dep:DEPENDS_ON]->(t2) \
WHERE a1 <> a2 \
RETURN a1.name AS Source, \
a2.name AS Target, \
COUNT(dep) AS X_Count \
ORDER BY Source DESC
moduleDependenciesData = moduleDependencies.get_dataframe().to_csv(index = False).replace("\r\n","\\n").replace("\n","\\n")
text = Template(open('vis/chord/chord-diagram.html', 'r').read().replace("\n","")).substitute({
'chord_data': moduleDependenciesData,
'container': 'module-chord-diagram'})
HTML(text)
%%cypher
// Anlegen von Knoten für technische Schichten
MERGE (pres:Layer{name: 'Presentation'})
MERGE (dom:Layer{name: 'Domain'})
MERGE (dat:Layer{name: 'Data'})
%%cypher
// Zuordnen von Klassen zu den Layern
WITH [
['Presentation','sm-shop'],
['Presentation','sm-shop-model'],
['Domain','sm-core'],
['Domain','sm-core-modules'],
['Data','sm-core-model']
] AS layerAssignments
UNWIND layerAssignments AS layerAssignment
MATCH (l:Layer{name: layerAssignment[0]}),
(a:Artifact:Shopizer{name: layerAssignment[1]}),
(a)-[:CONTAINS]->(t:Type)
MERGE (l)-[:CONTAINS]->(t)
RETURN l.name AS Layer, count(DISTINCT t) AS Classes
%%cypher
// Abhängigkeiten zwischen Schichten
MATCH (l1:Layer)-[:CONTAINS]->(t1:Type),
(l2:Layer)-[:CONTAINS]->(t2:Type),
(t1)-[d:DEPENDS_ON]->(t2)
WITH l1, l2, sum(d.weight) AS weight, count(d) AS count
MERGE (l1)-[d:DEPENDS_ON{weight: weight, count: count}]->(l2)
RETURN l1.name AS Source, d.weight AS Weight, d.count AS Count, l2.name AS Target
ORDER BY Source, Target
%%cypher
// Verwendete Frameworks nach Layer
MATCH (l:Layer)-[:CONTAINS]->(:Type)-[:DEPENDS_ON]->(dep:Type)<-[:REQUIRES]-(a:Artifact)
WHERE NOT (
dep.fqn starts with "java."
or dep.fqn in ["void","byte","int","long","double","boolean", "char"]
)
WITH DISTINCT l, split(dep.fqn, ".") AS dep
WITH dep[0] + "." + dep[1] + "." + dep[2] AS dep, l
ORDER BY l.name
RETURN dep AS Dependency, collect(DISTINCT l.name) AS Layer
ORDER BY size(Layer) DESC
%%cypher
// Identifikation der Packages unterhalb von "com.salesmanager.core.business.services"
MATCH (p:Package:Shopizer)-[:CONTAINS]->(bC:Package:Shopizer)
WHERE p.fqn = "com.salesmanager.core.business.services"
WITH bC
ORDER BY bC.name
RETURN collect(bC.name) AS BoundedContexts
%%cypher
// Anlegen eines Knoten je Fachlichkeit
MATCH (p:Package:Shopizer)-[:CONTAINS]->(bC:Package:Shopizer)
WHERE p.fqn = "com.salesmanager.core.business.services"
WITH collect(DISTINCT bC.name) AS boundedContexts
UNWIND boundedContexts AS boundedContext
MERGE (:BoundedContext {name: boundedContext})
%%cypher
// Zuordnen der Klassen zu den Bounded Contexts
MATCH (bC:BoundedContext),
(p:Package:Shopizer)-[:CONTAINS*]->(t:Type:Shopizer)
WHERE p.name = bC.name
MERGE (bC)-[:CONTAINS]->(t)
%%cypher
// Nicht zugeordnete Klassen
MATCH (p:Package)-[:CONTAINS*]->(t:Type:Shopizer)
WHERE NOT EXISTS((:BoundedContext)-[:CONTAINS]->(t))
RETURN p.fqn, count(DISTINCT t) AS Count
ORDER BY Count DESC
WITH [<liste der zuordnungen, in der Form ["p.fqn","boundedcontext"]>] AS packageMappings
UNWIND packageMappings AS packageMapping
MATCH (bC:BoundedContext{name: packageMapping[0]),
(p:Package{fqn: packageMapping[1])-[:CONTAINS]->(t:Type:Shopizer)
MERGE (bC)-[:CONTAINS]->(t)
subdomainSize = %cypher MATCH (bC:BoundedContext), \
(p:Package:Shopizer)-[:CONTAINS*]->(t:Type:Shopizer) \
WHERE p.name = bC.name \
MERGE (bC)-[:CONTAINS]->(t) \
RETURN bC.name AS BoundedContext, \
count(DISTINCT t) AS Classes
df = subdomainSize.get_dataframe()
fig = px.pie(df, values='Classes', names='BoundedContext', title='Größe der einzelnen Bounded Contexts')
fig.show()
# Abhängigkeiten zwischen Bounded Contexts (Domain Layer)
bCRelations = %cypher MATCH (bC1:BoundedContext)-[:CONTAINS]->(t1:Type:Shopizer), \
(dL:Layer{name:"Domain"})-[:CONTAINS]->(t1), \
(bC2:BoundedContext)-[:CONTAINS]->(t2:Type:Shopizer), \
(dL)-[:CONTAINS]->(t2), \
(t1)-[dep:DEPENDS_ON]->(t2) \
RETURN bC1.name AS Source, \
bC2.name AS Target, \
sum(dep.weight) AS X_Count
bounded_context_connections = bCRelations.get_dataframe()
text = Template(open('vis/chord/chord-diagram.html', 'r').read().replace("\n","")).substitute({
'chord_data': bounded_context_connections.to_csv(index = False).replace("\r\n","\\n").replace("\n","\\n"),
'container': 'bc-chord-diagram'})
HTML(text)
%%cypher
// Identifikation von Merge-Commits
MATCH (c:Commit)-[:HAS_PARENT]->(p:Commit)
WITH c, count(p) as parents
WHERE parents > 1
SET c:Merge
RETURN count(c)
%%cypher
// Bereinigung von Autor-Duplikaten (nach Name)
MATCH (a:Author)
WITH a.name as name, collect(a) as authors
WITH head(authors) as author, tail(authors) as duplicates
UNWIND duplicates as duplicate
MATCH (duplicate)-[:COMMITTED]->(c:Commit)
MERGE (author)-[:COMMITTED]->(c)
DETACH DELETE duplicate
RETURN author.name, count(duplicate)
%%cypher
// Bereinigung von Autor-Duplikaten (nach E-Mail)
MATCH (a:Author)
WITH a.email as email, collect(a) as authors
WITH head(authors) as author, tail(authors) as duplicates
UNWIND duplicates as duplicate
MATCH (duplicate)-[:COMMITTED]->(c:Commit)
MERGE (author)-[:COMMITTED]->(c)
DETACH DELETE duplicate
RETURN author.name, author.email, count(duplicate)
%%cypher
// Bereinigung von Autor-Duplikaten (Manuelles Postprocessing)
WITH [
["Carl Samson", "csamson777@yahoo.com", "c.samson@cgi.com"],
["Carl Samson", "csamson777@yahoo.com", "carlsamson@Carls-MacBook-Pro-2.local"],
["Umesh Awasthi", "UAwasthi@rccl.com", "umeshawasthi@gmail.com"]
] AS authors
UNWIND authors AS duplicateAuthor
MATCH (author:Author{email: duplicateAuthor[1]}),
(duplicate:Author{email: duplicateAuthor[2]})
SET author.name = duplicateAuthor[0]
WITH author, duplicate
MATCH (duplicate)-[:COMMITTED]->(c:Commit)
MERGE (author)-[:COMMITTED]->(c)
DETACH DELETE duplicate
RETURN author.name, author.email, count(duplicate)
commitsPerAuthor = %cypher MATCH (a:Author)-[:COMMITTED]->(c:Commit), \
(c)-[:CONTAINS_CHANGE]->(:Change)-[:MODIFIES]->(file:File) \
WHERE NOT c:Merge \
WITH a, count(DISTINCT c) AS Commits \
WHERE Commits > 1 \
RETURN a.name as Entwickler, Commits \
ORDER BY Commits DESC
commitsPerAuthor_df = commitsPerAuthor.get_dataframe()
#Visualisierung
bar_chart = pg.Bar(show_legend=True, human_readable=True,
fill=True, legend_at_bottom=True, legend_at_bottom_columns=2)
bar_chart.title = 'Entwickler mit den meisten Commits'
for index, row in commitsPerAuthor_df.iterrows():
bar_chart.add(row['Entwickler'],[{"value": row['Commits']}])
display(HTML(base_html.format(rendered_chart=bar_chart.render(is_unicode=True))))
%%cypher
// Index für Filepath
CREATE INDEX ON :File(relativePath)
%%cypher
// Verknüpfen von Git :File und Java :Type
MATCH (p:Package:Shopizer)-[:CONTAINS]->(t:Type:Shopizer)
WITH t, p.fileName + "/" + t.sourceFileName as sourceFileName
MATCH (f:Git:File)
WHERE f.relativePath ENDS WITH sourceFileName
MERGE (f)-[:HAS_SOURCE]->(t)
%%cypher
// Bestimmung der Klassen mit der höchsten durchschnittlichen zyklomatischen Komplexität
MATCH (t:Type:Shopizer)-[DECLARES]->(m:Method)
WHERE EXISTS(m.cyclomaticComplexity)
WITH t, sum(m.cyclomaticComplexity) / toFloat(count(m)) AS AverageComplexity
RETURN DISTINCT t.fqn AS Type,
AverageComplexity
ORDER BY AverageComplexity DESC
Robert C. Martin definiert Komplexitätsmetriken für Packages
Efferent Coupling (Ce)
Afferent Coupling (Ca)
Instabilität (I) = Ce / (Ce + Ca)
Abstraktheit (A) = Na / Nc
Distanz (D) = |A + I - 1|
instability_query = %cypher MATCH (t:Type:Shopizer) \
OPTIONAL MATCH (t)-[:DEPENDS_ON]->(d:Type:Shopizer) \
WITH t, count(d) AS EfferentCoupling \
OPTIONAL MATCH (e)-[:DEPENDS_ON]->(t) \
WITH t, EfferentCoupling, count(e) AS AfferentCoupling \
WHERE EfferentCoupling + AfferentCoupling > 0 \
RETURN t.fqn AS Type, t.name AS Name, toFloat(EfferentCoupling) / (EfferentCoupling + AfferentCoupling) AS Instability \
ORDER BY Instability DESC
instability_df = instability_query.get_dataframe()
abstractness_query = %cypher MATCH (t:Type:Shopizer)-[:DECLARES]->(m:Method) \
WITH t, count(m) AS Total \
OPTIONAL MATCH (t)-[:DECLARES]->(m:Method{abstract: true}) \
WITH t, toFloat(count(m)) / Total AS Abstractness \
RETURN t.fqn AS Type, t.name AS Name, Abstractness \
ORDER BY Abstractness DESC
abstractness_df = abstractness_query.get_dataframe()
distance_df = pd.merge(instability_df, abstractness_df, how='outer', on = ['Type','Name'])
distance_df = distance_df.fillna(0)
distance_df
distance_df['Distance'] = abs(distance_df.Instability + distance_df.Abstractness - 1)
distance_df = distance_df.sort_values('Distance', ascending=False)
# Entfernen reiner Utility-Klassen (Abstractness == 0, Instability == 0) da Senken
stackedBar_distance_df = distance_df[(distance_df.Abstractness > 0) & (distance_df.Instability > 0)]
stackedBar_distance_df = stackedBar_distance_df[0:30]
stacked_bar_chart = pg.StackedBar(show_legend=True, human_readable=True, fill=False, x_label_rotation=45, truncate_label=-1)
stacked_bar_chart.title = 'Robert C. Martin Metriken'
stacked_bar_chart.x_labels = stackedBar_distance_df['Name'].tolist()
stacked_bar_chart.add('Abstractness', stackedBar_distance_df['Abstractness'].tolist())
stacked_bar_chart.add('Instability', stackedBar_distance_df['Instability'].tolist())
stacked_bar_chart.add('Distance', stackedBar_distance_df['Distance'].tolist())
display(HTML(base_html.format(rendered_chart=stacked_bar_chart.render(is_unicode=True))))
type_distance_doc = []
for _id in distance_df.T:
data = distance_df.T[_id]
values = {'value': (data.Abstractness, data.Instability), 'label': data.Type}
type_distance_doc.append(values)
xy_chart = pg.XY(stroke=False, x_title='Abstractness', y_title='Instability')
xy_chart.title = 'Robert C. Martin Distance'
xy_chart.add('Abstractness to Instability', type_distance_doc)
xy_chart.add('Optimum', [(0, 1), (1, 0)], stroke=True)
xy_chart.add('Zone of Pain', [(0, 0.3), (0.3, 0)], stroke=True)
xy_chart.add('Zone of Uselesness', [(1, 0.7), (0.7, 1)], stroke=True)
display(HTML(base_html.format(rendered_chart=xy_chart.render(is_unicode=True))))
module_instability_query = %cypher MATCH (bC1:BoundedContext) \
WITH bC1 \
MATCH (bC1)-[:CONTAINS]->(t:Type:Shopizer)-[:DEPENDS_ON]->(d:Type:Shopizer)<-[:CONTAINS]-(bC2:BoundedContext) \
WHERE bC1 <> bC2 \
WITH bC1, count(d) AS EfferentCoupling \
MATCH (bC1)-[:CONTAINS]->(t:Type:Shopizer)<-[:DEPENDS_ON]-(d:Type:Shopizer)<-[:CONTAINS]-(bC2:BoundedContext) \
WHERE bC1 <> bC2 \
WITH bC1, EfferentCoupling, count(d) AS AfferentCoupling \
WITH bC1, toFloat(EfferentCoupling) / (EfferentCoupling + AfferentCoupling) AS Instability \
RETURN bC1.name AS Name, Instability \
ORDER BY Instability DESC
module_instability_df = module_instability_query.get_dataframe()
module_abstractness_query = %cypher MATCH (bC:BoundedContext)-[:CONTAINS]->(t:Type:Shopizer) \
WITH bC, count(t) AS Total \
OPTIONAL MATCH (bC)-[:CONTAINS]->(t:Type:Shopizer) \
WHERE t:Interface OR exists(t.abstract) \
WITH bC, toFloat(count(t)) / Total AS Abstractness \
RETURN bC.name AS Name, Abstractness \
ORDER BY Abstractness DESC
module_abstractness_df = module_abstractness_query.get_dataframe()
module_distance_df = pd.merge(module_instability_df, module_abstractness_df, how='outer', on = ['Name'])
module_distance_df = module_distance_df.fillna(0)
module_distance_doc = []
for _id in module_distance_df.T:
data = module_distance_df.T[_id]
values = {'value': (data.Abstractness, data.Instability), 'label': data.Name}
module_distance_doc.append(values)
xy_module_chart = pg.XY(stroke=False, x_title='Abstractness', y_title='Instability')
xy_module_chart.title = 'Robert C. Martin Distance'
xy_module_chart.add('Abstractness to Instability', module_distance_doc)
xy_module_chart.add('Optimum', [(0, 1), (1, 0)], stroke=True)
xy_module_chart.add('Zone of Pain', [(0, 0.3), (0.3, 0)], stroke=True)
xy_module_chart.add('Zone of Uselesness', [(1, 0.7), (0.7, 1)], stroke=True)
display(HTML(base_html.format(rendered_chart=xy_module_chart.render(is_unicode=True))))