Make sure the libraries are imported by executing this code (remember to unset Reset all runtimes before running):
from py2neo import Graph
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
pd.set_option('display.float_format', lambda x: '%.3f' % x)
Update the cell below to use the IP Address, Bolt Port, and Password, as you did previously.
# Change the line of code below to use the Bolt URL, and Password of your Neo4j Database instance.
# graph = Graph("<Bolt URL>", auth=("neo4j", "<Password>"))
graph = Graph("bolt://localhost:7687", auth=("neo4j", "letmein"))
query = """
MATCH (a:Article)
RETURN size((a)-[:CITED]->()) AS cited
"""
cited_df = graph.run(query).to_data_frame()
cited_df.describe([.25, .5, .75, .9, .99])
cited | |
---|---|
count | 51956.000 |
mean | 0.553 |
std | 1.301 |
min | 0.000 |
25% | 0.000 |
50% | 0.000 |
75% | 1.000 |
90% | 2.000 |
99% | 6.000 |
max | 51.000 |
fig1, ax1 = plt.subplots()
ax1.hist(pd.Series(cited_df['cited'].dropna()), 1250, density=True, facecolor='g', alpha=0.75)
plt.tight_layout()
plt.show()
query = """
MATCH (a:Author)
RETURN size((a)<-[:AUTHOR]-()) AS published
"""
published_df = graph.run(query).to_data_frame()
published_df.describe([.25, .5, .75, .9, .99])
published | |
---|---|
count | 80299.000 |
mean | 1.751 |
std | 2.064 |
min | 1.000 |
25% | 1.000 |
50% | 1.000 |
75% | 2.000 |
90% | 3.000 |
99% | 10.000 |
max | 89.000 |