from IPython.core.magic import register_cell_magic
from IPython.display import Markdown
import datetime
from datetime import date
import glob
import logging
import json
import pandas as pd
import plotly
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import warnings
@register_cell_magic
def markdown(line, cell):
return Markdown(cell.format(**globals()))
logging.getLogger('matplotlib.font_manager').disabled = True
warnings.filterwarnings("ignore")
pd.set_option('display.width', 500)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 10)
row_accumulator = []
for filename in glob.glob('nvd.jsonl'):
with open(filename, 'r', encoding='utf-8') as f:
nvd_data = json.load(f)
for entry in nvd_data:
cve = entry['cve']['id']
try:
assigner = entry['cve']['sourceIdentifier']
except KeyError:
assigner = 'Missing_Data'
try:
published_date = entry['cve']['published']
except KeyError:
published_date = 'Missing_Data'
try:
attack_vector = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['attackVector']
except KeyError:
attack_vector = 'Missing_Data'
try:
attack_complexity = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['attackComplexity']
except KeyError:
attack_complexity = 'Missing_Data'
try:
privileges_required = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['privilegesRequired']
except KeyError:
privileges_required = 'Missing_Data'
try:
user_interaction = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['userInteraction']
except KeyError:
user_interaction = 'Missing_Data'
try:
scope = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['scope']
except KeyError:
scope = 'Missing_Data'
try:
confidentiality_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['confidentialityImpact']
except KeyError:
confidentiality_impact = 'Missing_Data'
try:
integrity_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['integrityImpact']
except KeyError:
integrity_impact = 'Missing_Data'
try:
availability_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['availabilityImpact']
except KeyError:
availability_impact = 'Missing_Data'
try:
base_score = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['baseScore']
except KeyError:
base_score = '0.0'
try:
base_severity = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['baseSeverity']
except KeyError:
base_severity = 'Missing_Data'
try:
exploitability_score = entry['cve']['metrics']['cvssMetricV31'][0]['exploitabilityScore']
except KeyError:
exploitability_score = 'Missing_Data'
try:
impact_score = entry['cve']['metrics']['cvssMetricV31'][0]['impactScore']
except KeyError:
impact_score = 'Missing_Data'
try:
cwe = entry['cve']['weaknesses'][0]['description'][0]['value']
except KeyError:
cwe = 'Missing_Data'
try:
description = entry['cve']['descriptions'][0]['value']
except IndexError:
description = ''
new_row = {
'CVE': cve,
'Published': published_date,
'AttackVector': attack_vector,
'AttackComplexity': attack_complexity,
'PrivilegesRequired': privileges_required,
'UserInteraction': user_interaction,
'Scope': scope,
'ConfidentialityImpact': confidentiality_impact,
'IntegrityImpact': integrity_impact,
'AvailabilityImpact': availability_impact,
'BaseScore': base_score,
'BaseSeverity': base_severity,
'ExploitabilityScore': exploitability_score,
'ImpactScore': impact_score,
'CWE': cwe,
'Description': description,
'Assigner' : assigner
}
if not description.startswith('rejected reason'):
row_accumulator.append(new_row)
nvd = pd.DataFrame(row_accumulator)
nvd['Published'] = pd.to_datetime(nvd['Published'])
nvd = nvd.sort_values(by=['Published'])
nvd = nvd.reset_index(drop=True)
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = nvd['BaseScore'].replace(0, np.NaN);
nvdcount = nvd['Published'].count()
nvdunique = nvd['Published'].nunique()
startdate = date(2000, 1, 1)
enddate = date.today()
numberofdays = enddate - startdate
per_day = nvdcount/numberofdays.days
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = nvd['BaseScore'].replace(0, np.NaN);
nvd['BaseScore'].plot(kind="hist", title='CVSS Breakdown', color="#001d82", figsize=(16, 8));
nvd['BaseScore'].value_counts(bins = [1,2,3,4,5,6,7,8,9,10]).sort_values()
(0.999, 2.0] 20 (2.0, 3.0] 557 (3.0, 4.0] 1795 (4.0, 5.0] 9650 (8.0, 9.0] 14715 (9.0, 10.0] 17140 (5.0, 6.0] 19589 (6.0, 7.0] 19938 (7.0, 8.0] 30974 Name: count, dtype: int64
nvd['AttackVector'].value_counts()
AttackVector Missing_Data 132217 NETWORK 83705 LOCAL 26843 ADJACENT_NETWORK 2643 PHYSICAL 1188 Name: count, dtype: int64
nvd['AttackVector'].value_counts(normalize=True).round(2)
AttackVector Missing_Data 0.54 NETWORK 0.34 LOCAL 0.11 ADJACENT_NETWORK 0.01 PHYSICAL 0.00 Name: proportion, dtype: float64
nvd['Year'] = nvd['Published'].dt.strftime('%Y')
nvd2 = nvd[['Year', 'AttackVector']].copy()
av_df = nvd2.value_counts().to_frame('AttackVectorCount').reset_index()
av_df = av_df.sort_values(by=['Year', 'AttackVectorCount'] , ascending=[False, False])
av_df.style.hide()
Year | AttackVector | AttackVectorCount |
---|---|---|
2024 | NETWORK | 6879 |
2024 | Missing_Data | 3331 |
2024 | LOCAL | 1500 |
2024 | ADJACENT_NETWORK | 214 |
2024 | PHYSICAL | 71 |
2023 | NETWORK | 21742 |
2023 | LOCAL | 6379 |
2023 | Missing_Data | 2120 |
2023 | ADJACENT_NETWORK | 491 |
2023 | PHYSICAL | 217 |
2022 | NETWORK | 18114 |
2022 | LOCAL | 6176 |
2022 | Missing_Data | 1449 |
2022 | ADJACENT_NETWORK | 450 |
2022 | PHYSICAL | 242 |
2021 | NETWORK | 14041 |
2021 | LOCAL | 5252 |
2021 | Missing_Data | 1904 |
2021 | ADJACENT_NETWORK | 559 |
2021 | PHYSICAL | 194 |
2020 | NETWORK | 12868 |
2020 | LOCAL | 4457 |
2020 | Missing_Data | 899 |
2020 | ADJACENT_NETWORK | 706 |
2020 | PHYSICAL | 292 |
2019 | Missing_Data | 9964 |
2019 | NETWORK | 6727 |
2019 | LOCAL | 1955 |
2019 | ADJACENT_NETWORK | 162 |
2019 | PHYSICAL | 130 |
2018 | Missing_Data | 16483 |
2018 | NETWORK | 1294 |
2018 | LOCAL | 323 |
2018 | ADJACENT_NETWORK | 34 |
2018 | PHYSICAL | 20 |
2017 | Missing_Data | 16594 |
2017 | NETWORK | 1093 |
2017 | LOCAL | 394 |
2017 | ADJACENT_NETWORK | 18 |
2017 | PHYSICAL | 14 |
2016 | Missing_Data | 5794 |
2016 | NETWORK | 500 |
2016 | LOCAL | 215 |
2016 | ADJACENT_NETWORK | 4 |
2016 | PHYSICAL | 4 |
2015 | Missing_Data | 6561 |
2015 | NETWORK | 29 |
2015 | LOCAL | 5 |
2014 | Missing_Data | 7952 |
2014 | NETWORK | 37 |
2014 | LOCAL | 17 |
2014 | PHYSICAL | 2 |
2013 | Missing_Data | 5302 |
2013 | NETWORK | 16 |
2013 | LOCAL | 5 |
2013 | ADJACENT_NETWORK | 1 |
2012 | Missing_Data | 5288 |
2012 | NETWORK | 39 |
2012 | LOCAL | 22 |
2012 | ADJACENT_NETWORK | 2 |
2011 | Missing_Data | 4148 |
2011 | NETWORK | 17 |
2011 | LOCAL | 5 |
2011 | PHYSICAL | 1 |
2011 | ADJACENT_NETWORK | 1 |
2010 | Missing_Data | 4591 |
2010 | NETWORK | 58 |
2010 | LOCAL | 18 |
2009 | Missing_Data | 5711 |
2009 | NETWORK | 38 |
2009 | LOCAL | 29 |
2008 | Missing_Data | 5610 |
2008 | NETWORK | 43 |
2008 | LOCAL | 11 |
2007 | Missing_Data | 6571 |
2007 | NETWORK | 15 |
2007 | LOCAL | 8 |
2007 | ADJACENT_NETWORK | 1 |
2007 | PHYSICAL | 1 |
2006 | Missing_Data | 6645 |
2006 | NETWORK | 10 |
2006 | LOCAL | 4 |
2005 | Missing_Data | 4958 |
2005 | NETWORK | 35 |
2005 | LOCAL | 17 |
2004 | Missing_Data | 2440 |
2004 | NETWORK | 27 |
2004 | LOCAL | 12 |
2003 | Missing_Data | 1532 |
2003 | NETWORK | 13 |
2003 | LOCAL | 3 |
2002 | Missing_Data | 2118 |
2002 | NETWORK | 33 |
2002 | LOCAL | 19 |
2001 | Missing_Data | 1644 |
2001 | NETWORK | 24 |
2001 | LOCAL | 11 |
2000 | Missing_Data | 1011 |
2000 | NETWORK | 6 |
2000 | LOCAL | 3 |
1999 | Missing_Data | 917 |
1999 | NETWORK | 4 |
1999 | LOCAL | 2 |
1998 | Missing_Data | 244 |
1998 | NETWORK | 2 |
1998 | LOCAL | 1 |
1997 | Missing_Data | 252 |
1997 | NETWORK | 1 |
1996 | Missing_Data | 75 |
1995 | Missing_Data | 25 |
1994 | Missing_Data | 26 |
1993 | Missing_Data | 13 |
1992 | Missing_Data | 14 |
1991 | Missing_Data | 15 |
1990 | Missing_Data | 11 |
1989 | Missing_Data | 3 |
1988 | Missing_Data | 2 |
fig = px.line(av_df, x="Year", y="AttackVectorCount", color='AttackVector' , markers=True)
fig.show()