CVSS Data¶

In [1]:

from IPython.core.magic import register_cell_magic
from IPython.display import Markdown
import datetime
from datetime import date
import glob
import logging
import json
import pandas as pd
import plotly
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import warnings



@register_cell_magic
def markdown(line, cell):
    return Markdown(cell.format(**globals()))


logging.getLogger('matplotlib.font_manager').disabled = True
warnings.filterwarnings("ignore")
pd.set_option('display.width', 500)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 10)

In [2]:

row_accumulator = []
for filename in glob.glob('nvd.jsonl'):
    with open(filename, 'r', encoding='utf-8') as f:
        nvd_data = json.load(f)
        for entry in nvd_data:
            cve = entry['cve']['id']
            try:
                assigner = entry['cve']['sourceIdentifier']
            except KeyError:
                assigner = 'Missing_Data'
            try:
                published_date = entry['cve']['published']
            except KeyError:
                published_date = 'Missing_Data'
            try:
                attack_vector = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['attackVector']
            except KeyError:
                attack_vector = 'Missing_Data'
            try:
                attack_complexity = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['attackComplexity']
            except KeyError:
                attack_complexity = 'Missing_Data'
            try:
                privileges_required = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['privilegesRequired']
            except KeyError:
                privileges_required = 'Missing_Data'
            try:
                user_interaction = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['userInteraction']
            except KeyError:
                user_interaction = 'Missing_Data'
            try:
                scope = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['scope']
            except KeyError:
                scope = 'Missing_Data'
            try:
                confidentiality_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['confidentialityImpact']
            except KeyError:
                confidentiality_impact = 'Missing_Data'
            try:
                integrity_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['integrityImpact']
            except KeyError:
                integrity_impact = 'Missing_Data'
            try:
                availability_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['availabilityImpact']
            except KeyError:
                availability_impact = 'Missing_Data'
            try:
                base_score = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['baseScore']
            except KeyError:
                base_score = '0.0'
            try:
                base_severity = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['baseSeverity']
            except KeyError:
                base_severity = 'Missing_Data'
            try:
                exploitability_score = entry['cve']['metrics']['cvssMetricV31'][0]['exploitabilityScore']
            except KeyError:
                exploitability_score = 'Missing_Data'
            try:
                impact_score = entry['cve']['metrics']['cvssMetricV31'][0]['impactScore']
            except KeyError:
                impact_score = 'Missing_Data'
            try:
                cwe = entry['cve']['weaknesses'][0]['description'][0]['value']
            except KeyError:
                cwe = 'Missing_Data'
            try:
                description = entry['cve']['descriptions'][0]['value']
            except IndexError:
                description = ''
            new_row = {
                'CVE': cve,
                'Published': published_date,
                'AttackVector': attack_vector,
                'AttackComplexity': attack_complexity,
                'PrivilegesRequired': privileges_required,
                'UserInteraction': user_interaction,
                'Scope': scope,
                'ConfidentialityImpact': confidentiality_impact,
                'IntegrityImpact': integrity_impact,
                'AvailabilityImpact': availability_impact,
                'BaseScore': base_score,
                'BaseSeverity': base_severity,
                'ExploitabilityScore': exploitability_score,
                'ImpactScore': impact_score,
                'CWE': cwe,
                'Description': description,
                'Assigner' : assigner
            }
            if not description.startswith('rejected reason'): 
                row_accumulator.append(new_row)
        nvd = pd.DataFrame(row_accumulator)
        
nvd['Published'] = pd.to_datetime(nvd['Published'])
nvd = nvd.sort_values(by=['Published'])
nvd = nvd.reset_index(drop=True)
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = nvd['BaseScore'].replace(0, np.NaN);
nvdcount = nvd['Published'].count()
nvdunique = nvd['Published'].nunique()
startdate = date(2000, 1, 1)
enddate  = date.today()
numberofdays = enddate - startdate 
per_day = nvdcount/numberofdays.days

CVSS Graph¶

In [3]:

nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = nvd['BaseScore'].replace(0, np.NaN);
nvd['BaseScore'].plot(kind="hist", title='CVSS Breakdown', color="#001d82", figsize=(16, 8)); 

CVSS Count¶

In [4]:

nvd['BaseScore'].value_counts(bins = [1,2,3,4,5,6,7,8,9,10]).sort_values()

Out[4]:

(0.999, 2.0]       20
(2.0, 3.0]        557
(3.0, 4.0]       1795
(4.0, 5.0]       9650
(8.0, 9.0]      14715
(9.0, 10.0]     17140
(5.0, 6.0]      19589
(6.0, 7.0]      19938
(7.0, 8.0]      30974
Name: count, dtype: int64

Vector Count¶

In [5]:

nvd['AttackVector'].value_counts()

Out[5]:

AttackVector
Missing_Data        132217
NETWORK              83705
LOCAL                26843
ADJACENT_NETWORK      2643
PHYSICAL              1188
Name: count, dtype: int64

In [6]:

nvd['AttackVector'].value_counts(normalize=True).round(2)

Out[6]:

AttackVector
Missing_Data        0.54
NETWORK             0.34
LOCAL               0.11
ADJACENT_NETWORK    0.01
PHYSICAL            0.00
Name: proportion, dtype: float64

Yearly Breakdown¶

In [7]:

nvd['Year'] = nvd['Published'].dt.strftime('%Y')
nvd2 = nvd[['Year', 'AttackVector']].copy()
av_df = nvd2.value_counts().to_frame('AttackVectorCount').reset_index()
av_df = av_df.sort_values(by=['Year', 'AttackVectorCount'] , ascending=[False, False])
av_df.style.hide()

Out[7]:

Year	AttackVector	AttackVectorCount
2024	NETWORK	6879
2024	Missing_Data	3331
2024	LOCAL	1500
2024	ADJACENT_NETWORK	214
2024	PHYSICAL	71
2023	NETWORK	21742
2023	LOCAL	6379
2023	Missing_Data	2120
2023	ADJACENT_NETWORK	491
2023	PHYSICAL	217
2022	NETWORK	18114
2022	LOCAL	6176
2022	Missing_Data	1449
2022	ADJACENT_NETWORK	450
2022	PHYSICAL	242
2021	NETWORK	14041
2021	LOCAL	5252
2021	Missing_Data	1904
2021	ADJACENT_NETWORK	559
2021	PHYSICAL	194
2020	NETWORK	12868
2020	LOCAL	4457
2020	Missing_Data	899
2020	ADJACENT_NETWORK	706
2020	PHYSICAL	292
2019	Missing_Data	9964
2019	NETWORK	6727
2019	LOCAL	1955
2019	ADJACENT_NETWORK	162
2019	PHYSICAL	130
2018	Missing_Data	16483
2018	NETWORK	1294
2018	LOCAL	323
2018	ADJACENT_NETWORK	34
2018	PHYSICAL	20
2017	Missing_Data	16594
2017	NETWORK	1093
2017	LOCAL	394
2017	ADJACENT_NETWORK	18
2017	PHYSICAL	14
2016	Missing_Data	5794
2016	NETWORK	500
2016	LOCAL	215
2016	ADJACENT_NETWORK	4
2016	PHYSICAL	4
2015	Missing_Data	6561
2015	NETWORK	29
2015	LOCAL	5
2014	Missing_Data	7952
2014	NETWORK	37
2014	LOCAL	17
2014	PHYSICAL	2
2013	Missing_Data	5302
2013	NETWORK	16
2013	LOCAL	5
2013	ADJACENT_NETWORK	1
2012	Missing_Data	5288
2012	NETWORK	39
2012	LOCAL	22
2012	ADJACENT_NETWORK	2
2011	Missing_Data	4148
2011	NETWORK	17
2011	LOCAL	5
2011	PHYSICAL	1
2011	ADJACENT_NETWORK	1
2010	Missing_Data	4591
2010	NETWORK	58
2010	LOCAL	18
2009	Missing_Data	5711
2009	NETWORK	38
2009	LOCAL	29
2008	Missing_Data	5610
2008	NETWORK	43
2008	LOCAL	11
2007	Missing_Data	6571
2007	NETWORK	15
2007	LOCAL	8
2007	ADJACENT_NETWORK	1
2007	PHYSICAL	1
2006	Missing_Data	6645
2006	NETWORK	10
2006	LOCAL	4
2005	Missing_Data	4958
2005	NETWORK	35
2005	LOCAL	17
2004	Missing_Data	2440
2004	NETWORK	27
2004	LOCAL	12
2003	Missing_Data	1532
2003	NETWORK	13
2003	LOCAL	3
2002	Missing_Data	2118
2002	NETWORK	33
2002	LOCAL	19
2001	Missing_Data	1644
2001	NETWORK	24
2001	LOCAL	11
2000	Missing_Data	1011
2000	NETWORK	6
2000	LOCAL	3
1999	Missing_Data	917
1999	NETWORK	4
1999	LOCAL	2
1998	Missing_Data	244
1998	NETWORK	2
1998	LOCAL	1
1997	Missing_Data	252
1997	NETWORK	1
1996	Missing_Data	75
1995	Missing_Data	25
1994	Missing_Data	26
1993	Missing_Data	13
1992	Missing_Data	14
1991	Missing_Data	15
1990	Missing_Data	11
1989	Missing_Data	3
1988	Missing_Data	2

In [8]:

fig = px.line(av_df, x="Year", y="AttackVectorCount", color='AttackVector' ,  markers=True)
fig.show()