#!/usr/bin/env python # coding: utf-8 # # 2020 # --- # In[1]: from IPython.core.magic import register_cell_magic from IPython.display import Markdown import datetime from datetime import date import glob import json import logging import matplotlib.pyplot as plt import numpy as np import pandas as pd import plotly import warnings import calplot from itables import init_notebook_mode, show import itables.options as opt opt.dom = "tpir" opt.style = "table-layout:auto;width:auto" init_notebook_mode(all_interactive=True, connected=True) @register_cell_magic def markdown(line, cell): return Markdown(cell.format(**globals())) logging.getLogger('matplotlib.font_manager').disabled = True warnings.filterwarnings("ignore") pd.set_option('display.width', 500) pd.set_option('display.max_rows', 50) pd.set_option('display.max_columns', 10) # In[2]: row_accumulator = [] for filename in glob.glob('nvd.jsonl'): with open(filename, 'r', encoding='utf-8') as f: nvd_data = json.load(f) for entry in nvd_data: cve = entry['cve']['id'] try: assigner = entry['cve']['sourceIdentifier'] except KeyError: assigner = 'Missing_Data' try: published_date = entry['cve']['published'] except KeyError: published_date = 'Missing_Data' try: attack_vector = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['attackVector'] except KeyError: attack_vector = 'Missing_Data' try: attack_complexity = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['attackComplexity'] except KeyError: attack_complexity = 'Missing_Data' try: privileges_required = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['privilegesRequired'] except KeyError: privileges_required = 'Missing_Data' try: user_interaction = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['userInteraction'] except KeyError: user_interaction = 'Missing_Data' try: scope = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['scope'] except KeyError: scope = 'Missing_Data' try: confidentiality_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['confidentialityImpact'] except KeyError: confidentiality_impact = 'Missing_Data' try: integrity_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['integrityImpact'] except KeyError: integrity_impact = 'Missing_Data' try: availability_impact = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['availabilityImpact'] except KeyError: availability_impact = 'Missing_Data' try: base_score = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['baseScore'] except KeyError: base_score = '0.0' try: base_severity = entry['cve']['metrics']['cvssMetricV31'][0]['cvssData']['baseSeverity'] except KeyError: base_severity = 'Missing_Data' try: exploitability_score = entry['cve']['metrics']['cvssMetricV31'][0]['exploitabilityScore'] except KeyError: exploitability_score = 'Missing_Data' try: impact_score = entry['cve']['metrics']['cvssMetricV31'][0]['impactScore'] except KeyError: impact_score = 'Missing_Data' try: cwe = entry['cve']['weaknesses'][0]['description'][0]['value'] except KeyError: cwe = 'Missing_Data' try: description = entry['cve']['descriptions'][0]['value'] except IndexError: description = '' new_row = { 'CVE': cve, 'Published': published_date, 'AttackVector': attack_vector, 'AttackComplexity': attack_complexity, 'PrivilegesRequired': privileges_required, 'UserInteraction': user_interaction, 'Scope': scope, 'ConfidentialityImpact': confidentiality_impact, 'IntegrityImpact': integrity_impact, 'AvailabilityImpact': availability_impact, 'BaseScore': base_score, 'BaseSeverity': base_severity, 'ExploitabilityScore': exploitability_score, 'ImpactScore': impact_score, 'CWE': cwe, 'Description': description, 'Assigner' : assigner } if not description.startswith('rejected reason'): row_accumulator.append(new_row) nvd = pd.DataFrame(row_accumulator) nvd['Published'] = pd.to_datetime(nvd['Published']) thisyear = ((nvd['Published'] > '2020-01-01') & (nvd['Published'] < '2021-01-01')) nvd = nvd.loc[thisyear] nvd = nvd.sort_values(by=['Published']) nvd = nvd.reset_index(drop=True) nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']); nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']); nvd['BaseScore'] = nvd['BaseScore'].replace(0, np.NaN); nvdcount = nvd['Published'].count() nvdunique = nvd['Published'].nunique() startdate = date(2020, 1, 1) enddate = date(2020, 12, 31) numberofdays = enddate - startdate per_day = nvdcount/numberofdays.days # In[3]: Markdown(f"Total Number of CVEs: **{nvd['CVE'].count()}**
Average CVEs Per Day: **{per_day.round(2)}**
Average CVSS Score: **{nvd['BaseScore'].mean().round(2)}**") # # # ## CVE Graphs # In[4]: Month_Graph = nvd['Published'].groupby(nvd.Published.dt.to_period("M")).agg('count') Year_Graph = nvd['Published'].groupby(nvd.Published.dt.to_period("Y")).agg('count') Week_Graph = nvd['Published'].groupby(nvd.Published.dt.to_period("W")).agg('count') Day_Graph = nvd['Published'].groupby(nvd.Published.dt.to_period("D")).agg('count') # ### CVE Calendar # In[5]: dfs = nvd['Published'].apply(lambda x: pd.to_datetime(x, errors='coerce', format='%Y/%m/%d')) df = dfs.value_counts() df = df.to_frame() df.index = df.index.strftime('%m/%d/%Y') df.index = pd.to_datetime(df.index, format='%m/%d/%Y') calplot.calplot(df.T.squeeze(), cmap='jet', dropzero=True, edgecolor="Grey", textcolor="White", textformat='{:.0f}', textfiller='', suptitle='CVEs Per Day', figsize=(25,3)); # ### CVE Per Month Graph # In[6]: cg = Month_Graph.plot.area(colormap='jet', figsize=(16, 8), title='CVEs Per Month') plt.grid() cg.set_ylabel("New CVEs"); cg.set_xlabel("Date"); # # # ### CVE Per Week Graph # In[7]: cg = Week_Graph.plot.area(colormap='jet', figsize=(16, 8), title='CVEs Per Week') plt.grid() cg.set_ylabel("New CVEs"); cg.set_xlabel("Date"); # # # ### CVE Per Day Graph # In[8]: cg = Day_Graph.plot.area(colormap='jet', figsize=(16, 8), title='CVEs Per Day') plt.grid() cg.set_ylabel("New CVEs"); cg.set_xlabel("Date"); # # ## CVSS Data # In[9]: nvd['BaseScore'].plot(kind="hist", colormap='jet', figsize=(16, 8), title='CVSS Scores'); # ## CNA Data # # ### CNA Assigner Graph # In[10]: nvd_frequency = nvd['Assigner'].value_counts() nvd_frequency = nvd_frequency.reset_index() nvd_frequency.columns = ['Assigner', 'CVEs'] nvd_frequency['Percentage'] = round((nvd_frequency['CVEs'] / nvd_frequency['CVEs'].sum()) * 100) nvd_frequency[nvd_frequency.CVEs > 100].head(50) nvd_frequency_no_mitre = nvd_frequency[~nvd_frequency.Assigner.str.contains('cve@mitre.org')] nvd_frequency_no_mitre = nvd_frequency_no_mitre[nvd_frequency_no_mitre.CVEs > 1].head(20) plt.figure(figsize=(10,10)) plt.barh("Assigner", "CVEs", data = nvd_frequency_no_mitre, color="#001d82") plt.xlabel("CVEs"); plt.ylabel("") ; plt.title("Top 20 CNAs"); # # # ## CWE Data # In[11]: nvd_cwe = nvd['CWE'].value_counts() nvd_cwe = nvd_cwe.reset_index() nvd_cwe.columns = ['CWE', 'CVEs'] nvd_cwe_graph = nvd_cwe[nvd_cwe.CVEs > 100].head(25) plt.figure(figsize=(10,10)); plt.barh("CWE", "CVEs", data = nvd_cwe_graph, color="#001d82"); plt.xlabel("CVEs"); plt.ylabel("CWE") ; plt.title("Most Common CWE in CVE Records"); # # # ## More CVE Data # # # # ### Top CNA Assigner # In[12]: show(nvd_frequency, scrollY="400px", scrollCollapse=True, paging=False) # ### Top CWEs # In[13]: show(nvd_cwe, scrollY="400px", scrollCollapse=True, paging=False) # # # # ### CVEs By Identifier # In[14]: print("CVE-1999:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-1999-')])) print("CVE-2000:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2000-')])) print("CVE-2001:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2001-')])) print("CVE-2002:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2002-')])) print("CVE-2003:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2003-')])) print("CVE-2004:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2004-')])) print("CVE-2005:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2005-')])) print("CVE-2006:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2006-')])) print("CVE-2007:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2007-')])) print("CVE-2008:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2008-')])) print("CVE-2009:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2009-')])) print("CVE-2010:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2010-')])) print("CVE-2011:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2011-')])) print("CVE-2012:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2012-')])) print("CVE-2013:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2013-')])) print("CVE-2014:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2014-')])) print("CVE-2015:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2015-')])) print("CVE-2016:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2016-')])) print("CVE-2017:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2017-')])) print("CVE-2018:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2018-')])) print("CVE-2019:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2019-')])) print("CVE-2020:\t%s" % len(nvd[nvd['CVE'].str.contains('CVE-2020-')])) # In[15]: Markdown(f"This report is updated automatically every day, last generated on: **{datetime.datetime.now()}**")