# Import necessary libraries
from IPython.core.magic import register_cell_magic
from IPython.display import Markdown
import datetime
from datetime import date
import glob
import json
import logging
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
from itables import init_notebook_mode, show
import itables.options as opt
# Initialize itables options
opt.dom = "tpir"
opt.style = "table-layout:auto;width:auto"
init_notebook_mode(all_interactive=True, connected=True)
# Register a custom cell magic for markdown
@register_cell_magic
def markdown(line, cell):
return Markdown(cell.format(**globals()))
# Configure logging and warnings
logging.getLogger('matplotlib.font_manager').disabled = True
warnings.filterwarnings("ignore")
# Configure pandas display options
pd.set_option('display.width', 500)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 10)
def get_nested_value(entry, keys, default='Missing_Data'):
try:
for key in keys:
entry = entry[key]
return entry
except (KeyError, IndexError):
return default
row_accumulator = []
for filename in glob.glob('nvd.jsonl'):
with open(filename, 'r', encoding='utf-8') as f:
nvd_data = json.load(f)
for entry in nvd_data:
new_row = {
'CVE': get_nested_value(entry, ['cve', 'id']),
'Published': get_nested_value(entry, ['cve', 'published']),
'AttackVector': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'attackVector']),
'AttackComplexity': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'attackComplexity']),
'PrivilegesRequired': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'privilegesRequired']),
'UserInteraction': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'userInteraction']),
'Scope': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'scope']),
'ConfidentialityImpact': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'confidentialityImpact']),
'IntegrityImpact': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'integrityImpact']),
'AvailabilityImpact': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'availabilityImpact']),
'BaseScore': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'baseScore'], '0.0'),
'BaseSeverity': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'cvssData', 'baseSeverity']),
'ExploitabilityScore': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'exploitabilityScore']),
'ImpactScore': get_nested_value(entry, ['cve', 'metrics', 'cvssMetricV31', 0, 'impactScore']),
'CWE': get_nested_value(entry, ['cve', 'weaknesses', 0, 'description', 0, 'value']),
'Description': get_nested_value(entry, ['cve', 'descriptions', 0, 'value'], ''),
'Assigner': get_nested_value(entry, ['cve', 'sourceIdentifier']),
'Tag': get_nested_value(entry, ['cve', 'cveTags', 0, 'tags'], np.nan),
'Status': get_nested_value(entry, ['cve', 'vulnStatus'], '')
}
row_accumulator.append(new_row)
nvd = pd.DataFrame(row_accumulator)
nvd = nvd[~nvd.Status.str.contains('Rejected')]
nvd['Published'] = pd.to_datetime(nvd['Published'])
nvd = nvd.sort_values(by=['Published'])
thisyear = ((nvd['Published'] > '2017-01-01') & (nvd['Published'] < '2018-01-01'))
nvd = nvd.loc[thisyear]
nvd = nvd.sort_values(by=['Published'])
nvd = nvd.reset_index(drop=True)
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore']);
nvd['BaseScore'] = nvd['BaseScore'].replace(0, np.nan);
nvdcount = nvd['Published'].count()
nvdunique = nvd['Published'].nunique()
startdate = date(2017, 1, 1)
enddate = date(2018, 1, 1)
numberofdays = enddate - startdate
per_day = nvdcount/numberofdays.days
Markdown(f"Total Number of CVEs: **{nvd['CVE'].count()}**<br />Average CVEs Per Day: **{per_day.round(2)}**<br />Average CVSS Score: **{nvd['BaseScore'].mean().round(2)}**")
Total Number of CVEs: 14642
Average CVEs Per Day: 40.12
Average CVSS Score: 7.49
Month_Graph = nvd['Published'].groupby(nvd.Published.dt.to_period("M")).agg('count')
Year_Graph = nvd['Published'].groupby(nvd.Published.dt.to_period("Y")).agg('count')
Week_Graph = nvd['Published'].groupby(nvd.Published.dt.to_period("W")).agg('count')
Day_Graph = nvd['Published'].groupby(nvd.Published.dt.to_period("D")).agg('count')
cg = Month_Graph.plot.area(colormap='cividis', figsize=(16, 8), title='Number of CVEs Published Per Month')
plt.grid(True, linestyle='--', linewidth=0.5)
cg.set_ylabel("CVEs")
cg.set_xlabel("Month")
# Add watermark
plt.text(0.01, 0.01, 'cve.icu', transform=cg.transAxes, fontsize=12, color='gray', alpha=0.5)
# Adjust layout and save the chart as a PNG file one directory up
plt.tight_layout()
plt.show()
cg = Week_Graph.plot.area(colormap='cividis', figsize=(16, 8), title='Number of CVEs Published Per Week')
plt.grid(True, linestyle='--', linewidth=0.5)
cg.set_ylabel("CVEs")
cg.set_xlabel("Week")
# Add watermark
plt.text(0.01, 0.01, 'cve.icu', transform=cg.transAxes, fontsize=12, color='gray', alpha=0.5)
# Adjust layout and save the chart as a PNG file one directory up
plt.tight_layout()
plt.show()
cg = Day_Graph.plot.area(colormap='cividis', figsize=(16, 8), title='Number of CVEs Published Per Day')
plt.grid(True, linestyle='--', linewidth=0.5)
cg.set_ylabel("CVEs")
cg.set_xlabel("Day")
# Add watermark
plt.text(0.01, 0.01, 'cve.icu', transform=cg.transAxes, fontsize=12, color='gray', alpha=0.5)
# Adjust layout
plt.tight_layout()
plt.show()
nvd['BaseScore'] = pd.to_numeric(nvd['BaseScore'])
nvd['BaseScore'] = nvd['BaseScore'].replace(0, np.nan)
# Plot the CVSS breakdown with 100 bins
cg = nvd['BaseScore'].plot(kind="hist", bins=100, title='CVSS Breakdown', colormap='cividis', figsize=(16, 8))
plt.grid(True, linestyle='--', linewidth=0.5)
cg.set_ylabel("CVEs")
cg.set_xlabel("CVSS Score")
# Find the most common, least common, and average CVSS scores
most_common_score = nvd['BaseScore'].mode()[0]
least_common_score = nvd['BaseScore'].value_counts().idxmin()
most_common_count = nvd['BaseScore'].value_counts().max()
least_common_count = nvd['BaseScore'].value_counts().min()
average_score = nvd['BaseScore'].mean().round(2)
# Add annotation for the most common, least common, and average CVSS scores
annotation_text = (f'Most Common: {most_common_score} ({most_common_count} CVEs)\n'
f'Least Common: {least_common_score} ({least_common_count} CVEs)\n'
f'Average Score: {average_score}')
plt.text(0.01, 0.98, annotation_text, transform=cg.transAxes, fontsize=10, color='black',
verticalalignment='top', bbox=dict(boxstyle="round,pad=0.3", edgecolor='black', facecolor='white'))
# Adjust layout and save the chart as a PNG file one directory up
plt.tight_layout()
plt.show()
# Replace specific assigner ID with email
nvd['Assigner'].replace('416baaa9-dc9f-4396-8d5f-8c081fb06d67', 'cve@kernel.org', inplace=True)
# Extract domain names and check for uniqueness
nvd['Domain'] = nvd['Assigner'].apply(lambda x: x.split('@')[-1])
domain_counts = nvd['Domain'].value_counts()
# Modify Assigner column based on domain uniqueness
unique_domains = nvd.groupby('Domain')['Assigner'].nunique()
nvd['Assigner'] = nvd.apply(lambda x: x['Domain'] if unique_domains[x['Domain']] == 1 else f"{x['Domain']} ({x['Assigner'].split('@')[0]})", axis=1)
# Calculate frequency of assigners
nvd_frequency = nvd['Assigner'].value_counts().reset_index()
nvd_frequency.columns = ['Assigner', 'counts']
nvd_frequency = nvd_frequency[nvd_frequency.counts > 100].head(50)
# Calculate the number of CVEs published by mitre.org
mitre_cves = nvd_frequency[nvd_frequency['Assigner'].str.contains('mitre.org')]['counts'].sum()
# Remove mitre.org from the frequency list
nvd_frequency_no_mitre = nvd_frequency[~nvd_frequency.Assigner.str.contains('mitre.org')]
nvd_frequency_no_mitre = nvd_frequency_no_mitre[nvd_frequency_no_mitre.counts > 100].head(20)
# Plot the top 20 CNAs
plt.figure(figsize=(16, 8))
plt.barh("Assigner", "counts", data=nvd_frequency_no_mitre, color="#001d82")
plt.xlabel("CVEs")
plt.ylabel("Assigner")
plt.title("Top 20 CNAs")
plt.grid(True, linestyle='--', linewidth=0.5)
# Add a text box indicating mitre.org has been removed and the number of CVEs they published
textstr = f'{mitre_cves:,} CVEs published by MITRE not shown'
plt.text(0.99, 0.98, textstr, transform=plt.gca().transAxes, fontsize=10,
verticalalignment='top', horizontalalignment='right',
bbox=dict(boxstyle="round,pad=0.3", edgecolor='black', facecolor='white'))
# Adjust layout and show the chart
plt.tight_layout()
plt.show()
# Calculate frequency of CWEs
nvd_cwe = nvd['CWE'].value_counts().reset_index()
nvd_cwe.columns = ['CWE', 'counts']
nvd_cwe = nvd_cwe[~nvd_cwe.CWE.str.contains('Missing_')]
nvd_cwe = nvd_cwe[nvd_cwe.counts > 100].head(25)
# Plot the most common CWEs
plt.figure(figsize=(16, 8))
plt.barh("CWE", "counts", data=nvd_cwe, color="#001d82")
plt.xlabel("Count")
plt.ylabel("CWE")
plt.title("Most Common CWE in CVE Records")
plt.grid(True, linestyle='--', linewidth=0.5)
# Adjust layout and show the chart
plt.tight_layout()
plt.show()
nvd_frequency.reset_index(drop=True, inplace=True)
show(nvd_frequency, scrollCollapse=True, paging=True)
Loading ITables v2.4.0 from the internet... (need help?) |
nvd_cwe.reset_index(drop=True, inplace=True)
show(nvd_cwe, scrollCollapse=True, paging=True)
Loading ITables v2.4.0 from the internet... (need help?) |
years = range(1980, 2025)
cve_counts = []
for year in years:
count = len(nvd[nvd['CVE'].str.contains(f'CVE-{year}-')])
cve_counts.append({'Year': year, 'Count': count})
cve_df = pd.DataFrame(cve_counts)
# Filter out rows with a count of 0
cve_df = cve_df[cve_df['Count'] > 0]
# Reset the index
cve_df.reset_index(drop=True, inplace=True)
# Display the DataFrame without the index column
show(cve_df, scrollCollapse=True, paging=False)
Loading ITables v2.4.0 from the internet... (need help?) |
years = range(1980, 2025)
cve_counts = []
for year in years:
count = len(nvd[nvd['CVE'].str.contains(f'CVE-{year}-')])
cve_counts.append({'Year': year, 'Count': count})
cve_df = pd.DataFrame(cve_counts)
# Filter out rows with a count of 0
cve_df = cve_df[cve_df['Count'] != 0]
# Reset the index
cve_df.reset_index(drop=True, inplace=True)
# Display the DataFrame without the index column
cve_df.style.hide(axis='index')
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) File /opt/hostedtoolcache/Python/3.13.3/x64/lib/python3.13/site-packages/IPython/core/formatters.py:406, in BaseFormatter.__call__(self, obj) 404 method = get_real_method(obj, self.print_method) 405 if method is not None: --> 406 return method() 407 return None 408 else: File /opt/hostedtoolcache/Python/3.13.3/x64/lib/python3.13/site-packages/itables/javascript.py:312, in _datatables_repr_(df) 311 def _datatables_repr_(df): --> 312 return to_html_datatable(df, connected=_CONNECTED) File /opt/hostedtoolcache/Python/3.13.3/x64/lib/python3.13/site-packages/itables/javascript.py:339, in to_html_datatable(df, *args, **kwargs) 332 """ 333 Return the HTML representation of the given 334 dataframe as an interactive datatable 335 """ 336 kwargs["table_id"] = table_id = check_table_id( 337 kwargs.pop("table_id", None), kwargs, df=df 338 ) --> 339 dt_args = get_itable_arguments(df, *args, **kwargs) 340 dt_url = dt_args.pop("dt_url") 341 connected = dt_args.pop("connected") File /opt/hostedtoolcache/Python/3.13.3/x64/lib/python3.13/site-packages/itables/javascript.py:461, in get_itable_arguments(***failed resolving arguments***) 459 if pd_style is not None and isinstance(df, pd_style.Styler): 460 if not allow_html: --> 461 raise ValueError( 462 "Pandas Styler objects always use HTML. Please make sure that you trust the " 463 "content of that table. If so, please render it with allow_html=True, cf. " 464 "https://mwouts.github.io/itables/options/allow_html.html." 465 ) 466 if not showIndex: 467 try: ValueError: Pandas Styler objects always use HTML. Please make sure that you trust the content of that table. If so, please render it with allow_html=True, cf. https://mwouts.github.io/itables/options/allow_html.html.
<pandas.io.formats.style.Styler at 0x7f680cc0fb60>
Markdown(f"This report is updated automatically every day, last generated on: **{datetime.datetime.now()}**")
This report is updated automatically every day, last generated on: 2025-06-04 20:08:26.777307