# @markdown Only execute if not already installed and running a cloud runtime !pip install -q timesketch_api_client !pip install -q vt-py nest_asyncio pandas !pip install -q picatrix # @title Import libraries # @markdown This cell will import all the libraries needed for the running of this colab. import re import requests import pandas as pd from timesketch_api_client import config from picatrix import notebook_init import vt import nest_asyncio # https://github.com/VirusTotal/vt-py/issues/21 nest_asyncio.apply() notebook_init.init() # @title VirusTotal Configuration # @markdown In order to be able to lookup domains/IPs/samples using VirtusTotal we need to get an API key. # @markdown # @markdown If you don't have an API key you must sign up to [VirusTotal Community](https://www.virustotal.com/gui/join-us). # @markdown Once you have a valid VirusTotal Community account you will find your personal API key in your personal settings section. VT_API_KEY = '' # @param {type: "string"} # @markdown If you don't have the API key you will not be able to use the Virustotal API # @markdown to lookup information. # @title Declare functions # @markdown This cell will define few functions that we will use throughout # @markdown this colab. This would be better to define outside of the notebook # @markdown in a library that would be imported, but we keep it here for now. def print_dict(my_dict, space_before=0): """Print the content of a dictionary.""" max_len = max([len(x) for x in my_dict.keys()]) spaces = ' '*space_before format_str = f'{spaces}{{key:{max_len}s}} = {{value}}' for key, value in my_dict.items(): if isinstance(value, dict): print(format_str.format(key=key, value='')) print_dict(value, space_before=space_before + 8) elif isinstance(value, list): value_str = ', '.join(value) print(format_str.format(key=key, value=value_str)) else: print(format_str.format(key=key, value=value)) def ip_info(address): """Print out information about an IP address using the VT API.""" url = 'https://www.virustotal.com/vtapi/v2/ip-address/report' params = { 'apikey': VT_API_KEY, 'ip': address} response = requests.get(url, params=params) j_obj = response.json() def _print_stuff(part): print('') header = part.replace('_', ' ').capitalize() print(f'{header}:') for item in j_obj.get(part, []): print_dict(item, 2) _print_stuff('resolutions') _print_stuff('detected_urls') _print_stuff('detected_referrer_samples') _print_stuff('detected_communicating_samples') _print_stuff('detected_downloaded_samples') # @markdown Get a copy of the Timesketch client object. # @markdown Parameters to configure the client: # @markdown + host_uri: https://demo.timesketch.org # @markdown + username: demo # @markdown + auth_mode: timesketch (username/password) # @markdown + password: demo ts_client = config.get_client(confirm_choices=True) for sketch in ts_client.list_sketches(): if not sketch.name.startswith('Szechuan'): continue print('We found the sketch to use') print(f'[{sketch.id}] {sketch.name} - {sketch.description}') break %timesketch_set_active_sketch 6 timesketch_list_saved_searches_func? for status in sketch.get_analyzer_status(): print(f'Analyzer: {status["analyzer"]} - status: {status["status"]}') print(f'Results: {status["results"]}') print('') search_query = timesketch_query_func( 'parser:"winreg/windows_version"', fields='datetime,key_path,data_type,message,timestamp_desc,parser,display_name,product_name,hostname,timestamp_desc' ) cur_df = search_query.table cur_df[['hostname', 'product_name']] cur_df[cur_df.hostname == 'CITADEL-DC01'].product_name.value_counts() cur_df[cur_df.hostname == 'DESKTOP-SDN1RPT'].product_name.value_counts() cur_df = timesketch_query_func( 'HKEY_LOCAL_MACHINE*System*Select AND hostname:"CITADEL-DC01"', fields=( 'datetime,key_path,data_type,message,timestamp_desc,parser,display_name,' 'product_name,hostname,timestamp_desc,values') ).table for key, value in cur_df[['key_path', 'values']].values: print(f'Key: {key}') print(f'Value: {value}') cur_df['current_value'] = cur_df['values'].str.extract(r'Current: \[[A-Z_]+\] (\d) ') cur_df[['key_path', 'current_value']] cur_df = timesketch_query_func( 'TimeZoneInformation AND hostname:"CITADEL-DC01"', fields='datetime,key_path,data_type,message,timestamp_desc,parser,display_name,product_name,hostname,timestamp_desc,configuration' ).table cur_df pd.set_option('max_colwidth', 400) cur_df[cur_df.key_path.str.contains('ControlSet001')][['configuration']] lines = [] for value in cur_df[cur_df.key_path.str.contains('ControlSet001')]['configuration'].values: items = value.split(':') line_dict = {} key = items[0] for item in items[1:-1]: *values, new_key = item.split() line_dict[key] = ' '.join(values) key = new_key line_dict[key] = items[-1] lines.append(line_dict) time_df = pd.DataFrame(lines) time_df %timesketch_available_aggregators params = { 'field': 'Source', 'limit': 10, 'supported_charts': 'hbarchart', 'chart_title': 'Top 10 Source IP', } aggregation = timesketch_run_aggregator_func( 'field_bucket', parameters=params ) aggregation.chart # Remove the commend and run this code if you are running in colab # but have a local Jupyter kernel running: # alt.renderers.enable('colab') # Remove this comment if you are running in Jupyter and the chart is not displayed # alt.renderers.enable('notebook') aggregation.table params = { 'field': 'Destination', 'limit': 10, 'supported_charts': 'hbarchart', 'chart_title': 'Top 10 Source IP', } aggregation = timesketch_run_aggregator_func('field_bucket', parameters=params) aggregation.chart attacker_dst = timesketch_query_func( 'Source:"194.61.24.102" AND data_type:"pcap:wireshark:entry"', fields='datetime,message,timestamp_desc,Destination,DST port,Source,Protocol,src port').table attacker_dst.head(10) search_obj = timesketch_query_func( 'Source:"194.61.24.102" AND data_type:"pcap:wireshark:entry"', fields='datetime,message,timestamp_desc,Destination,DST port,Source,Protocol,src port') search_obj.max_entries = 150000 attacker_dst = search_obj.table attacker_dst.head(10) attacker_dst.shape attacker_group = attacker_dst[['DST port','Destination', 'Protocol']].groupby( ['DST port','Destination'], as_index=False) attacker_dst_mytable = attacker_group.count() attacker_dst_mytable.rename(columns={'Protocol': 'Count'}, inplace=True) attacker_dst_mytable.sort_values(by=['Count'], ascending=False) attacker_dst = timesketch_query_func( '194.61.24.102 AND data_type:"scapy:pcap:entry"', fields='datetime,message,timestamp_desc,ip_flags,ip_dst,ip_src,payload,tcp_flags,tcp_seq,tcp_sport,tcp_dport,tcp_window').table attacker_dst.head(10) params = { 'field': 'ip_src', 'query_string': 'ip_flags:"evil"', 'supported_charts': 'hbarchart', 'chart_title': 'Source IPs with "evil" bit set', } aggregation = timesketch_run_aggregator_func('query_bucket', parameters=params) aggregation.table name = 'Source IPs with "evil" bit set' aggregation.name = name aggregation.title = name aggregation.save() attacker_dst.iloc[0].message attacker_dst.message.str.slice(start=0, stop=30).unique() attacker_packages = attacker_dst.message.str.slice(start=30).str.split('|', expand=True) attacker_packages.head(3) attacker_packages = attacker_packages[[0, 1, 2]] attacker_packages.columns = ['ether', 'ip', 'transport'] attacker_packages.head(3) attacker_packages[['transport']].head(10) def parse_row(row): items = row.split() protocol = items[0][1:] line_dict = { 'protocol': protocol } for item in items[1:]: key, _, value = item.partition('=') if key == 'options': # We don't want options nor anything after that. break line_dict[key] = value return line_dict proto_df = pd.DataFrame(list(attacker_packages['transport'].apply(parse_row).values)) proto_df['datetime'] = attacker_dst['datetime'] proto_df.head(3) proto_df[['datetime', 'protocol', 'type', 'dport']].head(10) attacker_dst = timesketch_query_func( '(194.61.24.102 AND 10.42.85.10) AND data_type:"scapy:pcap:entry"', fields='datetime,message,timestamp_desc,ip_flags,ip_dst,ip_src,payload,tcp_flags,tcp_seq,tcp_sport,tcp_dport,tcp_window', max_entries=500000).table attacker_dst.head(10) attacker_packages = attacker_dst.message.str.slice(start=30).str.split('|', expand=True) attacker_packages = attacker_packages[[0, 1, 2]] attacker_packages.columns = ['ether', 'ip', 'transport'] proto_df = pd.DataFrame(list(attacker_packages['transport'].apply(parse_row).values)) proto_df['datetime'] = attacker_dst['datetime'] proto_df[['datetime', 'protocol', 'type', 'dport']].head(10) evtx_df = timesketch_query_func( '194.61.24.102 AND data_type:"windows:evtx:record"', fields='*').table evtx_df.head(3) evtx_df.username.value_counts() evtx_df.event_identifier.value_counts() evtx_df.source_name.value_counts() evtx_df[evtx_df.username == 'Administrator'][['datetime', 'event_identifier', 'tag', 'logon_type', 'source_address']] timesketch_query_func( 'source_address:"194.61.24.102" AND data_type:"windows:evtx:record"', fields='logon_type,username').table[['logon_type', 'username']].drop_duplicates() timeframe_df = timesketch_query_func( '*', start_date='2020-09-19T01:00:00', end_date='2020-09-19T04:20:00', max_entries=50000 ).table max_entries = 1500000 timeframe_df = timesketch_query_func( '*', start_date='2020-09-19T01:00:00', end_date='2020-09-19T04:20:00', max_entries=max_entries, fields='*' ).table timeframe_df.shape timeframe_df.data_type.value_counts() group = timeframe_df[ timeframe_df.data_type == 'windows:evtx:record'][['event_identifier', 'timestamp', 'source_name']].groupby( by=['event_identifier', 'source_name'], as_index=False ) group.count().rename(columns={'timestamp': 'count'}).sort_values('count', ascending=False) timeframe_evtx = timeframe_df[timeframe_df.data_type == 'windows:evtx:record'].copy() timeframe_evtx['event_identifier'] = timeframe_evtx.event_identifier.fillna(value=0) timeframe_evtx[timeframe_evtx.event_identifier == 36888].strings.str.join('|').unique() timeframe_evtx = timeframe_df[timeframe_df.data_type == 'windows:evtx:record'].copy() timeframe_evtx['event_identifier'] = timeframe_evtx.event_identifier.fillna(value=0) timeframe_evtx[(timeframe_evtx.event_identifier == 131) & (timeframe_evtx.source_name == 'Microsoft-Windows-RemoteDesktopServices-RdpCoreTS')].strings.str.join('|').unique() timeframe_df.data_type.value_counts() timeframe_df[timeframe_df.data_type == 'windows:prefetch:execution'][['datetime', 'executable', 'run_count']] timeframe_df[timeframe_df.data_type == 'windows:prefetch:execution'].executable.value_counts() timeframe_df[(timeframe_df.data_type == 'windows:prefetch:execution') & (~timeframe_df.run_count.isna()) & (timeframe_df.run_count < 2)][['executable', 'run_count']].drop_duplicates() timeframe_evtx[(timeframe_evtx.event_identifier == 7045) & (timeframe_evtx.source_name == 'Service Control Manager')]['strings'] attacker_dst_http = timesketch_query_func( '(194.61.24.102 AND 10.42.85.10) AND data_type:"scapy:pcap:entry" AND *http* AND *GET*', fields='datetime,message,timestamp_desc,ip_flags,ip_dst,ip_src,payload,tcp_flags,tcp_seq,tcp_sport,tcp_dport,tcp_window').table attacker_dst_http.head(4) attacker_dst.shape attacker_dst_http[attacker_dst_http.message.str.contains(r'GET|POST')].message.str.extract(r'