from IPython.display import HTML
HTML('')
cd pcap/
%pylab inline
import pandas as pd
!tshark -n -r nitroba.pcap -T fields -Eheader=y -e frame.number -e frame.len > frame.len
df=pd.read_table("frame.len")
df
df["frame.len"].describe()
df["frame.len"].plot(style=".", alpha=0.2)
title("Frame length")
ylabel("bytes")
xlabel("frame number")
def shark(pcap_file, fields=[], readfilter="", notnull=True):
fields = ["frame.time_epoch"] + fields
fieldspec = " ".join("-e %s" % f for f in fields)
readfilters = fields if notnull else []
if readfilter:
readfilters.append(readfilter)
readspec = "-R '%s'" % " and ".join(f for f in readfilters)
!tshark -r $pcap_file -n -T fields -Eheader=y $readspec $fieldspec > tmp.txt
df = pd.read_table("tmp.txt", index_col = "frame.time_epoch", parse_dates=True, date_parser=datetime.datetime.fromtimestamp)
return df
surflen=shark("nitroba.pcap", ["frame.len"])
surflen
surflen.plot()
bytes_per_second=surflen.resample("S", how="sum")
bytes_per_second.head()
bytes_per_second.plot(title="bytes/s")
tf=shark("nitroba.pcap", ["tcp.ack"])
tf
!head tmp.txt
tf.plot()
from pandas.tools.plotting import lag_plot
lag_plot(tf["tcp.ack"])
tf["tcp.ack"].plot()
ss=shark("nitroba.pcap", ["ssl.handshake.length"])
ss
hlen=ss["ssl.handshake.length"].map(lambda x: int(x.split(",")[0]))
hlen.plot()
hlen.head()
hlen.describe()
#!tshark -n -r nitroba.pcap -R "eth.trailer" -T fields -Eheader=y -e frame.number -e frame.time_epoch -e eth.src -e eth.trailer > eth.trailer
trailer_df = shark("nitroba.pcap", ["eth.src", "eth.trailer"])
trailer_df
trailer=trailer_df["eth.trailer"]
trailer
trailer.value_counts()
import binascii
def unhex(s, sep=":"):
return binascii.unhexlify("".join(s.split(sep)))
s=unhex("3b:02:a7:19:aa:aa:03:00:80:c2:00:07:00:00:00:02:3b:02")
s
trailer_df["unhex"]=trailer_df["eth.trailer"].map(unhex)
trailer_df
def printable(s):
chars = []
for c in s:
if c.isalnum():
chars.append(c)
else:
chars.append(".")
return "".join(chars)
printable("\x95asd\x33")
trailer_df["printable"]=trailer_df["unhex"].map(printable)
trailer_df["printable"].value_counts()
trailer_df["printable"].to_csv("printable.csv", index=False)
def ratio_printable(s):
printable = sum(1.0 for c in s if c.isalnum())
return printable / len(s)
ratio_printable("a\x93sdfs")
trailer_df["ratio_printable"] = trailer_df["unhex"].map(ratio_printable)
trailer_df[trailer_df["ratio_printable"] > 0.5]
_.printable.value_counts()
trailer_df[trailer_df["ratio_printable"] > 0.5].head(100)