from IPython.display import HTML HTML('') cd pcap/ %pylab inline import pandas as pd !tshark -n -r nitroba.pcap -T fields -Eheader=y -e frame.number -e frame.len > frame.len df=pd.read_table("frame.len") df df["frame.len"].describe() df["frame.len"].plot(style=".", alpha=0.2) title("Frame length") ylabel("bytes") xlabel("frame number") def shark(pcap_file, fields=[], readfilter="", notnull=True): fields = ["frame.time_epoch"] + fields fieldspec = " ".join("-e %s" % f for f in fields) readfilters = fields if notnull else [] if readfilter: readfilters.append(readfilter) readspec = "-R '%s'" % " and ".join(f for f in readfilters) !tshark -r $pcap_file -n -T fields -Eheader=y $readspec $fieldspec > tmp.txt df = pd.read_table("tmp.txt", index_col = "frame.time_epoch", parse_dates=True, date_parser=datetime.datetime.fromtimestamp) return df surflen=shark("nitroba.pcap", ["frame.len"]) surflen surflen.plot() bytes_per_second=surflen.resample("S", how="sum") bytes_per_second.head() bytes_per_second.plot(title="bytes/s") tf=shark("nitroba.pcap", ["tcp.ack"]) tf !head tmp.txt tf.plot() from pandas.tools.plotting import lag_plot lag_plot(tf["tcp.ack"]) tf["tcp.ack"].plot() ss=shark("nitroba.pcap", ["ssl.handshake.length"]) ss hlen=ss["ssl.handshake.length"].map(lambda x: int(x.split(",")[0])) hlen.plot() hlen.head() hlen.describe() #!tshark -n -r nitroba.pcap -R "eth.trailer" -T fields -Eheader=y -e frame.number -e frame.time_epoch -e eth.src -e eth.trailer > eth.trailer trailer_df = shark("nitroba.pcap", ["eth.src", "eth.trailer"]) trailer_df trailer=trailer_df["eth.trailer"] trailer trailer.value_counts() import binascii def unhex(s, sep=":"): return binascii.unhexlify("".join(s.split(sep))) s=unhex("3b:02:a7:19:aa:aa:03:00:80:c2:00:07:00:00:00:02:3b:02") s trailer_df["unhex"]=trailer_df["eth.trailer"].map(unhex) trailer_df def printable(s): chars = [] for c in s: if c.isalnum(): chars.append(c) else: chars.append(".") return "".join(chars) printable("\x95asd\x33") trailer_df["printable"]=trailer_df["unhex"].map(printable) trailer_df["printable"].value_counts() trailer_df["printable"].to_csv("printable.csv", index=False) def ratio_printable(s): printable = sum(1.0 for c in s if c.isalnum()) return printable / len(s) ratio_printable("a\x93sdfs") trailer_df["ratio_printable"] = trailer_df["unhex"].map(ratio_printable) trailer_df[trailer_df["ratio_printable"] > 0.5] _.printable.value_counts() trailer_df[trailer_df["ratio_printable"] > 0.5].head(100)