- Gathering System Data with multiplatform and platform-dependent tools
- Get infos from files, /proc, /sys
- Capture command output
- Use psutil to get IO, CPU and memory data
- Parse files with a strategy
- use with, yield or pipes
import psutil
import glob
import sys
import subprocess
#
# Our code is p3-ready
#
from __future__ import print_function, unicode_literals
def grep(needle, fpath):
"""A simple grep implementation
goal: open() is iterable and doesn't
need splitlines()
goal: comprehension can filter lists
"""
return [x for x in open(fpath) if needle in x]
# Do we have localhost?
grep("localhost", "/etc/hosts")
#The psutil module is very nice
import psutil
#Works on Windows, Linux and MacOS
psutil.cpu_percent()
#And its output is very easy to manage
ret = psutil.disk_io_counters()
print(ret)
# Exercise: Which other informations
# does psutil provide?
# Use this cell and the tab-completion jupyter functionalities.
# Exercise
def multiplatform_vmstat(count):
# Write a vmstat-like function printing every second:
# - cpu usage%
# - bytes read and written in the given interval
# Hint: use psutil and time.sleep(1)
# Hint: use this cell or try on ipython and *then* write the function
# using %edit vmstat.py
for i in range(count):
raise NotImplementedError
print(cpu_usage, bytes_rw)
multiplatform_vmstat(5)
%load course/multiplatform_vmstat.py
# Run your vmstat implementation.
multiplatform_vmstat(5)
#
# subprocess
#
# The check_output function returns the command stdout
from subprocess import check_output
# It takes a *list* as an argument!
out = check_output("ping -w1 -c1 www.google.com".split())
# and returns a string
print(out)
# If you want to stream command output, use subprocess.Popen
# and check carefully subprocess documentation!
def sh(cmd, shell=False, timeout=0):
""""Returns an iterable output of a command string
checking...
"""
from sys import version_info as python_version
if python_version < (3, 3): # ..before using..
if timeout:
raise ValueError("Timeout not supported until Python 3.3")
output = check_output(cmd.split(), shell=shell)
else:
output = check_output(cmd.split(), shell=shell, timeout=timeout)
return output.splitlines()
# Exercise:
# implement a multiplatform pgrep-like function.
def ppgrep(program):
"""
A multiplatform pgrep-like function.
Prints a list of processes executing 'program'
@param program - eg firefox, explorer.exe
Hint: use subprocess, os and list-comprehension
eg. items = [x for x in a_list if 'firefox' in x]
"""
raise NotImplementedError
%load course/pgrep.py
Linux /proc filesystem is a cool place to get data
In the next example we'll see how to get:
# Parsing /proc - 1
def linux_threads(pid):
"""Retrieving data from /proc
"""
from glob import glob
# glob emulates shell expansion of * and ?
path = "/proc/{}/task/*/status".format(pid)
# pick a set of fields to gather
t_info = ('Pid', 'Tgid', 'voluntary') # this is a tuple!
for t in glob(path):
# ... and use comprehension to get
# intersting data.
t_info = [x
for x in open(t)
if x.startswith(t_info)] # startswith accepts tuples!
print(t_info)
# If you're on linux try linux_threads
pid_of_init = 1 # or systemd ?
linux_threads(pid_of_init)
# On linux /proc/diskstats is the source of I/O infos
disk_l = grep("sda", "/proc/diskstats")
print(''.join(disk_l))
# To gather that data we put the header in a multiline string
from course import diskstats_headers as headers
print(*headers, sep='\n')
#Take the 1st entry (sda), split the data...
disk_info = disk_l[0].split()
# ... and tie them with the header
ret = zip(headers, disk_info)
# On py3 we need to iterate over the generators
print(list(ret))
# Try to mangle ret
print('\n'.join(str(x) for x in ret))
# Exercise: trasform ret in a dict.
# We can create a reusable commodity class with
from collections import namedtuple
# using the imported `headers` as attributes
# like the one provided by psutil
DiskStats = namedtuple('DiskStat', headers)
# ... and disk_info as values
dstat = DiskStats(*disk_info)
print(dstat.device, dstat.writes_ms)
# Homework: check further features with
# help(collections)
# Exercise
# Write the following function
def linux_diskstats(partition):
"""Print every second I/O information from /proc/diskstats
@param: partition - eg sda1 or vdx1
Hint: use the above `grep` function
Hint: use zip, time.sleep, print() and *magic
"""
diskstats_headers = ('reads reads_merged reads_sectors reads_ms'
' writes writes_merged writes_sectors writes_ms'
' io_in_progress io_ms_weight').split()
while True:
raise NotImplementedError
print(values, sep="\t")
%load course/linux_diskstats.py
# Using check_output with split() doesn't always work
from os import makedirs
makedirs('/tmp/course/b l a n k s') # , exist_ok=True) this on py3
check_output('ls "/tmp/course/b l a n k s"'.split())
# You can use
from shlex import split
# and
cmd = split('dir -a "/tmp/course/b l a n k s"')
check_output(cmd)
# zip_iterables():
"""The zip method joins list elements pairwise
like a zip fastener
"""
from sys import version_info as python_version
a_list = [0, 1, 2, 3]
b_list = ["a", "b", "c", "d"]
zipper = zip(a_list, b_list)
print(zipper)
if python_version >= (3,):
zipper = list(zipper)
assert zipper == [(0, "a"), (1, "b"), (2, "c"), (3, "d")]