Download this IPython notebook at Github:
https://github.com/DataScienceAtScale/python-at-osu-oct-10 then click on the link in the README
or get it directly via
Use nbviewer to see the static version of the notebook in the web:
You can stop here if you are viewing the static web version.
Download and install Anaconda Python from Continuum Analytics. Use Python 3.4, as there isn't a good reason to use 2.7 as a beginner:
http://continuum.io/downloads#py34
There are other Python options as well, such as Python(X,Y), WinPython, MiniConda version of Anaconda, or using pip to install packages with an existing installed version of Python.
If you are using any of these versions, also make sure that you install:
Launch IPython Notebook in the directory where Python_at_OSU_Oct_10.ipynb is located.
This will open a webpage for the IPython Notebook interface. From the terminal (for example if you downloaded the notebook in Downloads):
$ cd ~/Downloads
$ ls Python_at_OSU_Oct_10.ipynb
Python_at_OSU_Oct_10.ipynb
$ ipython notebook
Alternatively, launch IPython Notebook and copy the notebook file into the directory where ipython was launched:
$ cp ~/Downloads/Python_at_OSU_Oct_10.ipynb .
$ ipython notebook
Open the notebook (Python_at_OSU_Oct_10.ipynb) from the web interface that was opened in your browser:
# So let's get into it
print('Hello world!')
Hello world!
# assignment & print function
x = 0 # you don't have to declare x first
print(x) # calling built-in function
y = 1.0 # just assign a literal to a name
print(y) # calling looks just like C and other Algol-languages
z = 'foo' # a string with quotes
print(z) # no semi-colons or other terminators
w = "bar" # another string with double quotes (either works)
print(w)
0 1.0 foo bar
# more assignment & expressions
x = 1
y = 2
z = 3
r = 'one'
s = 'two'
t = 'three'
f = 1.0
a = x * y + z # expressions look like most other infix notation
print(a)
print(r + s + t) # concatenating strings & expression
# in a function argument
b, c, d = x + f, 4 * f, y ** z # multiple assigments on one line
print(b, c, d) # adding numeric types casts integer to float
print(x + r) # but this doesn't work
# (won't cast a numeric to a string, unlike Javascript)
5 onetwothree 2.0 4.0 8
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-3-1adb0bd1160d> in <module>() 18 print(b, c, d) # adding numeric types casts integer to float 19 ---> 20 print(x + r) # but this doesn't work 21 # (won't cast a numeric to a string, unlike Javascript) TypeError: unsupported operand type(s) for +: 'int' and 'str'
# conversions and types
x = '1.01'
print(x, type(x)) # type is another built-in function
x = float(x) # x is now a float
print(x, type(x)) # print can take multiple arguments
x = int(x)
print(x, type(x)) # now it's an integer
x = str(x)
print(x, type(x))
1.01 <class 'str'> 1.01 <class 'float'> 1 <class 'int'> 1 <class 'str'>
# lists (vectors, really)
x = [1, 2, 3, 4, 5] # a list of integers
print(x)
y = [1.0, 2.0, 3.0, 4.0, 5.0] # a list of floating point
print(y)
z = ['a', 'b', 'c', 'd', 'e'] # a list of strings
print(z)
w = [1, 'two', 3.0, "four", print, 'last'] # can we mix them?
print(w) # yes, we can
[1, 2, 3, 4, 5] [1.0, 2.0, 3.0, 4.0, 5.0] ['a', 'b', 'c', 'd', 'e'] [1, 'two', 3.0, 'four', <built-in function print>, 'last']
# accessing lists
w = [1, 'two', 3.0, "four", print, 'last']
print(w)
# accessing is array notation
# a Python list is like a "vector" in C++
# random access, reverse is constant time, etc.
# i.e., deleting from the front is linear time
print(w[0]) # first item
print(w[1]) # second item
print(w[len(w)-1]) # last item, len is a built-in function
print(w[-1]) # this is the last item, too
print(w[-2]) # second to last
w[0] = w[-1] # let's copy the last item to the first
print(w[0])
[1, 'two', 3.0, 'four', <built-in function print>, 'last'] 1 two last last <built-in function print> last
# list slices
w = [1, 2, 3, 4, 5]
# slices - similar to Matlab and Fortran
print(w[2:]) # everything from 2 onwards
print(w[:2]) # right hand index is exclusive (Python uses 0-indexing)
print(w[:2] + w[2:]) # list concatenation
# you can do subranges
print(w[1:3])
# you can do skips
print(w[::2])
# even in reverse
print(w[::-1])
# you can combine them all together
print(w[3:0:-2]) # notice I had to do 3 to 0 by -2 to go in reverse
[3, 4, 5] [1, 2] [1, 2, 3, 4, 5] [2, 3] [1, 3, 5] [5, 4, 3, 2, 1] [4, 2]
w = [1, 2, 3, 4, 5]
# a slice is a copy
v = w[::-1]
v[0] = 'first'
print(v, w)
# such that : means copy
u = v[:]
u[-1] = 'last'
print(u, v)
['first', 4, 3, 2, 1] [1, 2, 3, 4, 5] ['first', 4, 3, 2, 'last'] ['first', 4, 3, 2, 1]
# lists of lists
x = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] # what about lists of lists?
print(x)
print(x[0][0]) # double accessors to get the inner list items
print(x[-1][-1])
print(x[1:][1:]) # except this is probably not what you are expecting
y = [['a', 1, 2], [3.0], [4 + 3, ['seven', ['eight']], 'nine']]
print(y) # nothing stopping arbitrary list nesting
print(y[2][1][1][0]) # anything can go in a list
[[1, 2, 3], [4, 5, 6], [7, 8, 9]] 1 9 [[7, 8, 9]] [['a', 1, 2], [3.0], [7, ['seven', ['eight']], 'nine']] eight
# list iteration
z = [1, 2, 3]
n = []
for i in z: # iterating over a list
n.append(i + 1) # append is a method on a list
# that modifies it in place (it returns None)
print(n, z)
w = [[1, 2], [3, 4], [5, 6]]
s = ''
for i in w: # iterate over a list
for j in i: # iterate over another list
s = s + str(j)
print(s)
q = []
# iterate over two lists in tandem with zip
for i, j in zip(z, n):
print('i:', i, 'j:', j, 'i+j:', i + j)
q.append(i + j)
print(q)
[2, 3, 4] [1, 2, 3] 123456 i: 1 j: 2 i+j: 3 i: 2 j: 3 i+j: 5 i: 3 j: 4 i+j: 7 [3, 5, 7]
# if you want to modify a list in place, use the accessors
# i.e., like how you would in C
z = [1, 2, 3, 4, 5]
for i in range(0, len(z)): # range is a special "list"
# (iterator, actually)
# that gives you integers
z[i] = z[i] + 1
print(z) # basically, that was a for loop from 0 to 5 (exclusive)
z = [1, 2, 3, 4, 5]
for i in z: # this does nothing because i is a copy of the item in z
i = i + 1
print(z)
# DON'T DO THIS: IT WILL NEVER RETURN
# for i in z: # iterating over a list
# z.append(i + 1) # but you just added to the end of z,
# # so, z keeps growing, such that
# # you will never hit the end of z
# basically, don't try to modify the list while iterating over it
# lots of "bad" things can happen
[2, 3, 4, 5, 6] [1, 2, 3, 4, 5]
# tuples - basically, immutable lists
# there are also sets and frozensets (mutable and immutable sets)
empty = ()
print(empty)
print(len(empty))
a = (1, 2, 3)
print(a)
print(a[0], a[-1], a[1:])
for i in a:
print(i + 1)
a[0] = 'a' # this is going to fail, because tuples are immutable
# strings are immutable, too, as with all other basic data types,
# while containers are mutable (except tuples and frozensets)
s = 'a string'
s[0] = 'a' # this will fail too
() 0 (1, 2, 3) 1 3 (2, 3) 2 3 4
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-13-3f201cd96ca7> in <module>() 13 print(i + 1) 14 ---> 15 a[0] = 'a' # this is going to fail, because tuples are immutable 16 17 # strings are immutable, too, as with all other basic data types, TypeError: 'tuple' object does not support item assignment
# dicts : maps, hashes, associative arrays
# lists, dicts, and tuples are all accessed in
# similar ways
empty = {}
print(empty)
print(empty.keys()) # dicts have keys
print(empty.values()) # and values (it's a map)
a = {'one': 1, 2: 'two', 'print': print} # we can store all
# sorts of things in a dict,
# just like a list and tuple
print(a)
print(a['one']) # and we can use all sorts of keys
print(a[2])
print(a['print']) # even functions can be fetched
a['print']("hi there, I'm a function in a dict!") # call it
for k in a:
print('key:', k, 'value:', a[k])
print('a key' in a) # boolean is a built-in type: True or False
a['a key'] # this is going to fail
{} dict_keys([]) dict_values([]) {'print': <built-in function print>, 2: 'two', 'one': 1} 1 two <built-in function print> hi there, I'm a function in a dict! key: print value: <built-in function print> key: 2 value: two key: one value: 1 False
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-14-2361f8fa98e9> in <module>() 19 20 print('a key' in a) # boolean is a built-in type: True or False ---> 21 a['a key'] # this is going to fail KeyError: 'a key'
# simple boolean expressions
s_one = '1'
i_one = 1
f_one = 1.0
i_two = 2
i_other_one = 1
print('"1" = 1?', s_one == i_one) # strings can't equal numerics
print('1 < 2?', i_one < i_two)
print('1.0 > 2?', f_one > i_two)
print('1 = 1.0?', i_one == f_one)
print('"1" = str(1)?', s_one == str(i_one))
print('1 =/= 1?', i_one != i_other_one)
"1" = 1? False 1 < 2? True 1.0 > 2? False 1 = 1.0? True "1" = str(1)? True 1 =/= 1? False
# is vs. equality
a = [1, 2, 3, 4, 5]
b = [1, 2, 3, 4, 5]
c = [1, 2, 3, 4]
d = [2, 2, 3, 4, 5]
print(a == a)
print(a == b) # equality works for lists and containers
print(a == c)
print(a == d)
# but what if you want to know about references?
print(a is a)
print(a is b) # False, they are not the same reference
# x is y -> id(x) == id(y)
print(id(a) == id(a))
print(id(a) == id(b)) # this is equivalent to 'a is b'
True True False False True False True False
# copies vs. references of lists
a = ['A']
b = ['B']
c = ['C']
d = [a, b, c] # list of lists
print('d:', d)
print('a:', a, 'b:', b, 'c:', c)
d[0] = d[-1] # list c isn't copied
print('d:', d) # list c is in both places
print('a:', a, 'b:', b, 'c:', c) # a is still the same,
# and d[0] and d[-1] reference c
temp = a[0]
a[0] = b[0] # this string will be copied
b[0] = temp
print('d:', d) # all the built-in types are copied
# (i.e., int, float, string, etc.)
print('a:', a, 'b:', b, 'c:', c) # but built-in data structures
# (i.e., lists, classes, maps, etc.)
# are referenced
d: [['A'], ['B'], ['C']] a: ['A'] b: ['B'] c: ['C'] d: [['C'], ['B'], ['C']] a: ['A'] b: ['B'] c: ['C'] d: [['C'], ['A'], ['C']] a: ['B'] b: ['A'] c: ['C']
# immutability and is
# what if you try 'is' on two numbers?
a = 1
b = 1
print(a is b) # True? what's going on here?
print(id(a))
print(id(b)) # they point to the same thing
# basic types, such as string, integers, floats, get reused "interned"
# and are actually immutable - the other types (lists, classes, maps) are mutable
# so, when you call a function, you are always passing by reference
def is_it(x, y):
z = a # get a new reference to x
print(id(x), id(y), id(z))
is_it(a, b)
is_it([1, 2, 3], [1, 2, 3]) # difference references
# but only to a certain point
a = 100000000 # they will have different ids
b = 100000000
print(a is b)
print(a == b) # all of this is done because Python uses a lot of hashing
# for optimization - because you can hash immutable data
# for example, you can only use immutable data for keys in dicts
a = {}
a[[1, 2, 3]] = b # a list is unhashable
True 140368470091648 140368470091648 140368470091648 140368470091648 140368470091648 140368256473544 140368256473800 140368470091648 False True
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-18-c7ade888dec8> in <module>() 28 # for example, you can only use immutable data for keys in dicts 29 a = {} ---> 30 a[[1, 2, 3]] = b # a list is unhashable TypeError: unhashable type: 'list'
# compound boolean expressions
print(not False)
print(False or True)
print(True and False)
print((False and True) or not True)
True True False False
# if-then-else & block indentation
x = 1
y = 2
z = 3
# see how blocks line up due to spacing?
# PEP 8 says the preferred tab stop is 4 spaces (don't use tabs)
# I prefer 2, personally
if x < 1 or False:
print('not here')
elif y < 2 and True:
print('not here either')
elif z > 0:
print('we got here')
if not x != y:
print('nope')
elif z == 3:
print('here too')
if z < y or y < x:
print('not here either')
else:
print('we got all the way here')
while z > x:
z = z - 1
if y > x:
y = y - x
else:
y = y - 2
while z >= y:
z = z - 1
if x > 0:
x = x + 1
else:
print('nada')
else:
print('not gonna get here')
print(x, y, z)
we got here here too we got all the way here 4 -1 -2
# 0-argument functions and returning
def nop(): # no argument list
pass # do nothing
print(nop) # nop is a function
print(nop()) # call it, functions return None
# if there isn't an explicit return
def one(): # no argument list
return 1
print(one())
# functions are first-class
temp = nop
nop = one
one = temp
print(nop(), one())
<function nop at 0x7faa0815cbf8> None 1 1 None
# arguments to functions and returning values
def xyz(x): # one argument, no types needed
return x, x # this means it is returning a tuple
print(xyz(1))
def uvw(u, v): # two arguments
return (u, v) # we can do it explicitly, too
print(uvw(1, 2))
def abc(a, b, c):
return [a, b, c] # we can return lists
print(abc(1, 2, 3))
# btw, these two are equivalent
i, j = (3, 4)
print(i, j)
(i, j) = 3, 4
print(i, j)
(1, 1) (1, 2) [1, 2, 3] 3 4 3 4
# recursion works just fine
def ye_old_fib(n):
if n < 2:
return 1
else:
return ye_old_fib(n - 1) + ye_old_fib(n - 2)
fibs = []
for i in range(0, 10):
fibs = fibs + [ye_old_fib(i)] # append done another way
print(fibs)
[1, 1, 2, 3, 5, 8, 13, 21, 34, 55]
# named arguments and default values
def plus_one(x):
return x + 1
def plus_two(x):
return x + 2
# argument v and f have default values
def func_caller(v = 1, f = plus_one):
return f(v) # we can call arguments that are functions
print(func_caller()) # we don't have to pass an argument for v and f
print(func_caller(2)) # they are applied left to right
print(func_caller(2, plus_two))
print(func_caller(f=plus_two)) # we can bypass the order by naming them
2 3 4 3
# scope is function level NOT block level
# Local Function -> Outer Function -> Global -> Python Defined
# assignment determines scope
# you can think of '=' as declaration in Python
value = 1 # global
def modify_value():
value = 2 # local scope for value
def modify_modify_value():
value = 3 # inner scope
if True:
value = 4 # we look at the function scope
print(value) # this will be 4, not 3
modify_modify_value()
print(value) # 2 not 3 or 4
print(value)
modify_value()
print(value)
del value # remove a name from scope and reclaim memory
print(value) # this is going to be undefined
1 4 2 1
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-26-88dc96fa7cc1> in <module>() 24 print(value) 25 del value # remove a name from scope and reclaim memory ---> 26 print(value) # this is going to be undefined NameError: name 'value' is not defined
# assignment establishes scope
if True:
some_thing = 1 # it gets "declared" here, but not "lifted"
print(some_thing)
print(not_lifted + 1) # not_lifted is not in scope yet
if True:
not_lifted = 1
1
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-27-2c757dd657b1> in <module>() 5 print(some_thing) 6 ----> 7 print(not_lifted + 1) # not_lifted is not in scope yet 8 if True: 9 not_lifted = 1 NameError: name 'not_lifted' is not defined
# more scope examples
foo = ['bar'] # global scope
def pretend_modify():
foo = ['nope'] # new scope established
def actually_modify():
foo.append('yep') # we use outer scope
# because it isn't assignment
def gonna_fail():
foo.append('whoops') # this will break because foo isn't in scope
foo = ['broked'] # because this assignment created a new scope
print(foo)
pretend_modify()
print(foo)
actually_modify()
print(foo)
gonna_fail()
['bar'] ['bar'] ['bar', 'yep']
--------------------------------------------------------------------------- UnboundLocalError Traceback (most recent call last) <ipython-input-28-4c3c00ae6438> in <module>() 19 actually_modify() 20 print(foo) ---> 21 gonna_fail() <ipython-input-28-4c3c00ae6438> in gonna_fail() 11 12 def gonna_fail(): ---> 13 foo.append('whoops') # this will break because foo isn't in scope 14 foo = ['broked'] # because this assignment created a new scope 15 UnboundLocalError: local variable 'foo' referenced before assignment
# files
f = open('foo.txt', 'w')
f.write('hi there!\n')
f.close()
g = open('foo.txt', 'r')
s = g.read(10)
g.close()
print(s)
# the struct module and ctypes are useful for mass binary
# conversion of data, as well as the numpy from_file/to_file
# operations
f = open('bar.bin', 'wb')
f.write((10).to_bytes(1, 'little')) # I'd normally use struct
f.close()
g = open('bar.bin', 'rb')
i = int.from_bytes(g.read(1), 'little') # Or numpy.fromfile
g.close()
print(i)
hi there! 10
# modules, help, and dir
l = [1, 2, 3, 4, 5]
print(dir(l)) # what's in l?
import sys # sys module (a library, basically)
print(dir(sys)) # what's in the sys module?
from os import * # import everything in os into this namespace
print(dir()) # what's in the global namespace, now?
print(help(sys)) # get module help
print(help(sys.exit)) # get help on sys.exit
['__add__', '__class__', '__contains__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__imul__', '__init__', '__iter__', '__le__', '__len__', '__lt__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__rmul__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'append', 'clear', 'copy', 'count', 'extend', 'index', 'insert', 'pop', 'remove', 'reverse', 'sort'] ['__displayhook__', '__doc__', '__excepthook__', '__interactivehook__', '__loader__', '__name__', '__package__', '__spec__', '__stderr__', '__stdin__', '__stdout__', '_clear_type_cache', '_current_frames', '_debugmallocstats', '_getframe', '_home', '_mercurial', '_xoptions', 'abiflags', 'api_version', 'argv', 'base_exec_prefix', 'base_prefix', 'builtin_module_names', 'byteorder', 'call_tracing', 'callstats', 'copyright', 'displayhook', 'dont_write_bytecode', 'exc_info', 'excepthook', 'exec_prefix', 'executable', 'exit', 'flags', 'float_info', 'float_repr_style', 'getallocatedblocks', 'getcheckinterval', 'getdefaultencoding', 'getdlopenflags', 'getfilesystemencoding', 'getprofile', 'getrecursionlimit', 'getrefcount', 'getsizeof', 'getswitchinterval', 'gettrace', 'hash_info', 'hexversion', 'implementation', 'int_info', 'intern', 'last_traceback', 'last_type', 'last_value', 'maxsize', 'maxunicode', 'meta_path', 'modules', 'path', 'path_hooks', 'path_importer_cache', 'platform', 'prefix', 'ps1', 'ps2', 'ps3', 'setcheckinterval', 'setdlopenflags', 'setprofile', 'setrecursionlimit', 'setswitchinterval', 'settrace', 'stderr', 'stdin', 'stdout', 'thread_info', 'version', 'version_info', 'warnoptions'] ['In', 'Out', 'P_NOWAIT', 'P_NOWAITO', 'P_WAIT', 'SEEK_CUR', 'SEEK_END', 'SEEK_SET', '_', '__', '___', '__builtin__', '__builtins__', '__doc__', '__loader__', '__name__', '__package__', '__spec__', '_dh', '_exit', '_i', '_i1', '_i10', '_i11', '_i12', '_i13', '_i14', '_i15', '_i16', '_i17', '_i18', '_i19', '_i2', '_i20', '_i21', '_i22', '_i23', '_i24', '_i25', '_i26', '_i27', '_i28', '_i29', '_i3', '_i30', '_i4', '_i5', '_i6', '_i7', '_i8', '_i9', '_ih', '_ii', '_iii', '_oh', '_sh', 'a', 'abc', 'actually_modify', 'altsep', 'b', 'c', 'curdir', 'd', 'defpath', 'devnull', 'empty', 'environb', 'execl', 'execle', 'execlp', 'execlpe', 'execvp', 'execvpe', 'exit', 'extsep', 'f', 'f_one', 'fdopen', 'fibs', 'foo', 'fsdecode', 'fsencode', 'func_caller', 'fwalk', 'g', 'get_exec_path', 'get_ipython', 'getenv', 'getenvb', 'gonna_fail', 'i', 'i_one', 'i_other_one', 'i_two', 'is_it', 'j', 'k', 'l', 'linesep', 'makedirs', 'modify_value', 'n', 'name', 'nop', 'one', 'pardir', 'path', 'pathsep', 'plus_one', 'plus_two', 'popen', 'pretend_modify', 'putenv', 'q', 'quit', 'r', 'removedirs', 'renames', 's', 's_one', 'sep', 'some_thing', 'spawnl', 'spawnle', 'spawnlp', 'spawnlpe', 'spawnv', 'spawnve', 'spawnvp', 'spawnvpe', 'supports_bytes_environ', 'sys', 't', 'temp', 'u', 'unsetenv', 'uvw', 'v', 'w', 'walk', 'x', 'xyz', 'y', 'ye_old_fib', 'z'] Help on built-in module sys: NAME sys MODULE REFERENCE http://docs.python.org/3.4/library/sys The following documentation is automatically generated from the Python source files. It may be incomplete, incorrect or include features that are considered implementation detail and may vary between Python implementations. When in doubt, consult the module reference at the location listed above. DESCRIPTION This module provides access to some objects used or maintained by the interpreter and to functions that interact strongly with the interpreter. Dynamic objects: argv -- command line arguments; argv[0] is the script pathname if known path -- module search path; path[0] is the script directory, else '' modules -- dictionary of loaded modules displayhook -- called to show results in an interactive session excepthook -- called to handle any uncaught exception other than SystemExit To customize printing in an interactive session or to install a custom top-level exception handler, assign other functions to replace these. stdin -- standard input file object; used by input() stdout -- standard output file object; used by print() stderr -- standard error object; used for error messages By assigning other file objects (or objects that behave like files) to these, it is possible to redirect all of the interpreter's I/O. last_type -- type of last uncaught exception last_value -- value of last uncaught exception last_traceback -- traceback of last uncaught exception These three are only available in an interactive session after a traceback has been printed. Static objects: builtin_module_names -- tuple of module names built into this interpreter copyright -- copyright notice pertaining to this interpreter exec_prefix -- prefix used to find the machine-specific Python library executable -- absolute path of the executable binary of the Python interpreter float_info -- a struct sequence with information about the float implementation. float_repr_style -- string indicating the style of repr() output for floats hash_info -- a struct sequence with information about the hash algorithm. hexversion -- version information encoded as a single integer implementation -- Python implementation information. int_info -- a struct sequence with information about the int implementation. maxsize -- the largest supported length of containers. maxunicode -- the value of the largest Unicode codepoint platform -- platform identifier prefix -- prefix used to find the Python library thread_info -- a struct sequence with information about the thread implementation. version -- the version of this interpreter as a string version_info -- version information as a named tuple __stdin__ -- the original stdin; don't touch! __stdout__ -- the original stdout; don't touch! __stderr__ -- the original stderr; don't touch! __displayhook__ -- the original displayhook; don't touch! __excepthook__ -- the original excepthook; don't touch! Functions: displayhook() -- print an object to the screen, and save it in builtins._ excepthook() -- print an exception and its traceback to sys.stderr exc_info() -- return thread-safe information about the current exception exit() -- exit the interpreter by raising SystemExit getdlopenflags() -- returns flags to be used for dlopen() calls getprofile() -- get the global profiling function getrefcount() -- return the reference count for an object (plus one :-) getrecursionlimit() -- return the max recursion depth for the interpreter getsizeof() -- return the size of an object in bytes gettrace() -- get the global debug tracing function setcheckinterval() -- control how often the interpreter checks for events setdlopenflags() -- set the flags to be used for dlopen() calls setprofile() -- set the global profiling function setrecursionlimit() -- set the max recursion depth for the interpreter settrace() -- set the global debug tracing function FUNCTIONS __displayhook__ = displayhook(...) displayhook(object) -> None Print an object to sys.stdout and also save it in builtins._ __excepthook__ = excepthook(...) excepthook(exctype, value, traceback) -> None Handle an exception by displaying it with a traceback on sys.stderr. call_tracing(...) call_tracing(func, args) -> object Call func(*args), while tracing is enabled. The tracing state is saved, and restored afterwards. This is intended to be called from a debugger from a checkpoint, to recursively debug some other code. callstats(...) callstats() -> tuple of integers Return a tuple of function call statistics, if CALL_PROFILE was defined when Python was built. Otherwise, return None. When enabled, this function returns detailed, implementation-specific details about the number of function calls executed. The return value is a 11-tuple where the entries in the tuple are counts of: 0. all function calls 1. calls to PyFunction_Type objects 2. PyFunction calls that do not create an argument tuple 3. PyFunction calls that do not create an argument tuple and bypass PyEval_EvalCodeEx() 4. PyMethod calls 5. PyMethod calls on bound methods 6. PyType calls 7. PyCFunction calls 8. generator calls 9. All other calls 10. Number of stack pops performed by call_function() exc_info(...) exc_info() -> (type, value, traceback) Return information about the most recent exception caught by an except clause in the current stack frame or in an older stack frame. exit(...) exit([status]) Exit the interpreter by raising SystemExit(status). If the status is omitted or None, it defaults to zero (i.e., success). If the status is an integer, it will be used as the system exit status. If it is another kind of object, it will be printed and the system exit status will be one (i.e., failure). getallocatedblocks(...) getallocatedblocks() -> integer Return the number of memory blocks currently allocated, regardless of their size. getcheckinterval(...) getcheckinterval() -> current check interval; see setcheckinterval(). getdefaultencoding(...) getdefaultencoding() -> string Return the current default string encoding used by the Unicode implementation. getdlopenflags(...) getdlopenflags() -> int Return the current value of the flags that are used for dlopen calls. The flag constants are defined in the os module. getfilesystemencoding(...) getfilesystemencoding() -> string Return the encoding used to convert Unicode filenames in operating system filenames. getprofile(...) getprofile() Return the profiling function set with sys.setprofile. See the profiler chapter in the library manual. getrecursionlimit(...) getrecursionlimit() Return the current value of the recursion limit, the maximum depth of the Python interpreter stack. This limit prevents infinite recursion from causing an overflow of the C stack and crashing Python. getrefcount(...) getrefcount(object) -> integer Return the reference count of object. The count returned is generally one higher than you might expect, because it includes the (temporary) reference as an argument to getrefcount(). getsizeof(...) getsizeof(object, default) -> int Return the size of object in bytes. getswitchinterval(...) getswitchinterval() -> current thread switch interval; see setswitchinterval(). gettrace(...) gettrace() Return the global debug tracing function set with sys.settrace. See the debugger chapter in the library manual. intern(...) intern(string) -> string ``Intern'' the given string. This enters the string in the (global) table of interned strings whose purpose is to speed up dictionary lookups. Return the string itself or the previously interned string object with the same value. setcheckinterval(...) setcheckinterval(n) Tell the Python interpreter to check for asynchronous events every n instructions. This also affects how often thread switches occur. setdlopenflags(...) setdlopenflags(n) -> None Set the flags used by the interpreter for dlopen calls, such as when the interpreter loads extension modules. Among other things, this will enable a lazy resolving of symbols when importing a module, if called as sys.setdlopenflags(0). To share symbols across extension modules, call as sys.setdlopenflags(os.RTLD_GLOBAL). Symbolic names for the flag modules can be found in the os module (RTLD_xxx constants, e.g. os.RTLD_LAZY). setprofile(...) setprofile(function) Set the profiling function. It will be called on each function call and return. See the profiler chapter in the library manual. setrecursionlimit(...) setrecursionlimit(n) Set the maximum depth of the Python interpreter stack to n. This limit prevents infinite recursion from causing an overflow of the C stack and crashing Python. The highest possible limit is platform- dependent. setswitchinterval(...) setswitchinterval(n) Set the ideal thread switching delay inside the Python interpreter The actual frequency of switching threads can be lower if the interpreter executes long sequences of uninterruptible code (this is implementation-specific and workload-dependent). The parameter must represent the desired switching delay in seconds A typical value is 0.005 (5 milliseconds). settrace(...) settrace(function) Set the global debug tracing function. It will be called on each function call. See the debugger chapter in the library manual. DATA __stderr__ = <_io.TextIOWrapper name='<stderr>' mode='w' encoding='UTF... __stdin__ = <_io.TextIOWrapper name='<stdin>' mode='r' encoding='UTF-8... __stdout__ = <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF... abiflags = 'm' api_version = 1013 argv = ['-c', '-f', '/home/woodring/.ipython/profile_default/security/... base_exec_prefix = '/usr' base_prefix = '/usr' builtin_module_names = ('_ast', '_codecs', '_collections', '_functools... byteorder = 'little' copyright = 'Copyright (c) 2001-2014 Python Software Foundati...ematis... displayhook = <IPython.kernel.zmq.displayhook.ZMQShellDisplayHook obje... dont_write_bytecode = False exec_prefix = '/usr' executable = '/usr/bin/python3' flags = sys.flags(debug=0, inspect=0, interactive=0, opt...ing=0, quie... float_info = sys.float_info(max=1.7976931348623157e+308, max_...epsilo... float_repr_style = 'short' hash_info = sys.hash_info(width=64, modulus=2305843009213693...iphash2... hexversion = 50594544 implementation = namespace(cache_tag='cpython-34', hexversion=505...in... int_info = sys.int_info(bits_per_digit=30, sizeof_digit=4) last_value = UnboundLocalError("local variable 'foo' referenced before... maxsize = 9223372036854775807 maxunicode = 1114111 meta_path = [<class '_frozen_importlib.BuiltinImporter'>, <class '_fro... modules = {'IPython': <module 'IPython' from '/usr/lib/python3.4/site-... path = ['', '/usr/lib/python34.zip', '/usr/lib/python3.4', '/usr/lib/p... path_hooks = [<class 'zipimport.zipimporter'>, <function FileFinder.pa... path_importer_cache = {'/home/woodring/.ipython/extensions': FileFinde... platform = 'linux' prefix = '/usr' ps1 = 'In : ' ps2 = '...: ' ps3 = 'Out: ' stderr = <IPython.kernel.zmq.iostream.OutStream object> stdin = <_io.TextIOWrapper name='<stdin>' mode='r' encoding='UTF-8'> stdout = <IPython.kernel.zmq.iostream.OutStream object> thread_info = sys.thread_info(name='pthread', lock='semaphore', versio... version = '3.4.2 (default, Oct 8 2014, 13:44:52) \n[GCC 4.9.1 2014090... version_info = sys.version_info(major=3, minor=4, micro=2, releaseleve... warnoptions = [] FILE (built-in) None Help on built-in function exit in module sys: exit(...) exit([status]) Exit the interpreter by raising SystemExit(status). If the status is omitted or None, it defaults to zero (i.e., success). If the status is an integer, it will be used as the system exit status. If it is another kind of object, it will be printed and the system exit status will be one (i.e., failure). None
# import syntax
import sys # bring all of sys into global namespace as sys
print(sys.path) # use something in sys
import sys as foobar # rename the sys module
print(foobar.path)
from sys import * # import everything from sys, without having to do sys.something
from sys import path # only import path from sys
print(path)
from sys import path as foobar # import argv and rename it
print(foobar)
print(sys.path is foobar and path is foobar)
['', '/usr/lib/python34.zip', '/usr/lib/python3.4', '/usr/lib/python3.4/plat-linux', '/usr/lib/python3.4/lib-dynload', '/home/woodring/.local/lib/python3.4/site-packages', '/usr/lib/python3.4/site-packages', '/usr/lib/python3.4/site-packages/IPython/extensions'] ['', '/usr/lib/python34.zip', '/usr/lib/python3.4', '/usr/lib/python3.4/plat-linux', '/usr/lib/python3.4/lib-dynload', '/home/woodring/.local/lib/python3.4/site-packages', '/usr/lib/python3.4/site-packages', '/usr/lib/python3.4/site-packages/IPython/extensions'] ['', '/usr/lib/python34.zip', '/usr/lib/python3.4', '/usr/lib/python3.4/plat-linux', '/usr/lib/python3.4/lib-dynload', '/home/woodring/.local/lib/python3.4/site-packages', '/usr/lib/python3.4/site-packages', '/usr/lib/python3.4/site-packages/IPython/extensions'] ['', '/usr/lib/python34.zip', '/usr/lib/python3.4', '/usr/lib/python3.4/plat-linux', '/usr/lib/python3.4/lib-dynload', '/home/woodring/.local/lib/python3.4/site-packages', '/usr/lib/python3.4/site-packages', '/usr/lib/python3.4/site-packages/IPython/extensions'] True
# some final things to cover
# (though, this isn't exhaustive, but a few more useful features)
s = set([1, 1, 2, 3, 4, 4]) # sets
print(s)
print(3 in s)
def plus_n(n):
return lambda x: x + n # lambdas are "anonymous functions"
# though, limited to one line expressions in Python
plus_two = plus_n(2) # make a plus two function
print(plus_two, plus_two(1))
l = [2 ** i for i in range(0, 9) if i > 4] # list expressions
# basically, map and filter
# reduce is found in functools module
print(l)
# also, you can create "generator functions" for lazy
# evaluation
def infinite_evens():
i = 0
while True:
i = i + 2
yield i
evens = infinite_evens() # infinite list of even numbers
for i, j in zip(range(0, 10), evens):
print(i, j)
try: # exception handling
print(undefined)
except: # you can catch different types of exceptions
# which I am not showing here
print('I caught an error!')
finally:
print('and some cleanup')
# classes
class test:
def __init__(self, k):
self.j = k
def __call__(self):
return self.j
def i(self, x):
self.j = x
return x + 1
t = test(10)
print(t)
print(t())
print(t.i(2))
print(t())
t.foo = print
t.foo('bar')
print(dir(t))
{1, 2, 3, 4} True <function plus_n.<locals>.<lambda> at 0x7faa08116e18> 3 [32, 64, 128, 256] 0 2 1 4 2 6 3 8 4 10 5 12 6 14 7 16 8 18 9 20 I caught an error! and some cleanup <__main__.test object at 0x7faa08143be0> 10 3 2 bar ['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'foo', 'i', 'j']
$ python some_program.py
It's as easy as that. If you are on bash, you can put #!/usr/bin/python
on the first line of the program, and then launch it directly if the .py is set as executable.
A better version is #!/usr/bin/env python
, that way if you have different versions of Python, you can change your path to call the correct one.
$ conda install <some package>
$ pip install --user <some package>
Just create a file named your_module.py and you can import your_module
.
Doing nested submodules is a little more involved, but not that hard. Check the python documentation.
You don't really need one, because eveything in the .py will be executed.
Though, if you want an explicit main, here's the code for that:
if __name__ == '__main__':
your code goes here
This is useful for putting unit tests directly into "library" code.
Those are found in the sys module.
import sys
sys.argv # a list of command arguments
sys.argv[0] # the name of the program
sys.argv[1:] # everything else
len(sys.argv) # number of arguments
There are standard modules for parsing command line arguments, to add switches, etc.
# what's next? numpy
import numpy # also, it is standard convention to do
# import numpy as np
# I prefer typing numpy, as it is more explicit
A = numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
print(A)
# OK, so what's so special about that compared to the list?
[ 1 2 3 4 5 6 7 8 9 10]
# numpy arrays are fast, almost C speed
# as long as you do "large amounts of work"
import time
AL = range(0, 1000000)
BL = range(0, 1000000)
CL = [0] * len(AL)
start = time.time()
for i in range(0, len(AL)):
CL[i] = AL[i] + BL[i]
print(time.time() - start)
A = numpy.array(range(0, 1000000), numpy.int32)
B = numpy.array(range(0, 1000000), numpy.int32)
start = time.time()
C = A + B
print(time.time() - start)
0.8321845531463623 0.001194000244140625
# numpy allows you to use lists (or any sequence) in place of an
# array and it will convert it for you
A = numpy.array([1, 2, 3, 4]) # initialized with a list
B = [1, 2, 3, 4]
C = A + B
print(C)
# but, it's better to start with arrays as your data structures
# if you are going to be using them a lot, rather than converting
# creating arrays from scratch
A = numpy.empty((5,)) # length of 4
A = numpy.empty((5,2)) # 4x2 matrix
A = numpy.zeros((5,2)) # 4x2 matrix of zeroes
A = numpy.zeros((5,2), numpy.float64) # 4x2 matrix of zeroes, using doubles
# things need to be the name size (or shape) -- or "broadcastable"
C = A + B # going to fail because A and B aren't the same shape
[2 4 6 8]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-41-86277d246645> in <module>() 17 18 # things need to be the name size (or shape) -- or "broadcastable" ---> 19 C = A + B # going to fail because A and B aren't the same shape ValueError: operands could not be broadcast together with shapes (5,2) (4,)
# also, numpy has the ability to get binary data to and from disk
f = open('foo.bin', 'w')
a = numpy.array([1, 2, 3, 4, 5], numpy.int32)
a.tofile(f)
f.close()
f = open('foo.bin', 'r')
b = numpy.fromfile(f, numpy.int32, 5)
f.close()
print(a == b)
[ True True True True True]
# numpy notation is similar to array slicing
# and Matlab and Fortran matrix notation
A = numpy.array(range(0, 10)) # numbers 0..9
V = A[::2] # this is a view (shallow copy)
V[0] = -10 # slices are views in numpy
print(V, A)
B = A.copy() # this is a deep copy of A
B[0] = 0
print(B, A)
C = A[::2] + B[::2]
print(C)
C = A[1:9] * B[:8]
print(C)
C = A[1:-3] - B[2:-2]
print(C)
C = A / B[:5] # this is going to fail, because they aren't the same shape
[-10 2 4 6 8] [-10 1 2 3 4 5 6 7 8 9] [0 1 2 3 4 5 6 7 8 9] [-10 1 2 3 4 5 6 7 8 9] [-10 4 8 12 16] [ 0 2 6 12 20 30 42 56] [-1 -1 -1 -1 -1 -1]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-43-043c324e4d76> in <module>() 20 print(C) 21 ---> 22 C = A / B[:5] # this is going to fail, because they aren't the same shape ValueError: operands could not be broadcast together with shapes (10,) (5,)
# numpy also supports multi-dimensional arrays
# default memory layout is:
# C, row-major, right-most index varies fastest
A = numpy.array(range(0, 8))
A = numpy.reshape(A, (2, 2, 2)) # change the shape of an array
# the total size (elements) must be the same
print(A)
print(A[0,0,0]) # this is different from nested lists
print(A[1,1,1])
A = numpy.transpose(A, axes=[0,2,1]) # swap around axes
print(A)
[[[0 1] [2 3]] [[4 5] [6 7]]] 0 7 [[[0 2] [1 3]] [[4 6] [5 7]]]
# numpy also supports "broadcasting"
A = numpy.array(range(0, 4))
A = numpy.reshape(A, (2, 2))
print(A) # a 2x2 matrix
A = A + 1 # 1 is added to all elements
print(A)
v = numpy.array([-1, 1]) # let's make a vector
v = numpy.reshape(v, (2, 1)) # a column vector
print(v)
A = A * v # v gets broadcast over the columns
print(A)
v = numpy.reshape(v, (1, 2)) # now it's a row vector
print(v)
A = A - v # v gets broadcast over the rows
print(A)
[[0 1] [2 3]] [[1 2] [3 4]] [[-1] [ 1]] [[-1 -2] [ 3 4]] [[-1 1]] [[ 0 -3] [ 4 3]]
# you can broadcast all sorts of ways
A = numpy.array(range(0, 3*3))
A = numpy.reshape(A, (3, 3))
B = numpy.array(range(0, 3*1))
B = numpy.reshape(B, (3, 1))
C = A + B
print(C)
B = numpy.array(range(0, 3*1))
B = numpy.reshape(B, (1, 3))
C = A + B
print(C)
A = numpy.array(range(0, 2*2*2))
A = numpy.reshape(A, (2, 2, 2))
B = numpy.array(range(0, 2*1))
B = numpy.reshape(B, (1, 1, 2))
C = A + B
print(C)
[[ 0 1 2] [ 4 5 6] [ 8 9 10]] [[ 0 2 4] [ 3 5 7] [ 6 8 10]] [[[0 2] [2 4]] [[4 6] [6 8]]]
# you can use 1D arrays to index into arrays
A = numpy.array(range(0, 4))
A = numpy.reshape(A, (2, 2))
I = numpy.array([[0], [1], [1], [0]]) # the shape of the output
J = numpy.array([[0], [0], [1], [1]]) # is the same shape as the indices shape
print(A[I,J]) # (4, 1)
print((A[I,J])[3,0])
I = numpy.array([0, 1, 1, 0]) # the shape of the output
J = numpy.array([0, 0, 1, 1]) # is the same shape as the indices
print(A[I,J]) # (4,)
print((A[I,J])[3])
B = A[I,J]
B[0] = 1000 # indexing creates a copy
print(A, B)
[[0] [2] [3] [1]] 1 [0 2 3 1] 1 [[0 1] [2 3]] [1000 2 3 1]
# you can use boolean arrays to filter out elements
A = numpy.array(range(1, 11))
b = numpy.array([i % 2 == 0 for i in range(1, 11)]) # all the even elements
print(A[b]) # b is the same shape as A
# this is stream compaction
# the output size is equal to the number of Trues
print(numpy.where(b, A, 0)) # where generates the same shape as A
# but replaces A with 0 where b is False
print(A == 4) # this works
print(A[b==False]) # and this too
print(A[A > 5]) # and this
print(numpy.argwhere(A > 5)) # get the indices where A > 5
# there are other options as well, with any, all, logical_*
# for doing all sorts of indexing
[ 2 4 6 8 10] [ 0 2 0 4 0 6 0 8 0 10] [False False False True False False False False False False] [1 3 5 7 9] [ 6 7 8 9 10] [[5] [6] [7] [8] [9]]
# numpy has a lot of functionality
# beyond +, *, - and /
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html#available-ufuncs
A = numpy.array(range(0, 4))
print(numpy.max(A))
print(numpy.min(A))
print(numpy.sign(A))
print(numpy.cos(A))
print(A > A)
print(A == A)
print(-A)
3 0 [0 1 1 1] [ 1. 0.54030231 -0.41614684 -0.9899925 ] [False False False False] [ True True True True] [ 0 -1 -2 -3]
# and a lot of what you want is probably
# in the linear algebra
# http://docs.scipy.org/doc/numpy/reference/routines.linalg.html
from numpy.linalg import linalg # a submodule of a module
# numpy.linalg.linalg
A = numpy.array([[0, 1], [2, 3]])
B = numpy.array([[0, -1], [1, 0]])
print(linalg.dot(A, B)) # matrix multiply
print(numpy.outer(A, B)) # outer product
print(linalg.qr(A)) # qr factorization
print(linalg.svd(A)) # SVD
print(linalg.eig(A)) # eigenvectors and values
print(linalg.inv(A)) # inverse of A
# etc.
[[ 1 0] [ 3 -2]] [[ 0 0 0 0] [ 0 -1 1 0] [ 0 -2 2 0] [ 0 -3 3 0]] (array([[ 0., -1.], [-1., 0.]]), array([[-2., -3.], [ 0., -1.]])) (array([[-0.22975292, -0.97324899], [-0.97324899, 0.22975292]]), array([ 3.70245917, 0.54018151]), array([[-0.52573111, -0.85065081], [ 0.85065081, -0.52573111]])) (array([-0.56155281, 3.56155281]), array([[-0.87192821, -0.27032301], [ 0.48963374, -0.96276969]])) [[-1.5 0.5] [ 1. 0. ]]
# next is scipy
#
# it has lots of specialized functionality
# for scientific computing:
# FFTs, signal processing, integration, statistics,
# interpolation, optimization, graphs, etc.
#
# http://docs.scipy.org/doc/scipy/reference/
from scipy import fftpack
A = numpy.array([0, 1, 2, 3, 4, 3, 2, 1])
print(fftpack.fft(A)) # fft
print(fftpack.ifft(fftpack.fft(A))) # ifft and fft
from scipy import optimize
B = numpy.array([0, 1, 2, 3, 4, 5, 6, 7])
def poly(x, a, b, c): # the model to fit to
return a + b*x + c*x*x
print(optimize.curve_fit(poly, B, A)) # outputs a, b, c and covariance matrix
[ 16.00000000 +0.00000000e+00j -6.82842712 -2.22044605e-16j 0.00000000 -0.00000000e+00j -1.17157288 -2.22044605e-16j 0.00000000 +0.00000000e+00j -1.17157288 +2.22044605e-16j 0.00000000 +0.00000000e+00j -6.82842712 +2.22044605e-16j] [ 0. +0.00000000e+00j 1. +5.55111512e-17j 2. +0.00000000e+00j 3. -1.11022302e-16j 4. +0.00000000e+00j 3. -5.55111512e-17j 2. +0.00000000e+00j 1. +1.11022302e-16j] (array([-0.33333333, 1.85714286, -0.23809524]), array([[ 0.13492064, -0.07142857, 0.00793651], [-0.07142857, 0.0600907 , -0.00793651], [ 0.00793651, -0.00793651, 0.00113379]]))
# the fun part, plotting the data
%matplotlib inline
# this "magic" is necessary for ipython notebook
# it's not necessary (and will be an error)
# in normal python
# there are other "magic" ipython commands, check the documentation
import matplotlib.pyplot as plt # this is all you need in python
# pyplot is the Matlab like plotting interface
# plt is the standard rename for pyplot
# examples of plots can be found at http://matplotlib.org/gallery.html
import functools
A = numpy.array([0, 1, 2, 3, 4, 3, 2, 1])
B = numpy.array([0, 1, 2, 3, 4, 5, 6, 7])
# our model curve
def poly(x, a, b, c):
return a + b*x + c*x*x
abc, cov = optimize.curve_fit(poly, B, A) # going to do the least squares fit like before
fixed = functools.partial(poly, a=abc[0], b=abc[1], c=abc[2]) # freeze the polynomial
fixed = numpy.vectorize(fixed) # create a vectorized version of the function
# the start of a plot
# pyplot is Matlab like, it is a state machine
plt.figure() # start a new plot
plt.xlabel('x') # labels
plt.ylabel('y')
plt.plot(B, fixed(B)) # the x and y values of the model
plt.legend('model')
plt.plot(B, A, 'o') # 'o' means plot it with circles
plt.legend('original')
plt.title('least squares fit to quadratic model') # a title
plt.show() # show it
# plt.savefig('foo.png') # write it to an image instead
from scipy import fftpack
from scipy import fft
A = numpy.array([0, 1, 0, -1, 0, 1, 0, -1])
f = numpy.abs(fftpack.fft(A)) ** 2 # power spectrum
q = fftpack.fftfreq(A.size, 1.0) # get the frequencies
i = numpy.argsort(q) # get the indices that would sort the frequencies
plt.figure() # plot it
plt.title('power spectrum')
plt.xlabel('frequencies')
plt.ylabel('power')
plt.plot(q[i], f[i])
plt.show()
# sqlite is awesome to store your data in
# stop using custom text files, and use sqlite
import sqlite3
conn = sqlite3.connect('northwind.db') # connect to the database
cursor = conn.cursor()
cursor.execute('select CustomerID from customers')
# this is a pretty simple query, but you can do all sorts
# of complex SQL queries, using SQLite
# convert the first letter of the customer id to a number
data = cursor.fetchall()
customers = [ord(i[0][0]) - ord('A') + 1 for i in data]
bins = numpy.max(customers) - numpy.min(customers)
# let's plot the distribution
plt.figure()
plt.title('distribution of customer ids')
plt.xlabel('first letter')
plt.ylabel('count')
plt.hist(customers, bins)
plt.show()
# what's the max letter?
counts = numpy.histogram(customers, bins)[0]
print(chr(numpy.argmax(counts) + numpy.min(customers) + ord('A')))
W