In [73]:

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
from datetime import date
from dateutil.parser import parse
history = """December, 1989:Implementation started
1990:Internal releases at CWI
February 20, 1991:0.9.0 (released to alt.sources)
February, 1991:0.9.1
September, 1991:0.9.2
December 24, 1991:0.9.4
January 2, 1992:0.9.5 (Macintosh only)
April 6, 1992:0.9.6
January 9, 1993:0.9.8
July 29, 1993:0.9.9
January 26, 1994:1.0.0
February 15, 1994:1.0.2
May 4, 1994:1.0.3
July 14, 1994:1.0.4
October 11, 1994:1.1
November 10, 1994:1.1.1
April 13, 1995:1.2
October 13, 1995:1.3
October 25, 1996:1.4
January 3, 1998:1.5
October 31, 1998:1.5.1
April 13, 1999:1.5.2
September 5, 2000:1.6
October 16, 2000:2.0
February 25, 2001:1.6.1
April 17, 2001:2.1
December 21, 2001:2.2
July 29, 2003:2.3
November 30, 2004:2.4
September 16, 2006:2.5
October 1, 2008:2.6
December 3, 2008:3.0
June 27, 2009: 3.1
July 3, 2010: 2.7
February 20, 2011: 3.2
September 29, 2012: 3.3
March 16, 2014: 3.4
September 13, 2015: 3.5
December 23, 2016: 3.6
June 27, 2018:3.7
January 1, 2020: 2.7 EOL"""

dates = []
names = []
for entry in history.split('\n'):
    datestr, version = entry.split(':')
    dates.append(parse(datestr))
    names.append(version)


def plot_timeline(dates, names, title, spans=[]):
    levels = np.array([-5, 5, -4, 4, -3, 3, -2, 2])
    fig, ax = plt.subplots(figsize=(12, 5))

    # Create the base line
    start = min(dates)
    stop = max(dates)
    ax.plot((start, stop), (0, 0), 'k', alpha=.5)

    # Iterate through releases annotating each one
    for ii, (iname, idate) in enumerate(zip(names, dates)):
        level = levels[ii % len(levels)]
        vert = 'top' if level < 0 else 'bottom'

        ax.scatter(idate, 0, s=100, facecolor='w', edgecolor='k', zorder=9999)
        # Plot a line up to the text
        ax.plot((idate, idate), (0, level), c='r', alpha=.7)
        # Give the text a faint background and align it properly
        ax.text(idate, level, iname,
                horizontalalignment='right', verticalalignment=vert, fontsize=14,
                backgroundcolor=(1., 1., 1., .3))
    for args in spans:
        ax.axvspan(*args, alpha=0.2)
    ax.set(title=title)
    # Set the xticks formatting
    # format xaxis with 3 month intervals
    ax.get_xaxis().set_major_locator(mdates.YearLocator())
    ax.get_xaxis().set_major_formatter(mdates.DateFormatter("%Y"))
    fig.autofmt_xdate()

    # Remove components for a cleaner look
    plt.setp((ax.get_yticklabels() + ax.get_yticklines() +
              list(ax.spines.values())), visible=False)
    plt.tight_layout()
    plt.savefig(f"{title.lower().replace(' ','_')}.png")


plot_timeline(dates, names, "Python Release Dates",
              [(parse("july 2, 2010"), parse("january 1,2020")),
               (parse("December 3, 2008"), date.today(), 0.2, 0.8)
               ])

Python 3: More than just `print()`¶

Andrew Bolster
Threat Intelligence Data Scientist (Alert Logic)
Founding Director (Farset Labs)
Pythionista for ~10 years

What We'll Cover¶

History of Python
Significant Features

But TL;DR?¶

As of Python 3.7; in all but one test type (why are you doing crypto in python?), 3 is 20% faster than 2
The language features developed make Python both performent and stable
The breaking changes between 2 and 3 were due to poor historical architectural decisions; there are no plans for breaking changes going forward
2.7 EOL is in less than a year: Most major packages have already dropped (non security) support for it, including:
- Numpy
- Pandas
- matplotlib
- dask
- sympy
Of the Top 360 most popular Python modules only one hasn't migrated to Python 3: apache-beam (Which is a Java-first SDK anyway so stuff 'em)

Timeline¶

Significant Features/Changes¶

print let's just get that out of the way, shall we?
Integer Division
f-strings
υηι¢σ∂є
Iterable Unpacking
Iterators, Generators, nexts, oh my!
changes to dict behaviour
dataclasses

`print()`¶

Probably the most obvious, contentious, but also meaningless change in py3

In [82]:

print("Hello World!")

Hello World!

But it's more than just brackets;

In [83]:

print("Hello", "World", "!")  # Native Tuples

Hello World !

In [84]:

print("Hello", "World", "!", sep='\t')  # Custom Separators

Hello	World	!

In [85]:

print("Hello", end=' ')  # Tail override
print("World!")

Hello World!

In [87]:

import sys
print("fatal error", file=sys.stderr)  # Can still do piping to file handlers

fatal error

Integer Division (/ vs //)¶

print as a keyword vs print() as a function is just a bit of syntactic sugar to simplify the cPython API, however... some changes are more subtle and more likely to cause non-trivial bugs when porting 'stable' code

This is one of them...

In [66]:

print('4 / 2 =', 4 / 2)
print('3 / 2 =', 3 / 2)
print('4 // 2 =', 4 // 2)
print('3 // 2 =', 3 // 2)
print('4 / 2.0 =', 4 / 2.0)
print('3 / 2.0 =', 3 / 2.0)
print('3 // 2.0 =', 3 // 2.0)

4 / 2 = 2.0
3 / 2 = 1.5
4 // 2 = 2
3 // 2 = 1
4 / 2.0 = 2.0
3 / 2.0 = 1.5
3 // 2.0 = 1.0

Division now works in a way that you'd expect a duck-type language to; i.e.

/ always returns a float even when it's not numerically necessary
// returns as the type of the denominator, but always with an integer value ($\in \mathbb{Z}$)

In [67]:

print('4 / 2 =', type(4 / 2))
print('3 / 2 =', type(3 / 2))
print('4 // 2 =', type(4 // 2))
print('3 // 2 =', type(3 // 2))
print('4 / 2.0 =', type(4 / 2.0))
print('3 / 2.0 =', type(3 / 2.0))
print('3 // 2.0 =', type(3 // 2.0))

4 / 2 = <class 'float'>
3 / 2 = <class 'float'>
4 // 2 = <class 'int'>
3 // 2 = <class 'int'>
4 / 2.0 = <class 'float'>
3 / 2.0 = <class 'float'>
3 // 2.0 = <class 'float'>

Also note that this is not round; this is floor division

In [71]:

print("5 / 6 = ", 5/6)
print("5 // 6 = ", 5//6)
print("‖5/6‖ = ", round(5/6))

5 / 6 =  0.8333333333333334
5 // 6 =  0
‖5/6‖ =  1

And if anyone is wondering about performance...

In [75]:

% % timeit

x = 7//2

14 ns ± 1.15 ns per loop (mean ± std. dev. of 7 runs, 100000000 loops each)

In [78]:

from math import floor

In [79]:

% % timeit

x = floor(7/2)

84.7 ns ± 3.32 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)

υηι¢σ∂є¶

From the original 3.0 release notes:

Everything you thought you knew about binary data and Unicode has changed.

All strings are Unicode, but unicode encoded strings are stored as binary
No more u"" junk
Incorrectly encoded open's will fail loudly

The transition is largely painless unless you're doing something really 'clever' to get around python2's utter failings in interacting with Unicode in sensible ways.

Best of all... this isn't just a string processing change; this is fundamental to the interpreter... so...

In [96]:

from numpy import array, cos, sin


def rotate(vector, angle):
    θ = angle
    mat = [[cos(θ), -sin(θ)],
           [sin(θ), cos(θ)]]
    mat = array(mat)
    return mat @ vector  # << Sneaky mat_mul operator for free too


rotate([1, 0], 90)

Out[96]:

array([-0.44807362,  0.89399666])

Unfortunately you can only use printable characters as variable identifiers, so no emojis, but you can go mad elsewhere:

In [107]:

import emoji  # pip install emoji
this_is_a_regular_string = emoji.emojize(
    "Python 2 is :poop:", use_aliases=True)
print(this_is_a_regular_string)
print(this_is_a_regular_string.replace('is', 'was'))
print(''.join(reversed(this_is_a_regular_string)))  # << Spoilers Ahead

Python 2 is 💩
Python 2 was 💩
💩 si 2 nohtyP

f-strings (py3.6)¶

Replacement for % and str.format() methods
Jinja-like templating of local scope variables
(Basically like having an interpreter inside a string)

In [28]:

thing = 'thing'
print(f"This is a {thing}")

This is a thing

In [30]:

print(f"This is a loud {thing.upper()}")

This is a loud THING

In [44]:

import datetime
from datetime import date


class Person:
    def __init__(self, first_name: str, last_name: str, birthday: date, gender: str):
        self.first_name = first_name
        self.last_name = last_name
        self.birthday = birthday
        self.gender = gender

    @property
    def age(self):
        today = date.today()
        return today.year - self.birthday.year - ((today.month, today.day) < (self.birthday.month, self.birthday.day))

    def __str__(self):
        return f"{self.first_name} {self.last_name}"

    def __repr__(self):
        return f"{self.first_name} {self.last_name} ({self.age})"


p = Person("Andrew", "Bolster", date(1988, 5, 17), 'Male')
p

Out[44]:

Andrew Bolster (30)

In [45]:

f"{p}"  # Defaults to __str__

Out[45]:

'Andrew Bolster'

In [46]:

f"{p!r}"  # Can be poked to use __repr__

Out[46]:

'Andrew Bolster (30)'

In [48]:

f"{p}"\
    f" is {p.age}"\
    f" and this is a multiline {'f-string'}"

Out[48]:

'Andrew Bolster is 30 and this is a multiline f-string'

Performance wise; f-strings are fast About 30% faster than % 50% faster than .format()

Also support standard formatting syntax

In [49]:

from math import pi
pi

Out[49]:

3.141592653589793

In [65]:

f"{pi:07.4}"  # {value:width.precision}

Out[65]:

'003.142'

Iterators and stuff¶

`range` behaves like `xrange` used to¶

xrange is dead, long live range

In [113]:

span = range(10000)
len(span), sum(span), max(span), min(span)

Out[113]:

(10000, 49995000, 9999, 0)

In [110]:

4 in span

Out[110]:

True

So what?

range now returns an 'iterator'; elements are not populated until used

In [117]:

stupid_span = range(int(10e16))
len(stupid_span)

Out[117]:

100000000000000000

In [118]:

for i in stupid_span:  # Doesn't blow up memory
    if i ** 2 > 10000:
        break
print(i)

dict, zip, reversed and a load of other functions now return iterators

In [123]:

zip(range(5), range(5, 0, -1))

Out[123]:

<zip at 0x117b2fd08>

In [126]:

from string import ascii_letters
dict(zip(ascii_letters, range(5)))

Out[126]:

{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4}

In [127]:

d = dict(zip(ascii_letters, range(5)))
d.keys()

Out[127]:

dict_keys(['a', 'b', 'c', 'd', 'e'])

Note; this is a 'view', not an actual list, even though it looks like it. The motivation for this is that dict.items() etc. in py2 produced realised-views of the values as a fully populated list. This was expensive.

Oh, BTW, `dicts` are now sorted!¶

Previously insertion-sorting was not guaranteed; dicts will always be returned in the same order as they were inserted

In [130]:

del d['a']
d['a'] = -1
d

Out[130]:

{'b': 1, 'c': 2, 'd': 3, 'e': 4, 'a': -1}

However, since this is a view; things don't always work how you'd imagine

In [132]:

list(reversed(range(5)))

Out[132]:

[4, 3, 2, 1, 0]

In [133]:

reversed(d.keys())

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-133-10b791caa95b> in <module>
----> 1 reversed(d.keys())

TypeError: 'dict_keys' object is not reversible

In [136]:

list(reversed(list(d.keys())))  # need to instantiate the view

Out[136]:

['a', 'e', 'd', 'c', 'b']

The downside to all of this is that sometimes your code will be peppered with lists...

Advanced Unpacking¶

In [143]:

# First with sensible lists:
values = [0, 1, 2, 3, 4, 5, 6, 7]
while values:
    first, *values = values
    print(first, values)

0 [1, 2, 3, 4, 5, 6, 7]
1 [2, 3, 4, 5, 6, 7]
2 [3, 4, 5, 6, 7]
3 [4, 5, 6, 7]
4 [5, 6, 7]
5 [6, 7]
6 [7]
7 []

In [142]:

# First with sensible lists:
values = [0, 1, 2, 3, 4, 5, 6, 7]
while values:
    head, *values, tail = values
    print(head, tail, values)

0 7 [1, 2, 3, 4, 5, 6]
1 6 [2, 3, 4, 5]
2 5 [3, 4]
3 4 []

Type annotations¶

Lazy type hinting;
Great for documenting what you expect and IDE-assist; doesn't do type validation
Extensions available for auto generation of sphinx-docs based on hints
Optional type checking via mypy
Not used for any runtime performance optimisation or anything
Kinda used in dataclasses

In [25]:

def add(a: int, b: int)->int:
    return a+b

add(5, 5)

Out[25]:

In [26]:

add('this', 'that')  # badness

Out[26]:

'thisthat'

In [ ]:

from typing import Iterator

def fib(n: int) -> Iterator[int]:
    a, b = 0, 1
    while a < n:
        yield a
        a, b = b, a+b

In [74]:

from typing import *
from operator import itemgetter

def listtacular(listicle: Dict[AnyStr,int])->List[AnyStr]:
    listable = []
    for k, v in sorted(listicle.items(), key=itemgetter(1)):
        listable.append(k)
    return listable

listtacular({'first':1, 'fifth':5, 'second':2, 'forth':4, 'third':3})

Out[74]:

['first', 'second', 'third', 'forth', 'fifth']

`dataclass` (py3.7)¶

Basically, a massive shortcut for building object classes

Highly recommend watching Raymond Hettinger's PyCon 2018 talk https://www.youtube.com/watch?v=T-TwcmT6Rcw

TLDR:

It makes a mutable data holder, in the spirit of collections.namedtuple
It writes boiler-plate code for you, simplifying the process of writing the class.

In [1]:

# Code You write
from dataclasses import dataclass


@dataclass
class Color:
    hue: int
    saturation: float
    lightness: float = 0.5

In [ ]:

from dataclasses import Field, _MISSING_TYPE, _DataclassParams

class Color:
    'Color(hue: int, saturation: float, lightness: float = 0.5)'

    def __init__(self, hue: int, saturation: float, lightness: float = 0.5) -> None:
        self.hue = hue
        self.saturation = saturation
        self.lightness = lightness

    def __repr__(self):
        return (self.__class__.__qualname__ +
                f"(hue={self.hue!r}, saturation={self.saturation!r}, "
                f"lightness={self.lightness!r})")

    def __eq__(self, other):
        if other.__class__ is self.__class__:
            return (self.hue, self.saturation, self.lightness) == (other.hue, other.saturation, other.lightness)
        return NotImplemented

    __hash__ = None

    hue: int
    saturation: float
    lightness: float = 0.5

In [2]:

    __dataclass_params__ = _DataclassParams(
        init=True,
        repr=True,
        eq=True,
        order=False,
        unsafe_hash=False,
        frozen=False)

    __dataclass_fields__ = {
        'hue': Field(default=_MISSING_TYPE,
                     default_factory=_MISSING_TYPE,
                     init=True,
                     repr=True,
                     hash=None,
                     compare=True,
                     metadata={}),
        'saturation': Field(default=_MISSING_TYPE,
                            default_factory=_MISSING_TYPE,
                            init=True,
                            repr=True,
                            hash=None,
                            compare=True,
                            metadata={}),
        'lightness': Field(default=0.5,
                           default_factory=_MISSING_TYPE,
                           init=True,
                           repr=True,
                           hash=None,
                           compare=True,
                           metadata={})
    }
    __dataclass_fields__['hue'].name = 'hue'
    __dataclass_fields__['hue'].type = int
    __dataclass_fields__['saturation'].name = 'saturation'
    __dataclass_fields__['saturation'].type = float
    __dataclass_fields__['lightness'].name = 'lightness'
    __dataclass_fields__['lightness'].type = float

In [67]:

from dataclasses import dataclass
from datetime import date


@dataclass
class Person:  # Basically, gets rid of boring boilerplate
    first_name: str
    last_name: str
    birthday: date
    gender: str


p = Person("Andrew", "Bolster", date(1988, 5, 17), 'Male')
p

Out[67]:

Person(first_name='Andrew', last_name='Bolster', birthday=datetime.date(1988, 5, 17), gender='Male')

In [69]:

@dataclass
class Person:  # Basically, gets rid of boring boilerplate
    first_name: str
    last_name: str
    birthday: date = field(repr=False)
    gender: str = field(repr=False)


p = Person("Andrew", "Bolster", date(1988, 5, 17), 'Male')
p

Out[69]:

Person(first_name='Andrew', last_name='Bolster')

In [71]:

from dataclasses import dataclass, field
from datetime import date


@dataclass
class Person:  # Basically, gets rid of boring boilerplate
    first_name: str
    last_name: str
    birthday: date = field(repr=False)
    gender: str = field(repr=False)

    @property
    def age(self):
        today = date.today()
        return today.year - self.birthday.year \
            - ((today.month, today.day) < (self.birthday.month, self.birthday.day))

    def __str__(self):
        return f"{self.first_name} {self.last_name} ({self.age})"


p = Person("Andrew", "Bolster", date(1988, 5, 17), 'Male')
p

Out[71]:

Person(first_name='Andrew', last_name='Bolster')

In [72]:

print(p)

Andrew Bolster (30)

But wait, there's more!¶

Passing class decorator arguments to augment output objects; e.g.

'order': adds __lt__/__gt__ etc methods based on tuple-ordering of attributes
'frozen': adds __hash__ method to add immutability / hashability

Also field declarations to provide per-attribute control over these things

In [83]:

from dataclasses import dataclass, field
from datetime import datetime
import uuid

@dataclass(order=True, frozen=True)
class MP:
    name: str
    gender: str = field(repr=False)
    salary: int = field(hash=False, repr=False, metadata={'units': 'GBP'})
    age: int = field(hash=False, repr=False)
    party: str = field(hash=True, repr=True, default='Independent')
    ate: list = field(default_factory=list, compare=False, repr=False)
    emp_id: uuid.UUID = field(
        default_factory=uuid.uuid4, compare=True, repr=False
    )

    def eats(self, thing):
        self.ate.append((thing, datetime.now()))

In [96]:

e1 = MP(name='Sammy Wilson',
        gender='male', party='DUP',
        salary=77_379,  # Another cool py3 feature ;)
        age=65,

        )
e2 = MP(name='Caroline Lucas',
        gender='female', party='Greens',
        salary=77_379,  # Another cool py3 feature ;)
        age=56,
        )
e1  # Non-repr fields not displayed

Out[96]:

MP(name='Sammy Wilson', party='DUP')

In [97]:

[e1, e2]

Out[97]:

[MP(name='Sammy Wilson', party='DUP'),
 MP(name='Caroline Lucas', party='Greens')]

In [98]:

sorted([e1, e2]) # thanks to 'order'

Out[98]:

[MP(name='Caroline Lucas', party='Greens'),
 MP(name='Sammy Wilson', party='DUP')]

In [99]:

affiliations = {
    e1: 'Brexiteers',
    e2: 'Sane'
}
affiliations # thanks to 'frozen'

Out[99]:

{MP(name='Sammy Wilson', party='DUP'): 'Brexiteers',
 MP(name='Caroline Lucas', party='Greens'): 'Sane'}

In [100]:

e1.eats('fish')
e1.eats('chips')
for e, camp in affiliations.items():
    msg = f"{e.name}, from the {camp} camp, "\
          f"ate {' and '.join([m[0] for m in e.ate]) if e.ate else 'Nothing'}"
    print(msg)

Sammy Wilson, from the Brexiteers camp, ate fish and chips
Caroline Lucas, from the Sane camp, ate Nothing

What we done covered¶

print
/ vs //
unicode
Catching Constructions (i.e. first,*rest = iterable)
changes to dict (i.e. views)
f-strings (including performance)
typing / type hinting
dataclasses

Anything I've missed / Undersold?

Conclusion¶

If you're not using at least Python 3.5, you're missing out

If you're still stuck 2.7, you're going to be left behind

If you're still starting new projects in 2.7, you deserve all the pain that's coming your way

In [ ]:

Python 3: More than just print()¶

What We'll Cover¶

But TL;DR?¶

Timeline¶

Significant Features/Changes¶

print()¶

Integer Division (/ vs //)¶

υηι¢σ∂є¶

f-strings (py3.6)¶

Iterators and stuff¶

range behaves like xrange used to¶

Oh, BTW, dicts are now sorted!¶

Advanced Unpacking¶

Type annotations¶

dataclass (py3.7)¶

But wait, there's more!¶

What we done covered¶

Conclusion¶

Python 3: More than just `print()`¶

`print()`¶

`range` behaves like `xrange` used to¶

Oh, BTW, `dicts` are now sorted!¶

`dataclass` (py3.7)¶