#!/usr/bin/env python # coding: utf-8 # # Operator Overloading # - Enables class to intercept normal Pytonh operations # - Makes class instances act more like built-in types # # ## When Should Operator Overloading Be Used # When a class naturally matches, or needs to emulate, a built-in type's interface # # ## Performance # Don't expect speed adavtange # In fact, it might be slower. (Might due to the overhead of a function call) # In[3]: import timeit min(timeit.repeat("L = list(range(100)); x = L.__len__()", number=10000, repeat=3)) # In[4]: min(timeit.repeat("L = list(range(100)); x = len(L)", number=10000, repeat=3)) # ## Command Used Operator # ``` # __init__ # __repr, __str__ # __call__ # __getattr__ # __setattr__ # __getitem__ # __setitem__ # __len__ # __bool__ # __lt__, __gt__, __le__, __ge__, __eq__, __ne__ # __iter__, __next__ # __contains__ # __index__ # ``` # ## Indexing and Slicing: `__getitem__`, `__setitem__` # # ### `__getitem__` # For instance-indexing operation. (e.g. **X[i]**) # In[5]: class Indexer(object): data = [5, 6, 7, 8, 9] def __getitem__(self, index): print("getitem: ", index) return self.data[index] x = Indexer() x[2] # In[6]: x[2:4] # Handle the slice object (slice objects have attribute **start**, **stop** and **step**) # In[7]: class Indexer(object): data = [5, 6, 7, 8, 9] def __getitem__(self, index): if isinstance(index, int): print("indexing", index) else: print("slicing", index.start, index.stop, index.step) return self.data[index] x = Indexer() x[2:4] # ### Code One, Get a Bunch Free: `__getitem__` # In absence of more-specific methods, `__getitem__` may be used in the following cases. # # - iteration # - **in** # - list comprehensions # - **map** # - list and tuple assignment # - type constructors # In[8]: class StepIndexer(object): def __getitem__(self, i): return self.data[i] x = StepIndexer() x.data = "1234" # In[9]: # iteration for i in x: print(i) # In[10]: # in "1" in x # In[11]: # list comprehensions [i for i in x] # In[12]: # map list(map(lambda x: int(x) * 2, x)) # In[13]: # tuple assignment (a, b, c, d) = x a, b, d # In[14]: # type constructor list(x) # ### `__setitem__` # For instance-indexing assignment. (e.g. **X[i] = 5**) # In[15]: class IndexSeter(object): data = [1, 2, 3, 4] def __setitem__(self, index, value): self.data[index] = value x = IndexSeter() x[2] = 5 x.data # ### Slicing and Indexing in Python2 # In Python 2 only, there are also `__getslice__` and `__setslice__` # They're removed in Python3. # Thus, even in Python2, `__getitem__` and `__setitem__` should be used # ### Python3's `__index__` is not indexing !!! # Returning an iteger value for an instance and is used by built-ins that convert to digit strings # In[16]: class C(object): def __index__(self): return 255 x = C() hex(x) # ## Iterable Objects: `__iter__`, `__next__` # - Iterations first try **`__iter__`** first than **`__getitem__`** # - Generally, **`__iter__`** is prefered. It supports general iteration context better than **`__getitem__`** # - When **`__iter__`** is invoked, it's expected to return an iterator object. # If it's provided, Python calls this iterator object's **`__next__`** until a **StopIteration** exception # - It's designed for iteration, not random indexing. # Thus, if indexing is needed, **`__getitem__`** should still be used. # # ### Single Traversal # In[17]: class Squares(object): def __init__(self, start, stop): self.value = start - 1 self.stop = stop def __iter__(self): return self def __next__(self): if self.value == self.stop: raise StopIteration self.value += 1 return self.value**2 def test_iter(): x = Squares(1, 5) I1 = iter(x) I2 = iter(x) print("I1: ", next(I1)) print("I1: ", next(I1)) print("I1: ", next(I1)) print("I2: ", next(I2)) test_iter() # ### Multiple Iterators on One Object # # #### Method 1: yield # - When **yield** is used, it return s a new generator object and create **`__iter__`** and **`__next__`** # - It's still true even if the generator function with a **yield** happens to be a method named **`__iter__`** # In[18]: class Squares(object): def __init__(self, start, stop): self.start = start self.stop = stop def __iter__(self): for value in range(self.start, self.stop + 1): yield value**2 # Defined above test_iter() # #### Supplemental class that stores iterator state # In[19]: class Squares(object): def __init__(self, start, stop): self.start = start self.stop = stop def __iter__(self): return SquaresIter(self.start, self.stop) class SquaresIter(object): def __init__(self, start, stop): self.value = start - 1 self.stop = stop def __next__(self): if self.value == self.stop: raise StopIteration self.value += 1 return self.value**2 # Defined above test_iter() # ## Membership: `__contains__`, `__iter__`, `__getitem__` # - **`__contains__`** is called when **in** mebership operator is used # - It should be used in membership optimization as special case. In other cases, using **`__iter__`** or **`__geitem__`** is enough # - **`__contains__`** should define membership as applying to keys for a mapping and as a search for sequences # In[20]: class C(object): data = [1, 2, 3, 4] def __contains__(self, x): return x in self.data x = C() 1 in x # ## Attribute Access: `__getattr__` and `__setattr__` # # They are used when **`object.attribute`** presents # ### Attribtue Reference `__getattr__` # - It's called whenever you try to qualify an instance with an undefined attribute name. (e.g. **object.attribute**) # It's not called if Python can find the attibute from its inheritance tree # # - Usage # - Delegate calls to embedded objects from a proxy controller object # - Adapt classes to an interface or accessors for data attributes after the fact - logic in a method that validates or computes an attibute after it's already being used with simple dot notation # In[21]: class Empty(object): def __getattr__(self, attrname): if attrname == "age": return 40 else: raise AttributeError(attrname) x = Empty() x.age # - In Python2 default classes, operator overloading methods run by built-in operations are routed through generic attribute interception methods like **`__getattr__`** # - In Python3, look up method invoked impliciityly by built-in operations in classes and skip the normal instance lookup entirely # (e.g. printing does not trigger **`__getattr__`** (or **`__getatttribute__`**) and a default display is used instead) # ### Attribute Assignment and Deletion: `__setattr__` # - Intercepts ***all*** attribute assignmet # If this method is defined or inherited, **`seflf.attr = value`** becomes **`self.__setattr__('attr', value)`** # # #### Loop issue # Assigning to any **self** attriibtues calls **`__setattr__`**. Even if it's in **`__setattr__`** # To avoid loops, use attribute dict, **`self.__dict__['name'] = x`** or by routing any attribute assignments to a higher superclass # # In[22]: class Accesscontrol(object): def __setattr__(self, attr, value): if attr == "age": self.__dict__[attr] = value + 10 else: raise AttributeError(attr + " not allowed") x = Accesscontrol() x.age = 50 x.age # In[23]: x.name = "Bob" # ### Other Attribute Management Tools # - **`__delattr__`**: Used when **del object.attr** present. (Must avoid recursive loops as **`__setattr__`**) # - **`__getattribute__`**: Intercepts ***all*** attribute fetches, not just those that are undefined. This would need to avoid loop # - Descriptors provide a protocol for associating **`__get__`** and **`__set__`** methods of a class with accesses to a specific class attribute # ## String Representation: `__repr__`, `__str__` # # ### `__repr__` vs `__str__` # - `__str__` # - Tried first for **print** and **str** # - Should return a user-friendly display # - `__repr__` # - Interactive echoes, **repr**, nested appearances # - Should return an as-code string that could be used to re-create the object # # ### Usage # - **`__repr__`** is used everywhere, except by **print** and **str** when **`__str__`** is defined. # - Both **must return strings** # - Print falls back on **`__repr__`** if no **`__str__`** is defined, but the inverse is not # - Depending on a container's string conversion logic, **`__str__`** might only apply whne objects appear at the top level of a print operation # # In[24]: class Printer(object): def __init__(self, val): self.val = val def __str__(self): return str(self.val) objs = [Printer(2), Printer(3)] for x in objs: print(x) print(objs) # In[25]: class Printer(object): def __init__(self, val): self.val = val def __repr__(self): return str(self.val) objs = [Printer(2), Printer(3)] for x in objs: print(x) print(objs) # ### Loop in **`__repr__`** # Displaying the value of a method riggers the **`__repr__`** of the method's class, in order ot display the class # In[3]: class LoopRepr(object): def __init__(self): self.data = "loop" def __repr__(self): print(self) return self.data r = LoopRepr() print(r) # ## Binary Operation (e.g. __add__, __radd__, __iadd__) # The following examples are based on addition. # Every binary operator has similar overloading methods that works the same. (e.g. **`__mul__`**, **`__rmul__`**, **`__imul__`**) # # ### Right-Side Addition # **`__add___`** do not support right side of the + operator # In[26]: class Adder(object): def __init__(self, value=0): self.val = value def __add__(self, other): return self.val + other x = Adder(5) # In[27]: x + 2 # In[28]: 2 + x # Python calls **`__radd__`** only when the object on the right side of the + is your classs instance, but the object on the left is not and **`__add__`** handles all the other cases. # When instance of different classes appear mixed in an expression, Python prefers the class of the one on the left # # The order is reversed in **`__radd__`**: self is really on the right of the +, and the other is on the left. # In[29]: class Adder(object): def __init__(self, value=0): self.val = value def __add__(self, other): return self.val + other def __radd__(self, other): print("radd ", self.val, other) return other + self.val x = Adder(5) 2 + x # #### Reusing `__add__` in` __radd__` # For truly commutative operations # In[30]: class Adder(object): def __init__(self, val): self.val = val def __add__(self, other): return self.val + other __radd__ = __add__ x = Adder(5) print(x + 2) print(2 + x) # ### In-place addition # To implement +=, code **`__iadd__`** or **`__add__`**. The latter is used if the former is absent # It allows for more efficient in-place changes to be coded. # In[31]: class Number(object): def __init__(self, val): self.val = val def __iadd__(self, other): self.val += other return self x = Number(5) x += 1 x.val # ## Call Expressions: `__call__` # - If defined, Python runs a **`__call__`** for function call expressions applied to your instances, passing along whatever positional or keyword arguments were sent # # - All the argument-passing modes are supported by the **`__call__`** # - Useful when interfacing with APIs(i.e. libraries) # - Allows us to code objects that conform to an expected function call interface, but also retain state information # - Many consider such classes to be the best way to retain state information in Python # In[32]: class Callee(object): def __call__(self, *pargs, **kargs): print("Called: ", pargs, kargs) c = Callee() c(1, 2, 3) # ## Comparisons (e.g. `__lt__`, `__gt__`) # - No implicit relationships among the comparison operators. # - e.g. both **`__eq__`** and **`__ne__`** should be defined to ensure that both operatos behave correctly # In[33]: class Comparator(object): def __init__(self, val): self.val = val def __lt__(self, other): return self.val < other x = Comparator(5) x < 50 # In[34]: x > 50 # ## Boolean Tests: `__bool__`, `__len__` # - Boolean test try **`__bool__`** first then **`__len__`**(zero or not) # - Python3 renamed the Python2 **`__nonzero__`** to **`__bool__`** # - Don't misuse the methods in the two version or it will be silently ignored # In[35]: class Truth(object): def __bool__(self): return True def __len__(self): return 1 x = Truth() if x: print("True") # ## Object Destruction: `__del__` # It's destructor but not recommanded to use. # Due to the following reason # - No need: Python automatically reclaims all memory space # - Unpredictable: When an instacne will be reclaimed cannot be easily predicted. Python does not guarantee that destructor will be called for objects that still exist when the interpreter exits # # It's often better to code termination activities in an explicityly called method(e.g. **shutdown**) # ## Other Operator Overloadings # The following methods would be mentioned in future chapters # - **`__enter__`**, **`__exit__`**: used in **with** statement # - **`__get__`**, **`__set__`**: used in descriptor # - **`__new__`**: used in metaclasses