#!/usr/bin/env python
# coding: utf-8
#
Table of Contents
#
# # What is the hidden action/function of `*` in the `dummy` func below?
#
# You can have a dummy function below without a code and `*` can take out things from a list and put into a tuple. What exactly is happening here with `*`?
# In[3]:
from pdb import set_trace
# In[4]:
def dummy(*sth):
set_trace()
pass
# In[5]:
dummy([{'a':1, 'b':2}])
# In[6]:
dummy(*[{'a':1, 'b':2}]) # * removes the list [] bracket
# In[7]:
dummy([{'a':1, 'b':2}], {'c':3, 'd':4}, {'e':5})
# In[8]:
dummy(*[{'a':1, 'b':2}], {'c':3, 'd':4}, {'e':5}) # * removes the list [] bracket
# # What types of data can be used as `params` for `Optimizer`?
# According to official docs (see the source below), `params`
# - should be `Tensor` and
# - contain both parameters and hyper parameters.
#
# ```python
# class Optimizer(_BaseOptimizer):
# "Base optimizer class for the fastai library, updating `params` with `cbs`"
# _keep_on_clear = ['force_train', 'do_wd']
# def __init__(self,
# params:Tensor, # Parameters and hyper parameters
# cbs:list, # `Optimizer` callbacks
# train_bn:bool=True, # Batch normalization is always trained
# **defaults # Default values to set on hyper parameters
# ):
# ```
# However, in actual source code and tests involving `params`, we can tell that `params`:
# - can be almost anything, a number, a list, a tuple, a range, a generator, and finally a tensor
# - I don't see any example or source code of using `params` as hyper parameters
#
# First, let's see what type of data can be used as `params`
# ```python
# # there are two lines of source codes to process `params`
# params = L(params)
# self.param_lists = L(L(p) for p in params) if isinstance(params[0], (L,list)) else L([params])
# ```
# In[9]:
from fastai.optimizer import L, listify
# By reading the source of `L` from `L??`, `params` can be anything that can be `listify`ed.
# In[10]:
from fastai.optimizer import _BaseOptimizer, Tensor, Optimizer, noop, test_eq
# In fact, tests from the source have given us examples of `params` being a list, a range and a generator.
# In[11]:
# The 4 examples provided by the official source code
opt = Optimizer([1,2,3], noop) # param as a list
test_eq(opt.param_lists, [[1,2,3]])
opt = Optimizer(range(3), noop)# param as a range
test_eq(opt.param_lists, [[0,1,2]])
opt = Optimizer([[1,2],[3]], noop) # as list of lists
test_eq(opt.param_lists, [[1,2],[3]])
opt = Optimizer(([o,o+1] for o in range(0,4,2)), noop) # as a generator
test_eq(opt.param_lists, [[0,1],[2,3]])
# I have added examples where `params` can be a digit, a tuple, a tensor
# In[12]:
# I have added 2 examples for `params` as a tuple and a digit and a tensor
opt = Optimizer((1,2,3), noop) # param as a tuple
test_eq(opt.param_lists, [[1,2,3]])
opt = Optimizer((1), noop) # param as a digit
test_eq(opt.param_lists, [[1]])
# In[13]:
t = Tensor([[1,2],[3,4]])
t1 = Tensor([[1,2],[3,4]])
# In[14]:
opt = Optimizer(t, noop) # params as a single tensor
opt.param_lists
# In[15]:
opt = Optimizer([t, [t1]], noop) # param as a list of tensors, first item is just a tensor
opt.param_lists
# In[16]:
opt = Optimizer([[t], t1], noop) # param as a list of tensors, first item is a list of tensor
opt.param_lists
# # `defaults` provide hyper parameters not `params`
# According to the docs above, `params` is said to be both parameters and hyper parameters. But according to the actual source, hyper parameters like `lr`, `mom` are provided by `defaults` from `**defaults` (user input) or from `cbs` (another user input), not `params`.
#
# In fact, all hyper parameters are processed and stored in `self.hypers` for use later. There are tests in the source to demon it.
# In[19]:
def tst_arg(p, lr=0, **kwargs): return p
tst_arg.defaults = dict(lr=1e-2)
def tst_arg2(p, lr2=0, **kwargs): return p
tst_arg2.defaults = dict(lr2=1e-3)
def tst_arg3(p, mom=0, **kwargs): return p
tst_arg3.defaults = dict(mom=0.9)
def tst_arg4(p, **kwargs): return p
# In[21]:
opt = Optimizer([1,2,3], [tst_arg,tst_arg2, tst_arg3]) # hyper params provided by cbs
test_eq(opt.hypers, [{'lr2': 1e-3, 'mom': 0.9, 'lr': 1e-2}])
test_eq(opt.param_lists, [[1, 2, 3]])
# In[22]:
opt = Optimizer([1,2,3], tst_arg, lr=0.1) # hyper params provided by both cbs and **defaults
test_eq(opt.hypers, [{'lr': 0.1}])
test_eq(opt.param_lists, [[1,2,3]])
# In[ ]: