#!/usr/bin/env python # coding: utf-8 #

Table of Contents

#
# # What is the hidden action/function of `*` in the `dummy` func below? # # You can have a dummy function below without a code and `*` can take out things from a list and put into a tuple. What exactly is happening here with `*`? # In[3]: from pdb import set_trace # In[4]: def dummy(*sth): set_trace() pass # In[5]: dummy([{'a':1, 'b':2}]) # In[6]: dummy(*[{'a':1, 'b':2}]) # * removes the list [] bracket # In[7]: dummy([{'a':1, 'b':2}], {'c':3, 'd':4}, {'e':5}) # In[8]: dummy(*[{'a':1, 'b':2}], {'c':3, 'd':4}, {'e':5}) # * removes the list [] bracket # # What types of data can be used as `params` for `Optimizer`? # According to official docs (see the source below), `params` # - should be `Tensor` and # - contain both parameters and hyper parameters. # # ```python # class Optimizer(_BaseOptimizer): # "Base optimizer class for the fastai library, updating `params` with `cbs`" # _keep_on_clear = ['force_train', 'do_wd'] # def __init__(self, # params:Tensor, # Parameters and hyper parameters # cbs:list, # `Optimizer` callbacks # train_bn:bool=True, # Batch normalization is always trained # **defaults # Default values to set on hyper parameters # ): # ``` # However, in actual source code and tests involving `params`, we can tell that `params`: # - can be almost anything, a number, a list, a tuple, a range, a generator, and finally a tensor # - I don't see any example or source code of using `params` as hyper parameters # # First, let's see what type of data can be used as `params` # ```python # # there are two lines of source codes to process `params` # params = L(params) # self.param_lists = L(L(p) for p in params) if isinstance(params[0], (L,list)) else L([params]) # ``` # In[9]: from fastai.optimizer import L, listify # By reading the source of `L` from `L??`, `params` can be anything that can be `listify`ed. # In[10]: from fastai.optimizer import _BaseOptimizer, Tensor, Optimizer, noop, test_eq # In fact, tests from the source have given us examples of `params` being a list, a range and a generator. # In[11]: # The 4 examples provided by the official source code opt = Optimizer([1,2,3], noop) # param as a list test_eq(opt.param_lists, [[1,2,3]]) opt = Optimizer(range(3), noop)# param as a range test_eq(opt.param_lists, [[0,1,2]]) opt = Optimizer([[1,2],[3]], noop) # as list of lists test_eq(opt.param_lists, [[1,2],[3]]) opt = Optimizer(([o,o+1] for o in range(0,4,2)), noop) # as a generator test_eq(opt.param_lists, [[0,1],[2,3]]) # I have added examples where `params` can be a digit, a tuple, a tensor # In[12]: # I have added 2 examples for `params` as a tuple and a digit and a tensor opt = Optimizer((1,2,3), noop) # param as a tuple test_eq(opt.param_lists, [[1,2,3]]) opt = Optimizer((1), noop) # param as a digit test_eq(opt.param_lists, [[1]]) # In[13]: t = Tensor([[1,2],[3,4]]) t1 = Tensor([[1,2],[3,4]]) # In[14]: opt = Optimizer(t, noop) # params as a single tensor opt.param_lists # In[15]: opt = Optimizer([t, [t1]], noop) # param as a list of tensors, first item is just a tensor opt.param_lists # In[16]: opt = Optimizer([[t], t1], noop) # param as a list of tensors, first item is a list of tensor opt.param_lists # # `defaults` provide hyper parameters not `params` # According to the docs above, `params` is said to be both parameters and hyper parameters. But according to the actual source, hyper parameters like `lr`, `mom` are provided by `defaults` from `**defaults` (user input) or from `cbs` (another user input), not `params`. # # In fact, all hyper parameters are processed and stored in `self.hypers` for use later. There are tests in the source to demon it. # In[19]: def tst_arg(p, lr=0, **kwargs): return p tst_arg.defaults = dict(lr=1e-2) def tst_arg2(p, lr2=0, **kwargs): return p tst_arg2.defaults = dict(lr2=1e-3) def tst_arg3(p, mom=0, **kwargs): return p tst_arg3.defaults = dict(mom=0.9) def tst_arg4(p, **kwargs): return p # In[21]: opt = Optimizer([1,2,3], [tst_arg,tst_arg2, tst_arg3]) # hyper params provided by cbs test_eq(opt.hypers, [{'lr2': 1e-3, 'mom': 0.9, 'lr': 1e-2}]) test_eq(opt.param_lists, [[1, 2, 3]]) # In[22]: opt = Optimizer([1,2,3], tst_arg, lr=0.1) # hyper params provided by both cbs and **defaults test_eq(opt.hypers, [{'lr': 0.1}]) test_eq(opt.param_lists, [[1,2,3]]) # In[ ]: