#!/usr/bin/env python
# coding: utf-8

# # 케라스 API를 사용한 사용자 정의 모델 만들기 with 텐서플로 2.3+2.4
# 
# DLD(Daejeon Learning Day) 2020을 위해 작성된 노트북입니다.
# 
# * 깃허브 주소: https://github.com/rickiepark/handson-ml2/blob/master/custom_model_in_keras.ipynb
# * 코랩 주소: https://colab.research.google.com/github/rickiepark/handson-ml2/blob/master/custom_model_in_keras.ipynb

# In[1]:


import tensorflow as tf

tf.__version__


# ### MNIST 손글씨 숫자 데이터 적재

# In[2]:


(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

X_train = X_train.reshape(-1, 784) / 255.


# In[3]:


X_train.shape


# ### `Sequential()` 클래스와 함수형 API의 관계

# `Sequential()`:

# 시퀀셜 모델에 10개의 유닛을 가진 완전 연결 층을 추가합니다.

# In[4]:


seq_model = tf.keras.Sequential()

seq_model.add(tf.keras.layers.Dense(units=10, 
                                    activation='softmax',
                                    input_shape=(784,)))

seq_model.summary()


# In[5]:


seq_model.compile(loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
seq_model.fit(X_train, y_train, batch_size=32, epochs=2)


# ### 함수형 API:
# 
# 함수형 API를 사용할 때는 `Input()`을 사용해 입력의 크기를 정의해야 합니다. 하지만 `InputLayer` 층이 추가되어 있습니다.

# In[6]:


inputs = tf.keras.layers.Input(784)

outputs = tf.keras.layers.Dense(units=10,
                                activation='softmax')(inputs)  # __call()__ 메서드 호출
# dense = tf.keras.layers.Dense(units=10, activation='softmax')
# outputs = dense(inputs)

func_model = tf.keras.Model(inputs, outputs)

func_model.summary()


# In[7]:


func_model.compile(loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
func_model.fit(X_train, y_train, batch_size=32, epochs=2)


# `Input`의 정체는 무엇일까요? 이 함수는 `InputLayer` 클래스의 객체를 만들어 그 결과를 반환합니다.

# In[8]:


type(tf.keras.layers.Input)


# 사실 신경망의 입력층은 입력 그 자체입니다. `InputLayer` 객체의 입력 노드 출력을 그대로 `Dense` 층에 주입할 수 있습니다. 모든 층은 입력과 출력 노드를 정의합니다.

# In[9]:


# inputs = tf.keras.layers.Input(784)

input_layer = tf.keras.layers.InputLayer(784)
inputs = input_layer._inbound_nodes[0].outputs

outputs = tf.keras.layers.Dense(units=10,
                                activation='softmax')(inputs)

input_layer_model = tf.keras.Model(inputs, outputs)

input_layer_model.summary()


# In[10]:


input_layer_model.compile(loss='sparse_categorical_crossentropy', 
                          metrics=['accuracy'])
input_layer_model.fit(X_train, y_train, batch_size=32, epochs=2)


# 함수형 API를 사용한 모델은 `layers` 속성에 `InputLayer` 클래스를 포함합니다.

# In[11]:


func_model.layers


# 하지만 시퀀셜 모델은 `layers` 속성에 `InputLayer` 클래스가 보이지 않습니다.

# In[12]:


seq_model.layers


# 모델은 감춰진 `_self_tracked_trackables` 속성이 또 있습니다. 여기에서 `InputLayer` 클래스를 확인할 수 있습니다(텐서플로 2.5 이전 버전에서는 `_layers` 속성을 사용합니다).

# In[15]:


seq_model._self_tracked_trackables


# 또는 `_input_layers` 속성에서도 확인할 수 있습니다.

# In[16]:


seq_model._input_layers, func_model._input_layers


# In[17]:


seq_model._output_layers, func_model._output_layers


# `Model` 클래스로 만든 `func_model`은 사실 `Functional` 클래스의 객체입니다. `Model` 클래스는 서브클래싱에 사용합니다.

# In[18]:


func_model.__class__


# 시퀀셜 모델은 함수형 모델의 특별한 경우입니다. (`Model` --> `Functional` --> `Sequential`)

# ### 사용자 정의 층 만들기

# `tf.layers.Layer` 클래스를 상속하고 `build()` 메서드에서 가중치를 만든다음 `call()` 메서드에서 연산을 구현합니다.

# In[19]:


class MyDense(tf.keras.layers.Layer):
    
    def __init__(self, units, activation=None, **kwargs):
        # units와 activation 매개변수 외에 나머지 변수를 부모 클래스의 생성자로 전달합니다.
        super(MyDense, self).__init__(**kwargs)
        self.units = units
        # 문자열로 미리 정의된 활성화 함수를 선택합니다. e.g., 'softmax', 'relu'
        self.activation = tf.keras.activations.get(activation)
        
    def build(self, input_shape):
        # __call__() 메서드를 호출할 때 호출됩니다. 가중치 생성을 지연합니다.
        # 가중치와 절편을 생성합니다.
        self.kernel = self.add_weight(name='kernel', 
                                      shape=[input_shape[-1], self.units],
                                      initializer='glorot_uniform'   # 케라스의 기본 초기화
                                     )
        self.bias = self.add_weight(name='bias',
                                    shape=[self.units],
                                    initializer='zeros')
    
    def call(self, inputs):  # training=None은 training은 배치 정규화나 드롭아웃 같은 경우 사용
        # __call__() 메서드를 호출할 때 호출됩니다.
        # 실제 연산을 수행합니다. [batch_size, units]
        z = tf.matmul(inputs, self.kernel) + self.bias
        if self.activation:
            return self.activation(z)
        return z


# In[20]:


inputs = tf.keras.layers.Input(784)
# Layer.__call__() --> MyDense().build() --> Layer.build() --> MyDense().call()
outputs = MyDense(units=10, activation='softmax')(inputs)

my_dense_model = tf.keras.Model(inputs, outputs)

my_dense_model.summary()


# In[21]:


my_dense_model.compile(loss='sparse_categorical_crossentropy', 
                       metrics=['accuracy'])
my_dense_model.fit(X_train, y_train, batch_size=32, epochs=2)


# ### 사용자 정의 모델 만들기

# In[22]:


# fit(), compile(), predict(), evaluate() 등의 메서드 제공
class MyModel(tf.keras.Model):
    
    def __init__(self):
        super(MyModel, self).__init__()
        self.output_layer = MyDense(units=10, activation='softmax')
    
    def call(self, inputs):
        return self.output_layer(inputs)


# In[23]:


my_model = MyModel()

my_model.compile(loss='sparse_categorical_crossentropy', 
                       metrics=['accuracy'])
my_model.fit(X_train, y_train, batch_size=32, epochs=2)


# ### 사용자 정의 훈련

# In[24]:


class MyCustomStep(MyModel):
    
    def train_step(self, data):
        # fit()에서 전달된 데이터
        x, y = data

        # 그레이디언트 기록 시작
        with tf.GradientTape() as tape:
            # 정방향 계산
            y_pred = self(x)
            # compile() 메서드에서 지정한 손실 계산
            loss = self.compiled_loss(y, y_pred)

        # 훈련가능한 파라미터에 대한 그레이디언트 계산
        gradients = tape.gradient(loss, self.trainable_variables)
        # 파라미터 업데이트
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # TF 2.4에서는
        # self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
        
        # compile() 메서드에서 지정한 지표 계산
        self.compiled_metrics.update_state(y, y_pred)
        
        # 현재까지 지표와 결괏값을 딕셔너리로 반환
        return {m.name: m.result() for m in self.metrics}


# In[25]:


my_custom_step = MyCustomStep()

my_custom_step.compile(loss='sparse_categorical_crossentropy', 
                       metrics=['accuracy'])
my_custom_step.fit(X_train, y_train, batch_size=32, epochs=2)