In [ ]:

%install-location $cwd/swift-install
%install '.package(path: "$cwd/FastaiNotebook_02_fully_connected")' FastaiNotebook_02_fully_connected

Installing packages:
	.package(path: "/home/jupyter/notebooks/swift/FastaiNotebook_02_fully_connected")
		FastaiNotebook_02_fully_connected
With SwiftPM flags: []
Working in: /tmp/tmpf7bhzuzj/swift-install
[1/5] Compiling FastaiNotebook_02_fully_connected 01_matmul.swift
[2/5] Compiling FastaiNotebook_02_fully_connected 02_fully_connected.swift
[3/5] Compiling FastaiNotebook_02_fully_connected 00_load_data.swift
[4/5] Compiling FastaiNotebook_02_fully_connected 01a_fastai_layers.swift
[5/6] Merging module FastaiNotebook_02_fully_connected
[6/7] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift
[7/8] Merging module jupyterInstalledPackages
[8/8] Linking libjupyterInstalledPackages.so
Initializing Swift...
Installation complete!

In [ ]:

//export
import Foundation
import TensorFlow
import Path

In [ ]:

import FastaiNotebook_02_fully_connected

Does nn.Conv2d init work well?¶

In [ ]:

var (xTrain, yTrain, xValid, yValid) = loadMNIST(path: Path.home/".fastai"/"data"/"mnist_tst")
let (trainMean, trainStd) = (xTrain.mean(), xTrain.standardDeviation())
xTrain = normalize(xTrain, mean: trainMean, std: trainStd)
xValid = normalize(xValid, mean: trainMean, std: trainStd)

In [ ]:

xTrain = xTrain.reshaped(to: [xTrain.shape[0], 28, 28, 1])
xValid = xValid.reshaped(to: [xValid.shape[0], 28, 28, 1])
print(xTrain.shape, xValid.shape)

[60000, 28, 28, 1] [10000, 28, 28, 1]

In [ ]:

let images = xTrain.shape[0]
let classes = xValid.max() + 1
let channels = 32

In [ ]:

var layer1 = FAConv2D<Float>(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)

In [ ]:

let x = xValid[0..<100]

In [ ]:

x.shape

Out[ ]:

▿ [100, 28, 28, 1]
  ▿ dimensions : 4 elements
    - 0 : 100
    - 1 : 28
    - 2 : 28
    - 3 : 1

In [ ]:

extension Tensor where Scalar: TensorFlowFloatingPoint {
    func stats() -> (mean: Tensor, std: Tensor) {
        return (mean: mean(), std: standardDeviation())
    }
}

In [ ]:

(filter: layer1.filter.stats(), bias: layer1.bias.stats())

Out[ ]:

▿ 2 elements
  ▿ filter : 2 elements
    - mean : -0.0027464556
    - std : 0.19631124
  ▿ bias : 2 elements
    - mean : 0.0
    - std : 0.0

In [ ]:

withDevice(.cpu){
    let result = layer1(x)
}

In [ ]:

let result = layer1(x)

In [ ]:

result.stats()

Out[ ]:

▿ 2 elements
  - mean : 0.00048066635
  - std : 0.9185965

This is in 1a now so this code is disabled from here:

var rng = PhiloxRandomNumberGenerator.global

extension Tensor where Scalar: TensorFlowFloatingPoint {
    init(kaimingNormal shape: TensorShape, negativeSlope: Double = 1.0) {
        // Assumes Leaky ReLU nonlinearity
        let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))
        let spatialDimCount = shape.count - 2
        let receptiveField = shape[0..<spatialDimCount].contiguousSize
        let fanIn = shape[shape.count - 2] * receptiveField
        self.init(randomNormal: shape,
                  stddev: gain / sqrt(Scalar(fanIn)),
                  generator: &rng
        )
    }
}

In [ ]:

layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 1.0)
layer1(x).stats()

Out[ ]:

▿ 2 elements
  - mean : -0.002676351
  - std : 0.8549339

In [ ]:

// export
func leakyRelu<T: TensorFlowFloatingPoint>(
    _ x: Tensor<T>,
    negativeSlope: Double = 0.0
) -> Tensor<T> {
    return max(0, x) + T(negativeSlope) * min(0, x)
}

In [ ]:

layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 0.0)
leakyRelu(layer1(x)).stats()

Out[ ]:

▿ 2 elements
  - mean : 0.40438622
  - std : 0.8042958

In [ ]:

var layer1 = FAConv2D<Float>(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)
leakyRelu(layer1(x)).stats()

Out[ ]:

▿ 2 elements
  - mean : 0.3136924
  - std : 0.6081149

In [ ]:

layer1.filter.shape

Out[ ]:

▿ [5, 5, 1, 32]
  ▿ dimensions : 4 elements
    - 0 : 5
    - 1 : 5
    - 2 : 1
    - 3 : 32

In [ ]:

let spatialDimCount = layer1.filter.rank - 2
let receptiveField = layer1.filter.shape[0..<spatialDimCount].contiguousSize
receptiveField

Out[ ]:

In [ ]:

let filtersIn = layer1.filter.shape[2]
let filtersOut = layer1.filter.shape[3]
print(filtersIn, filtersOut)

1 32

In [ ]:

let fanIn = filtersIn * receptiveField
let fanOut = filtersOut * receptiveField
print(fanIn, fanOut)

25 800

In [ ]:

func gain(_ negativeSlope: Double) -> Double {
    return sqrt(2.0 / (1.0 + pow(negativeSlope, 2.0)))
}

In [ ]:

(gain(1.0), gain(0.0), gain(0.01), gain(0.1), gain(sqrt(5.0)))

Out[ ]:

▿ 5 elements
  - .0 : 1.0
  - .1 : 1.4142135623730951
  - .2 : 1.4141428569978354
  - .3 : 1.4071950894605838
  - .4 : 0.5773502691896257

In [ ]:

(2 * Tensor<Float>(randomUniform: [10000]) - 1).standardDeviation()

Out[ ]:

0.5790101

In [ ]:

1.0 / sqrt(3.0)

Out[ ]:

0.5773502691896258

In [ ]:

//export
extension Tensor where Scalar: TensorFlowFloatingPoint {
    init(kaimingUniform shape: TensorShape, negativeSlope: Double = 1.0) {
        // Assumes Leaky ReLU nonlinearity
        let gain = Scalar.init(TensorFlow.sqrt(2.0 / (1.0 + TensorFlow.pow(negativeSlope, 2))))
        let spatialDimCount = shape.count - 2
        let receptiveField = shape[0..<spatialDimCount].contiguousSize
        let fanIn = shape[shape.count - 2] * receptiveField
        let bound = TensorFlow.sqrt(Scalar(3.0)) * gain / TensorFlow.sqrt(Scalar(fanIn))
        self = bound * (2 * Tensor(randomUniform: shape, generator: &PhiloxRandomNumberGenerator.global) - 1)
    }
}

In [ ]:

layer1.filter = Tensor(kaimingUniform: layer1.filter.shape, negativeSlope: 0.0)
leakyRelu(layer1(x)).stats()

Out[ ]:

▿ 2 elements
  - mean : 0.497556
  - std : 1.0267977

In [ ]:

layer1.filter = Tensor(kaimingUniform: layer1.filter.shape, negativeSlope: sqrt(5.0))
leakyRelu(layer1(x)).stats()

Out[ ]:

▿ 2 elements
  - mean : 0.18083005
  - std : 0.34926173

In [ ]:

public struct Model: Layer {
    public var conv1 = FAConv2D<Float>(
        filterShape: (5, 5, 1, 8),   strides: (2, 2), padding: .same, activation: relu
    )
    public var conv2 = FAConv2D<Float>(
        filterShape: (3, 3, 8, 16),  strides: (2, 2), padding: .same, activation: relu
    )
    public var conv3 = FAConv2D<Float>(
        filterShape: (3, 3, 16, 32), strides: (2, 2), padding: .same, activation: relu
    )
    public var conv4 = FAConv2D<Float>(
        filterShape: (3, 3, 32, 1),  strides: (2, 2), padding: .valid
    )
    public var flatten = Flatten<Float>()

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        return input.sequenced(through: conv1, conv2, conv3, conv4, flatten)
    }
}

In [ ]:

let y = Tensor<Float>(yValid[0..<100])
var model = Model()

In [ ]:

let prediction = model(x)
prediction.stats()

Out[ ]:

▿ 2 elements
  - mean : 0.1729667
  - std : 0.12520388

In [ ]:

let gradients = gradient(at: model) { model in
    meanSquaredError(predicted: model(x), expected: y)
}

gradients.conv1.filter.stats()

Out[ ]:

▿ 2 elements
  - mean : -0.056964096
  - std : 0.2776651

In [ ]:

for keyPath in [\Model.conv1, \Model.conv2, \Model.conv3, \Model.conv4] {
    model[keyPath: keyPath].filter = Tensor(kaimingUniform: model[keyPath: keyPath].filter.shape)
}

In [ ]:

let prediction = model(x)
prediction.stats()

Out[ ]:

▿ 2 elements
  - mean : -0.37689942
  - std : 0.32016334

In [ ]:

let gradients = gradient(at: model) { model in
    meanSquaredError(predicted: model(x), expected: y)
}

gradients.conv1.filter.stats()

Out[ ]:

▿ 2 elements
  - mean : 0.100767136
  - std : 0.54216325

Export¶

In [ ]:

import NotebookExport
let exporter = NotebookExport(Path.cwd/"02a_why_sqrt5.ipynb")
print(exporter.export(usingPrefix: "FastaiNotebook_"))

success

In [ ]: