w = rand(3) ## We are setting up a w.  We will know it, but the learning algorithm will only have X and y data below.

# Here is the data.  Each "x" is a 3-vector.  Each "y" is a number.
n = 3
x1 = rand(3); y1=w ⋅ x1  # We are using the dot product (type \cdot+tab)
x2 = rand(3); y2=w ⋅ x2
x3 = rand(3); y3=w ⋅ x3
# Gather the "x" data into the rows of a matrix and "y" into a vector
X=[x1 x2 x3]'
y=[y1; y2; y3]

# We check that the linear system for the "unknown" w is X*w = y
X*w-y

## Recover w with Gaussian Elimination
X\y

w

## Recover w with a machine learning package -- 18.06 students might just want to execute as a black box
using Flux

# t ... a model to be learned to fit the data
t = Dense(3,1)
loss(x,y) = Flux.mse(t(x),y)
opt = ADAM(Flux.params(t)[1:1])
Flux.train!(loss, Iterators.repeated( (X',y'), 20000), opt) # 20000 steps of training
println((t.W).data, " : <== estimate after training")

## Adding more data does not help a whole lot

n = 3000
X = randn(n,3)
y = X*w
t = Dense(3,1)
loss(x,y) = Flux.mse(t(x),y)
opt = ADAM(Flux.params(t)[1:1])
Flux.train!(loss, Iterators.repeated( (X',y'), 2000), opt) # 2000 steps of training
println((t.W).data, " : <== estimate after training")