-
Notifications
You must be signed in to change notification settings - Fork 24
/
linear.go
282 lines (241 loc) · 8.2 KB
/
linear.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
// Copyright (c) 2023, The Emergent Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package decoder
//go:generate core generate -add-types
import (
"fmt"
"cogentcore.org/core/math32"
"cogentcore.org/lab/base/mpi"
"cogentcore.org/lab/tensor"
)
type ActivationFunc func(float32) float32
// Linear is a linear neural network, which can be configured with a custom
// activation function. By default it will use the identity function.
// It learns using the delta rule for each output unit.
type Linear struct {
// learning rate
LRate float32 `default:"0.1"`
// layers to decode
Layers []Layer
// unit values -- read this for decoded output
Units []LinearUnit
// number of inputs -- total sizes of layer inputs
NInputs int
// number of outputs -- total sizes of layer inputs
NOutputs int
// input values, copied from layers
Inputs []float32
// for holding layer values
ValuesTsrs map[string]*tensor.Float32 `display:"-"`
// synaptic weights: outer loop is units, inner loop is inputs
Weights tensor.Float32
// activation function
ActivationFn ActivationFunc
// which pool to use within a layer
PoolIndex int
// mpi communicator -- MPI users must set this to their comm -- do direct assignment
Comm *mpi.Comm `display:"-"`
// delta weight changes: only for MPI mode -- outer loop is units, inner loop is inputs
MPIDWts tensor.Float32
}
// Layer is the subset of emer.Layer that is used by this code
type Layer interface {
Name() string
UnitValuesTensor(tsr tensor.Values, varNm string, di int) error
Shape() *tensor.Shape
}
func IdentityFunc(x float32) float32 { return x }
// LogisticFunc implements the standard logistic function.
// Its outputs are in the range (0, 1).
// Also known as Sigmoid. See https://en.wikipedia.org/wiki/Logistic_function.
func LogisticFunc(x float32) float32 { return 1 / (1 + math32.FastExp(-x)) }
// LinearUnit has variables for Linear decoder unit
type LinearUnit struct {
// target activation value -- typically 0 or 1 but can be within that range too
Target float32
// final activation = sum x * w -- this is the decoded output
Act float32
// net input = sum x * w
Net float32
}
// InitLayer initializes detector with number of categories and layers
func (dec *Linear) InitLayer(nOutputs int, layers []Layer, activationFn ActivationFunc) {
dec.Layers = layers
nIn := 0
for _, ly := range dec.Layers {
nIn += ly.Shape().Len()
}
dec.Init(nOutputs, nIn, -1, activationFn)
}
// InitPool initializes detector with number of categories, 1 layer and
func (dec *Linear) InitPool(nOutputs int, layer Layer, poolIndex int, activationFn ActivationFunc) {
dec.Layers = []Layer{layer}
shape := layer.Shape()
// TODO: assert that it's a 4D layer
nIn := shape.DimSize(2) * shape.DimSize(3)
dec.Init(nOutputs, nIn, poolIndex, activationFn)
}
// Init initializes detector with number of categories and number of inputs
func (dec *Linear) Init(nOutputs, nInputs int, poolIndex int, activationFn ActivationFunc) {
dec.NInputs = nInputs
dec.LRate = 0.1
dec.NOutputs = nOutputs
dec.Units = make([]LinearUnit, dec.NOutputs)
dec.Inputs = make([]float32, dec.NInputs)
dec.Weights.SetShapeSizes(dec.NOutputs, dec.NInputs)
for i := range dec.Weights.Values {
dec.Weights.Values[i] = 0.1
}
dec.PoolIndex = poolIndex
dec.ActivationFn = activationFn
}
// Decode decodes the given variable name from layers (forward pass).
// Decoded values are in Units[i].Act -- see also Output to get into a []float32.
// di is a data parallel index di, for networks capable
// of processing input patterns in parallel.
func (dec *Linear) Decode(varNm string, di int) {
dec.Input(varNm, di)
dec.Forward()
}
// Output returns the resulting Decoded output activation values into given slice
// which is automatically resized if not of sufficient size.
func (dec *Linear) Output(acts *[]float32) {
if cap(*acts) < dec.NOutputs {
*acts = make([]float32, dec.NOutputs)
} else if len(*acts) != dec.NOutputs {
*acts = (*acts)[:dec.NOutputs]
}
for ui := range dec.Units {
u := &dec.Units[ui]
(*acts)[ui] = u.Act
}
}
// Train trains the decoder with given target correct answers, as []float32 values.
// Returns SSE (sum squared error) of difference between targets and outputs.
// Also returns and prints an error if targets are not sufficient length for NOutputs.
func (dec *Linear) Train(targs []float32) (float32, error) {
err := dec.SetTargets(targs)
if err != nil {
return 0, err
}
sse := dec.Back()
return sse, nil
}
// TrainMPI trains the decoder with given target correct answers, as []float32 values.
// Returns SSE (sum squared error) of difference between targets and outputs.
// Also returns and prints an error if targets are not sufficient length for NOutputs.
// MPI version uses mpi to synchronize weight changes across parallel nodes.
func (dec *Linear) TrainMPI(targs []float32) (float32, error) {
err := dec.SetTargets(targs)
if err != nil {
return 0, err
}
sse := dec.BackMPI()
return sse, nil
}
// SetTargets sets given target correct answers, as []float32 values.
// Also returns and prints an error if targets are not sufficient length for NOutputs.
func (dec *Linear) SetTargets(targs []float32) error {
if len(targs) < dec.NOutputs {
err := fmt.Errorf("decoder.Linear: number of targets < NOutputs: %d < %d", len(targs), dec.NOutputs)
fmt.Println(err)
return err
}
for ui := range dec.Units {
u := &dec.Units[ui]
u.Target = targs[ui]
}
return nil
}
// ValuesTsr gets value tensor of given name, creating if not yet made
func (dec *Linear) ValuesTsr(name string) *tensor.Float32 {
if dec.ValuesTsrs == nil {
dec.ValuesTsrs = make(map[string]*tensor.Float32)
}
tsr, ok := dec.ValuesTsrs[name]
if !ok {
tsr = &tensor.Float32{}
dec.ValuesTsrs[name] = tsr
}
return tsr
}
// Input grabs the input from given variable in layers
// di is a data parallel index di, for networks capable
// of processing input patterns in parallel.
func (dec *Linear) Input(varNm string, di int) {
off := 0
for _, ly := range dec.Layers {
tsr := dec.ValuesTsr(ly.Name())
ly.UnitValuesTensor(tsr, varNm, di)
if dec.PoolIndex >= 0 {
shape := ly.Shape()
y := dec.PoolIndex / shape.DimSize(1)
x := dec.PoolIndex % shape.DimSize(1)
tsr = tsr.SubSpace(y, x).(*tensor.Float32)
}
for j, v := range tsr.Values {
dec.Inputs[off+j] = v
}
off += ly.Shape().Len()
}
}
// Forward compute the forward pass from input
func (dec *Linear) Forward() {
for ui := range dec.Units {
u := &dec.Units[ui]
net := float32(0)
off := ui * dec.NInputs
for j, in := range dec.Inputs {
net += dec.Weights.Values[off+j] * in
}
u.Net = net
u.Act = dec.ActivationFn(net)
}
}
// https://en.wikipedia.org/wiki/Delta_rule
// Delta rule: delta = learning rate * error * input
// We don't need the g' (derivative of activation function) term assuming:
// 1. Identity activation function with SSE loss (beecause it's 1), OR
// 2. Logistic activation function with Cross Entropy loss (because it cancels out, see
// https://towardsdatascience.com/deriving-backpropagation-with-cross-entropy-loss-d24811edeaf9)
// The fact that we return SSE does not mean we're optimizing SSE.
// Back compute the backward error propagation pass
// Returns SSE (sum squared error) of difference between targets and outputs.
func (dec *Linear) Back() float32 {
var sse float32
for ui := range dec.Units {
u := &dec.Units[ui]
err := u.Target - u.Act
sse += err * err
del := dec.LRate * err
off := ui * dec.NInputs
for j, in := range dec.Inputs {
dec.Weights.Values[off+j] += del * in
}
}
return sse
}
// BackMPI compute the backward error propagation pass
// Returns SSE (sum squared error) of difference between targets and outputs.
func (dec *Linear) BackMPI() float32 {
if dec.MPIDWts.Len() != dec.Weights.Len() {
tensor.SetShapeFrom(&dec.MPIDWts, &dec.Weights)
}
var sse float32
for ui := range dec.Units {
u := &dec.Units[ui]
err := u.Target - u.Act
sse += err * err
del := dec.LRate * err
off := ui * dec.NInputs
for j, in := range dec.Inputs {
dec.MPIDWts.Values[off+j] = del * in
}
}
dec.Comm.AllReduceF32(mpi.OpSum, dec.MPIDWts.Values, nil)
for i, dw := range dec.MPIDWts.Values {
dec.Weights.Values[i] += dw
}
return sse
}