Skip to content

Commit

Permalink
Batchnorm1d optimized and parameterized point cnn layer in phoneme det
Browse files Browse the repository at this point in the history
  • Loading branch information
Anirudh0707 committed Oct 26, 2020
1 parent 190d3b4 commit 214495c
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 49 deletions.
16 changes: 10 additions & 6 deletions c_reference/include/conv1d.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,22 @@ int avgpool1d(float* output_signal, unsigned out_time, const float* input_signal
* @param[in] input_signal pointer to the input signal. size = in_time * in_channels
* @param[in] in_time number of time steps in the input
* @param[in] in_channels number of input channels. The output will have the same number of channels
* @param[in] mean pointer to the mean for the batch normalization, size = in_channels
* @param[in] var pointer to the variance for the batch normalization, size = in_channels
* @param[in] affine whether the affine operations are applied
* @param[in] gamma pointer to the scaling factors for the post-norm affine operation, size = in_channels. Provide Null/0 if affine is False(non-zero)
* @param[in] beta pointer to the offsets for the post-norm affine operation, size = in_channels. Provide Null/0 if affine is False(non-zero)
* @param[in] mean pointer to the mean for the batch normalization, size = in_channels. if affine_config = 2, then pass a NULL/0
* @param[in] var pointer to the variance for the batch normalization, size = in_channels. if affine_config = 2, then pass a NULL/0
* @param[in] affine_config whether the affine operations are applied
* if affine_config = 0, then only mean and var are used
* if affine_config = 1, then mean, var, gamma and beta are used for the final computation.
* if affine_config = 2, then only the gamma and beta are used. gamma = original_gamma/sqrt(var), beta = original_beta - gamma * mean/sqrt(var)
* Note: Use affine_config = 2 for faster calculations. The new gamma and beta would need to be pre-computed, stored and passed
* @param[in] gamma pointer to the scaling factors for the post-norm affine operation, size = in_channels. Provide Null/0 if affine_config is 0
* @param[in] beta pointer to the offsets for the post-norm affine operation, size = in_channels. Provide Null/0 if affine_config is 0
* @param[in] in_place in-place computation of the batchnorm i.e. the output is stored in-place of the input signal. Storage efficient
* @param[in] eps a very small +ve value to avoid division by 0. For the default value, assign = 0.00001
*/
int batchnorm1d(float* output_signal, float* input_signal,
unsigned in_time, unsigned in_channels,
const float* const mean, const float* const var,
unsigned affine, const float* const gamma , const float* const beta,
unsigned affine_config, const float* const gamma , const float* const beta,
unsigned in_place, float eps);

#endif
39 changes: 26 additions & 13 deletions c_reference/include/dscnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,27 @@
#ifndef __DSCNN_H__
#define __DSCNN_H__

// Function pointer for the Conv layer to be passed as a parameter. (conv1d or conv1d_lr only)
typedef int (*conv_layer)(float*, unsigned, unsigned, const float*,
unsigned, unsigned, unsigned, unsigned,
const void*, unsigned, unsigned);

/**
* @brief Model definition for the 1D Convolution block applied before the RNN
* @brief sub-layers : batchnorm1d -> conv1d_lr
* @param[out] output_signal pointer to the final output signal, minimum size = out_time * in_channels. out_time has to be calculated based on the reduction from all the conv and pool layers
* @param[in] input_signal pointer to the input signal. size = in_time * in_channels
* @param[in] in_time number of time steps in the input_signal
* @param[in] in_channels number of input channels
* @param[in] mean pointer to the mean for the batch normalization, size = in_channels
* @param[in] var pointer to the variance for the batch normalization, size = in_channels
* @param[in] affine whether the affine operations are applied
* @param[in] gamma pointer to the scaling factors for the post-norm affine operation, size = in_channels
* @param[in] beta pointer to the offsets for the post-norm affine operation, size = in_channels
* @param[in] mean pointer to the mean for the batch normalization, size = in_channels. Pass NULL/0 for affine_config = 2
* @param[in] var pointer to the variance for the batch normalization, size = in_channels. Pass NULL/0 for affine_config = 2
* @param[in] affine_config whether the affine operations are applied
* if affine_config = 0, then only mean and var are used
* if affine_config = 1, then mean, var, gamma and beta are used for the final computation.
* if affine_config = 2, then only the gamma and beta are used. gamma = original_gamma/sqrt(var), beta = original_beta - gamma * mean/sqrt(var)
* Note: Use affine_config = 2 for faster calculations. The new gamma and beta would need to be pre-computed, stored and passed
* @param[in] gamma pointer to the scaling factors for the post-norm affine operation, size = in_channels. Pass NULL/0 for affine_config = 0
* @param[in] beta pointer to the offsets for the post-norm affine operation, size = in_channels. Pass NULL/0 for affine_config = 0
* @param[in] in_place in-place computation check for the batchnorm. Storage efficient
* @param[in] cnn_hidden hidden state/out_channels dimensions for the low-rank CNN. The final channel size of this block
* @param[in] cnn_padding padding for the low-rank CNN layer. Note: applied to both sides of the input
Expand All @@ -31,7 +40,7 @@
int phon_pred_lr_cnn(float* output_signal, float* input_signal,
unsigned in_time, unsigned in_channels,
const float* const mean, const float* const var,
unsigned affine, float* gamma, float* beta, unsigned in_place,
unsigned affine_config, float* gamma, float* beta, unsigned in_place,
unsigned cnn_hidden, unsigned cnn_padding, unsigned cnn_kernel_size,
const void* cnn_params, unsigned cnn_stride, unsigned cnn_activation);

Expand All @@ -42,11 +51,15 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
* @param[in] input_signal pointer to the input signal. size = in_time * in_channels
* @param[in] in_time number of time steps in the input
* @param[in] in_channels number of input channels
* @param[in] mean pointer to the mean for the batch normalization, size = in_channels
* @param[in] var pointer to the variance for the batch normalization, size = in_channels
* @param[in] affine whether the affine operations are applied
* @param[in] gamma pointer to the scaling factors for the post-norm affine operation, size = in_channels
* @param[in] beta pointer to the offsets for the post-norm affine operation, size = in_channels
* @param[in] mean pointer to the mean for the batch normalization, size = in_channels. Pass NULL/0 for affine_config = 2
* @param[in] var pointer to the variance for the batch normalization, size = in_channels. Pass NULL/0 for affine_config = 2
* @param[in] affine_config whether the affine operations are applied
* if affine_config = 0, then only mean and var are used
* if affine_config = 1, then mean, var, gamma and beta are used for the final computation.
* if affine_config = 2, then only the gamma and beta are used. gamma = original_gamma/sqrt(var), beta = original_beta - gamma * mean/sqrt(var)
* Note: Use affine_config = 2 for faster calculations. The new gamma and beta would need to be pre-computed, stored and passed
* @param[in] gamma pointer to the scaling factors for the post-norm affine operation, size = in_channels. Pass NULL/0 for affine_config = 0
* @param[in] beta pointer to the offsets for the post-norm affine operation, size = in_channels. Pass NULL/0 for affine_config = 0
* @param[in] in_place in-place computation of the batchnorm. Storage efficient
* @param[in] depth_cnn_padding padding for the depth CNN layer. Note: applied to both sides of the input to the depth CNN
* @param[in] depth_cnn_kernel_size kernel size of the depth CNN
Expand Down Expand Up @@ -77,9 +90,9 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
* 3: relu
*/
int phon_pred_depth_point_lr_cnn(float* output_signal, float* input_signal,
unsigned in_time, unsigned in_channels,
conv_layer point_cnn, unsigned in_time, unsigned in_channels,
const float* const mean, const float* const var,
unsigned affine, const float* const gamma, const float* const beta, unsigned in_place,
unsigned affine_config, const float* const gamma, const float* const beta, unsigned in_place,
unsigned depth_cnn_padding, unsigned depth_cnn_kernel_size,
const void* depth_cnn_params, unsigned depth_cnn_stride, unsigned depth_cnn_activation,
unsigned point_cnn_hidden, unsigned point_cnn_padding, unsigned point_cnn_kernel_size,
Expand Down
34 changes: 28 additions & 6 deletions c_reference/src/conv1d.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ int avgpool1d(float* output_signal, unsigned out_time, const float* input_signal
unsigned padding, unsigned kernel_size, unsigned stride, unsigned activation) {

// Iterate over the time steps and average them. Similar to Conv1D_Dept with a filter kernel of ones
float scale = 1.0/(float)kernel_size;
for (unsigned t_in = 0, t_out = 0; t_out < out_time; t_out++, t_in += stride) {
for (unsigned ci = 0; ci < in_channels; ci++) {
float sum = 0;
Expand All @@ -183,16 +184,16 @@ int avgpool1d(float* output_signal, unsigned out_time, const float* input_signal
}
}
if (activation == 1) {
output_signal[t_out * in_channels + ci] = sigmoid(sum / (float)kernel_size);
output_signal[t_out * in_channels + ci] = sigmoid(sum * scale);
}
else if (activation == 2) {
output_signal[t_out * in_channels + ci] = tanh(sum / (float)kernel_size);
output_signal[t_out * in_channels + ci] = tanh(sum * scale);
}
else if (activation == 3) {
output_signal[t_out * in_channels + ci] = relu(sum / (float)kernel_size);
output_signal[t_out * in_channels + ci] = relu(sum * scale);
}
else {
output_signal[t_out * in_channels + ci] = sum / (float)kernel_size;
output_signal[t_out * in_channels + ci] = sum * scale;
}
}
}
Expand All @@ -202,10 +203,10 @@ int avgpool1d(float* output_signal, unsigned out_time, const float* input_signal
int batchnorm1d(float* output_signal, float* input_signal,
unsigned in_time, unsigned in_channels,
const float* const mean, const float* const var,
unsigned affine, const float* const gamma , const float* const beta,
unsigned affine_config, const float* const gamma , const float* const beta,
unsigned in_place, float eps) {
// Check if affine values was learnt
if (affine) {
if (affine_config == 1) {
// Check for in-place computation
if (in_place) {
for (unsigned t = 0; t < in_time; t++) {
Expand All @@ -228,6 +229,27 @@ int batchnorm1d(float* output_signal, float* input_signal,
}
}
}
else if (affine_config == 2) {
// Check for in-place computation
if (in_place) {
for (unsigned t = 0; t < in_time; t++) {
for (unsigned d = 0; d < in_channels; d++) {
input_signal[t * in_channels + d] = (gamma[d]
* input_signal[t * in_channels + d])
+ beta[d];
}
}
}
else {
for (unsigned t = 0; t < in_time; t++) {
for (unsigned d = 0; d < in_channels; d++) {
output_signal[t * in_channels + d] = (gamma[d]
* input_signal[t * in_channels + d])
+ beta[d];
}
}
}
}
else {
// Check for in-place computation
if (in_place) {
Expand Down
16 changes: 8 additions & 8 deletions c_reference/src/dscnn.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
int phon_pred_lr_cnn(float* output_signal, float* input_signal,
unsigned in_time, unsigned in_channels,
const float* const mean, const float* const var,
unsigned affine, float* gamma, float* beta, unsigned in_place,
unsigned affine_config, float* gamma, float* beta, unsigned in_place,
unsigned cnn_hidden, unsigned cnn_padding, unsigned cnn_kernel_size,
const void* cnn_params, unsigned cnn_stride, unsigned cnn_activation) {

Expand All @@ -20,7 +20,7 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
// BatchNorm
batchnorm1d(0, input_signal,
in_time, in_channels,
mean, var, affine, gamma, beta,
mean, var, affine_config, gamma, beta,
in_place, 0.00001);
// CNN
conv1d_lr(output_signal, out_time, cnn_hidden, input_signal,
Expand All @@ -32,7 +32,7 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
float* norm_out = (float*)malloc(in_time * in_channels * sizeof(float));
batchnorm1d(norm_out, input_signal,
in_time, in_channels,
mean, var, affine, gamma, beta,
mean, var, affine_config, gamma, beta,
in_place, 0.00001);
// CNN
conv1d_lr(output_signal, out_time, cnn_hidden, norm_out,
Expand All @@ -44,9 +44,9 @@ int phon_pred_lr_cnn(float* output_signal, float* input_signal,
}

int phon_pred_depth_point_lr_cnn(float* output_signal, float* input_signal,
unsigned in_time, unsigned in_channels,
conv_layer point_cnn, unsigned in_time, unsigned in_channels,
const float* const mean, const float* const var,
unsigned affine, const float* const gamma, const float* const beta, unsigned in_place,
unsigned affine_config, const float* const gamma, const float* const beta, unsigned in_place,
unsigned depth_cnn_padding, unsigned depth_cnn_kernel_size,
const void* depth_cnn_params, unsigned depth_cnn_stride, unsigned depth_cnn_activation,
unsigned point_cnn_hidden, unsigned point_cnn_padding, unsigned point_cnn_kernel_size,
Expand All @@ -66,7 +66,7 @@ int phon_pred_depth_point_lr_cnn(float* output_signal, float* input_signal,
batchnorm1d(0, act_out,
in_time, in_channels,
mean, var,
affine, gamma, beta,
affine_config, gamma, beta,
in_place, 0.00001);
// Depth CNN
depth_out = (float*)malloc(out_time * in_channels * sizeof(float));
Expand All @@ -81,7 +81,7 @@ int phon_pred_depth_point_lr_cnn(float* output_signal, float* input_signal,
batchnorm1d(norm_out, act_out,
in_time, in_channels,
mean, var,
affine, gamma, beta,
affine_config, gamma, beta,
in_place, 0.00001);
free(act_out);
// Depth CNN
Expand All @@ -96,7 +96,7 @@ int phon_pred_depth_point_lr_cnn(float* output_signal, float* input_signal,
in_time = out_time;
out_time = in_time - point_cnn_kernel_size + 2 * point_cnn_padding + 1;
float* point_out = (float*)malloc(out_time * point_cnn_hidden * sizeof(float));
conv1d_lr(point_out, out_time, point_cnn_hidden, depth_out,
point_cnn(point_out, out_time, point_cnn_hidden, depth_out,
in_time, in_channels, point_cnn_padding, point_cnn_kernel_size,
point_cnn_params, point_cnn_stride, point_cnn_activation);
free(depth_out);
Expand Down
4 changes: 2 additions & 2 deletions c_reference/tests/kws/postcnn_params.h
Git LFS file not shown
4 changes: 2 additions & 2 deletions c_reference/tests/kws/precnn_params.h
Git LFS file not shown
4 changes: 2 additions & 2 deletions c_reference/tests/kws/rnn_params.h
Git LFS file not shown
20 changes: 10 additions & 10 deletions c_reference/tests/kws/test_phoneme_det_cnn_rnn.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,12 @@ void phoneme_prediction(float* mem_buf) {
// Use the in-place computation only if the input can be discarded/altered. Else avoid in-place computation for this layer
phon_pred_lr_cnn(cnn1_out, mem_buf,
in_time, PRE_CNN_IN_FEATURES,
BNORM_CNN1_MEAN, BNORM_CNN1_VAR, 0, 0, 0, PRE_CNN_BNORM_INPLACE,
0, 0, PRE_CNN_BNORM_AFFINE, CNN1_SCALE, CNN1_OFFSET, PRE_CNN_BNORM_INPLACE,
PRE_CNN_OUT_FEATURES, PRE_CNN_FILT_PAD, PRE_CNN_FILT,
&conv_params, PRE_CNN_STRIDE, PRE_CNN_FILT_ACT); // regular tanh activation

batchnorm1d(0, cnn1_out, in_time, RNN_IN_FEATURES,
BNORM_RNN_MEAN, BNORM_RNN_VAR, 0, 0, 0, 1, 0.00001); // Currently in-place only and no affine values
0, 0, RNN_BNORM_AFFINE, RNN_SCALE, RNN_OFFSET, 1, 0.00001);

/* Bricked Bi-FastGRNN Block */
out_time = in_time/RNN_HOP + 1;
Expand All @@ -194,8 +194,8 @@ void phoneme_prediction(float* mem_buf) {
out_time = out_time - POST_CNN_POOL + (POST_CNN_POOL_PAD << 1) + 1;
float* cnn2_out = (float*)malloc(out_time * POST_CNN_INTER_FEATURES * sizeof(float));
phon_pred_depth_point_lr_cnn(cnn2_out, rnn_out,
in_time, POST_CNN_INTER_FEATURES,
CNN2_BNORM_MEAN, CNN2_BNORM_VAR, 0, 0, 0, POST_CNN_BNORM_INPLACE,
conv1d_lr, in_time, POST_CNN_INTER_FEATURES,
0, 0, POST_CNN_BNORM_AFFINE, CNN2_SCALE, CNN2_OFFSET, POST_CNN_BNORM_INPLACE,
POST_CNN_DEPTH_PAD, POST_CNN_DEPTH_FILT,
&depth_param_2, POST_CNN_DEPTH_STRIDE, POST_CNN_DEPTH_ACT,
POST_CNN_INTER_FEATURES, POST_CNN_POINT_PAD, POST_CNN_POINT_FILT,
Expand All @@ -209,8 +209,8 @@ void phoneme_prediction(float* mem_buf) {
out_time = out_time - POST_CNN_POOL + (POST_CNN_POOL_PAD << 1) + 1;
float* cnn3_out = (float*)malloc(out_time * POST_CNN_INTER_FEATURES * sizeof(float));
phon_pred_depth_point_lr_cnn(cnn3_out, cnn2_out,
in_time, POST_CNN_INTER_FEATURES,
CNN3_BNORM_MEAN, CNN3_BNORM_VAR, 0, 0, 0, POST_CNN_BNORM_INPLACE,
conv1d_lr, in_time, POST_CNN_INTER_FEATURES,
0, 0, POST_CNN_BNORM_AFFINE, CNN3_SCALE, CNN3_OFFSET, POST_CNN_BNORM_INPLACE,
POST_CNN_DEPTH_PAD, POST_CNN_DEPTH_FILT,
&depth_param_3, POST_CNN_DEPTH_STRIDE, POST_CNN_DEPTH_ACT,
POST_CNN_INTER_FEATURES, POST_CNN_POINT_PAD, POST_CNN_POINT_FILT,
Expand All @@ -224,8 +224,8 @@ void phoneme_prediction(float* mem_buf) {
out_time = out_time - POST_CNN_POOL + (POST_CNN_POOL_PAD << 1) + 1;
float* cnn4_out = (float*)malloc(out_time * POST_CNN_INTER_FEATURES * sizeof(float));
phon_pred_depth_point_lr_cnn(cnn4_out, cnn3_out,
in_time, POST_CNN_INTER_FEATURES,
CNN4_BNORM_MEAN, CNN4_BNORM_VAR, 0, 0, 0, POST_CNN_BNORM_INPLACE,
conv1d_lr, in_time, POST_CNN_INTER_FEATURES,
0, 0, POST_CNN_BNORM_AFFINE, CNN4_SCALE, CNN4_OFFSET, POST_CNN_BNORM_INPLACE,
POST_CNN_DEPTH_PAD, POST_CNN_DEPTH_FILT,
&depth_param_4, POST_CNN_DEPTH_STRIDE, POST_CNN_DEPTH_ACT,
POST_CNN_INTER_FEATURES, POST_CNN_POINT_PAD, POST_CNN_POINT_FILT,
Expand All @@ -239,8 +239,8 @@ void phoneme_prediction(float* mem_buf) {
out_time = out_time - POST_CNN_POOL + (POST_CNN_POOL_PAD << 1) + 1;
float* pred = (float*)malloc(out_time * POST_CNN_OUT_FEATURES * sizeof(float));
phon_pred_depth_point_lr_cnn(pred, cnn4_out,
in_time, POST_CNN_INTER_FEATURES,
CNN5_BNORM_MEAN, CNN5_BNORM_VAR, 0, 0, 0, POST_CNN_BNORM_INPLACE,
conv1d_lr, in_time, POST_CNN_INTER_FEATURES,
0, 0, POST_CNN_BNORM_AFFINE, CNN5_SCALE, CNN5_OFFSET, POST_CNN_BNORM_INPLACE,
POST_CNN_DEPTH_PAD, POST_CNN_DEPTH_FILT,
&depth_param_5, POST_CNN_DEPTH_STRIDE, POST_CNN_DEPTH_ACT,
POST_CNN_OUT_FEATURES, POST_CNN_POINT_PAD, POST_CNN_POINT_FILT,
Expand Down

0 comments on commit 214495c

Please sign in to comment.