-
Notifications
You must be signed in to change notification settings - Fork 42
Expand file tree
/
Copy pathtrainingparameters.yaml
More file actions
52 lines (41 loc) · 2.3 KB
/
trainingparameters.yaml
File metadata and controls
52 lines (41 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Description: Training parameters for the training script
# Model selection
model: 'CNNMNIST' # 'FCMNIST' or 'CNNMNST' This is the class name of the model as defined in models.py.
dataset: 'MNIST' # 'MNIST', or EMNIST splits: EMNIST_BALANCED, EMNIST_BYCLASS, EMNIST_BYMERGE, EMNIST_LETTERS, EMNIST_DIGITS, EMNIST_MNIST
# Quantization settings
QuantType: '4bitsym' # 'Ternary', 'Binary', 'BinaryBalanced', '2bitsym', '4bit', '4bitsym', '8bit', 'None", 'FP130', 'NF4'
NormType: 'RMS' # 'RMS', 'Lin', 'BatchNorm'
WScale: 'PerTensor' # 'PerTensor', 'PerOutput'
# Clipping parameters - only used for 2 bit and higher quantization
maxw_algo: 'octav' # 'octav', 'prop' Algorithm used to calculate the clipping parameters (maximum weight)
maxw_update_until_epoch: 60 # Update clipping parameters until this epoch, they are frozen afterwards
maxw_quantscale: 0.25 # Used only for clipping_algo='prop'. Determines the relation between stddev of weights and max_weight
# Learning parameters
num_epochs: 60 # 5, 20, 80
batch_size: 64
scheduler: "Cosine" # "StepLR", "Cosine", "CosineWarmRestarts"
learning_rate: 0.001
# CosineWarmRestarts parameters
# T_0: 5 # Period of the first restart for CosineWarmRestarts - 10+20+40 = 70 epochs, need to step in epoch 69 at minimum LR
# T_mult: 4 # Factor increasing T_i after a restart
# StepLR parameters
# lr_decay: 0.1 # lr_decay and step size for StepLR
# step_size: 10
# halve_lr_epoch: 30 # Epoch at which to halve the learning rate - to be used with Cosine schedule
# Data augmentation
augmentation: True
rotation1: 10 # rotation1 and rotation2 are used for data augmentation
rotation2: 10
elastictransformprobability: 0.0 # probability of applying elastic transform
# channel pruning settings. Requires "MaskLayer" in the model, otherwise these settings have no effect
lambda_l1: 0.0005 # L1 regularization parameter for mask learning
prune_epoch: -1 # Epoch at which to start pruning. -1 means no pruning
prune_groupstoprune: 32 # number of groups to prune
prune_totalgroups: 96 # total number of groups. e.g. if there are 384 channels and 96 groups, then each group has 4 channels
# Model parameters
cnn_width: 64 # Width of CNN layers (CNNMNIST only)
network_width1: 96
network_width2: 64
network_width3: 0
# name
runtag: "octav" # runtag is prefix for runname