1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
{
"model_path": "adept_level1",
"datatype": "adept",
"dataset": "ADEPT",
"learning_rate": 0.0001,
"num_updates": 0,
"max_epochs": 1000,
"max_updates": 400000,
"phases": {
"start_inner_loop": 60000,
"shufleslots_end": 30000,
"entity_pretraining_phase2_end": 50000,
"entity_pretraining_phase1_end": 30000,
"background_pretraining_end": 0
},
"defaults": {
"num_workers": 2,
"prefetch_factor": 2,
"statistics_offset": 10,
"load_optimizers": false,
"teacher_forcing": 10,
"skip_frames": 3,
"error_dropout": 0.1
},
"bptt": {
"bptt_start_timestep": 0,
"bptt_steps": 1,
"bptt_steps_max": 2,
"increase_bptt_steps_every": 200000
},
"model": {
"level": 1,
"batch_size": 16,
"num_objects": 7,
"img_channels": 3,
"input_size": [
320,
480
],
"latent_size": [
20,
30
],
"gestalt_size": 96,
"bottleneck": "binar",
"position_regularizer": 0.01,
"time_regularizer": 0.1,
"encoder_regularizer": 0.333333,
"inner_loop_enabled": false,
"encoder": {
"channels": 48,
"level1_channels": 24,
"num_layers": 3,
"reg_lambda": 1e-10
},
"predictor": {
"heads": 2,
"layers": 2,
"channels_multiplier": 2,
"reg_lambda": 1e-10,
"transformer_type": "standard"
},
"decoder": {
"channels": 48,
"level1_channels": 24,
"num_layers": 5
},
"background": {
"learning_rate": 0.0001,
"learning_rate_old": 0.0001,
"num_layers": 1,
"latent_channels": 48,
"level1_channels": 24,
"gestalt_size": 8,
"flow": false
},
"update_module": {
"reg_lambda": 5e-06
}
}
}
|