阶段

层数

参数

输出数据维度

视觉特征提取(卷积)

Conv2d_1

kernel_size = [7, 7, 3, 16],stride = 2

[126, 126, 16]

MaxPool_1

kernel_size = [5, 5], stride = 2

[62, 62, 16]

Conv2d_2

kernel_size = [3, 3, 16, 32],stride = 1

[60, 60, 32]

MaxPool_2

kernel_size = [5, 5], stride = 2

[29, 29, 32]

Conv2d_3

kernel_size = [3, 3, 32, 32],stride = 1

[27, 27, 32]

MaxPool_3

kernel_size = [5, 5], stride = 2

[12, 12, 32]

Conv2d_4

kernel_size = [3, 3, 32, 64], stride = 1

[10, 10, 64]

MaxPool_4

kernel_size = [5, 5], stride = 2

[4, 4, 64]

数据展开

Fliping

-

[1, 4096]

LSTM

LSTM_1

kernel_size = [4096, 1024]

[1, 1024]

LSTM_2

kernel_size = [1024, 256]

[1, 256]

LSTM_3

kernel_size = [256, 128]

[1, 128]

注意力模块

Attention_1

kernel_size = [128, 16]

[1, 16]

Attention_2

kernel_size = [16, 128]

[1, 128]

全连接

FC_1

kernel_size = [128, 32]

[1, 32]

FC_2

kernel_size = [32, 1]

[1, 1]