Created
January 11, 2020 06:19
-
-
Save zhpmatrix/21efc3c16b35924f47b7b817f01277ab to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| NMTModel( | |
| (encoder): TransformerEncoder( | |
| (embeddings): Embeddings( | |
| (make_embedding): Sequential( | |
| (emb_luts): Elementwise( | |
| (0): Embedding(50004, 512, padding_idx=1) | |
| ) | |
| (pe): PositionalEncoding( | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| ) | |
| ) | |
| ) | |
| (transformer): ModuleList( | |
| (0): TransformerEncoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| ) | |
| (1): TransformerEncoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| ) | |
| (2): TransformerEncoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| ) | |
| (3): TransformerEncoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| ) | |
| (4): TransformerEncoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| ) | |
| (5): TransformerEncoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| ) | |
| ) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| ) | |
| (decoder): TransformerDecoder( | |
| (embeddings): Embeddings( | |
| (make_embedding): Sequential( | |
| (emb_luts): Elementwise( | |
| (0): Embedding(50004, 512, padding_idx=1) | |
| ) | |
| (pe): PositionalEncoding( | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| ) | |
| ) | |
| ) | |
| (transformer_layers): ModuleList( | |
| (0): TransformerDecoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (context_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (drop): Dropout(p=0.1, inplace=False) | |
| ) | |
| (1): TransformerDecoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (context_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (drop): Dropout(p=0.1, inplace=False) | |
| ) | |
| (2): TransformerDecoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (context_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (drop): Dropout(p=0.1, inplace=False) | |
| ) | |
| (3): TransformerDecoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (context_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (drop): Dropout(p=0.1, inplace=False) | |
| ) | |
| (4): TransformerDecoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (context_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (drop): Dropout(p=0.1, inplace=False) | |
| ) | |
| (5): TransformerDecoderLayer( | |
| (self_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (context_attn): MultiHeadedAttention( | |
| (linear_keys): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_values): Linear(in_features=512, out_features=512, bias=True) | |
| (linear_query): Linear(in_features=512, out_features=512, bias=True) | |
| (softmax): Softmax(dim=-1) | |
| (dropout): Dropout(p=0.1, inplace=False) | |
| (final_linear): Linear(in_features=512, out_features=512, bias=True) | |
| ) | |
| (feed_forward): PositionwiseFeedForward( | |
| (w_1): Linear(in_features=512, out_features=2048, bias=True) | |
| (w_2): Linear(in_features=2048, out_features=512, bias=True) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (dropout_1): Dropout(p=0.1, inplace=False) | |
| (relu): ReLU() | |
| (dropout_2): Dropout(p=0.1, inplace=False) | |
| ) | |
| (layer_norm_1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (layer_norm_2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| (drop): Dropout(p=0.1, inplace=False) | |
| ) | |
| ) | |
| (layer_norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) | |
| ) | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment