|
"submodule_list": "[GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2MLP(\n (c_fc): Conv1D()\n (c_proj): Conv1D()\n (act): NewGELUActivation()\n (dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n), GPT2SdpaAttention(\n (c_attn): Conv1D()\n (c_proj): Conv1D()\n (attn_dropout): Dropout(p=0.1, inplace=False)\n (resid_dropout): Dropout(p=0.1, inplace=False)\n)]" |