KoichiYasuoka's picture
model improved
baf605f
{
"architectures": [
"ModernBertForTokenClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_modernbert.ModernBertConfig",
"AutoModel": "modeling_modernbert.ModernBertModel",
"AutoModelForMaskedLM": "modeling_modernbert.ModernBertForMaskedLM",
"AutoModelForSequenceClassification": "modeling_modernbert.ModernBertForSequenceClassification",
"AutoModelForTokenClassification": "modeling_modernbert.ModernBertForTokenClassification"
},
"bos_token_id": 0,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 0,
"custom_pipelines": {
"upos": {
"impl": "upos.BellmanFordTokenClassificationPipeline",
"pt": "AutoModelForTokenClassification"
}
},
"decoder_bias": true,
"deterministic_flash_attn": false,
"embedding_dropout": 0.0,
"eos_token_id": 2,
"global_attn_every_n_layers": 3,
"global_rope_theta": 160000.0,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 768,
"id2label": {
"0": "ADP",
"1": "ADP|Foreign=Yes",
"2": "ADP|NounType=Class",
"3": "ADP|Prefix=Yes",
"4": "ADV",
"5": "ADV|Foreign=Yes",
"6": "ADV|NumType=Mult",
"7": "ADV|PartType=Adv",
"8": "ADV|PartType=Enp",
"9": "ADV|PartType=Int",
"10": "ADV|Prefix=Yes",
"11": "AUX",
"12": "AUX|Mood=Imp",
"13": "AUX|Prefix=Yes",
"14": "AUX|VerbType=Cop",
"15": "AUX|Voice=Pass",
"16": "B-ADP",
"17": "B-ADV",
"18": "B-ADV|NumType=Mult",
"19": "B-ADV|PartType=Adv",
"20": "B-ADV|PartType=Enp",
"21": "B-ADV|PronType=Int",
"22": "B-AUX",
"23": "B-AUX|NounType=Class",
"24": "B-AUX|VerbType=Cop",
"25": "B-AUX|Voice=Pass",
"26": "B-CCONJ",
"27": "B-CCONJ|PronType=Prs",
"28": "B-DET",
"29": "B-DET|PartType=Int",
"30": "B-INTJ",
"31": "B-NOUN",
"32": "B-NOUN|Abbr=Yes",
"33": "B-NOUN|Abbr=Yes|Foreign=Yes",
"34": "B-NOUN|Abbr=Yes|Prefix=Yes",
"35": "B-NOUN|Foreign=Yes",
"36": "B-NOUN|Foreign=Yes|NounType=Class",
"37": "B-NOUN|Foreign=Yes|Prefix=Yes",
"38": "B-NOUN|NameType=Com",
"39": "B-NOUN|NameType=Geo",
"40": "B-NOUN|NameType=Oth",
"41": "B-NOUN|NameType=Prs",
"42": "B-NOUN|NounType=Class",
"43": "B-NOUN|NounType=Class|Prefix=Yes",
"44": "B-NOUN|NumType=Mult",
"45": "B-NOUN|Prefix=Yes",
"46": "B-NUM",
"47": "B-NUM|Abbr=Yes",
"48": "B-NUM|Foreign=Yes",
"49": "B-NUM|NumType=Mult",
"50": "B-PART",
"51": "B-PART|Aspect=Perf",
"52": "B-PART|Aspect=Prog",
"53": "B-PART|NounType=Class|PartType=Emp",
"54": "B-PART|NounType=Class|PartType=Emp|Prefix=Yes",
"55": "B-PART|NounType=Class|Prefix=Yes",
"56": "B-PART|NumType=Mult|PartType=Emp",
"57": "B-PART|PartType=Adj",
"58": "B-PART|PartType=Adv",
"59": "B-PART|PartType=Emp",
"60": "B-PART|PartType=Emp|Prefix=Yes",
"61": "B-PART|PartType=Enp",
"62": "B-PART|PartType=Int",
"63": "B-PART|PartType=Neg",
"64": "B-PART|Polarity=Neg",
"65": "B-PART|Prefix=Yes",
"66": "B-PART|PronType=Int",
"67": "B-PRON",
"68": "B-PRON|Person=1",
"69": "B-PRON|Person=2",
"70": "B-PRON|Person=3",
"71": "B-PRON|PronType=Prs",
"72": "B-PRON|PronType=Rcp",
"73": "B-PROPN",
"74": "B-PROPN|Abbr=Yes",
"75": "B-PROPN|Abbr=Yes|Foreign=Yes|NameType=Oth",
"76": "B-PROPN|Foreign=Yes",
"77": "B-PROPN|Foreign=Yes|NameType=Com",
"78": "B-PROPN|Foreign=Yes|NameType=Geo",
"79": "B-PROPN|Foreign=Yes|NameType=Giv",
"80": "B-PROPN|Foreign=Yes|NameType=Oth",
"81": "B-PROPN|Foreign=Yes|NameType=Prs",
"82": "B-PROPN|Foreign=Yes|NameType=Sur",
"83": "B-PROPN|NameType=Com",
"84": "B-PROPN|NameType=Geo",
"85": "B-PROPN|NameType=Giv",
"86": "B-PROPN|NameType=Nat",
"87": "B-PROPN|NameType=Oth",
"88": "B-PROPN|NameType=Pro",
"89": "B-PROPN|NameType=Prs",
"90": "B-PROPN|NameType=Sur",
"91": "B-PUNCT",
"92": "B-SCONJ",
"93": "B-VERB",
"94": "B-VERB|Foreign=Yes",
"95": "B-VERB|PartType=Adj",
"96": "B-VERB|Prefix=Yes",
"97": "B-VERB|VerbType=Cop",
"98": "B-VERB|Voice=Pass",
"99": "B-X",
"100": "CCONJ",
"101": "DET",
"102": "DET|PartType=Emp",
"103": "DET|PronType=Int",
"104": "I-ADP",
"105": "I-ADV",
"106": "I-ADV|NumType=Mult",
"107": "I-ADV|PartType=Adv",
"108": "I-ADV|PartType=Enp",
"109": "I-ADV|PronType=Int",
"110": "I-AUX",
"111": "I-AUX|NounType=Class",
"112": "I-AUX|VerbType=Cop",
"113": "I-AUX|Voice=Pass",
"114": "I-CCONJ",
"115": "I-CCONJ|PronType=Prs",
"116": "I-DET",
"117": "I-DET|PartType=Int",
"118": "I-INTJ",
"119": "I-NOUN",
"120": "I-NOUN|Abbr=Yes",
"121": "I-NOUN|Abbr=Yes|Foreign=Yes",
"122": "I-NOUN|Abbr=Yes|Prefix=Yes",
"123": "I-NOUN|Foreign=Yes",
"124": "I-NOUN|Foreign=Yes|NounType=Class",
"125": "I-NOUN|Foreign=Yes|Prefix=Yes",
"126": "I-NOUN|NameType=Com",
"127": "I-NOUN|NameType=Geo",
"128": "I-NOUN|NameType=Oth",
"129": "I-NOUN|NameType=Prs",
"130": "I-NOUN|NounType=Class",
"131": "I-NOUN|NounType=Class|Prefix=Yes",
"132": "I-NOUN|NumType=Mult",
"133": "I-NOUN|Prefix=Yes",
"134": "I-NUM",
"135": "I-NUM|Abbr=Yes",
"136": "I-NUM|Foreign=Yes",
"137": "I-NUM|NumType=Mult",
"138": "I-PART",
"139": "I-PART|Aspect=Perf",
"140": "I-PART|Aspect=Prog",
"141": "I-PART|NounType=Class|PartType=Emp",
"142": "I-PART|NounType=Class|PartType=Emp|Prefix=Yes",
"143": "I-PART|NounType=Class|Prefix=Yes",
"144": "I-PART|NumType=Mult|PartType=Emp",
"145": "I-PART|PartType=Adj",
"146": "I-PART|PartType=Adv",
"147": "I-PART|PartType=Emp",
"148": "I-PART|PartType=Emp|Prefix=Yes",
"149": "I-PART|PartType=Enp",
"150": "I-PART|PartType=Int",
"151": "I-PART|PartType=Neg",
"152": "I-PART|Polarity=Neg",
"153": "I-PART|Prefix=Yes",
"154": "I-PART|PronType=Int",
"155": "I-PRON",
"156": "I-PRON|Person=1",
"157": "I-PRON|Person=2",
"158": "I-PRON|Person=3",
"159": "I-PRON|PronType=Prs",
"160": "I-PRON|PronType=Rcp",
"161": "I-PROPN",
"162": "I-PROPN|Abbr=Yes",
"163": "I-PROPN|Abbr=Yes|Foreign=Yes|NameType=Oth",
"164": "I-PROPN|Foreign=Yes",
"165": "I-PROPN|Foreign=Yes|NameType=Com",
"166": "I-PROPN|Foreign=Yes|NameType=Geo",
"167": "I-PROPN|Foreign=Yes|NameType=Giv",
"168": "I-PROPN|Foreign=Yes|NameType=Oth",
"169": "I-PROPN|Foreign=Yes|NameType=Prs",
"170": "I-PROPN|Foreign=Yes|NameType=Sur",
"171": "I-PROPN|NameType=Com",
"172": "I-PROPN|NameType=Geo",
"173": "I-PROPN|NameType=Giv",
"174": "I-PROPN|NameType=Nat",
"175": "I-PROPN|NameType=Oth",
"176": "I-PROPN|NameType=Pro",
"177": "I-PROPN|NameType=Prs",
"178": "I-PROPN|NameType=Sur",
"179": "I-PUNCT",
"180": "I-SCONJ",
"181": "I-VERB",
"182": "I-VERB|Foreign=Yes",
"183": "I-VERB|PartType=Adj",
"184": "I-VERB|Prefix=Yes",
"185": "I-VERB|VerbType=Cop",
"186": "I-VERB|Voice=Pass",
"187": "I-X",
"188": "INTJ",
"189": "NOUN",
"190": "NOUN|Foreign=Yes",
"191": "NOUN|NameType=Oth",
"192": "NOUN|NounType=Class",
"193": "NOUN|PartType=Enp",
"194": "NOUN|PartType=Int",
"195": "NOUN|PartType=Res",
"196": "NOUN|Prefix=Yes",
"197": "NUM",
"198": "NUM|NumType=Mult",
"199": "NUM|Prefix=Yes",
"200": "PART",
"201": "PART|Aspect=Perf",
"202": "PART|Aspect=Prog",
"203": "PART|NameType=Oth",
"204": "PART|PartType=Adj",
"205": "PART|PartType=Adv",
"206": "PART|PartType=Emp",
"207": "PART|PartType=Enp",
"208": "PART|PartType=Int",
"209": "PART|PartType=Neg",
"210": "PART|PartType=Res",
"211": "PART|Polarity=Neg",
"212": "PART|Prefix=Yes",
"213": "PART|PronType=Int",
"214": "PRON",
"215": "PRON|NounType=Class",
"216": "PRON|Person=1",
"217": "PRON|Person=2",
"218": "PRON|Person=3",
"219": "PRON|PronType=Int",
"220": "PRON|PronType=Prs",
"221": "PRON|PronType=Rcp",
"222": "PROPN",
"223": "PROPN|Foreign=Yes",
"224": "PROPN|Foreign=Yes|NameType=Prs",
"225": "PROPN|NameType=Com",
"226": "PROPN|NameType=Geo",
"227": "PROPN|NameType=Giv",
"228": "PROPN|NameType=Nat",
"229": "PROPN|NameType=Oth",
"230": "PROPN|NameType=Pro",
"231": "PROPN|NameType=Prs",
"232": "PROPN|NounType=Class",
"233": "PUNCT",
"234": "PUNCT|NounType=Class",
"235": "SCONJ",
"236": "SCONJ|Prefix=Yes",
"237": "SCONJ|VerbType=Cop",
"238": "SYM",
"239": "VERB",
"240": "VERB|Foreign=Yes",
"241": "VERB|Mood=Imp",
"242": "VERB|NounType=Class",
"243": "VERB|Prefix=Yes",
"244": "VERB|VerbType=Cop",
"245": "VERB|Voice=Pass",
"246": "X"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 1152,
"label2id": {
"ADP": 0,
"ADP|Foreign=Yes": 1,
"ADP|NounType=Class": 2,
"ADP|Prefix=Yes": 3,
"ADV": 4,
"ADV|Foreign=Yes": 5,
"ADV|NumType=Mult": 6,
"ADV|PartType=Adv": 7,
"ADV|PartType=Enp": 8,
"ADV|PartType=Int": 9,
"ADV|Prefix=Yes": 10,
"AUX": 11,
"AUX|Mood=Imp": 12,
"AUX|Prefix=Yes": 13,
"AUX|VerbType=Cop": 14,
"AUX|Voice=Pass": 15,
"B-ADP": 16,
"B-ADV": 17,
"B-ADV|NumType=Mult": 18,
"B-ADV|PartType=Adv": 19,
"B-ADV|PartType=Enp": 20,
"B-ADV|PronType=Int": 21,
"B-AUX": 22,
"B-AUX|NounType=Class": 23,
"B-AUX|VerbType=Cop": 24,
"B-AUX|Voice=Pass": 25,
"B-CCONJ": 26,
"B-CCONJ|PronType=Prs": 27,
"B-DET": 28,
"B-DET|PartType=Int": 29,
"B-INTJ": 30,
"B-NOUN": 31,
"B-NOUN|Abbr=Yes": 32,
"B-NOUN|Abbr=Yes|Foreign=Yes": 33,
"B-NOUN|Abbr=Yes|Prefix=Yes": 34,
"B-NOUN|Foreign=Yes": 35,
"B-NOUN|Foreign=Yes|NounType=Class": 36,
"B-NOUN|Foreign=Yes|Prefix=Yes": 37,
"B-NOUN|NameType=Com": 38,
"B-NOUN|NameType=Geo": 39,
"B-NOUN|NameType=Oth": 40,
"B-NOUN|NameType=Prs": 41,
"B-NOUN|NounType=Class": 42,
"B-NOUN|NounType=Class|Prefix=Yes": 43,
"B-NOUN|NumType=Mult": 44,
"B-NOUN|Prefix=Yes": 45,
"B-NUM": 46,
"B-NUM|Abbr=Yes": 47,
"B-NUM|Foreign=Yes": 48,
"B-NUM|NumType=Mult": 49,
"B-PART": 50,
"B-PART|Aspect=Perf": 51,
"B-PART|Aspect=Prog": 52,
"B-PART|NounType=Class|PartType=Emp": 53,
"B-PART|NounType=Class|PartType=Emp|Prefix=Yes": 54,
"B-PART|NounType=Class|Prefix=Yes": 55,
"B-PART|NumType=Mult|PartType=Emp": 56,
"B-PART|PartType=Adj": 57,
"B-PART|PartType=Adv": 58,
"B-PART|PartType=Emp": 59,
"B-PART|PartType=Emp|Prefix=Yes": 60,
"B-PART|PartType=Enp": 61,
"B-PART|PartType=Int": 62,
"B-PART|PartType=Neg": 63,
"B-PART|Polarity=Neg": 64,
"B-PART|Prefix=Yes": 65,
"B-PART|PronType=Int": 66,
"B-PRON": 67,
"B-PRON|Person=1": 68,
"B-PRON|Person=2": 69,
"B-PRON|Person=3": 70,
"B-PRON|PronType=Prs": 71,
"B-PRON|PronType=Rcp": 72,
"B-PROPN": 73,
"B-PROPN|Abbr=Yes": 74,
"B-PROPN|Abbr=Yes|Foreign=Yes|NameType=Oth": 75,
"B-PROPN|Foreign=Yes": 76,
"B-PROPN|Foreign=Yes|NameType=Com": 77,
"B-PROPN|Foreign=Yes|NameType=Geo": 78,
"B-PROPN|Foreign=Yes|NameType=Giv": 79,
"B-PROPN|Foreign=Yes|NameType=Oth": 80,
"B-PROPN|Foreign=Yes|NameType=Prs": 81,
"B-PROPN|Foreign=Yes|NameType=Sur": 82,
"B-PROPN|NameType=Com": 83,
"B-PROPN|NameType=Geo": 84,
"B-PROPN|NameType=Giv": 85,
"B-PROPN|NameType=Nat": 86,
"B-PROPN|NameType=Oth": 87,
"B-PROPN|NameType=Pro": 88,
"B-PROPN|NameType=Prs": 89,
"B-PROPN|NameType=Sur": 90,
"B-PUNCT": 91,
"B-SCONJ": 92,
"B-VERB": 93,
"B-VERB|Foreign=Yes": 94,
"B-VERB|PartType=Adj": 95,
"B-VERB|Prefix=Yes": 96,
"B-VERB|VerbType=Cop": 97,
"B-VERB|Voice=Pass": 98,
"B-X": 99,
"CCONJ": 100,
"DET": 101,
"DET|PartType=Emp": 102,
"DET|PronType=Int": 103,
"I-ADP": 104,
"I-ADV": 105,
"I-ADV|NumType=Mult": 106,
"I-ADV|PartType=Adv": 107,
"I-ADV|PartType=Enp": 108,
"I-ADV|PronType=Int": 109,
"I-AUX": 110,
"I-AUX|NounType=Class": 111,
"I-AUX|VerbType=Cop": 112,
"I-AUX|Voice=Pass": 113,
"I-CCONJ": 114,
"I-CCONJ|PronType=Prs": 115,
"I-DET": 116,
"I-DET|PartType=Int": 117,
"I-INTJ": 118,
"I-NOUN": 119,
"I-NOUN|Abbr=Yes": 120,
"I-NOUN|Abbr=Yes|Foreign=Yes": 121,
"I-NOUN|Abbr=Yes|Prefix=Yes": 122,
"I-NOUN|Foreign=Yes": 123,
"I-NOUN|Foreign=Yes|NounType=Class": 124,
"I-NOUN|Foreign=Yes|Prefix=Yes": 125,
"I-NOUN|NameType=Com": 126,
"I-NOUN|NameType=Geo": 127,
"I-NOUN|NameType=Oth": 128,
"I-NOUN|NameType=Prs": 129,
"I-NOUN|NounType=Class": 130,
"I-NOUN|NounType=Class|Prefix=Yes": 131,
"I-NOUN|NumType=Mult": 132,
"I-NOUN|Prefix=Yes": 133,
"I-NUM": 134,
"I-NUM|Abbr=Yes": 135,
"I-NUM|Foreign=Yes": 136,
"I-NUM|NumType=Mult": 137,
"I-PART": 138,
"I-PART|Aspect=Perf": 139,
"I-PART|Aspect=Prog": 140,
"I-PART|NounType=Class|PartType=Emp": 141,
"I-PART|NounType=Class|PartType=Emp|Prefix=Yes": 142,
"I-PART|NounType=Class|Prefix=Yes": 143,
"I-PART|NumType=Mult|PartType=Emp": 144,
"I-PART|PartType=Adj": 145,
"I-PART|PartType=Adv": 146,
"I-PART|PartType=Emp": 147,
"I-PART|PartType=Emp|Prefix=Yes": 148,
"I-PART|PartType=Enp": 149,
"I-PART|PartType=Int": 150,
"I-PART|PartType=Neg": 151,
"I-PART|Polarity=Neg": 152,
"I-PART|Prefix=Yes": 153,
"I-PART|PronType=Int": 154,
"I-PRON": 155,
"I-PRON|Person=1": 156,
"I-PRON|Person=2": 157,
"I-PRON|Person=3": 158,
"I-PRON|PronType=Prs": 159,
"I-PRON|PronType=Rcp": 160,
"I-PROPN": 161,
"I-PROPN|Abbr=Yes": 162,
"I-PROPN|Abbr=Yes|Foreign=Yes|NameType=Oth": 163,
"I-PROPN|Foreign=Yes": 164,
"I-PROPN|Foreign=Yes|NameType=Com": 165,
"I-PROPN|Foreign=Yes|NameType=Geo": 166,
"I-PROPN|Foreign=Yes|NameType=Giv": 167,
"I-PROPN|Foreign=Yes|NameType=Oth": 168,
"I-PROPN|Foreign=Yes|NameType=Prs": 169,
"I-PROPN|Foreign=Yes|NameType=Sur": 170,
"I-PROPN|NameType=Com": 171,
"I-PROPN|NameType=Geo": 172,
"I-PROPN|NameType=Giv": 173,
"I-PROPN|NameType=Nat": 174,
"I-PROPN|NameType=Oth": 175,
"I-PROPN|NameType=Pro": 176,
"I-PROPN|NameType=Prs": 177,
"I-PROPN|NameType=Sur": 178,
"I-PUNCT": 179,
"I-SCONJ": 180,
"I-VERB": 181,
"I-VERB|Foreign=Yes": 182,
"I-VERB|PartType=Adj": 183,
"I-VERB|Prefix=Yes": 184,
"I-VERB|VerbType=Cop": 185,
"I-VERB|Voice=Pass": 186,
"I-X": 187,
"INTJ": 188,
"NOUN": 189,
"NOUN|Foreign=Yes": 190,
"NOUN|NameType=Oth": 191,
"NOUN|NounType=Class": 192,
"NOUN|PartType=Enp": 193,
"NOUN|PartType=Int": 194,
"NOUN|PartType=Res": 195,
"NOUN|Prefix=Yes": 196,
"NUM": 197,
"NUM|NumType=Mult": 198,
"NUM|Prefix=Yes": 199,
"PART": 200,
"PART|Aspect=Perf": 201,
"PART|Aspect=Prog": 202,
"PART|NameType=Oth": 203,
"PART|PartType=Adj": 204,
"PART|PartType=Adv": 205,
"PART|PartType=Emp": 206,
"PART|PartType=Enp": 207,
"PART|PartType=Int": 208,
"PART|PartType=Neg": 209,
"PART|PartType=Res": 210,
"PART|Polarity=Neg": 211,
"PART|Prefix=Yes": 212,
"PART|PronType=Int": 213,
"PRON": 214,
"PRON|NounType=Class": 215,
"PRON|Person=1": 216,
"PRON|Person=2": 217,
"PRON|Person=3": 218,
"PRON|PronType=Int": 219,
"PRON|PronType=Prs": 220,
"PRON|PronType=Rcp": 221,
"PROPN": 222,
"PROPN|Foreign=Yes": 223,
"PROPN|Foreign=Yes|NameType=Prs": 224,
"PROPN|NameType=Com": 225,
"PROPN|NameType=Geo": 226,
"PROPN|NameType=Giv": 227,
"PROPN|NameType=Nat": 228,
"PROPN|NameType=Oth": 229,
"PROPN|NameType=Pro": 230,
"PROPN|NameType=Prs": 231,
"PROPN|NounType=Class": 232,
"PUNCT": 233,
"PUNCT|NounType=Class": 234,
"SCONJ": 235,
"SCONJ|Prefix=Yes": 236,
"SCONJ|VerbType=Cop": 237,
"SYM": 238,
"VERB": 239,
"VERB|Foreign=Yes": 240,
"VERB|Mood=Imp": 241,
"VERB|NounType=Class": 242,
"VERB|Prefix=Yes": 243,
"VERB|VerbType=Cop": 244,
"VERB|Voice=Pass": 245,
"X": 246
},
"layer_norm_eps": 1e-05,
"local_attention": 128,
"local_rope_theta": 10000.0,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 12,
"num_hidden_layers": 22,
"pad_token_id": 1,
"position_embedding_type": "absolute",
"reference_compile": true,
"repad_logits_with_grad": false,
"sep_token_id": 2,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"task_specific_params": {
"esupar_lemmatize": "copy"
},
"tokenizer_class": "DebertaV2TokenizerFast",
"torch_dtype": "float32",
"transformers_version": "4.49.0.dev0",
"vocab_size": 2803
}