GliLem / utils.py
adorkin's picture
Add demo files
15ce174 verified
raw
history blame contribute delete
315 Bytes
def sentence_to_spans(tokenized_sentence: list[str]) -> dict[str, int]:
span_to_token_id_ = dict()
start = 0
for index, token_ in enumerate(tokenized_sentence):
span_to_token_id_[f"{start + index}-{start + index + len(token_)}"] = index
start += len(token_)
return span_to_token_id_