--- license: apache-2.0 --- #### Usage Installation ``` git clone https://github.com/panuthept/OKEAN.git cd OKEAN conda create -n okean python==3.11.4 conda activate okean # Select the appropriate PyTorch version based on your CUDA version # CUDA 11.8 conda install pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 pytorch-cuda=11.8 -c pytorch -c nvidia # CUDA 12.1 conda install pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 pytorch-cuda=12.1 -c pytorch -c nvidia # CPU Only conda install pytorch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 cpuonly -c pytorch pip install -e . ``` Entity Linking ```python from okean.modules.entity_linking.elq import ELQ model = ELQ.from_pretrained( model_name_or_path="panuthept/okean-elq-wikipedia", ) texts = [ "Barack Obama is the former president of the United States.", "The Eiffel Tower is located in Paris.", ] response = model(texts=texts, return_metadata=["id"]) print(response.passages) >> [ Passage( text='Barack Obama is the former president of the United States.', spans=[ Span(start=0, end=12, surface_form='Barack Obama', confident=0.7972, entity=Entity(identifier=213062, confident=1.0, metadata={'id': {'wikipedia': '534366', 'wikidata': 'Q76'}})), Span(start=27, end=57, surface_form='president of the United States', confident=0.5499, entity=Entity(identifier=11887, confident=0.9999, metadata={'id': {'wikipedia': '24113', 'wikidata': 'Q11696'}})) ]), Passage( text='The Eiffel Tower is located in Paris.', spans=[ Span(start=4, end=16, surface_form='Eiffel Tower', confident=0.5214, entity=Entity(identifier=4276, confident=0.9999, metadata={'id': {'wikipedia': '9232', 'wikidata': 'Q243'}})), Span(start=31, end=36, surface_form='Paris', confident=0.6658, entity=Entity(identifier=11245, confident=0.9999, metadata={'id': {'wikipedia': '22989', 'wikidata': 'Q90'}})) ] ) ] ``` Entity Disambiguation ```python from okean.modules.entity_linking.elq import ELQ from okean.data_types.basic_types import Passage, Span model = ELQ.from_pretrained( model_name_or_path="panuthept/okean-elq-wikipedia", ) passages = [ Passage( text="Barack Obama is the former president of the United States.", spans=[ Span(start=0, end=12, surface_form="Barack Obama"), Span(start=27, end=57, surface_form="president of the United States"), ] ), Passage( text="The Eiffel Tower is located in Paris.", spans=[ Span(start=4, end=16, surface_form="Eiffel Tower"), Span(start=31, end=36, surface_form="Paris"), ] ), ] response = model(passages=passages, return_metadata=["id"]) print(response.passages) >> [ Passage( text='Barack Obama is the former president of the United States.', spans=[ Span(start=0, end=12, surface_form='Barack Obama', confident=1.0, entity=Entity(identifier=213062, confident=1.0, metadata={'id': {'wikipedia': '534366', 'wikidata': 'Q76'}})), Span(start=27, end=57, surface_form='president of the United States', confident=1.0, entity=Entity(identifier=11887, confident=0.9999, metadata={'id': {'wikipedia': '24113', 'wikidata': 'Q11696'}})) ]), Passage( text='The Eiffel Tower is located in Paris.', spans=[ Span(start=4, end=16, surface_form='Eiffel Tower', confident=1.0, entity=Entity(identifier=4276, confident=0.9999, metadata={'id': {'wikipedia': '9232', 'wikidata': 'Q243'}})), Span(start=31, end=36, surface_form='Paris', confident=1.0, entity=Entity(identifier=11245, confident=0.9999, metadata={'id': {'wikipedia': '22989', 'wikidata': 'Q90'}})) ] ) ] ```