Spaces:
Runtime error
Runtime error
from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig, AutoTokenizer, Qwen2TokenizerFast | |
from PIL import Image | |
import torch | |
import requests | |
from accelerate import init_empty_weights | |
USE_GPU = True | |
device = torch.device("cuda" if USE_GPU and torch.cuda.is_available() else "cpu") | |
processor = AutoProcessor.from_pretrained( | |
'allenai/MolmoE-1B-0924', | |
trust_remote_code=True, | |
torch_dtype='auto', | |
device_map='auto' if USE_GPU else None, | |
cache_dir="./models/molmo1" | |
) | |
with init_empty_weights(): | |
model = AutoModelForCausalLM.from_pretrained( | |
'allenai/MolmoE-1B-0924', | |
trust_remote_code=True, | |
torch_dtype='auto', | |
device_map='auto' if USE_GPU else None, | |
cache_dir="./models/molmo1", | |
attn_implementation="eager" | |
) | |
if not USE_GPU: | |
model.to(device) | |
model.tie_weights() | |
image_path = "./public/image.jpg" # Replace with your image file path | |
image = Image.open(image_path) | |
image = image.convert("RGB") | |
inputs = processor.process( | |
images=[image], | |
text="Extract text" | |
) | |
inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()} | |
print('STARTED') | |
output = model.generate_from_batch( | |
inputs, | |
GenerationConfig( | |
max_new_tokens=2000, | |
# temperature=0.1, | |
# top_p=top_p, | |
stop_strings="<|endoftext|>" | |
), | |
tokenizer=processor.tokenizer | |
) | |
# Only get generated tokens; decode them to text | |
generated_tokens = output[0, inputs['input_ids'].size(1):] | |
generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True) | |
print(generated_text) | |