Spaces:

ashisdeveloper
/

Molmo-7B-D-0924

Runtime error

Molmo-7B-D-0924 / app.py

Create app.py

9ff61cd verified about 1 year ago

1.62 kB

	from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig, AutoTokenizer, Qwen2TokenizerFast
	from PIL import Image
	import torch
	import requests
	from accelerate import init_empty_weights


	USE_GPU = True

	device = torch.device("cuda" if USE_GPU and torch.cuda.is_available() else "cpu")

	processor = AutoProcessor.from_pretrained(
	'allenai/MolmoE-1B-0924',
	trust_remote_code=True,
	torch_dtype='auto',
	device_map='auto' if USE_GPU else None,
	cache_dir="./models/molmo1"
	)
	with init_empty_weights():
	model = AutoModelForCausalLM.from_pretrained(
	'allenai/MolmoE-1B-0924',
	trust_remote_code=True,
	torch_dtype='auto',
	device_map='auto' if USE_GPU else None,
	cache_dir="./models/molmo1",
	attn_implementation="eager"
	)



	if not USE_GPU:
	model.to(device)

	model.tie_weights()

	image_path = "./public/image.jpg" # Replace with your image file path
	image = Image.open(image_path)
	image = image.convert("RGB")

	inputs = processor.process(
	images=[image],
	text="Extract text"
	)

	inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
	print('STARTED')
	output = model.generate_from_batch(
	inputs,
	GenerationConfig(
	max_new_tokens=2000,
	# temperature=0.1,
	# top_p=top_p,
	stop_strings="<\|endoftext\|>"
	),
	tokenizer=processor.tokenizer
	)

	# Only get generated tokens; decode them to text
	generated_tokens = output[0, inputs['input_ids'].size(1):]
	generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)

	print(generated_text)