Spaces:

Teklia
/

PyLaia

Running

App Files Files Community

PyLaia / app.py

yschneider

Cache download model location, load images in L mode

6b1f545 unverified almost 2 years ago

raw

history blame

4.2 kB

	from uuid import uuid4
	import gradio as gr
	from laia.scripts.htr.decode_ctc import run as decode
	from laia.common.arguments import CommonArgs, DataArgs, TrainerArgs, DecodeArgs
	import sys
	from tempfile import NamedTemporaryFile, mkdtemp
	from pathlib import Path
	from contextlib import redirect_stdout
	import re
	from huggingface_hub import snapshot_download

	images = Path(mkdtemp())

	IMAGE_ID_PATTERN = r"(?P<image_id>[-a-z0-9]{36})"
	CONFIDENCE_PATTERN = r"(?P<confidence>[0-9.]+)" # For line
	TEXT_PATTERN = r"\s(?P<text>.)\s*"
	LINE_PREDICTION = re.compile(rf"{IMAGE_ID_PATTERN} {CONFIDENCE_PATTERN} {TEXT_PATTERN}")
	models_name = ["Teklia/pylaia-rimes"]
	MODELS = {}
	DEFAULT_HEIGHT = 128


	def get_width(image, height=DEFAULT_HEIGHT):
	aspect_ratio = image.width / image.height
	return height * aspect_ratio


	def load_model(model_name):
	if model_name not in MODELS:
	MODELS[model_name] = Path(snapshot_download(model_name))
	return MODELS[model_name]


	def predict(model_name, input_img):
	model_dir = load_model(model_name)

	temperature = 2.0
	batch_size = 1

	weights_path = model_dir / "weights.ckpt"
	syms_path = model_dir / "syms.txt"
	language_model_params = {"language_model_weight": 1.0}
	use_language_model = (model_dir / "tokens.txt").exists()
	if use_language_model:
	language_model_params.update(
	{
	"language_model_path": str(model_dir / "language_model.arpa.gz"),
	"lexicon_path": str(model_dir / "lexicon.txt"),
	"tokens_path": str(model_dir / "tokens.txt"),
	}
	)

	common_args = CommonArgs(
	checkpoint=str(weights_path.relative_to(model_dir)),
	train_path=str(model_dir),
	experiment_dirname="",
	)
	data_args = DataArgs(batch_size=batch_size, color_mode="L")
	trainer_args = TrainerArgs(
	# Disable progress bar else it messes with frontend display
	progress_bar_refresh_rate=0
	)
	decode_args = DecodeArgs(
	include_img_ids=True,
	join_string="",
	convert_spaces=True,
	print_line_confidence_scores=True,
	print_word_confidence_scores=False,
	temperature=temperature,
	use_language_model=use_language_model,
	**language_model_params,
	)

	with NamedTemporaryFile() as pred_stdout, NamedTemporaryFile() as img_list:
	image_id = uuid4()
	# Resize image to 128 if bigger/smaller
	input_img = input_img.resize((int(get_width(input_img)), DEFAULT_HEIGHT))
	input_img.save(str(images / f"{image_id}.jpg"))
	# Export image list
	Path(img_list.name).write_text("\n".join([str(image_id)]))

	# Capture stdout as that's where PyLaia outputs predictions
	with redirect_stdout(open(pred_stdout.name, mode="w")):
	decode(
	syms=str(syms_path),
	img_list=img_list.name,
	img_dirs=[str(images)],
	common=common_args,
	data=data_args,
	trainer=trainer_args,
	decode=decode_args,
	num_workers=1,
	)
	# Flush stdout to avoid output buffering
	sys.stdout.flush()
	predictions = Path(pred_stdout.name).read_text().strip().splitlines()
	assert len(predictions) == 1
	_, score, text = LINE_PREDICTION.match(predictions[0]).groups()
	return input_img, {"text": text, "score": score}


	gradio_app = gr.Interface(
	predict,
	inputs=[
	gr.Dropdown(models_name, value=models_name[0], label="Models"),
	gr.Image(
	label="Upload an image of a line",
	sources=["upload", "clipboard"],
	type="pil",
	height=DEFAULT_HEIGHT,
	width=2000,
	image_mode="L",
	),
	],
	outputs=[
	gr.Image(label="Processed Image"),
	gr.JSON(label="Decoded text"),
	],
	examples=[
	["Teklia/pylaia-rimes", str(filename)]
	for filename in Path("examples").iterdir()
	],
	title="Decode the transcription of an image using a PyLaia model",
	cache_examples=True,
	)

	if __name__ == "__main__":
	gradio_app.launch()