Spaces:

rodrigomasini
/

rephrase

Paused

App Files Files Community

rephrase / app_v1.py

rodrigomasini

Update app_v1.py

3ac5658 over 1 year ago

raw

history blame

2.1 kB

	import streamlit as st
	from transformers import AutoTokenizer
	from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
	from huggingface_hub import snapshot_download
	import os
	import torch

	# Clear up some memory
	#torch.cuda.empty_cache()

	# Try reducing the number of threads PyTorch uses
	# torch.set_num_threads(1)

	cwd = os.getcwd()
	cachedir = cwd + '/cache'

	# Check if the directory exists before creating it
	if not os.path.exists(cachedir):
	os.mkdir(cachedir)

	os.environ['HF_HOME'] = cachedir

	local_folder = cachedir + "/model"

	quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"

	# Check if the model has already been downloaded
	model_path = os.path.join(local_folder, 'pytorch_model.bin')
	if not os.path.isfile(model_path):
	snapshot_download(repo_id=quantized_model_dir, local_dir=local_folder, local_dir_use_symlinks=False)

	model_basename = cachedir + "/model/Jackson2-4bit-128g-GPTQ"

	use_strict = False
	use_triton = False

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=False)

	quantize_config = BaseQuantizeConfig(
	bits=4,
	group_size=128,
	desc_act=False
	)

	model = AutoGPTQForCausalLM.from_quantized(
	local_folder,
	use_safetensors=True,
	strict=use_strict,
	model_basename=model_basename,
	device="cuda:0",
	trust_remote_code=True,
	use_triton=use_triton,
	quantize_config=quantize_config
	)

	#st.write(model.hf_device_map)
	user_input = st.text_input("Input a phrase")

	prompt_template = f'USER: {user_input}\nASSISTANT:'

	# Generate output when the "Generate" button is pressed
	if st.button("Generate the prompt"):
	inputs = tokenizer(prompt_template, return_tensors="pt")
	outputs = model.generate(
	input_ids=inputs.input_ids.to("cuda:0"),
	attention_mask=inputs.attention_mask.to("cuda:0"),
	max_length=512 + inputs.input_ids.size(-1),
	temperature=0.1,
	top_p=0.95,
	repetition_penalty=1.15
	)
	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	st.text_area("Prompt", value=generated_text)