Spaces:

rodrigomasini
/

rephrase

Paused

App Files Files Community

rephrase / app_v2.py

rodrigomasini

Update app_v2.py

214f24f almost 2 years ago

raw

history blame contribute delete

1.77 kB

	########################## FRUSTATION PHASE ###########################
	import streamlit as st
	from transformers import AutoTokenizer
	from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
	from huggingface_hub import snapshot_download
	import os

	# Define pretrained and quantized model directories
	pretrained_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
	cwd = os.getcwd()

	quantized_model_dir = cwd + "/Jackson2-4bit-128g-GPTQ"

	# Create the cache directory if it doesn't exist
	os.makedirs(quantized_model_dir, exist_ok=True)

	snapshot_download(repo_id=pretrained_model_dir, local_dir=quantized_model_dir, local_dir_use_symlinks=False)

	# Quantization configuration
	# quantize_config = BaseQuantizeConfig(bits=4, group_size=128, damp_percent=0.01, desc_act=False)

	# Load the model using from_quantized
	model = AutoGPTQForCausalLM.from_quantized(
	quantized_model_dir,
	model_basename="Jackson2-4bit-128g-GPTQ",
	use_safetensors=True,
	strict=False,
	device="cuda:0",
	#trust_remote_code=True,
	use_triton=False,
	#quantize_config=quantize_config
	)

	#model.save_quantized(quantized_model_dir)

	# Load the tokenizer
	tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)

	# Starting Streamlit app
	st.title("AutoGPTQ Streamlit App")

	user_input = st.text_input("Input a phrase")

	# Generate output when the "Generate" button is pressed
	if st.button("Generate"):
	inputs = tokenizer(user_input, return_tensors="pt")
	outputs = model.generate(
	**inputs,
	max_length=512 + inputs['input_ids'].size(-1),
	temperature=0.1,
	top_p=0.95,
	repetition_penalty=1.15
	)
	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	st.text(generated_text)