Spaces:

bldng
/

demo_human_gpt

Runtime error

App Files Files Community

demo_human_gpt / models.py

bldng

Upload folder using huggingface_hub

e6b43a5 verified 3 months ago

raw

history blame contribute delete

4.88 kB

	from typing import Dict, List

	from llama_cpp import Llama
	llama_args={"n_gpu_layers":100,"main_gpu":0,"verbose":False}

	class Model:
	def __init__(self):
	pass
	def __call__(self, msg:str, stop:List[str], max_tokens:int):
	raise NotImplementedError
	def conv(self, msgs:List[Dict[str, str]])->str:
	raise NotImplementedError
	def starttok(self, user:str)->str:
	raise NotImplementedError
	def start(self)->str:
	return ""
	def close(self):
	pass

	class Phi35RPMax(Model):
	def __init__(self):
	self.llm = Llama.from_pretrained(
	repo_id="ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1-GGUF",
	filename="ArliAI-RPMax-3.8B-v1.1-fp16.gguf",
	**llama_args,
	)

	def __call__(self, msg:str, stop:List[str], max_tokens:int):
	print("Autocomplete: ",msg)
	ret=self.llm(msg, stop=stop, max_tokens=max_tokens)
	return ret

	def conv(self,msgs:List[Dict[str, str]]):
	return "\n".join([f"<\|{msg['role']}\|>\n{msg['content']}<\|end\|>" for msg in msgs])
	def starttok(self,user:str):
	return f"<\|{user}\|>\n"
	def close(self):
	self.llm.close()
	Phi35RPMax.modelname="Phi35RPMax-fp16"
	class Phi35(Model):
	def __init__(self):
	self.llm = Llama.from_pretrained(
	repo_id="bartowski/Phi-3.5-mini-instruct-GGUF",
	filename="Phi-3.5-mini-instruct-f32.gguf",
	**llama_args,
	)
	def __call__(self, msg:str, stop:List[str], max_tokens:int):
	print("Autocomplete: ",msg)
	return self.llm(msg, stop=stop, max_tokens=max_tokens)

	def conv(self,msgs:List[Dict[str, str]]):
	return "\n".join([f"<\|{msg['role']}\|>\n{msg['content']}<\|end\|>" for msg in msgs])

	def starttok(self,user:str):
	return f"<\|{user}\|>\n"
	def close(self):
	self.llm.close()
	Phi35.modelname="Phi35-IQ3_XS"

	# TODO: Gemma2 needs license maybe try it in the future but dont think it is worth it
	# class Gemma2(Model):
	# def __init__(self):
	# self.llm = Llama.from_pretrained(
	# repo_id="google/gemma-2-2b-it-GGUF",
	# filename="2b_it_v2.gguf",
	# )
	# def __call__(self, msg:str, stop:List[str], max_tokens:int):
	# return self.llm(msg, stop=stop, max_tokens=max_tokens)

	# def conv(self,msgs:List[Dict[str, str]]):#https://ai.google.dev/gemma/docs/formatting?hl=de
	# return "\n".join([f"<\|{msg['role']}\|>\n{msg['content']}<\|end\|>" for msg in msgs])
	# def formatmessage(self,msg:str, role:str):#https://ai.google.dev/gemma/docs/formatting?hl=de
	# if(role=="system"):
	# # Gemma2 does not support system messages / isnt trained for them
	# # TODO: Make them Assistant messages and test if this improves the results
	# return ""
	# if role=="assistant":
	# role="model"
	# return f"<start_of_turn>{role}\n{msg}<end_of_turn>"
	# def starttok(self,user:str):
	# return f"<start_of_turn>{user}\n"
	# def close(self):
	# self.llm.close()
	# Gemma2.modelname="Gemma2-2b-it-GGUF"

	class Llama31uncensored(Model):
	def __init__(self):
	self.llm = Llama.from_pretrained(
	repo_id="Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2-GGUF",
	filename="Llama-3.1-8B-Lexi-Uncensored_V2_F16.gguf",
	**llama_args,
	)
	def __call__(self, msg:str, stop:List[str], max_tokens:int):
	print("Autocomplete: ",msg)
	return self.llm(msg, stop=stop, max_tokens=max_tokens)

	def start(self):
	return "<\|begin_of_text\|>"

	def conv(self,msgs:List[Dict[str, str]]):
	return "\n".join([f"<\|start_header_id\|>{msg['role']}<\|end_header_id\|>\n\n{msg['content']}<\|eot_id\|>" for msg in msgs])
	def starttok(self,user:str):
	return f"<\|start_header_id\|>{user}<\|end_header_id\|>\n\n"
	def close(self):
	self.llm.close()
	Llama31uncensored.modelname="Llama31-uncensored-fp16"

	class Llama31(Model):
	def __init__(self):
	self.llm = Llama.from_pretrained(
	repo_id="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
	filename="Meta-Llama-3.1-8B-Instruct-IQ4_XS.gguf",
	**llama_args,
	)
	def __call__(self, msg:str, stop:List[str], max_tokens:int):
	print("Autocomplete: ",msg)
	return self.llm(msg, stop=stop, max_tokens=max_tokens)

	def conv(self,msgs:List[Dict[str, str]]):
	return "\n".join([f"<\|begin_of_text\|><\|start_header_id\|>{msg['role']}<\|end_header_id\|>\n\n{msg['content']}<\|eot_id\|>" for msg in msgs])
	def starttok(self,user:str):
	return f"<\|begin_of_text\|><\|start_header_id\|>{user}<\|end_header_id\|>"
	def close(self):
	self.llm.close()
	Llama31.modelname="Llama31-IQ4_XS"

	models=[Phi35RPMax,Phi35,Llama31uncensored,Llama31]