demo_human_gpt / models.py
bldng's picture
Upload folder using huggingface_hub
e6b43a5 verified
from typing import Dict, List
from llama_cpp import Llama
llama_args={"n_gpu_layers":100,"main_gpu":0,"verbose":False}
class Model:
def __init__(self):
pass
def __call__(self, msg:str, stop:List[str], max_tokens:int):
raise NotImplementedError
def conv(self, msgs:List[Dict[str, str]])->str:
raise NotImplementedError
def starttok(self, user:str)->str:
raise NotImplementedError
def start(self)->str:
return ""
def close(self):
pass
class Phi35RPMax(Model):
def __init__(self):
self.llm = Llama.from_pretrained(
repo_id="ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1-GGUF",
filename="ArliAI-RPMax-3.8B-v1.1-fp16.gguf",
**llama_args,
)
def __call__(self, msg:str, stop:List[str], max_tokens:int):
print("Autocomplete: ",msg)
ret=self.llm(msg, stop=stop, max_tokens=max_tokens)
return ret
def conv(self,msgs:List[Dict[str, str]]):
return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
def starttok(self,user:str):
return f"<|{user}|>\n"
def close(self):
self.llm.close()
Phi35RPMax.modelname="Phi35RPMax-fp16"
class Phi35(Model):
def __init__(self):
self.llm = Llama.from_pretrained(
repo_id="bartowski/Phi-3.5-mini-instruct-GGUF",
filename="Phi-3.5-mini-instruct-f32.gguf",
**llama_args,
)
def __call__(self, msg:str, stop:List[str], max_tokens:int):
print("Autocomplete: ",msg)
return self.llm(msg, stop=stop, max_tokens=max_tokens)
def conv(self,msgs:List[Dict[str, str]]):
return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
def starttok(self,user:str):
return f"<|{user}|>\n"
def close(self):
self.llm.close()
Phi35.modelname="Phi35-IQ3_XS"
# TODO: Gemma2 needs license maybe try it in the future but dont think it is worth it
# class Gemma2(Model):
# def __init__(self):
# self.llm = Llama.from_pretrained(
# repo_id="google/gemma-2-2b-it-GGUF",
# filename="2b_it_v2.gguf",
# )
# def __call__(self, msg:str, stop:List[str], max_tokens:int):
# return self.llm(msg, stop=stop, max_tokens=max_tokens)
# def conv(self,msgs:List[Dict[str, str]]):#https://ai.google.dev/gemma/docs/formatting?hl=de
# return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
# def formatmessage(self,msg:str, role:str):#https://ai.google.dev/gemma/docs/formatting?hl=de
# if(role=="system"):
# # Gemma2 does not support system messages / isnt trained for them
# # TODO: Make them Assistant messages and test if this improves the results
# return ""
# if role=="assistant":
# role="model"
# return f"<start_of_turn>{role}\n{msg}<end_of_turn>"
# def starttok(self,user:str):
# return f"<start_of_turn>{user}\n"
# def close(self):
# self.llm.close()
# Gemma2.modelname="Gemma2-2b-it-GGUF"
class Llama31uncensored(Model):
def __init__(self):
self.llm = Llama.from_pretrained(
repo_id="Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2-GGUF",
filename="Llama-3.1-8B-Lexi-Uncensored_V2_F16.gguf",
**llama_args,
)
def __call__(self, msg:str, stop:List[str], max_tokens:int):
print("Autocomplete: ",msg)
return self.llm(msg, stop=stop, max_tokens=max_tokens)
def start(self):
return "<|begin_of_text|>"
def conv(self,msgs:List[Dict[str, str]]):
return "\n".join([f"<|start_header_id|>{msg['role']}<|end_header_id|>\n\n{msg['content']}<|eot_id|>" for msg in msgs])
def starttok(self,user:str):
return f"<|start_header_id|>{user}<|end_header_id|>\n\n"
def close(self):
self.llm.close()
Llama31uncensored.modelname="Llama31-uncensored-fp16"
class Llama31(Model):
def __init__(self):
self.llm = Llama.from_pretrained(
repo_id="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
filename="Meta-Llama-3.1-8B-Instruct-IQ4_XS.gguf",
**llama_args,
)
def __call__(self, msg:str, stop:List[str], max_tokens:int):
print("Autocomplete: ",msg)
return self.llm(msg, stop=stop, max_tokens=max_tokens)
def conv(self,msgs:List[Dict[str, str]]):
return "\n".join([f"<|begin_of_text|><|start_header_id|>{msg['role']}<|end_header_id|>\n\n{msg['content']}<|eot_id|>" for msg in msgs])
def starttok(self,user:str):
return f"<|begin_of_text|><|start_header_id|>{user}<|end_header_id|>"
def close(self):
self.llm.close()
Llama31.modelname="Llama31-IQ4_XS"
models=[Phi35RPMax,Phi35,Llama31uncensored,Llama31]