File size: 4,884 Bytes
1793d31
 
 
e6b43a5
1793d31
 
 
 
 
 
88bce96
1793d31
88bce96
1793d31
88bce96
 
1793d31
 
 
 
 
 
 
 
88bce96
1793d31
 
 
e6b43a5
88bce96
 
1793d31
 
 
 
 
 
 
 
 
 
 
 
88bce96
 
1793d31
 
e6b43a5
1793d31
88bce96
1793d31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88bce96
1793d31
 
e6b43a5
1793d31
 
88bce96
 
 
1793d31
88bce96
1793d31
88bce96
1793d31
 
 
 
 
 
 
 
 
88bce96
1793d31
 
e6b43a5
1793d31
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from typing import Dict, List

from llama_cpp import Llama
llama_args={"n_gpu_layers":100,"main_gpu":0,"verbose":False}

class Model:
    def __init__(self):
        pass
    def __call__(self, msg:str, stop:List[str], max_tokens:int):
        raise NotImplementedError
    def conv(self, msgs:List[Dict[str, str]])->str:
        raise NotImplementedError
    def starttok(self, user:str)->str:
        raise NotImplementedError
    def start(self)->str:
        return ""
    def close(self):
        pass

class Phi35RPMax(Model):
    def __init__(self):
        self.llm = Llama.from_pretrained(
            repo_id="ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1-GGUF",
            filename="ArliAI-RPMax-3.8B-v1.1-fp16.gguf",
            **llama_args,
        )
        
    def __call__(self, msg:str, stop:List[str], max_tokens:int):
        print("Autocomplete: ",msg)
        ret=self.llm(msg, stop=stop, max_tokens=max_tokens)
        return ret
    
    def conv(self,msgs:List[Dict[str, str]]):
        return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
    def starttok(self,user:str):
        return f"<|{user}|>\n"
    def close(self):
        self.llm.close()
Phi35RPMax.modelname="Phi35RPMax-fp16"
class Phi35(Model):
    def __init__(self):
        self.llm = Llama.from_pretrained(
            repo_id="bartowski/Phi-3.5-mini-instruct-GGUF",
            filename="Phi-3.5-mini-instruct-f32.gguf",
            **llama_args,
        )
    def __call__(self, msg:str, stop:List[str], max_tokens:int):
        print("Autocomplete: ",msg)
        return self.llm(msg, stop=stop, max_tokens=max_tokens)

    def conv(self,msgs:List[Dict[str, str]]):
        return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
    
    def starttok(self,user:str):
        return f"<|{user}|>\n"
    def close(self):
        self.llm.close()
Phi35.modelname="Phi35-IQ3_XS"

# TODO: Gemma2 needs license maybe try it in the future but dont think it is worth it
# class Gemma2(Model):
#     def __init__(self):
#         self.llm = Llama.from_pretrained(
#             repo_id="google/gemma-2-2b-it-GGUF",
#             filename="2b_it_v2.gguf",
#         )
#     def __call__(self, msg:str, stop:List[str], max_tokens:int):
#         return self.llm(msg, stop=stop, max_tokens=max_tokens)
    
#     def conv(self,msgs:List[Dict[str, str]]):#https://ai.google.dev/gemma/docs/formatting?hl=de
#         return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
#     def formatmessage(self,msg:str, role:str):#https://ai.google.dev/gemma/docs/formatting?hl=de
#         if(role=="system"):
#             # Gemma2 does not support system messages / isnt trained for them
#             # TODO: Make them Assistant messages and test if this improves the results
#             return ""
#         if role=="assistant":
#             role="model"
#         return f"<start_of_turn>{role}\n{msg}<end_of_turn>"
#     def starttok(self,user:str):
#         return f"<start_of_turn>{user}\n"
#     def close(self):
#         self.llm.close()
# Gemma2.modelname="Gemma2-2b-it-GGUF"

class Llama31uncensored(Model):
    def __init__(self):
        self.llm = Llama.from_pretrained(
            repo_id="Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2-GGUF",
            filename="Llama-3.1-8B-Lexi-Uncensored_V2_F16.gguf",
            **llama_args,
        )
    def __call__(self, msg:str, stop:List[str], max_tokens:int):
        print("Autocomplete: ",msg)
        return self.llm(msg, stop=stop, max_tokens=max_tokens)
    
    def start(self):
        return "<|begin_of_text|>"
    
    def conv(self,msgs:List[Dict[str, str]]):
        return "\n".join([f"<|start_header_id|>{msg['role']}<|end_header_id|>\n\n{msg['content']}<|eot_id|>" for msg in msgs])
    def starttok(self,user:str):
        return f"<|start_header_id|>{user}<|end_header_id|>\n\n"
    def close(self):
        self.llm.close()
Llama31uncensored.modelname="Llama31-uncensored-fp16"

class Llama31(Model):
    def __init__(self):
        self.llm = Llama.from_pretrained(
            repo_id="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
            filename="Meta-Llama-3.1-8B-Instruct-IQ4_XS.gguf",
            **llama_args,
        )
    def __call__(self, msg:str, stop:List[str], max_tokens:int):
        print("Autocomplete: ",msg)
        return self.llm(msg, stop=stop, max_tokens=max_tokens)
    
    def conv(self,msgs:List[Dict[str, str]]):
        return "\n".join([f"<|begin_of_text|><|start_header_id|>{msg['role']}<|end_header_id|>\n\n{msg['content']}<|eot_id|>" for msg in msgs])
    def starttok(self,user:str):
        return f"<|begin_of_text|><|start_header_id|>{user}<|end_header_id|>"
    def close(self):
        self.llm.close()
Llama31.modelname="Llama31-IQ4_XS"

models=[Phi35RPMax,Phi35,Llama31uncensored,Llama31]