Ramikan-BR commited on
Commit
da35f37
1 Parent(s): 678944d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+
4
+ # Instala os pacotes necessários
5
+ subprocess.run(["pip", "install", "--upgrade", "pip"])
6
+ subprocess.run(["pip", "install", "--upgrade", "torch", "transformers", "accelerate"])
7
+ subprocess.run(["pip", "install", "git+https://github.com/TimDettmers/bitsandbytes.git"])
8
+
9
+ import accelerate
10
+ import bitsandbytes
11
+ import gradio as gr
12
+ from transformers import LlamaForCausalLM, LlamaTokenizer
13
+
14
+ # Define a variável de ambiente para desabilitar CUDA
15
+ os.environ["TRANSFORMERS_NO_CUDA"] = "1"
16
+
17
+ # Carrega o modelo e o tokenizador
18
+ model = LlamaForCausalLM.from_pretrained("Ramikan-BR/tinyllama_PY-CODER-bnb-4bit-lora_4k-q4_k_m-v2")
19
+ tokenizer = LlamaTokenizer.from_pretrained("Ramikan-BR/tinyllama_PY-CODER-bnb-4bit-lora_4k-q4_k_m-v2")
20
+
21
+ def predict(input_text):
22
+ # Codifica o texto de entrada e gera a saída
23
+ input_ids = tokenizer.encode(input_text, return_tensors="pt")
24
+ output = model.generate(input_ids, max_length=4096, do_sample=True, top_k=50, top_p=0.50, num_return_sequences=1)
25
+ return tokenizer.decode(output[0], skip_special_tokens=True)
26
+
27
+ # Cria a interface Gradio
28
+ iface = gr.Interface(fn=predict, inputs="text", outputs="text")
29
+ iface.launch()