DoctorSlimm commited on
Commit
a0a6a64
·
verified ·
1 Parent(s): 42fd5b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -4
app.py CHANGED
@@ -1,14 +1,49 @@
1
- import gradio as gr
2
  import spaces
3
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  zero = torch.Tensor([0]).cuda()
6
  print(zero.device) # <-- 'cpu' 🤔
7
 
 
 
 
8
  @spaces.GPU
9
- def greet(n):
10
- print(zero.device) # <-- 'cuda:0' 🤗
11
- return f"Hello {zero + n} Tensor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
14
  demo.launch()
 
1
+ import os
2
  import spaces
3
  import torch
4
+ import gradio as gr
5
+ from huggingface_hub import snapshot_download, login
6
+ from transformers.utils import move_cache
7
+
8
+
9
+
10
+
11
+ LLM_MODEL_DIR = '/model'
12
+ LLM_MODEL_ID = "mistral-community/Mistral-7B-v0.2"
13
+ LLM_MODEL_REVISION = 'main'
14
+ os.makedirs(LLM_MODEL_DIR, exist_ok=True)
15
+ snapshot_download(LLM_MODEL_ID, revision=LLM_MODEL_REVISION, local_dir=LLM_MODEL_DIR) #, token=HF_TOKEN)
16
+ move_cache()
17
+
18
+
19
+ # cpu
20
 
21
  zero = torch.Tensor([0]).cuda()
22
  print(zero.device) # <-- 'cpu' 🤔
23
 
24
+
25
+ # gpu
26
+
27
  @spaces.GPU
28
+ def greet(user):
29
+ # print(zero.device) # <-- 'cuda:0' 🤗
30
+ from vllm import SamplingParams, LLM
31
+
32
+ model = LLM(LLM_MODEL_DIR)
33
+ sampling_params = dict(
34
+ temperature = 0.3,
35
+ ignore_eos = False,
36
+ max_tokens = int(512 * 2)
37
+ )
38
+ sampling_params = SamplingParams(**sampling_params)
39
+
40
+ prompts = [user]
41
+ model_outputs = model.generate(prompts, sampling_params)
42
+ generations = []
43
+ for output in model_outputs:
44
+ for outputs in output.outputs:
45
+ generations.append(outputs.text)
46
+ return generations[0]
47
 
48
  demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
49
  demo.launch()