prithivMLmods commited on
Commit
4f16846
1 Parent(s): e96545c

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +17 -0
  2. app.py +73 -0
  3. requirements.txt +1 -0
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Llama 3.1 8B Instruct
3
+ emoji: ⚡
4
+ colorFrom: gray
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.9.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: creativeml-openrail-m
11
+ llama-recipes: >-
12
+ https://github.com/huggingface/huggingface-llama-recipes/blob/main/inference-api.ipynb
13
+ huggingface-llama-recipes: https://github.com/huggingface/huggingface-llama-recipes
14
+ short_description: Meta-Llama-3.1-8B-Instruct
15
+ ---
16
+
17
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #refer llama recipes for more info https://github.com/huggingface/huggingface-llama-recipes/blob/main/inference-api.ipynb
2
+ #huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
3
+ import gradio as gr
4
+ from openai import OpenAI
5
+ import os
6
+
7
+ css = '''
8
+ .gradio-container{max-width: 1000px !important}
9
+ h1{text-align:center}
10
+ footer {
11
+ visibility: hidden
12
+ }
13
+ '''
14
+
15
+ ACCESS_TOKEN = os.getenv("HF_TOKEN")
16
+
17
+ client = OpenAI(
18
+ base_url="https://api-inference.huggingface.co/v1/",
19
+ api_key=ACCESS_TOKEN,
20
+ )
21
+
22
+ def respond(
23
+ message,
24
+ history: list[tuple[str, str]],
25
+ system_message,
26
+ max_tokens,
27
+ temperature,
28
+ top_p,
29
+ ):
30
+ messages = [{"role": "system", "content": system_message}]
31
+
32
+ for val in history:
33
+ if val[0]:
34
+ messages.append({"role": "user", "content": val[0]})
35
+ if val[1]:
36
+ messages.append({"role": "assistant", "content": val[1]})
37
+
38
+ messages.append({"role": "user", "content": message})
39
+
40
+ response = ""
41
+
42
+ for message in client.chat.completions.create(
43
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct",
44
+ max_tokens=max_tokens,
45
+ stream=True,
46
+ temperature=temperature,
47
+ top_p=top_p,
48
+ messages=messages,
49
+ ):
50
+ token = message.choices[0].delta.content
51
+
52
+ response += token
53
+ yield response
54
+
55
+ demo = gr.ChatInterface(
56
+ respond,
57
+ additional_inputs=[
58
+ gr.Textbox(value="", label="System message"),
59
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
60
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
61
+ gr.Slider(
62
+ minimum=0.1,
63
+ maximum=1.0,
64
+ value=0.95,
65
+ step=0.05,
66
+ label="Top-P",
67
+ ),
68
+
69
+ ],
70
+ css=css
71
+ )
72
+ if __name__ == "__main__":
73
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ openai==1.58.1