anakin87 commited on
Commit
76bc95f
ยท
1 Parent(s): 329c0e6

fa + good defaults + style

Browse files
Files changed (2) hide show
  1. README.md +6 -9
  2. app.py +27 -29
README.md CHANGED
@@ -1,13 +1,10 @@
1
  ---
2
- title: Phi 3.5 Mini ITA
3
- emoji: ๐Ÿ’ฌ๐Ÿ‡ฎ๐Ÿ‡น
4
- colorFrom: green
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 5.42.0
8
  app_file: app.py
9
- license: mit
10
- short_description: Chat with an Italian Small Model
11
- ---
12
-
13
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
1
  ---
2
+ title: Gemma 3 270m IT
3
+ emoji: ๐Ÿ’Ž๐Ÿ’ฌ
4
+ colorFrom: powder-blue
5
+ colorTo: royal-blue
6
  sdk: gradio
7
  sdk_version: 5.42.0
8
  app_file: app.py
9
+ short_description: Chat with Gemma 3 270m IT
10
+ ---
 
 
 
app.py CHANGED
@@ -8,20 +8,19 @@ import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
  import subprocess
10
 
11
- # subprocess.run(
12
- # "pip install flash-attn --no-build-isolation",
13
- # env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
14
- # shell=True,
15
- # )
16
 
17
 
18
  DESCRIPTION = """\
19
- # Phi 3.5 mini ITA ๐Ÿ’ฌ ๐Ÿ‡ฎ๐Ÿ‡น
20
 
21
- Fine-tuned version of Microsoft/Phi-3.5-mini-instruct to improve the performance on the Italian language.
22
- Small (3.82 B parameters) but capable model, with 128k context length.
23
 
24
- [๐Ÿชช **Model card**](https://huggingface.co/anakin87/Phi-3.5-mini-ITA)
25
  """
26
 
27
  MAX_MAX_NEW_TOKENS = 2048
@@ -34,12 +33,13 @@ model_id = "google/gemma-3-270m-it"
34
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
35
  model = AutoModelForCausalLM.from_pretrained(
36
  model_id,
37
- torch_dtype="auto",
38
  device_map="auto",
39
- attn_implementation="eager"
 
 
40
  )
41
- # model.config.sliding_window = 4096
42
- # model.eval()
43
 
44
 
45
  @spaces.GPU(duration=90)
@@ -80,7 +80,7 @@ def generate(
80
  temperature=temperature,
81
  num_beams=1,
82
  repetition_penalty=repetition_penalty,
83
- disable_compile=True,
84
  )
85
  t = Thread(target=model.generate, kwargs=generate_kwargs)
86
  t.start()
@@ -111,42 +111,40 @@ chat_interface = gr.ChatInterface(
111
  minimum=0,
112
  maximum=4.0,
113
  step=0.1,
114
- value=0.001,
115
  ),
116
  gr.Slider(
117
  label="Top-p (nucleus sampling)",
118
  minimum=0.05,
119
  maximum=1.0,
120
  step=0.05,
121
- value=1.0,
122
  ),
123
  gr.Slider(
124
  label="Top-k",
125
  minimum=1,
126
  maximum=1000,
127
  step=1,
128
- value=50,
129
  ),
130
  gr.Slider(
131
  label="Repetition penalty",
132
  minimum=1.0,
133
  maximum=2.0,
134
  step=0.05,
135
- value=1.0,
136
  ),
137
  ],
138
  stop_btn=None,
139
- examples=[
140
- ["Ciao! Come stai?"],
141
- ["Pro e contro di una relazione a lungo termine. Elenco puntato con max 3 pro e 3 contro sintetici."],
142
- ["Quante ore impiega un uomo per mangiare un elicottero?"],
143
- ["Come si apre un file JSON in Python?"],
144
- ["Fammi un elenco puntato dei pro e contro di vivere in Italia. Massimo 2 pro e 2 contro."],
145
- ["Inventa una breve storia con animali sul valore dell'amicizia."],
146
- ["Scrivi un articolo di 100 parole sui 'Benefici dell'open-source nella ricerca sull'intelligenza artificiale'"],
147
- ["Can you explain briefly to me what is the Python programming language?"],
148
- ["How many hours does it take a man to eat a Helicopter?"],
149
- ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
150
  ],
151
  cache_examples=False,
152
  )
 
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
  import subprocess
10
 
11
+ subprocess.run(
12
+ "pip install flash-attn --no-build-isolation",
13
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
14
+ shell=True,
15
+ )
16
 
17
 
18
  DESCRIPTION = """\
19
+ # Gemma 3 270m IT ๐Ÿ’Ž๐Ÿ’ฌ
20
 
21
+ Try this mini model by Google.
 
22
 
23
+ [๐Ÿชช **Model card**](https://huggingface.co/google/gemma-3-270m-it)
24
  """
25
 
26
  MAX_MAX_NEW_TOKENS = 2048
 
33
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
34
  model = AutoModelForCausalLM.from_pretrained(
35
  model_id,
 
36
  device_map="auto",
37
+ torch_dtype=torch.bfloat16,
38
+ attn_implementation="flash_attention_2",
39
+ trust_remote_code=True,
40
  )
41
+ model.config.sliding_window = 4096
42
+ model.eval()
43
 
44
 
45
  @spaces.GPU(duration=90)
 
80
  temperature=temperature,
81
  num_beams=1,
82
  repetition_penalty=repetition_penalty,
83
+ disable_compile=True, # https://ai.google.dev/gemma/docs/core/huggingface_text_full_finetune#test_model_inference
84
  )
85
  t = Thread(target=model.generate, kwargs=generate_kwargs)
86
  t.start()
 
111
  minimum=0,
112
  maximum=4.0,
113
  step=0.1,
114
+ value=1.0, # default from https://huggingface.co/docs/transformers/en/main_classes/text_generation
115
  ),
116
  gr.Slider(
117
  label="Top-p (nucleus sampling)",
118
  minimum=0.05,
119
  maximum=1.0,
120
  step=0.05,
121
+ value=0.95, # from https://huggingface.co/google/gemma-3-270m-it/blob/main/generation_config.json
122
  ),
123
  gr.Slider(
124
  label="Top-k",
125
  minimum=1,
126
  maximum=1000,
127
  step=1,
128
+ value=64, # from https://huggingface.co/google/gemma-3-270m-it/blob/main/generation_config.json
129
  ),
130
  gr.Slider(
131
  label="Repetition penalty",
132
  minimum=1.0,
133
  maximum=2.0,
134
  step=0.05,
135
+ value=1.0, # default from https://huggingface.co/docs/transformers/en/main_classes/text_generation
136
  ),
137
  ],
138
  stop_btn=None,
139
+ examples = [
140
+ ["Hi! How are you?"],
141
+ ["Pros and cons of a long-term relationship. Bullet list with max 3 pros and 3 cons, concise."],
142
+ ["How many hours does it take a man to eat a helicopter?"],
143
+ ["How do you open a JSON file in Python?"],
144
+ ["Make a bullet list of pros and cons of living in San Francisco. Maximum 2 pros and 2 cons."],
145
+ ["Invent a short story with animals about the value of friendship."],
146
+ ["Can you briefly explain what the Python programming language is?"],
147
+ ["Write a 100-word article on 'Benefits of Open-Source in AI Research'."],
 
 
148
  ],
149
  cache_examples=False,
150
  )