Spaces:

FlameF0X
/

SnowflakeCore-G1

Sleeping

App Files Files Community

FlameF0X commited on 18 days ago

Commit

18a753b

verified ·

1 Parent(s): fd28877

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -4

app.py CHANGED Viewed

@@ -2,13 +2,18 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 # Load model and tokenizer
 model = AutoModelForCausalLM.from_pretrained(
     "FlameF0X/SnowflakeCore-G1-Tiny2",
     trust_remote_code=True,
     force_download=True,
     use_safetensors=True,
-)
 tokenizer = AutoTokenizer.from_pretrained(
     "FlameF0X/SnowflakeCore-G1-Tiny2",
     trust_remote_code=True,
@@ -17,11 +22,17 @@ tokenizer = AutoTokenizer.from_pretrained(
 )
 def custom_greedy_generate(prompt, max_length=50):
     model.eval()
-    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
     generated = input_ids
     with torch.no_grad():
         for _ in range(max_length):
             outputs = model(input_ids=generated)
             next_token_logits = outputs["logits"][:, -1, :]
             next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
@@ -31,15 +42,20 @@ def custom_greedy_generate(prompt, max_length=50):
     return tokenizer.decode(generated[0], skip_special_tokens=True)
 def gradio_generate(prompt):
     return custom_greedy_generate(prompt)
 iface = gr.Interface(
     fn=gradio_generate,
     inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
     outputs=gr.Textbox(label="Generated Text"),
     title="SnowflakeCore-G1-Tiny2 Text Generation",
-    description="Enter a prompt and generate text using the SnowflakeCore-G1-Tiny2 model.",
 )
 if __name__ == "__main__":
-    iface.launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Determine the device to use (GPU if available, otherwise CPU)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
 # Load model and tokenizer
+# Move the model to the determined device
 model = AutoModelForCausalLM.from_pretrained(
     "FlameF0X/SnowflakeCore-G1-Tiny2",
     trust_remote_code=True,
     force_download=True,
     use_safetensors=True,
+).to(device) # Move model to GPU or CPU
 tokenizer = AutoTokenizer.from_pretrained(
     "FlameF0X/SnowflakeCore-G1-Tiny2",
     trust_remote_code=True,
 )
 def custom_greedy_generate(prompt, max_length=50):
+    """
+    Generates text using a custom greedy decoding approach.
+    The model and input tensors are moved to the appropriate device (GPU/CPU).
+    """
     model.eval()
+    # Move input_ids to the same device as the model
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     generated = input_ids
     with torch.no_grad():
         for _ in range(max_length):
+            # Ensure the generated tensor is on the correct device for model input
             outputs = model(input_ids=generated)
             next_token_logits = outputs["logits"][:, -1, :]
             next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
     return tokenizer.decode(generated[0], skip_special_tokens=True)
 def gradio_generate(prompt):
+    """
+    Wrapper function for Gradio interface.
+    """
     return custom_greedy_generate(prompt)
+# Create the Gradio interface
 iface = gr.Interface(
     fn=gradio_generate,
     inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
     outputs=gr.Textbox(label="Generated Text"),
     title="SnowflakeCore-G1-Tiny2 Text Generation",
+    description=f"Enter a prompt and generate text using the SnowflakeCore-G1-Tiny2 model. Running on: {device}",
 )
+# Launch the Gradio application
 if __name__ == "__main__":
+    iface.launch()