Spaces:

akashmadisetty
/

Test

Running

App Files Files Community

akashmadisetty commited on Mar 29

Commit

b820bc7

1 Parent(s): 45c882e

'lll'

Browse files

Files changed (1) hide show

app.py +63 -14

app.py CHANGED Viewed

@@ -27,37 +27,66 @@ def load_model(hf_token):
         return "⚠️ Please enter your Hugging Face token to use the model."
     try:
-        # Try both model versions
         model_options = [
-            "google/gemma-3-4b-pt",    # Try the quantized PT version first
-            "google/gemma-2b",         # Fallback to 2b model
         ]
         # Try to load models in order until one works
         for model_name in model_options:
             try:
                 print(f"Attempting to load model: {model_name}")
                 # Load tokenizer
                 global_tokenizer = AutoTokenizer.from_pretrained(
                     model_name,
                     token=hf_token
                 )
-                # Load model with minimal configuration to avoid errors
                 global_model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16,
                     device_map="auto",
                     token=hf_token
                 )
                 model_loaded = True
                 return f"✅ Model {model_name} loaded successfully!"
             except Exception as specific_e:
                 print(f"Failed to load {model_name}: {specific_e}")
                 continue
         # If we get here, all model options failed
         model_loaded = False
         return "❌ Could not load any model version. Please check your token and try again."
@@ -65,6 +94,10 @@ def load_model(hf_token):
     except Exception as e:
         model_loaded = False
         error_msg = str(e)
         if "401 Client Error" in error_msg:
             return "❌ Authentication failed. Please check your token and make sure you've accepted the model license on Hugging Face."
         else:
@@ -152,7 +185,11 @@ def generate_text(prompt, max_length=1024, temperature=0.7, top_p=0.95):
     """Generate text using the Gemma model"""
     global global_model, global_tokenizer, model_loaded
     if not model_loaded or global_model is None or global_tokenizer is None:
         return "⚠️ Model not loaded. Please authenticate with your Hugging Face token."
     if not prompt:
@@ -161,21 +198,35 @@ def generate_text(prompt, max_length=1024, temperature=0.7, top_p=0.95):
     try:
         # Keep generation simple to avoid errors
         inputs = global_tokenizer(prompt, return_tensors="pt").to(global_model.device)
-        # Use simpler generation parameters that work reliably
-        outputs = global_model.generate(
-            inputs.input_ids,
-            max_length=min(2048, max_length + len(inputs.input_ids[0])),
-            temperature=max(0.3, temperature),  # Prevent too low temperature
-            do_sample=True
-        )
         # Decode and return the generated text
         generated_text = global_tokenizer.decode(outputs[0], skip_special_tokens=True)
         return generated_text
     except Exception as e:
         error_msg = str(e)
         print(f"Generation error: {error_msg}")
         if "probability tensor" in error_msg:
             return "Error: There was a problem with the generation parameters. Try using simpler parameters or a different prompt."
         else:
@@ -234,11 +285,9 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
             with gr.Column(scale=1):
                 auth_button = gr.Button("Authenticate", variant="primary")
-        with gr.Group(visible=True) as auth_message_group:
-            auth_status = gr.Markdown("Please authenticate to use the model.")
         def authenticate(token):
-            auth_message_group.visible = True
             return "Loading model... Please wait, this may take a minute."
         def auth_complete(token):

         return "⚠️ Please enter your Hugging Face token to use the model."
     try:
+        # Try different model versions from smallest to largest
         model_options = [
+            "google/gemma-2b-it",    # Try an instruction-tuned 2B model first (smallest)
+            "google/gemma-2b",       # Try base 2B model next
+            "google/gemma-7b-it",    # Try 7B instruction-tuned model
+            "google/gemma-7b",       # Try base 7B model
+            # Fallback to completely different models if all Gemma models fail
+            "meta-llama/Llama-2-7b-chat-hf",
+            "facebook/opt-1.3b",
+            "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
         ]
+        print(f"Attempting to load models with token starting with: {hf_token[:5]}...")
         # Try to load models in order until one works
         for model_name in model_options:
             try:
                 print(f"Attempting to load model: {model_name}")
                 # Load tokenizer
+                print("Loading tokenizer...")
                 global_tokenizer = AutoTokenizer.from_pretrained(
                     model_name,
                     token=hf_token
                 )
+                print("Tokenizer loaded successfully")
+                # Load model with minimal configuration
+                print(f"Loading model {model_name}...")
                 global_model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16,
                     device_map="auto",
                     token=hf_token
                 )
+                print(f"Model {model_name} loaded successfully!")
                 model_loaded = True
                 return f"✅ Model {model_name} loaded successfully!"
             except Exception as specific_e:
                 print(f"Failed to load {model_name}: {specific_e}")
+                import traceback
+                traceback.print_exc()
                 continue
+        # If we get here, all model options failed - try one more option with no token
+        try:
+            print("Trying a public model with no token requirement...")
+            model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+            global_tokenizer = AutoTokenizer.from_pretrained(model_name)
+            global_model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.float16,
+                device_map="auto"
+            )
+            model_loaded = True
+            return f"✅ Fallback model {model_name} loaded successfully! Note: This is not Gemma but a fallback model."
+        except Exception as fallback_e:
+            print(f"Failed to load fallback model: {fallback_e}")
         # If we get here, all model options failed
         model_loaded = False
         return "❌ Could not load any model version. Please check your token and try again."
     except Exception as e:
         model_loaded = False
         error_msg = str(e)
+        print(f"Error in load_model: {error_msg}")
+        import traceback
+        traceback.print_exc()
         if "401 Client Error" in error_msg:
             return "❌ Authentication failed. Please check your token and make sure you've accepted the model license on Hugging Face."
         else:
     """Generate text using the Gemma model"""
     global global_model, global_tokenizer, model_loaded
+    print(f"Generating text with params: max_length={max_length}, temp={temperature}, top_p={top_p}")
+    print(f"Prompt: {prompt[:100]}...")
     if not model_loaded or global_model is None or global_tokenizer is None:
+        print("Model not loaded")
         return "⚠️ Model not loaded. Please authenticate with your Hugging Face token."
     if not prompt:
     try:
         # Keep generation simple to avoid errors
         inputs = global_tokenizer(prompt, return_tensors="pt").to(global_model.device)
+        print(f"Input token length: {len(inputs.input_ids[0])}")
+        # Use even simpler generation parameters
+        generation_args = {
+            "input_ids": inputs.input_ids,
+            "max_length": min(2048, max_length + len(inputs.input_ids[0])),
+            "do_sample": True,
+        }
+        # Only add temperature if not too low (can cause issues)
+        if temperature >= 0.3:
+            generation_args["temperature"] = temperature
+        print(f"Generation args: {generation_args}")
+        # Generate text
+        outputs = global_model.generate(**generation_args)
         # Decode and return the generated text
         generated_text = global_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        print(f"Generated text length: {len(generated_text)}")
         return generated_text
     except Exception as e:
         error_msg = str(e)
         print(f"Generation error: {error_msg}")
+        print(f"Error type: {type(e)}")
+        import traceback
+        traceback.print_exc()
         if "probability tensor" in error_msg:
             return "Error: There was a problem with the generation parameters. Try using simpler parameters or a different prompt."
         else:
             with gr.Column(scale=1):
                 auth_button = gr.Button("Authenticate", variant="primary")
+        auth_status = gr.Markdown("Please authenticate to use the model.")
         def authenticate(token):
             return "Loading model... Please wait, this may take a minute."
         def auth_complete(token):