Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 | 
             
            import os
         | 
| 2 | 
             
            import torch
         | 
| 3 | 
            -
            from transformers import AutoTokenizer, AutoModelForCausalLM | 
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
             
            import spaces
         | 
| 6 |  | 
| @@ -8,12 +8,10 @@ huggingface_token = os.getenv('HUGGINGFACE_TOKEN') | |
| 8 | 
             
            if not huggingface_token:
         | 
| 9 | 
             
                raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
         | 
| 10 |  | 
| 11 | 
            -
            model_id = "meta-llama/Llama-Guard-3- | 
| 12 | 
             
            device = "cuda" if torch.cuda.is_available() else "cpu"
         | 
| 13 | 
             
            dtype = torch.bfloat16
         | 
| 14 |  | 
| 15 | 
            -
            quantization_config = BitsAndBytesConfig(load_in_8bit=True)
         | 
| 16 | 
            -
             | 
| 17 | 
             
            def parse_llama_guard_output(result):
         | 
| 18 | 
             
                # "<END CONVERSATION>" 以降の部分を抽出
         | 
| 19 | 
             
                safety_assessment = result.split("<END CONVERSATION>")[-1].strip()
         | 
| @@ -43,7 +41,6 @@ def moderate(user_input, assistant_response): | |
| 43 | 
             
                    model_id, 
         | 
| 44 | 
             
                    torch_dtype=dtype, 
         | 
| 45 | 
             
                    device_map="auto",
         | 
| 46 | 
            -
                    quantization_config=quantization_config,
         | 
| 47 | 
             
                    token=huggingface_token,
         | 
| 48 | 
             
                    low_cpu_mem_usage=True
         | 
| 49 | 
             
                )
         | 
|  | |
| 1 | 
             
            import os
         | 
| 2 | 
             
            import torch
         | 
| 3 | 
            +
            from transformers import AutoTokenizer, AutoModelForCausalLM
         | 
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
             
            import spaces
         | 
| 6 |  | 
|  | |
| 8 | 
             
            if not huggingface_token:
         | 
| 9 | 
             
                raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
         | 
| 10 |  | 
| 11 | 
            +
            model_id = "meta-llama/Llama-Guard-3-1B"
         | 
| 12 | 
             
            device = "cuda" if torch.cuda.is_available() else "cpu"
         | 
| 13 | 
             
            dtype = torch.bfloat16
         | 
| 14 |  | 
|  | |
|  | |
| 15 | 
             
            def parse_llama_guard_output(result):
         | 
| 16 | 
             
                # "<END CONVERSATION>" 以降の部分を抽出
         | 
| 17 | 
             
                safety_assessment = result.split("<END CONVERSATION>")[-1].strip()
         | 
|  | |
| 41 | 
             
                    model_id, 
         | 
| 42 | 
             
                    torch_dtype=dtype, 
         | 
| 43 | 
             
                    device_map="auto",
         | 
|  | |
| 44 | 
             
                    token=huggingface_token,
         | 
| 45 | 
             
                    low_cpu_mem_usage=True
         | 
| 46 | 
             
                )
         | 
