Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	File size: 3,390 Bytes
			
			| 0b739f4 3f23984 0b739f4 3f23984 0b739f4 8440578 0b739f4 62b85d7 0b739f4 62b85d7 0b739f4 fc522ae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | import os
import torch
import torch.nn as nn
import numpy as np
import random
import gradio as gr
from transformers import (
    BartForConditionalGeneration, 
    AutoModelForCausalLM, 
    BertModel, 
    Wav2Vec2Model,
    CLIPModel,
    AutoTokenizer
)
class MultiModalModel(nn.Module):
    def __init__(self):
        super(MultiModalModel, self).__init__()
        # 初始化子模型
        self.text_generator = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
        self.code_generator = AutoModelForCausalLM.from_pretrained('gpt2')
        self.nlp_encoder = BertModel.from_pretrained('bert-base-uncased')
        self.speech_encoder = Wav2Vec2Model.from_pretrained('facebook/wav2vec2-base-960h')
        self.vision_encoder = CLIPModel.from_pretrained('openai/clip-vit-base-patch32')
        # 初始化分词器和处理器
        self.text_tokenizer = AutoTokenizer.from_pretrained('facebook/bart-base')
        self.code_tokenizer = AutoTokenizer.from_pretrained('gpt2')
        self.nlp_tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
        self.speech_processor = AutoTokenizer.from_pretrained('facebook/wav2vec2-base-960h')
        self.vision_processor = AutoTokenizer.from_pretrained('openai/clip-vit-base-patch32')
    def forward(self, task, inputs):
        if task == 'text_generation':
            attention_mask = inputs.get('attention_mask')
            outputs = self.text_generator.generate(
                inputs['input_ids'], 
                max_new_tokens=100,  
                pad_token_id=self.text_tokenizer.eos_token_id, 
                attention_mask=attention_mask,
                top_p=0.9,  
                top_k=50,  
                temperature=0.8,  
                do_sample=True
            )
            return self.text_tokenizer.decode(outputs[0], skip_special_tokens=True)
        elif task == 'code_generation':
            attention_mask = inputs.get('attention_mask')
            outputs = self.code_generator.generate(
                inputs['input_ids'], 
                max_new_tokens=50,  
                pad_token_id=self.code_tokenizer.eos_token_id, 
                attention_mask=attention_mask,
                top_p=0.95,  
                top_k=50,  
                temperature=1.2,  
                do_sample=True
            )
            return self.code_tokenizer.decode(outputs[0], skip_special_tokens=True)
        # 添加其他任务的逻辑...
# 定义 Gradio 接口的推理函数
def gradio_inference(task, input_text):
    if task == "text_generation":
        tokenizer = model.text_tokenizer
    elif task == "code_generation":
        tokenizer = model.code_tokenizer
    # 根据任务选择合适的分词器
    inputs = tokenizer(input_text, return_tensors='pt')
    inputs['attention_mask'] = torch.ones_like(inputs['input_ids'])
    
    with torch.no_grad():
        result = model(task, inputs)
    return result
# 初始化模型
model = MultiModalModel()
# 创建 Gradio 接口
interface = gr.Interface(
    fn=gradio_inference,
    inputs=[gr.Dropdown(choices=["text_generation", "code_generation"], label="任务类型"), gr.Textbox(lines=2, placeholder="输入文本...")],
    outputs="text",
    title="多模态模型推理",
    description="选择任务类型并输入文本以进行推理"
)
# 启动 Gradio 应用
interface.launch()
 | 
