import re # 我希望实现一个字符串解析函数,输入是一个string,输出是一个dict,如果字符串中出现 # "Strees:", "Affection:"或者"Darkness:",则把后面的一个有正负的浮点数作为value,对应的字符串作为key,记录在dict中 # 如果后面是?或者非数字,则记录成0 # example input: # Stress: -1.0, Affection: +0.5 # example output: # {"Stress":-1,"Affection":0.5 } # exmple input: # Affection: +4.0, Stress: -2.0, Darkness: -1.0 # example output: # {"Stress":-1,"Affection":0.5 } # example input: # Affection: +2.0, Stress: -1.0, Darkness: ? # example output: # {"Affection": 2, "Stress": -1, "Darkness": 0 } # example input: # Stress: -1.0 # example output: # {"Stress":-1} def parse_attribute_string(attribute_str): result = {} patterns = { "Stress": r"Stress:\s*([+-]?\d+(\.\d+)?)?", "Affection": r"Affection:\s*([+-]?\d+(\.\d+)?)?", "Darkness": r"Darkness:\s*([+-]?\d+(\.\d+)?)?" } for key, pattern in patterns.items(): match = re.search(pattern, attribute_str) if match: value = match.group(1) if value is None: result[key] = 0 else: result[key] = float(value) return result # 我希望实现一个字符串解析函数,输入是一个string,输出是一个tuple, # max_value = 100,字符串中可能会包含Darkness,Stress或者Affection属性中的一种, # 如果输入为"Affection 61+", 则输出 ("Affection", 61, 100) # 如果输入为"Darkness 0-39",则输出 ("Darkness", 0, 39) # 输出字符串中包含的属性,区间的最小值和最大值。 # 如果不包含任何属性,则输出None # example_input: # Random Noon Event: Darkness 0-39 # example_output # ("Darkness", 0 , 39) # example_input: # Random Noon Event: Stress 0-19 # example_output # ("Stress", 0 , 19) # example_input: # Random Noon Event: Affection 61+ # example_output # ("Affection", 61, 100) import re def parsing_condition_string(s): max_value = 100 # 定义最大值 # 正则表达式匹配'属性 最小值-最大值'或'属性 最小值+' pattern = re.compile(r'(Darkness|Stress|Affection)\s+(\d+)(?:-(\d+)|\+)') match = pattern.search(s) if match: attribute = match.group(1) # 属性 min_value = int(match.group(2)) # 最小值 # 如果有最大值就直接使用,没有就用默认的max_value max_value = int(match.group(3)) if match.group(3) else max_value return (attribute, min_value, max_value) return None # 如果没有匹配则返回None #------ BGE Embedding ----------- from transformers import AutoModel, AutoTokenizer import torch _bge_model_zh = None _bge_tokenizer_zh = None def get_bge_embeddings_zh( sentences ): # unsafe ensure batch size by yourself global _bge_model_zh global _bge_tokenizer_zh if _bge_model_zh is None: from transformers import AutoTokenizer, AutoModel _bge_tokenizer_zh = AutoTokenizer.from_pretrained('BAAI/bge-small-zh-v1.5') _bge_model_zh = AutoModel.from_pretrained('BAAI/bge-small-zh-v1.5') _bge_model_zh.eval() # Tokenize sentences encoded_input = _bge_tokenizer_zh(sentences, padding=True, truncation=True, return_tensors='pt', max_length = 512) # Compute token embeddings with torch.no_grad(): model_output = _bge_model_zh(**encoded_input) # Perform pooling. In this case, cls pooling. sentence_embeddings = model_output[0][:, 0] # normalize embeddings sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1) return sentence_embeddings.cpu().tolist() def get_bge_embedding_zh( text_or_texts ): if isinstance(text_or_texts, str): return get_bge_embeddings_zh([text_or_texts])[0] else: return get_bge_embeddings_zh(text_or_texts) # Encode和Decode的代码来自于ChatHaruhi import base64 import struct def float_array_to_base64(float_arr): byte_array = b'' for f in float_arr: # 将每个浮点数打包为4字节 num_bytes = struct.pack('!f', f) byte_array += num_bytes # 将字节数组进行base64编码 base64_data = base64.b64encode(byte_array) return base64_data.decode('utf-8') def base64_to_float_array(base64_data): byte_array = base64.b64decode(base64_data) float_array = [] # 每 4 个字节解析为一个浮点数 for i in range(0, len(byte_array), 4): num = struct.unpack('!f', byte_array[i:i+4])[0] float_array.append(num) return float_array