ChatHaruhi-Needy

Runtime error

File size: 4,689 Bytes
import re


# 我希望实现一个字符串解析函数，输入是一个string，输出是一个dict，如果字符串中出现
# "Strees:", "Affection:"或者"Darkness:"，则把后面的一个有正负的浮点数作为value，对应的字符串作为key，记录在dict中

# 如果后面是？或者非数字，则记录成0

# example input:
# Stress: -1.0, Affection: +0.5
# example output:
# {"Stress":-1,"Affection":0.5 }

# exmple input:
# Affection: +4.0, Stress: -2.0, Darkness: -1.0
# example output:
# {"Stress":-1,"Affection":0.5 }

# example input:
# Affection: +2.0, Stress: -1.0, Darkness: ?
# example output:
# {"Affection": 2, "Stress": -1, "Darkness": 0 }

# example input:
# Stress: -1.0
# example output:
# {"Stress":-1}

def parse_attribute_string(attribute_str):
    result = {}
    patterns = {
        "Stress": r"Stress:\s*([+-]?\d+(\.\d+)?)?",
        "Affection": r"Affection:\s*([+-]?\d+(\.\d+)?)?",
        "Darkness": r"Darkness:\s*([+-]?\d+(\.\d+)?)?"
    }

    for key, pattern in patterns.items():
        match = re.search(pattern, attribute_str)
        if match:
            value = match.group(1)
            if value is None:
                result[key] = 0
            else:
                result[key] = float(value)

    return result

# 我希望实现一个字符串解析函数，输入是一个string，输出是一个tuple，

# max_value = 100，字符串中可能会包含Darkness，Stress或者Affection属性中的一种，

# 如果输入为"Affection 61+", 则输出 ("Affection", 61, 100)

# 如果输入为"Darkness 0-39"，则输出 ("Darkness", 0, 39)

# 输出字符串中包含的属性，区间的最小值和最大值。

# 如果不包含任何属性，则输出None

# example_input:
# Random Noon Event: Darkness 0-39
# example_output
# ("Darkness", 0 , 39)

# example_input:
# Random Noon Event: Stress 0-19
# example_output
# ("Stress", 0 , 19)

# example_input:
# Random Noon Event: Affection 61+
# example_output
# ("Affection", 61, 100)

import re

def parsing_condition_string(s):
    max_value = 100  # 定义最大值
    # 正则表达式匹配'属性 最小值-最大值'或'属性 最小值+'
    pattern = re.compile(r'(Darkness|Stress|Affection)\s+(\d+)(?:-(\d+)|\+)')

    match = pattern.search(s)
    if match:
        attribute = match.group(1)  # 属性
        min_value = int(match.group(2))  # 最小值
        # 如果有最大值就直接使用，没有就用默认的max_value
        max_value = int(match.group(3)) if match.group(3) else max_value
        return (attribute, min_value, max_value)
    
    return None  # 如果没有匹配则返回None


#------ BGE Embedding -----------

from transformers import AutoModel, AutoTokenizer
import torch

_bge_model_zh = None
_bge_tokenizer_zh = None

def get_bge_embeddings_zh( sentences ):
    # unsafe ensure batch size by yourself

    global _bge_model_zh
    global _bge_tokenizer_zh

    if _bge_model_zh is None:
        from transformers import AutoTokenizer, AutoModel
        _bge_tokenizer_zh = AutoTokenizer.from_pretrained('BAAI/bge-small-zh-v1.5')
        _bge_model_zh = AutoModel.from_pretrained('BAAI/bge-small-zh-v1.5')

    _bge_model_zh.eval()

    # Tokenize sentences
    encoded_input = _bge_tokenizer_zh(sentences, padding=True, truncation=True, return_tensors='pt', max_length = 512)

    # Compute token embeddings
    with torch.no_grad():
        model_output = _bge_model_zh(**encoded_input)
        # Perform pooling. In this case, cls pooling.
        sentence_embeddings = model_output[0][:, 0]
    # normalize embeddings
    sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
    return sentence_embeddings.cpu().tolist()

def get_bge_embedding_zh( text_or_texts ):
    if isinstance(text_or_texts, str):
        return get_bge_embeddings_zh([text_or_texts])[0]
    else:
        return get_bge_embeddings_zh(text_or_texts)


# Encode和Decode的代码来自于ChatHaruhi

import base64
import struct

def float_array_to_base64(float_arr):
    
    byte_array = b''
    
    for f in float_arr:
        # 将每个浮点数打包为4字节
        num_bytes = struct.pack('!f', f)  
        byte_array += num_bytes
    
    # 将字节数组进行base64编码    
    base64_data = base64.b64encode(byte_array)
    
    return base64_data.decode('utf-8')

def base64_to_float_array(base64_data):

    byte_array = base64.b64decode(base64_data)
    
    float_array = []
    
    # 每 4 个字节解析为一个浮点数
    for i in range(0, len(byte_array), 4):
        num = struct.unpack('!f', byte_array[i:i+4])[0] 
        float_array.append(num)

    return float_array