Spaces:
Sleeping
Sleeping
How to add a custom prompt: working example (try F5 to debug run)
Browse files
.vscode/launch.json
CHANGED
@@ -21,7 +21,7 @@
|
|
21 |
"gpt-4.1-mini",
|
22 |
"-p",
|
23 |
// "llmbar",
|
24 |
-
"
|
25 |
|
26 |
]
|
27 |
}
|
|
|
21 |
"gpt-4.1-mini",
|
22 |
"-p",
|
23 |
// "llmbar",
|
24 |
+
"post_edit",
|
25 |
|
26 |
]
|
27 |
}
|
README_kr.md
CHANGED
@@ -37,7 +37,6 @@ python main.py -i "rsc/inputs_for_dbg/dbg_400_error_inputs/" -o SOME_WANTED_TARG
|
|
37 |
```
|
38 |
|
39 |
## Requirements
|
40 |
-
`python = 3.11.9` ์์์ ํ
์คํธ ํจ. `requirements.txt`
|
41 |
```
|
42 |
pip install -r requirements.txt # python 3.11
|
43 |
|
|
|
37 |
```
|
38 |
|
39 |
## Requirements
|
|
|
40 |
```
|
41 |
pip install -r requirements.txt # python 3.11
|
42 |
|
eval_prompt_list.txt
CHANGED
@@ -2,4 +2,5 @@ llmbar
|
|
2 |
llmbar_brief
|
3 |
translation_pair
|
4 |
rag_pair_kr
|
5 |
-
translation_fortunecookie
|
|
|
|
2 |
llmbar_brief
|
3 |
translation_pair
|
4 |
rag_pair_kr
|
5 |
+
translation_fortunecookie
|
6 |
+
post_edit
|
varco_arena/varco_arena_core/prompts/__init__.py
CHANGED
@@ -9,6 +9,7 @@ from .llmbar_brief import LLMBarBriefPrompt
|
|
9 |
from .rag_pair_kr import RagPairKRPrompt
|
10 |
from .translation_pair import TranslationPairPrompt
|
11 |
from .translation_fortunecookie import TranslationNewPrompt
|
|
|
12 |
|
13 |
NAME2PROMPT_CLS = dict(
|
14 |
llmbar_brief=LLMBarBriefPrompt(),
|
@@ -16,6 +17,7 @@ NAME2PROMPT_CLS = dict(
|
|
16 |
translation_pair=TranslationPairPrompt(),
|
17 |
rag_pair_kr=RagPairKRPrompt(),
|
18 |
translation_fortunecookie=TranslationNewPrompt(),
|
|
|
19 |
# contextual_vqa = Contextual_VQA(),
|
20 |
# contextual_ocr = Contextual_OCR(),
|
21 |
)
|
@@ -26,8 +28,9 @@ def load_prompt(
|
|
26 |
"llmbar_brief",
|
27 |
"llmbar",
|
28 |
"translation_pair",
|
29 |
-
"translation_fortunecookie",
|
30 |
"rag_pair_kr",
|
|
|
|
|
31 |
],
|
32 |
task: str = "", # used for further prompt variation (eval prompt might depend on task.)
|
33 |
):
|
|
|
9 |
from .rag_pair_kr import RagPairKRPrompt
|
10 |
from .translation_pair import TranslationPairPrompt
|
11 |
from .translation_fortunecookie import TranslationNewPrompt
|
12 |
+
from .post_edit import PostEditPrompt
|
13 |
|
14 |
NAME2PROMPT_CLS = dict(
|
15 |
llmbar_brief=LLMBarBriefPrompt(),
|
|
|
17 |
translation_pair=TranslationPairPrompt(),
|
18 |
rag_pair_kr=RagPairKRPrompt(),
|
19 |
translation_fortunecookie=TranslationNewPrompt(),
|
20 |
+
post_edit=PostEditPrompt(),
|
21 |
# contextual_vqa = Contextual_VQA(),
|
22 |
# contextual_ocr = Contextual_OCR(),
|
23 |
)
|
|
|
28 |
"llmbar_brief",
|
29 |
"llmbar",
|
30 |
"translation_pair",
|
|
|
31 |
"rag_pair_kr",
|
32 |
+
"translation_fortunecookie",
|
33 |
+
"post_edit",
|
34 |
],
|
35 |
task: str = "", # used for further prompt variation (eval prompt might depend on task.)
|
36 |
):
|
varco_arena/varco_arena_core/prompts/post_edit.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from typing import *
|
3 |
+
|
4 |
+
from .llmbar import LLMBarPrompt
|
5 |
+
from .prompt_utils import fill_template_over_messsages
|
6 |
+
|
7 |
+
import random
|
8 |
+
|
9 |
+
class PostEditPrompt(LLMBarPrompt):
|
10 |
+
def __init__(self, prompt_yaml: str = "post_edit.yaml"):
|
11 |
+
super().__init__(prompt_yaml=prompt_yaml)
|
12 |
+
|
13 |
+
def parsed_output(self, response: Any) -> str:
|
14 |
+
"""
|
15 |
+
judge output์ ํ์ฑํ๋ ํจ์.
|
16 |
+
post_edit.yaml ์ decision_token์ res_tok์ผ๋ก ๋ฆฌํดํ๋ฉฐ, ์๋์ฒ๋ผ ์ฝ๊ฐ์ ๊ฐ๋๋ ์ผ๋ ๊ตฌํํ ์ ์๋ค. (A ๋์ a๋ก ๋์จ๊ฒฝ์ฐ A๋ก ๋ฆฌํด ๋ฑ)
|
17 |
+
"""
|
18 |
+
# remove ', "
|
19 |
+
input_string = response.choices[0].message.content
|
20 |
+
input_string = input_string.replace("'", "").replace('"', "").strip()
|
21 |
+
|
22 |
+
if "(A)" in input_string and "(B)" not in input_string:
|
23 |
+
res_tok = "A"
|
24 |
+
elif "(B)" in input_string and "(A)" not in input_string:
|
25 |
+
res_tok = "B"
|
26 |
+
elif "A" in input_string and "B" not in input_string:
|
27 |
+
res_tok = "A"
|
28 |
+
elif "B" in input_string and "A" not in input_string:
|
29 |
+
res_tok = "B"
|
30 |
+
elif "a" in input_string and "b" not in input_string:
|
31 |
+
res_tok = "a"
|
32 |
+
elif "b" in input_string and "a" not in input_string:
|
33 |
+
res_tok = "b"
|
34 |
+
else: # both exists or nothing exists
|
35 |
+
# fallback for ambiguous or malformed model output
|
36 |
+
res_tok = random.choice(['A', 'B'])
|
37 |
+
print("="*100)
|
38 |
+
print(f"actual_response={input_string}")
|
39 |
+
print(f"{res_tok=}")
|
40 |
+
print("Response format Error (model side, not code side): Fails to output in expected format. Fallback to random choice: ", res_tok)
|
41 |
+
print("="*100)
|
42 |
+
|
43 |
+
return res_tok
|
44 |
+
|
45 |
+
def complete_prompt(
|
46 |
+
self,
|
47 |
+
inst: str = None,
|
48 |
+
src: str = None,
|
49 |
+
out_a: str = None,
|
50 |
+
out_b: str = None,
|
51 |
+
**kwargs,
|
52 |
+
) -> List[Dict]:
|
53 |
+
# inst_src from inst and src
|
54 |
+
def _combine_inst_src(inst, src):
|
55 |
+
"""
|
56 |
+
src ํ๋๊ฐ ์์ผ๋ฉด inst๋ง ๋ฆฌํด, inst๊ฐ ์์ผ๋ฉด src๋ง ๋ฆฌํด,
|
57 |
+
๋ ๋ค ์์ผ๋ฉด \n\n ๊ธฐ์ค์ผ๋ก ๊ฒฐํฉ
|
58 |
+
"""
|
59 |
+
if not inst:
|
60 |
+
return src
|
61 |
+
elif not src:
|
62 |
+
return inst
|
63 |
+
else:
|
64 |
+
inst_src = f"{inst}\n\n**Glossary**: {src}"
|
65 |
+
return inst_src
|
66 |
+
|
67 |
+
inst_src = _combine_inst_src(inst, src)
|
68 |
+
|
69 |
+
kwargs_to_fill = dict(
|
70 |
+
inst_src=inst_src,
|
71 |
+
out_a=out_a,
|
72 |
+
out_b=out_b,
|
73 |
+
)
|
74 |
+
|
75 |
+
# string.Template ํด๋์ค๋ก post_edit.yaml์์ ์ฝ์ด์จ ๊ฒ์ ์์๊น์ง ์ ๋ฆฌ๋ kwargs_to_fill๋ก safe_substitute()ํจ
|
76 |
+
complete_prm = fill_template_over_messsages(
|
77 |
+
self.prompt_template, **kwargs_to_fill
|
78 |
+
)
|
79 |
+
|
80 |
+
return complete_prm # ์์ฑ๋ ํ๋กฌ
|
81 |
+
|
82 |
+
@staticmethod
|
83 |
+
def get_criteria_questions(task: str = None):
|
84 |
+
"""
|
85 |
+
LLMBar์์๋ง ์ฌ์ฉ๋จ.
|
86 |
+
์ด ๋
์์ด task ๋ฅผ ์์ฝํ๊ณ ์๊ธฐ ๋๋ฌธ์ *.yaml์์ ${task}๋ฅผ ์ฌ์ฉํ์ง ๋ชปํ๊ฒ ๋ง์๋
|
87 |
+
"""
|
88 |
+
raise ValueError(
|
89 |
+
f"{__class__.__name__} does not require criteria questions to complete the prompt. It is for LLMBar prompt and its variants"
|
90 |
+
)
|
varco_arena/varco_arena_core/prompts/post_edit.yaml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
sampling_parameters:
|
2 |
+
stop: [] # ๋น์๋๋ฉด ์์์ stop ํ ๋๊ฐ์ง ์์ฑ
|
3 |
+
temperature: 1.0 # o4-mini ๋ฑ reasoning ๋ชจ๋ธ์ ์ฌ์ฉํ ๊ฒฝ์ฐ ๋๋
|
4 |
+
logprobs: true # o4-mini ๋ฑ reasoning ๋ชจ๋ธ์ ์ฌ์ฉํ ๊ฒฝ์ฐ ๋๋
|
5 |
+
top_logprobs: 20 # o4-mini ๋ฑ reasoning ๋ชจ๋ธ์ ์ฌ์ฉํ ๊ฒฝ์ฐ ๋๋ํ๋ฉฐ ์ฌ์ฉ์ด ๊ถ์ฅ๋์ง ์์ Deprecated
|
6 |
+
|
7 |
+
|
8 |
+
decision_tokens:
|
9 |
+
prefer_1st: A
|
10 |
+
prefer_2nd: B
|
11 |
+
|
12 |
+
expected_generation_str: |
|
13 |
+
B
|
14 |
+
|
15 |
+
prompt_template:
|
16 |
+
-
|
17 |
+
role: system
|
18 |
+
content: |
|
19 |
+
You are a meticulous translator and writer, an expert in language, style, and cultural nuances. Your task is to evaluate two responses, A and B, against a user's prompt. Select the response that better fulfills the user's request by strictly adhering to all given constraints, such as contextual information or character details. You must respond with only the letter 'A' or 'B'. Do not include any other words, explanations, or punctuation.
|
20 |
+
-
|
21 |
+
role: user
|
22 |
+
content: |
|
23 |
+
**User prompt**: ${inst_src}
|
24 |
+
|
25 |
+
**Response A**: ${out_a}
|
26 |
+
|
27 |
+
**Response B**: ${out_b}
|
28 |
+
|
29 |
+
**Your Judge**:
|