sonsus commited on
Commit
7eb9722
ยท
1 Parent(s): 8882dbc

How to add a custom prompt: working example (try F5 to debug run)

Browse files
.vscode/launch.json CHANGED
@@ -21,7 +21,7 @@
21
  "gpt-4.1-mini",
22
  "-p",
23
  // "llmbar",
24
- "translation_pair",
25
 
26
  ]
27
  }
 
21
  "gpt-4.1-mini",
22
  "-p",
23
  // "llmbar",
24
+ "post_edit",
25
 
26
  ]
27
  }
README_kr.md CHANGED
@@ -37,7 +37,6 @@ python main.py -i "rsc/inputs_for_dbg/dbg_400_error_inputs/" -o SOME_WANTED_TARG
37
  ```
38
 
39
  ## Requirements
40
- `python = 3.11.9` ์ƒ์—์„œ ํ…Œ์ŠคํŠธ ํ•จ. `requirements.txt`
41
  ```
42
  pip install -r requirements.txt # python 3.11
43
 
 
37
  ```
38
 
39
  ## Requirements
 
40
  ```
41
  pip install -r requirements.txt # python 3.11
42
 
eval_prompt_list.txt CHANGED
@@ -2,4 +2,5 @@ llmbar
2
  llmbar_brief
3
  translation_pair
4
  rag_pair_kr
5
- translation_fortunecookie
 
 
2
  llmbar_brief
3
  translation_pair
4
  rag_pair_kr
5
+ translation_fortunecookie
6
+ post_edit
varco_arena/varco_arena_core/prompts/__init__.py CHANGED
@@ -9,6 +9,7 @@ from .llmbar_brief import LLMBarBriefPrompt
9
  from .rag_pair_kr import RagPairKRPrompt
10
  from .translation_pair import TranslationPairPrompt
11
  from .translation_fortunecookie import TranslationNewPrompt
 
12
 
13
  NAME2PROMPT_CLS = dict(
14
  llmbar_brief=LLMBarBriefPrompt(),
@@ -16,6 +17,7 @@ NAME2PROMPT_CLS = dict(
16
  translation_pair=TranslationPairPrompt(),
17
  rag_pair_kr=RagPairKRPrompt(),
18
  translation_fortunecookie=TranslationNewPrompt(),
 
19
  # contextual_vqa = Contextual_VQA(),
20
  # contextual_ocr = Contextual_OCR(),
21
  )
@@ -26,8 +28,9 @@ def load_prompt(
26
  "llmbar_brief",
27
  "llmbar",
28
  "translation_pair",
29
- "translation_fortunecookie",
30
  "rag_pair_kr",
 
 
31
  ],
32
  task: str = "", # used for further prompt variation (eval prompt might depend on task.)
33
  ):
 
9
  from .rag_pair_kr import RagPairKRPrompt
10
  from .translation_pair import TranslationPairPrompt
11
  from .translation_fortunecookie import TranslationNewPrompt
12
+ from .post_edit import PostEditPrompt
13
 
14
  NAME2PROMPT_CLS = dict(
15
  llmbar_brief=LLMBarBriefPrompt(),
 
17
  translation_pair=TranslationPairPrompt(),
18
  rag_pair_kr=RagPairKRPrompt(),
19
  translation_fortunecookie=TranslationNewPrompt(),
20
+ post_edit=PostEditPrompt(),
21
  # contextual_vqa = Contextual_VQA(),
22
  # contextual_ocr = Contextual_OCR(),
23
  )
 
28
  "llmbar_brief",
29
  "llmbar",
30
  "translation_pair",
 
31
  "rag_pair_kr",
32
+ "translation_fortunecookie",
33
+ "post_edit",
34
  ],
35
  task: str = "", # used for further prompt variation (eval prompt might depend on task.)
36
  ):
varco_arena/varco_arena_core/prompts/post_edit.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import *
3
+
4
+ from .llmbar import LLMBarPrompt
5
+ from .prompt_utils import fill_template_over_messsages
6
+
7
+ import random
8
+
9
+ class PostEditPrompt(LLMBarPrompt):
10
+ def __init__(self, prompt_yaml: str = "post_edit.yaml"):
11
+ super().__init__(prompt_yaml=prompt_yaml)
12
+
13
+ def parsed_output(self, response: Any) -> str:
14
+ """
15
+ judge output์„ ํŒŒ์‹ฑํ•˜๋Š” ํ•จ์ˆ˜.
16
+ post_edit.yaml ์˜ decision_token์„ res_tok์œผ๋กœ ๋ฆฌํ„ดํ•˜๋ฉฐ, ์•„๋ž˜์ฒ˜๋Ÿผ ์•ฝ๊ฐ„์˜ ๊ฐ€๋“œ๋ ˆ์ผ๋„ ๊ตฌํ˜„ํ•  ์ˆ˜ ์žˆ๋‹ค. (A ๋Œ€์‹  a๋กœ ๋‚˜์˜จ๊ฒฝ์šฐ A๋กœ ๋ฆฌํ„ด ๋“ฑ)
17
+ """
18
+ # remove ', "
19
+ input_string = response.choices[0].message.content
20
+ input_string = input_string.replace("'", "").replace('"', "").strip()
21
+
22
+ if "(A)" in input_string and "(B)" not in input_string:
23
+ res_tok = "A"
24
+ elif "(B)" in input_string and "(A)" not in input_string:
25
+ res_tok = "B"
26
+ elif "A" in input_string and "B" not in input_string:
27
+ res_tok = "A"
28
+ elif "B" in input_string and "A" not in input_string:
29
+ res_tok = "B"
30
+ elif "a" in input_string and "b" not in input_string:
31
+ res_tok = "a"
32
+ elif "b" in input_string and "a" not in input_string:
33
+ res_tok = "b"
34
+ else: # both exists or nothing exists
35
+ # fallback for ambiguous or malformed model output
36
+ res_tok = random.choice(['A', 'B'])
37
+ print("="*100)
38
+ print(f"actual_response={input_string}")
39
+ print(f"{res_tok=}")
40
+ print("Response format Error (model side, not code side): Fails to output in expected format. Fallback to random choice: ", res_tok)
41
+ print("="*100)
42
+
43
+ return res_tok
44
+
45
+ def complete_prompt(
46
+ self,
47
+ inst: str = None,
48
+ src: str = None,
49
+ out_a: str = None,
50
+ out_b: str = None,
51
+ **kwargs,
52
+ ) -> List[Dict]:
53
+ # inst_src from inst and src
54
+ def _combine_inst_src(inst, src):
55
+ """
56
+ src ํ•„๋“œ๊ฐ€ ์—†์œผ๋ฉด inst๋งŒ ๋ฆฌํ„ด, inst๊ฐ€ ์—†์œผ๋ฉด src๋งŒ ๋ฆฌํ„ด,
57
+ ๋‘˜ ๋‹ค ์žˆ์œผ๋ฉด \n\n ๊ธฐ์ค€์œผ๋กœ ๊ฒฐํ•ฉ
58
+ """
59
+ if not inst:
60
+ return src
61
+ elif not src:
62
+ return inst
63
+ else:
64
+ inst_src = f"{inst}\n\n**Glossary**: {src}"
65
+ return inst_src
66
+
67
+ inst_src = _combine_inst_src(inst, src)
68
+
69
+ kwargs_to_fill = dict(
70
+ inst_src=inst_src,
71
+ out_a=out_a,
72
+ out_b=out_b,
73
+ )
74
+
75
+ # string.Template ํด๋ž˜์Šค๋กœ post_edit.yaml์—์„œ ์ฝ์–ด์˜จ ๊ฒƒ์„ ์œ„์—๊นŒ์ง€ ์ •๋ฆฌ๋œ kwargs_to_fill๋กœ safe_substitute()ํ•จ
76
+ complete_prm = fill_template_over_messsages(
77
+ self.prompt_template, **kwargs_to_fill
78
+ )
79
+
80
+ return complete_prm # ์™„์„ฑ๋œ ํ”„๋กฌ
81
+
82
+ @staticmethod
83
+ def get_criteria_questions(task: str = None):
84
+ """
85
+ LLMBar์—์„œ๋งŒ ์‚ฌ์šฉ๋จ.
86
+ ์ด ๋…€์„์ด task ๋ฅผ ์˜ˆ์•ฝํ•˜๊ณ  ์žˆ๊ธฐ ๋•Œ๋ฌธ์— *.yaml์—์„œ ${task}๋ฅผ ์‚ฌ์šฉํ•˜์ง€ ๋ชปํ•˜๊ฒŒ ๋ง‰์•„๋‘ 
87
+ """
88
+ raise ValueError(
89
+ f"{__class__.__name__} does not require criteria questions to complete the prompt. It is for LLMBar prompt and its variants"
90
+ )
varco_arena/varco_arena_core/prompts/post_edit.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sampling_parameters:
2
+ stop: [] # ๋น„์›Œ๋‘๋ฉด ์•Œ์•„์„œ stop ํ•  ๋•Œ๊ฐ€์ง€ ์ƒ์„ฑ
3
+ temperature: 1.0 # o4-mini ๋“ฑ reasoning ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•  ๊ฒฝ์šฐ ๋“œ๋ž
4
+ logprobs: true # o4-mini ๋“ฑ reasoning ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•  ๊ฒฝ์šฐ ๋“œ๋ž
5
+ top_logprobs: 20 # o4-mini ๋“ฑ reasoning ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•  ๊ฒฝ์šฐ ๋“œ๋žํ•˜๋ฉฐ ์‚ฌ์šฉ์ด ๊ถŒ์žฅ๋˜์ง€ ์•Š์Œ Deprecated
6
+
7
+
8
+ decision_tokens:
9
+ prefer_1st: A
10
+ prefer_2nd: B
11
+
12
+ expected_generation_str: |
13
+ B
14
+
15
+ prompt_template:
16
+ -
17
+ role: system
18
+ content: |
19
+ You are a meticulous translator and writer, an expert in language, style, and cultural nuances. Your task is to evaluate two responses, A and B, against a user's prompt. Select the response that better fulfills the user's request by strictly adhering to all given constraints, such as contextual information or character details. You must respond with only the letter 'A' or 'B'. Do not include any other words, explanations, or punctuation.
20
+ -
21
+ role: user
22
+ content: |
23
+ **User prompt**: ${inst_src}
24
+
25
+ **Response A**: ${out_a}
26
+
27
+ **Response B**: ${out_b}
28
+
29
+ **Your Judge**: