Spaces:

NCSOFT
/

ArenaLite

Sleeping

App Files Files Community

sonsus commited on 3 days ago

Commit

7eb9722

1 Parent(s): 8882dbc

How to add a custom prompt: working example (try F5 to debug run)

Browse files

Files changed (6) hide show

.vscode/launch.json +1 -1
README_kr.md +0 -1
eval_prompt_list.txt +2 -1
varco_arena/varco_arena_core/prompts/__init__.py +4 -1
varco_arena/varco_arena_core/prompts/post_edit.py +90 -0
varco_arena/varco_arena_core/prompts/post_edit.yaml +29 -0

.vscode/launch.json CHANGED Viewed

@@ -21,7 +21,7 @@
                 "gpt-4.1-mini",
                 "-p",
                 // "llmbar",
-                "translation_pair",
             ]
         }

                 "gpt-4.1-mini",
                 "-p",
                 // "llmbar",
+                "post_edit",
             ]
         }

README_kr.md CHANGED Viewed

@@ -37,7 +37,6 @@ python main.py -i "rsc/inputs_for_dbg/dbg_400_error_inputs/" -o SOME_WANTED_TARG
 ```
 ## Requirements
-`python = 3.11.9` 상에서 테스트  함. `requirements.txt`
 ```
 pip install -r requirements.txt # python 3.11

 ```
 ## Requirements
 ```
 pip install -r requirements.txt # python 3.11

eval_prompt_list.txt CHANGED Viewed

@@ -2,4 +2,5 @@ llmbar
 llmbar_brief
 translation_pair
 rag_pair_kr
-translation_fortunecookie

 llmbar_brief
 translation_pair
 rag_pair_kr
+translation_fortunecookie
+post_edit

varco_arena/varco_arena_core/prompts/__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@ from .llmbar_brief import LLMBarBriefPrompt
 from .rag_pair_kr import RagPairKRPrompt
 from .translation_pair import TranslationPairPrompt
 from .translation_fortunecookie import TranslationNewPrompt
 NAME2PROMPT_CLS = dict(
     llmbar_brief=LLMBarBriefPrompt(),
@@ -16,6 +17,7 @@ NAME2PROMPT_CLS = dict(
     translation_pair=TranslationPairPrompt(),
     rag_pair_kr=RagPairKRPrompt(),
     translation_fortunecookie=TranslationNewPrompt(),
     # contextual_vqa = Contextual_VQA(),
     # contextual_ocr = Contextual_OCR(),
 )
@@ -26,8 +28,9 @@ def load_prompt(
         "llmbar_brief",
         "llmbar",
         "translation_pair",
-        "translation_fortunecookie",
         "rag_pair_kr",
     ],
     task: str = "",  # used for further prompt variation (eval prompt might depend on task.)
 ):

 from .rag_pair_kr import RagPairKRPrompt
 from .translation_pair import TranslationPairPrompt
 from .translation_fortunecookie import TranslationNewPrompt
+from .post_edit import PostEditPrompt
 NAME2PROMPT_CLS = dict(
     llmbar_brief=LLMBarBriefPrompt(),
     translation_pair=TranslationPairPrompt(),
     rag_pair_kr=RagPairKRPrompt(),
     translation_fortunecookie=TranslationNewPrompt(),
+    post_edit=PostEditPrompt(),
     # contextual_vqa = Contextual_VQA(),
     # contextual_ocr = Contextual_OCR(),
 )
         "llmbar_brief",
         "llmbar",
         "translation_pair",
         "rag_pair_kr",
+        "translation_fortunecookie",
+        "post_edit",
     ],
     task: str = "",  # used for further prompt variation (eval prompt might depend on task.)
 ):

varco_arena/varco_arena_core/prompts/post_edit.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import re
+from typing import *
+from .llmbar import LLMBarPrompt
+from .prompt_utils import fill_template_over_messsages
+import random
+class PostEditPrompt(LLMBarPrompt):
+    def __init__(self, prompt_yaml: str = "post_edit.yaml"):
+        super().__init__(prompt_yaml=prompt_yaml)
+    def parsed_output(self, response: Any) -> str:
+        """
+        judge output을 파싱하는 함수.
+        post_edit.yaml 의 decision_token을 res_tok으로 리턴하며, 아래처럼 약간의 가드레일도 구현할 수 있다. (A 대신 a로 나온경우 A로 리턴 등)
+        """
+        # remove ', "
+        input_string = response.choices[0].message.content
+        input_string = input_string.replace("'", "").replace('"', "").strip()
+        if "(A)" in input_string and "(B)" not in input_string:
+            res_tok = "A"
+        elif "(B)" in input_string and "(A)" not in input_string:
+            res_tok = "B"
+        elif "A" in input_string and "B" not in input_string:
+            res_tok = "A"
+        elif "B" in input_string and "A" not in input_string:
+            res_tok = "B"
+        elif "a" in input_string and "b" not in input_string:
+            res_tok = "a"
+        elif "b" in input_string and "a" not in input_string:
+            res_tok = "b"
+        else: # both exists or nothing exists
+            # fallback for ambiguous or malformed model output
+            res_tok = random.choice(['A', 'B'])
+            print("="*100)
+            print(f"actual_response={input_string}")
+            print(f"{res_tok=}")
+            print("Response format Error (model side, not code side): Fails to output in expected format. Fallback to random choice: ", res_tok)
+            print("="*100)
+        return res_tok
+    def complete_prompt(
+        self,
+        inst: str = None,
+        src: str = None,
+        out_a: str = None,
+        out_b: str = None,
+        **kwargs,
+    ) -> List[Dict]:
+        # inst_src from inst and src
+        def _combine_inst_src(inst, src):
+            """
+            src 필드가 없으면 inst만 리턴, inst가 없으면 src만 리턴,
+            둘 다 있으면 \n\n 기준으로 결합
+            """
+            if not inst:
+                return src
+            elif not src:
+                return inst
+            else:
+                inst_src = f"{inst}\n\n**Glossary**: {src}"
+                return inst_src
+        inst_src = _combine_inst_src(inst, src)
+        kwargs_to_fill = dict(
+            inst_src=inst_src,
+            out_a=out_a,
+            out_b=out_b,
+        )
+        # string.Template 클래스로 post_edit.yaml에서 읽어온 것을 위에까지 정리된 kwargs_to_fill로 safe_substitute()함
+        complete_prm = fill_template_over_messsages(
+            self.prompt_template, **kwargs_to_fill
+        )
+        return complete_prm # 완성된 프롬
+    @staticmethod
+    def get_criteria_questions(task: str = None):
+        """
+        LLMBar에서만 사용됨.
+        이 녀석이 task 를 예약하고 있기 때문에 *.yaml에서 ${task}를 사용하지 못하게 막아둠
+        """
+        raise ValueError(
+            f"{__class__.__name__} does not require criteria questions to complete the prompt. It is for LLMBar prompt and its variants"
+        )

varco_arena/varco_arena_core/prompts/post_edit.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+sampling_parameters:
+  stop: [] # 비워두면 알아서 stop 할 때가지 생성
+  temperature: 1.0 # o4-mini 등 reasoning 모델을 사용할 경우 드랍
+  logprobs: true # o4-mini 등 reasoning 모델을 사용할 경우 드랍
+  top_logprobs: 20 # o4-mini 등 reasoning 모델을 사용할 경우 드랍하며 사용이 권장되지 않음 Deprecated
+decision_tokens:
+  prefer_1st: A
+  prefer_2nd: B
+expected_generation_str: |
+  B
+prompt_template:
+  -
+    role: system
+    content: |
+      You are a meticulous translator and writer, an expert in language, style, and cultural nuances. Your task is to evaluate two responses, A and B, against a user's prompt. Select the response that better fulfills the user's request by strictly adhering to all given constraints, such as contextual information or character details. You must respond with only the letter 'A' or 'B'. Do not include any other words, explanations, or punctuation.
+  -
+    role: user
+    content: |
+      **User prompt**: ${inst_src}
+      **Response A**: ${out_a}
+      **Response B**: ${out_b}
+      **Your Judge**: