allow the sharegpt handler to also better handle datasets destined for openai finetuning (#1361)
Browse files* allow the sharegpt handler to also better handle datasets destined for openai finetuning
* make sure to support system role
src/axolotl/prompt_strategies/sharegpt.py
CHANGED
|
@@ -82,7 +82,7 @@ class SimpleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
|
|
| 82 |
basic sharegpt strategy to grab conversations from the sample row
|
| 83 |
"""
|
| 84 |
|
| 85 |
-
_strict =
|
| 86 |
|
| 87 |
@property
|
| 88 |
def strict(self):
|
|
@@ -96,10 +96,25 @@ class SimpleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
|
|
| 96 |
conversations = prompt["conversations"]
|
| 97 |
if self.strict:
|
| 98 |
return conversations
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
turns = [
|
| 102 |
-
{"from": role_map[t[
|
|
|
|
| 103 |
]
|
| 104 |
return turns
|
| 105 |
|
|
|
|
| 82 |
basic sharegpt strategy to grab conversations from the sample row
|
| 83 |
"""
|
| 84 |
|
| 85 |
+
_strict = False
|
| 86 |
|
| 87 |
@property
|
| 88 |
def strict(self):
|
|
|
|
| 96 |
conversations = prompt["conversations"]
|
| 97 |
if self.strict:
|
| 98 |
return conversations
|
| 99 |
+
role_key = "from"
|
| 100 |
+
if "role" in conversations[0].keys():
|
| 101 |
+
role_key = "role"
|
| 102 |
+
value_key = "value"
|
| 103 |
+
if "text" in conversations[0].keys():
|
| 104 |
+
value_key = "text"
|
| 105 |
+
elif "content" in conversations[0].keys():
|
| 106 |
+
value_key = "content"
|
| 107 |
+
# remap roles - allow for assistant turn"
|
| 108 |
+
role_map = {
|
| 109 |
+
"user": "human",
|
| 110 |
+
"human": "human",
|
| 111 |
+
"assistant": "gpt",
|
| 112 |
+
"gpt": "gpt",
|
| 113 |
+
"system": "system",
|
| 114 |
+
}
|
| 115 |
turns = [
|
| 116 |
+
{"from": role_map[t[role_key]], "value": t[value_key]}
|
| 117 |
+
for t in conversations
|
| 118 |
]
|
| 119 |
return turns
|
| 120 |
|