mtasic85 commited on
Commit
24a69e8
·
1 Parent(s): b34d4e8
scripts/base_datasets.py CHANGED
@@ -147,15 +147,3 @@ base_datasets = [
147
  # 15.6 MB, 24,926
148
  {'kind': 'base', 'path': 'garage-bAInd/Open-Platypus', 'split': 'train', 'format': '{instruction}\n{output}'},
149
  ]
150
-
151
- base_datasets = [
152
- #
153
- # light instructions
154
- #
155
- # 44.3 MB, 51,760
156
- {'kind': 'base', 'path': 'yahma/alpaca-cleaned', 'split': 'train', 'format': '{instruction}\n{input}\n{output}'},
157
- # 11 MB, 12,564
158
- {'kind': 'base', 'path': 'Cleanlab/databricks-dolly-15k-cleanset', 'split': 'train', 'format': '{instruction}\n{context}\n{response}'},
159
- # 15.6 MB, 24,926
160
- {'kind': 'base', 'path': 'garage-bAInd/Open-Platypus', 'split': 'train', 'format': '{instruction}\n{output}'},
161
- ]
 
147
  # 15.6 MB, 24,926
148
  {'kind': 'base', 'path': 'garage-bAInd/Open-Platypus', 'split': 'train', 'format': '{instruction}\n{output}'},
149
  ]
 
 
 
 
 
 
 
 
 
 
 
 
scripts/base_instruct_datasets.py CHANGED
@@ -13,7 +13,7 @@ You are an AI assistant.
13
  Your primary directive is to provide well-reasoned, structured, and extensively detailed responses.
14
 
15
  Formatting Requirements:
16
- - Always structure your replies using: <think>{reasoning}</think>{answer}
17
  - The <think></think> block should contain at least six reasoning steps when applicable.
18
  - If the answer requires minimal thought, the <think></think> block may be left empty.
19
  - The user does not see the <think></think> section. Any information critical to the response must be included in the answer.
@@ -24,7 +24,10 @@ Response Guidelines:
24
  - Scientific and Logical Approach: Your explanations should reflect the depth and precision of the greatest scientific minds.
25
  - Prioritize Reasoning: Always reason through the problem first, unless the answer is trivial.
26
  - Concise yet Complete: Ensure responses are informative, yet to the point without unnecessary elaboration.
27
- - Maintain a professional, intelligent, and analytical tone in all interactions.'''
 
 
 
28
 
29
  base_instruct_datasets = [
30
  # 65.7 MB, 11,578
 
13
  Your primary directive is to provide well-reasoned, structured, and extensively detailed responses.
14
 
15
  Formatting Requirements:
16
+ - Structure your replies using: <think>{reasoning}</think>{answer}
17
  - The <think></think> block should contain at least six reasoning steps when applicable.
18
  - If the answer requires minimal thought, the <think></think> block may be left empty.
19
  - The user does not see the <think></think> section. Any information critical to the response must be included in the answer.
 
24
  - Scientific and Logical Approach: Your explanations should reflect the depth and precision of the greatest scientific minds.
25
  - Prioritize Reasoning: Always reason through the problem first, unless the answer is trivial.
26
  - Concise yet Complete: Ensure responses are informative, yet to the point without unnecessary elaboration.
27
+ - Maintain a professional, intelligent, and analytical tone in all interactions.
28
+
29
+ If user provides <question>...</question> and expects <answer>...</answer>:
30
+ - Structure your replies using: <question>{User’s exact input}</question><think>{reasoning}</think><answer>{answer}</answer>'''
31
 
32
  base_instruct_datasets = [
33
  # 65.7 MB, 11,578
scripts/prepare_base_datasets.py CHANGED
@@ -20,7 +20,12 @@ seqs = [
20
  # (16385, 32769, 32769, 500),
21
  # (32769, 65537, 65537, 250),
22
  # (65537, 131073, 131073, 125),
 
23
  (0, 1073741824, 8193, 2000),
 
 
 
 
24
  ]
25
 
26
  #
 
20
  # (16385, 32769, 32769, 500),
21
  # (32769, 65537, 65537, 250),
22
  # (65537, 131073, 131073, 125),
23
+
24
  (0, 1073741824, 8193, 2000),
25
+ (8193, 16385, 16385, 1000),
26
+ (16385, 32769, 32769, 500),
27
+ (32769, 65537, 65537, 250),
28
+ (65537, 131073, 131073, 125),
29
  ]
30
 
31
  #
scripts/pretrain_base_model_0.yaml CHANGED
@@ -61,7 +61,7 @@ train:
61
  global_batch_size: 512
62
 
63
  # Number of samples per data-parallel rank (type: int, default: 4)
64
- micro_batch_size: 2
65
 
66
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
67
  lr_warmup_steps: 2000
 
61
  global_batch_size: 512
62
 
63
  # Number of samples per data-parallel rank (type: int, default: 4)
64
+ micro_batch_size: 1
65
 
66
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
67
  lr_warmup_steps: 2000