futosane commited on
Commit
32f768a
·
1 Parent(s): ef30a7a

Update space

Browse files
FinGPT.ipynb CHANGED
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "cell_type": "code",
26
- "execution_count": null,
27
  "metadata": {},
28
  "outputs": [],
29
  "source": [
@@ -54,9 +54,23 @@
54
  },
55
  {
56
  "cell_type": "code",
57
- "execution_count": null,
58
  "metadata": {},
59
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  "source": [
61
  "from datasets import load_dataset\n",
62
  "import datasets\n",
@@ -86,9 +100,27 @@
86
  },
87
  {
88
  "cell_type": "code",
89
- "execution_count": null,
90
  "metadata": {},
91
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  "source": [
93
  "tmp_dataset = datasets.concatenate_datasets([tfns]*2)\n",
94
  "train_dataset = tmp_dataset\n",
@@ -114,9 +146,24 @@
114
  },
115
  {
116
  "cell_type": "code",
117
- "execution_count": null,
118
  "metadata": {},
119
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  "source": [
121
  "import json\n",
122
  "from tqdm.notebook import tqdm\n",
@@ -155,7 +202,7 @@
155
  },
156
  {
157
  "cell_type": "code",
158
- "execution_count": null,
159
  "metadata": {},
160
  "outputs": [],
161
  "source": [
@@ -207,9 +254,24 @@
207
  },
208
  {
209
  "cell_type": "code",
210
- "execution_count": null,
211
  "metadata": {},
212
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  "source": [
214
  "# The script then creates a Hugging Face Dataset object from the generator and saves it to disk.\n",
215
  "save_path = './data/dataset_new'\n",
@@ -236,14 +298,14 @@
236
  },
237
  {
238
  "cell_type": "code",
239
- "execution_count": 1,
240
  "metadata": {},
241
  "outputs": [
242
  {
243
  "name": "stderr",
244
  "output_type": "stream",
245
  "text": [
246
- "W0801 20:19:58.973000 23260 site-packages\\torch\\distributed\\elastic\\multiprocessing\\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.\n"
247
  ]
248
  }
249
  ],
@@ -300,7 +362,7 @@
300
  },
301
  {
302
  "cell_type": "code",
303
- "execution_count": 2,
304
  "metadata": {},
305
  "outputs": [],
306
  "source": [
@@ -321,7 +383,7 @@
321
  },
322
  {
323
  "cell_type": "code",
324
- "execution_count": 3,
325
  "metadata": {},
326
  "outputs": [
327
  {
@@ -335,7 +397,7 @@
335
  {
336
  "data": {
337
  "application/vnd.jupyter.widget-view+json": {
338
- "model_id": "4bc8b4c85e974cfe806fda92d57ad1c3",
339
  "version_major": 2,
340
  "version_minor": 0
341
  },
@@ -456,7 +518,7 @@
456
  },
457
  {
458
  "cell_type": "code",
459
- "execution_count": 5,
460
  "metadata": {},
461
  "outputs": [],
462
  "source": [
@@ -478,7 +540,7 @@
478
  },
479
  {
480
  "cell_type": "code",
481
- "execution_count": 6,
482
  "metadata": {},
483
  "outputs": [
484
  {
@@ -664,7 +726,7 @@
664
  "trainer.train()\n",
665
  "writer.close()\n",
666
  "# save model\n",
667
- "model.save_pretrained(training_args.output_dir)"
668
  ]
669
  },
670
  {
@@ -683,7 +745,7 @@
683
  },
684
  {
685
  "cell_type": "code",
686
- "execution_count": 4,
687
  "metadata": {},
688
  "outputs": [
689
  {
@@ -696,7 +758,7 @@
696
  {
697
  "data": {
698
  "application/vnd.jupyter.widget-view+json": {
699
- "model_id": "014c4259e386457ca0892de97c5a8ec3",
700
  "version_major": 2,
701
  "version_minor": 0
702
  },
@@ -783,7 +845,7 @@
783
  },
784
  {
785
  "cell_type": "code",
786
- "execution_count": 5,
787
  "metadata": {},
788
  "outputs": [
789
  {
@@ -805,21 +867,155 @@
805
  "name": "stderr",
806
  "output_type": "stream",
807
  "text": [
808
- "100%|██████████| 299/299 [00:48<00:00, 6.23it/s]"
809
  ]
810
  },
811
  {
812
  "name": "stdout",
813
  "output_type": "stream",
814
  "text": [
815
- "Acc: 0.8756281407035176. F1 macro: 0.8401912464851741. F1 micro: 0.8756281407035176. F1 weighted (BloombergGPT): 0.8753926635410131. \n"
816
  ]
817
  },
818
  {
819
  "name": "stderr",
820
  "output_type": "stream",
821
  "text": [
822
- "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
823
  ]
824
  }
825
  ],
 
23
  },
24
  {
25
  "cell_type": "code",
26
+ "execution_count": 2,
27
  "metadata": {},
28
  "outputs": [],
29
  "source": [
 
54
  },
55
  {
56
  "cell_type": "code",
57
+ "execution_count": 3,
58
  "metadata": {},
59
+ "outputs": [
60
+ {
61
+ "data": {
62
+ "text/plain": [
63
+ "Dataset({\n",
64
+ " features: ['input', 'output', 'instruction'],\n",
65
+ " num_rows: 9543\n",
66
+ "})"
67
+ ]
68
+ },
69
+ "execution_count": 3,
70
+ "metadata": {},
71
+ "output_type": "execute_result"
72
+ }
73
+ ],
74
  "source": [
75
  "from datasets import load_dataset\n",
76
  "import datasets\n",
 
100
  },
101
  {
102
  "cell_type": "code",
103
+ "execution_count": 4,
104
  "metadata": {},
105
+ "outputs": [
106
+ {
107
+ "name": "stdout",
108
+ "output_type": "stream",
109
+ "text": [
110
+ "19086\n"
111
+ ]
112
+ },
113
+ {
114
+ "data": {
115
+ "text/plain": [
116
+ "(19086, 3)"
117
+ ]
118
+ },
119
+ "execution_count": 4,
120
+ "metadata": {},
121
+ "output_type": "execute_result"
122
+ }
123
+ ],
124
  "source": [
125
  "tmp_dataset = datasets.concatenate_datasets([tfns]*2)\n",
126
  "train_dataset = tmp_dataset\n",
 
146
  },
147
  {
148
  "cell_type": "code",
149
+ "execution_count": 5,
150
  "metadata": {},
151
+ "outputs": [
152
+ {
153
+ "data": {
154
+ "application/vnd.jupyter.widget-view+json": {
155
+ "model_id": "8204ff4d7ae048508ff011ff341df7b3",
156
+ "version_major": 2,
157
+ "version_minor": 0
158
+ },
159
+ "text/plain": [
160
+ "formatting..: 0%| | 0/19086 [00:00<?, ?it/s]"
161
+ ]
162
+ },
163
+ "metadata": {},
164
+ "output_type": "display_data"
165
+ }
166
+ ],
167
  "source": [
168
  "import json\n",
169
  "from tqdm.notebook import tqdm\n",
 
202
  },
203
  {
204
  "cell_type": "code",
205
+ "execution_count": 6,
206
  "metadata": {},
207
  "outputs": [],
208
  "source": [
 
254
  },
255
  {
256
  "cell_type": "code",
257
+ "execution_count": 7,
258
  "metadata": {},
259
+ "outputs": [
260
+ {
261
+ "data": {
262
+ "application/vnd.jupyter.widget-view+json": {
263
+ "model_id": "ee7846a30c5d4d59be6bd5e2cf6c1870",
264
+ "version_major": 2,
265
+ "version_minor": 0
266
+ },
267
+ "text/plain": [
268
+ "Saving the dataset (0/1 shards): 0%| | 0/19086 [00:00<?, ? examples/s]"
269
+ ]
270
+ },
271
+ "metadata": {},
272
+ "output_type": "display_data"
273
+ }
274
+ ],
275
  "source": [
276
  "# The script then creates a Hugging Face Dataset object from the generator and saves it to disk.\n",
277
  "save_path = './data/dataset_new'\n",
 
298
  },
299
  {
300
  "cell_type": "code",
301
+ "execution_count": 8,
302
  "metadata": {},
303
  "outputs": [
304
  {
305
  "name": "stderr",
306
  "output_type": "stream",
307
  "text": [
308
+ "W0804 14:22:55.473000 584 site-packages\\torch\\distributed\\elastic\\multiprocessing\\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.\n"
309
  ]
310
  }
311
  ],
 
362
  },
363
  {
364
  "cell_type": "code",
365
+ "execution_count": 9,
366
  "metadata": {},
367
  "outputs": [],
368
  "source": [
 
383
  },
384
  {
385
  "cell_type": "code",
386
+ "execution_count": 10,
387
  "metadata": {},
388
  "outputs": [
389
  {
 
397
  {
398
  "data": {
399
  "application/vnd.jupyter.widget-view+json": {
400
+ "model_id": "85d1520006a04d289fe3431da7df7e42",
401
  "version_major": 2,
402
  "version_minor": 0
403
  },
 
518
  },
519
  {
520
  "cell_type": "code",
521
+ "execution_count": null,
522
  "metadata": {},
523
  "outputs": [],
524
  "source": [
 
540
  },
541
  {
542
  "cell_type": "code",
543
+ "execution_count": null,
544
  "metadata": {},
545
  "outputs": [
546
  {
 
726
  "trainer.train()\n",
727
  "writer.close()\n",
728
  "# save model\n",
729
+ "# model.save_pretrained(training_args.output_dir)"
730
  ]
731
  },
732
  {
 
745
  },
746
  {
747
  "cell_type": "code",
748
+ "execution_count": null,
749
  "metadata": {},
750
  "outputs": [
751
  {
 
758
  {
759
  "data": {
760
  "application/vnd.jupyter.widget-view+json": {
761
+ "model_id": "f632e66d18914e13a9febb54f4e9ee42",
762
  "version_major": 2,
763
  "version_minor": 0
764
  },
 
845
  },
846
  {
847
  "cell_type": "code",
848
+ "execution_count": 12,
849
  "metadata": {},
850
  "outputs": [
851
  {
 
867
  "name": "stderr",
868
  "output_type": "stream",
869
  "text": [
870
+ " 0%| | 0/299 [00:01<?, ?it/s]\n"
871
  ]
872
  },
873
  {
874
  "name": "stdout",
875
  "output_type": "stream",
876
  "text": [
877
+ "Unexpected exception formatting exception. Falling back to standard exception\n"
878
  ]
879
  },
880
  {
881
  "name": "stderr",
882
  "output_type": "stream",
883
  "text": [
884
+ "Traceback (most recent call last):\n",
885
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 3667, in run_code\n",
886
+ " exec(code_obj, self.user_global_ns, self.user_ns)\n",
887
+ " File \"C:\\Users\\23524\\AppData\\Local\\Temp\\ipykernel_584\\2487751523.py\", line 5, in <module>\n",
888
+ " res = test_tfns(model, tokenizer, batch_size = batch_size)\n",
889
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
890
+ " File \"d:\\fingpt\\tfns.py\", line 62, in test_tfns\n",
891
+ " res = model.generate(**tokens, max_length=512)\n",
892
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
893
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\peft\\peft_model.py\", line 1148, in generate\n",
894
+ " outputs = self.base_model.generate(*args, **kwargs)\n",
895
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
896
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\utils\\_contextlib.py\", line 120, in decorate_context\n",
897
+ " return func(*args, **kwargs)\n",
898
+ " ^^^^^^^^^^^^^^^^^^^^^\n",
899
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\generation\\utils.py\", line 1522, in generate\n",
900
+ " return self.greedy_search(\n",
901
+ " ^^^^^^^^^^^^^^^^^^^\n",
902
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\transformers\\generation\\utils.py\", line 2339, in greedy_search\n",
903
+ " outputs = self(\n",
904
+ " ^^^^^\n",
905
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1773, in _wrapped_call_impl\n",
906
+ " return self._call_impl(*args, **kwargs)\n",
907
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
908
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1784, in _call_impl\n",
909
+ " return forward_call(*args, **kwargs)\n",
910
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
911
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py\", line 165, in new_forward\n",
912
+ " output = module._old_forward(*args, **kwargs)\n",
913
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
914
+ " File \"C:\\Users\\23524/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py\", line 937, in forward\n",
915
+ " transformer_outputs = self.transformer(\n",
916
+ " ^^^^^^^^^^^^^^^^^\n",
917
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1773, in _wrapped_call_impl\n",
918
+ " return self._call_impl(*args, **kwargs)\n",
919
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
920
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1784, in _call_impl\n",
921
+ " return forward_call(*args, **kwargs)\n",
922
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
923
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py\", line 165, in new_forward\n",
924
+ " output = module._old_forward(*args, **kwargs)\n",
925
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
926
+ " File \"C:\\Users\\23524/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py\", line 807, in forward\n",
927
+ " inputs_embeds = self.embedding(input_ids)\n",
928
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
929
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1773, in _wrapped_call_impl\n",
930
+ " return self._call_impl(*args, **kwargs)\n",
931
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
932
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1784, in _call_impl\n",
933
+ " return forward_call(*args, **kwargs)\n",
934
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
935
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py\", line 165, in new_forward\n",
936
+ " output = module._old_forward(*args, **kwargs)\n",
937
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
938
+ " File \"C:\\Users\\23524/.cache\\huggingface\\modules\\transformers_modules\\THUDM\\chatglm2-6b\\d2e2d91789248536a747d9ce60642a336444186c\\modeling_chatglm.py\", line 723, in forward\n",
939
+ " words_embeddings = self.word_embeddings(input_ids)\n",
940
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
941
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1773, in _wrapped_call_impl\n",
942
+ " return self._call_impl(*args, **kwargs)\n",
943
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
944
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1784, in _call_impl\n",
945
+ " return forward_call(*args, **kwargs)\n",
946
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
947
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\accelerate\\hooks.py\", line 165, in new_forward\n",
948
+ " output = module._old_forward(*args, **kwargs)\n",
949
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
950
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\modules\\sparse.py\", line 192, in forward\n",
951
+ " return F.embedding(\n",
952
+ " ^^^^^^^^^^^^\n",
953
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\torch\\nn\\functional.py\", line 2546, in embedding\n",
954
+ " return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)\n",
955
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
956
+ "torch.AcceleratorError: CUDA error: out of memory\n",
957
+ "Search for `cudaErrorMemoryAllocation' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information.\n",
958
+ "CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\n",
959
+ "For debugging consider passing CUDA_LAUNCH_BLOCKING=1\n",
960
+ "Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\n",
961
+ "\n",
962
+ "\n",
963
+ "During handling of the above exception, another exception occurred:\n",
964
+ "\n",
965
+ "Traceback (most recent call last):\n",
966
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\interactiveshell.py\", line 2176, in showtraceback\n",
967
+ " stb = self.InteractiveTB.structured_traceback(\n",
968
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
969
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 1182, in structured_traceback\n",
970
+ " return FormattedTB.structured_traceback(\n",
971
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
972
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 1053, in structured_traceback\n",
973
+ " return VerboseTB.structured_traceback(\n",
974
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
975
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 861, in structured_traceback\n",
976
+ " formatted_exceptions: list[list[str]] = self.format_exception_as_a_whole(\n",
977
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
978
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 773, in format_exception_as_a_whole\n",
979
+ " frames.append(self.format_record(record))\n",
980
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
981
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\ultratb.py\", line 651, in format_record\n",
982
+ " _format_traceback_lines(\n",
983
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\IPython\\core\\tbtools.py\", line 99, in _format_traceback_lines\n",
984
+ " line = stack_line.render(pygmented=has_colors).rstrip(\"\\n\") + \"\\n\"\n",
985
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
986
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\stack_data\\core.py\", line 360, in render\n",
987
+ " start_line, lines = self.frame_info._pygmented_scope_lines\n",
988
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
989
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\stack_data\\utils.py\", line 145, in cached_property_wrapper\n",
990
+ " value = obj.__dict__[self.func.__name__] = self.func(obj)\n",
991
+ " ^^^^^^^^^^^^^^\n",
992
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\stack_data\\core.py\", line 780, in _pygmented_scope_lines\n",
993
+ " lines = _pygmented_with_ranges(formatter, code, ranges)\n",
994
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
995
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\stack_data\\utils.py\", line 165, in _pygmented_with_ranges\n",
996
+ " return pygments.highlight(code, lexer, formatter).splitlines()\n",
997
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
998
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\__init__.py\", line 82, in highlight\n",
999
+ " return format(lex(code, lexer), formatter, outfile)\n",
1000
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
1001
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\__init__.py\", line 64, in format\n",
1002
+ " formatter.format(tokens, realoutfile)\n",
1003
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\formatters\\terminal256.py\", line 250, in format\n",
1004
+ " return Formatter.format(self, tokensource, outfile)\n",
1005
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
1006
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\formatter.py\", line 124, in format\n",
1007
+ " return self.format_unencoded(tokensource, outfile)\n",
1008
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
1009
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\formatters\\terminal256.py\", line 256, in format_unencoded\n",
1010
+ " for ttype, value in tokensource:\n",
1011
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\stack_data\\utils.py\", line 158, in get_tokens\n",
1012
+ " for ttype, value in super().get_tokens(text):\n",
1013
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\lexer.py\", line 270, in streamer\n",
1014
+ " for _, t, v in self.get_tokens_unprocessed(text):\n",
1015
+ " File \"d:\\anaconda\\envs\\fingpt-env\\Lib\\site-packages\\pygments\\lexer.py\", line 712, in get_tokens_unprocessed\n",
1016
+ " m = rexmatch(text, pos)\n",
1017
+ " ^^^^^^^^^^^^^^^^^^^\n",
1018
+ "MemoryError\n"
1019
  ]
1020
  }
1021
  ],
data/dataset_new/cache-73a60c7a93c40e4d.arrow DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b6320f4c6a40b0bca5fd1336162679cc0774a05571c48f611cb5eb448d8e544
3
- size 124752
 
 
 
 
data/dataset_new/cache-fba1c6867270024b.arrow DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c4dcf79222ef1e06ebad7422e5ec4ba475314461029ad502303af90ad7ecf25
3
- size 31424
 
 
 
 
requirements.txt CHANGED
@@ -1 +1,3 @@
1
- huggingface_hub==0.25.2
 
 
 
1
+ huggingface_hub==0.25.2
2
+ transformers
3
+ torch