Saeid commited on
Commit
542f0e8
·
verified ·
1 Parent(s): 07484ed

Training in progress, epoch 0

Browse files
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "ViTForImageClassification",
5
+ "parent_library": "transformers.models.vit.modeling_vit"
6
+ },
7
+ "base_model_name_or_path": "/home/cc/polymorph/fine-tuning/results/train-lora/5/base_model",
8
+ "bias": "none",
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_dropout": 0.1,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": [
21
+ "classifier"
22
+ ],
23
+ "peft_type": "LORA",
24
+ "r": 16,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "value",
29
+ "query"
30
+ ],
31
+ "task_type": null,
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:491fba07b6662ee825210fa5a398275c9b87e79fd07b27fb4591b8238de6b60c
3
+ size 2372592
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
runs/Jul17_16-22-01_gpu/events.out.tfevents.1721233323.gpu.2115683.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:303ef896a1277085a548aaa1b48e19fa060b55b06dfa7328fa16d0bf4b3e9344
3
+ size 5374
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4d5975e4236cbb57f2744c0bb04888a6ce06aa5c95d1f870e20e7dec7483ba8
3
+ size 5240
wandb/debug-internal.log ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-07-17 16:22:04,040 INFO StreamThr :2116314 [internal.py:wandb_internal():85] W&B internal server running at pid: 2116314, started at: 2024-07-17 16:22:04.039368
2
+ 2024-07-17 16:22:04,043 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: status
3
+ 2024-07-17 16:22:04,046 INFO WriterThread:2116314 [datastore.py:open_for_write():87] open: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/run-wglu07sk.wandb
4
+ 2024-07-17 16:22:04,050 DEBUG SenderThread:2116314 [sender.py:send():379] send: header
5
+ 2024-07-17 16:22:04,051 DEBUG SenderThread:2116314 [sender.py:send():379] send: run
6
+ 2024-07-17 16:22:04,205 INFO SenderThread:2116314 [dir_watcher.py:__init__():211] watching files in: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files
7
+ 2024-07-17 16:22:04,205 INFO SenderThread:2116314 [sender.py:_start_run_threads():1188] run started: wglu07sk with start time 1721233324.03891
8
+ 2024-07-17 16:22:04,218 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: check_version
9
+ 2024-07-17 16:22:04,219 DEBUG SenderThread:2116314 [sender.py:send_request():406] send_request: check_version
10
+ 2024-07-17 16:22:04,281 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: run_start
11
+ 2024-07-17 16:22:04,332 DEBUG HandlerThread:2116314 [system_info.py:__init__():26] System info init
12
+ 2024-07-17 16:22:04,332 DEBUG HandlerThread:2116314 [system_info.py:__init__():41] System info init done
13
+ 2024-07-17 16:22:04,332 INFO HandlerThread:2116314 [system_monitor.py:start():194] Starting system monitor
14
+ 2024-07-17 16:22:04,332 INFO SystemMonitor:2116314 [system_monitor.py:_start():158] Starting system asset monitoring threads
15
+ 2024-07-17 16:22:04,332 INFO HandlerThread:2116314 [system_monitor.py:probe():214] Collecting system info
16
+ 2024-07-17 16:22:04,333 INFO SystemMonitor:2116314 [interfaces.py:start():188] Started cpu monitoring
17
+ 2024-07-17 16:22:04,335 INFO SystemMonitor:2116314 [interfaces.py:start():188] Started disk monitoring
18
+ 2024-07-17 16:22:04,336 INFO SystemMonitor:2116314 [interfaces.py:start():188] Started gpu monitoring
19
+ 2024-07-17 16:22:04,337 INFO SystemMonitor:2116314 [interfaces.py:start():188] Started memory monitoring
20
+ 2024-07-17 16:22:04,338 INFO SystemMonitor:2116314 [interfaces.py:start():188] Started network monitoring
21
+ 2024-07-17 16:22:04,400 DEBUG HandlerThread:2116314 [system_info.py:probe():152] Probing system
22
+ 2024-07-17 16:22:04,405 DEBUG HandlerThread:2116314 [system_info.py:_probe_git():137] Probing git
23
+ 2024-07-17 16:22:04,416 DEBUG HandlerThread:2116314 [system_info.py:_probe_git():145] Probing git done
24
+ 2024-07-17 16:22:04,416 DEBUG HandlerThread:2116314 [system_info.py:probe():200] Probing system done
25
+ 2024-07-17 16:22:04,416 DEBUG HandlerThread:2116314 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-101-generic-x86_64-with-glibc2.35', 'python': '3.11.9', 'heartbeatAt': '2024-07-17T16:22:04.400954', 'startedAt': '2024-07-17T16:22:04.032523', 'docker': None, 'cuda': None, 'args': (), 'state': 'running', 'program': '/home/cc/polymorph/fine-tuning/main-lora-train.py', 'codePathLocal': 'main-lora-train.py', 'codePath': 'fine-tuning/main-lora-train.py', 'git': {'remote': 'https://github.com/inference-serving/polymorph.git', 'commit': 'e84189a37f0838a7e4ac1496b2345fe84c6a7683'}, 'email': '[email protected]', 'root': '/home/cc/polymorph', 'host': 'gpu', 'username': 'cc', 'executable': '/home/cc/miniconda3/envs/vision/bin/python', 'cpu_count': 24, 'cpu_count_logical': 48, 'cpu_freq': {'current': 2576.3446041666666, 'min': 1000.0, 'max': 3700.0}, 'cpu_freq_per_core': [{'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 1401.746, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}], 'disk': {'/': {'total': 208.95753479003906, 'used': 157.59302139282227}}, 'gpu': 'Quadro RTX 6000', 'gpu_count': 1, 'gpu_devices': [{'name': 'Quadro RTX 6000', 'memory_total': 25769803776}], 'memory': {'total': 187.4629783630371}}
26
+ 2024-07-17 16:22:04,417 INFO HandlerThread:2116314 [system_monitor.py:probe():224] Finished collecting system info
27
+ 2024-07-17 16:22:04,417 INFO HandlerThread:2116314 [system_monitor.py:probe():227] Publishing system info
28
+ 2024-07-17 16:22:04,417 DEBUG HandlerThread:2116314 [system_info.py:_save_conda():209] Saving list of conda packages installed into the current environment
29
+ 2024-07-17 16:22:05,208 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_created():271] file/dir created: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/conda-environment.yaml
30
+ 2024-07-17 16:22:07,942 DEBUG HandlerThread:2116314 [system_info.py:_save_conda():224] Saving conda packages done
31
+ 2024-07-17 16:22:07,943 INFO HandlerThread:2116314 [system_monitor.py:probe():229] Finished publishing system info
32
+ 2024-07-17 16:22:07,953 DEBUG SenderThread:2116314 [sender.py:send():379] send: files
33
+ 2024-07-17 16:22:07,953 INFO SenderThread:2116314 [sender.py:_save_file():1454] saving file wandb-metadata.json with policy now
34
+ 2024-07-17 16:22:08,106 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: python_packages
35
+ 2024-07-17 16:22:08,106 DEBUG SenderThread:2116314 [sender.py:send_request():406] send_request: python_packages
36
+ 2024-07-17 16:22:08,107 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: stop_status
37
+ 2024-07-17 16:22:08,108 DEBUG SenderThread:2116314 [sender.py:send_request():406] send_request: stop_status
38
+ 2024-07-17 16:22:08,112 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
39
+ 2024-07-17 16:22:08,159 DEBUG SenderThread:2116314 [sender.py:send():379] send: telemetry
40
+ 2024-07-17 16:22:08,159 DEBUG SenderThread:2116314 [sender.py:send():379] send: config
41
+ 2024-07-17 16:22:08,161 DEBUG SenderThread:2116314 [sender.py:send():379] send: telemetry
42
+ 2024-07-17 16:22:08,162 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
43
+ 2024-07-17 16:22:08,162 DEBUG SenderThread:2116314 [sender.py:send():379] send: telemetry
44
+ 2024-07-17 16:22:08,162 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
45
+ 2024-07-17 16:22:08,162 WARNING SenderThread:2116314 [sender.py:send_metric():1405] Seen metric with glob (shouldn't happen)
46
+ 2024-07-17 16:22:08,162 DEBUG SenderThread:2116314 [sender.py:send():379] send: telemetry
47
+ 2024-07-17 16:22:08,163 DEBUG SenderThread:2116314 [sender.py:send():379] send: telemetry
48
+ 2024-07-17 16:22:08,163 DEBUG SenderThread:2116314 [sender.py:send():379] send: config
49
+ 2024-07-17 16:22:08,206 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_modified():288] file/dir modified: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/conda-environment.yaml
50
+ 2024-07-17 16:22:08,207 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_created():271] file/dir created: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/requirements.txt
51
+ 2024-07-17 16:22:08,207 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_created():271] file/dir created: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/wandb-metadata.json
52
+ 2024-07-17 16:22:08,216 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: log_artifact
53
+ 2024-07-17 16:22:08,216 DEBUG SenderThread:2116314 [sender.py:send_request():406] send_request: log_artifact
54
+ 2024-07-17 16:22:08,259 INFO wandb-upload_0:2116314 [upload_job.py:push():130] Uploaded file /tmp/tmp6xd4bkanwandb/g8dj0aj4-wandb-metadata.json
55
+ 2024-07-17 16:22:08,735 INFO wandb-upload_0:2116314 [upload_job.py:push():88] Uploaded file /tmp/tmpyvr0ja9x/model_architecture.txt
56
+ 2024-07-17 16:22:09,108 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
57
+ 2024-07-17 16:22:09,159 INFO SenderThread:2116314 [sender.py:send_request_log_artifact():1518] logged artifact model-wglu07sk - {'id': 'QXJ0aWZhY3Q6OTkxMzgwOTg0', 'state': 'PENDING', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjI4MjA3NTAwNA==', 'latestArtifact': None}}
58
+ 2024-07-17 16:22:09,160 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: status_report
59
+ 2024-07-17 16:22:09,207 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_created():271] file/dir created: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/output.log
60
+ 2024-07-17 16:22:10,107 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
61
+ 2024-07-17 16:22:11,108 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
62
+ 2024-07-17 16:22:11,208 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_modified():288] file/dir modified: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/output.log
63
+ 2024-07-17 16:22:12,108 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
64
+ 2024-07-17 16:22:13,108 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
65
+ 2024-07-17 16:22:13,209 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_modified():288] file/dir modified: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/output.log
66
+ 2024-07-17 16:22:13,550 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: partial_history
67
+ 2024-07-17 16:22:13,554 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
68
+ 2024-07-17 16:22:13,556 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
69
+ 2024-07-17 16:22:13,556 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
70
+ 2024-07-17 16:22:13,557 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
71
+ 2024-07-17 16:22:13,557 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
72
+ 2024-07-17 16:22:13,557 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
73
+ 2024-07-17 16:22:13,557 DEBUG SenderThread:2116314 [sender.py:send():379] send: history
74
+ 2024-07-17 16:22:13,557 DEBUG SenderThread:2116314 [sender.py:send_request():406] send_request: summary_record
75
+ 2024-07-17 16:22:13,558 INFO SenderThread:2116314 [sender.py:_save_file():1454] saving file wandb-summary.json with policy end
76
+ 2024-07-17 16:22:14,108 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
wandb/debug.log ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Current SDK version is 0.17.4
2
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Configure stats pid to 2115683
3
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Loading settings from /home/cc/.config/wandb/settings
4
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Loading settings from /home/cc/polymorph/fine-tuning/wandb/settings
5
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'root_dir': '/home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19'}
6
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'fine-tuning/main-lora-train.py', 'program_abspath': '/home/cc/polymorph/fine-tuning/main-lora-train.py', 'program': '/home/cc/polymorph/fine-tuning/main-lora-train.py'}
8
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Applying login settings: {}
9
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:_log_setup():529] Logging user logs to /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/logs/debug.log
10
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:_log_setup():530] Logging internal logs to /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/logs/debug-internal.log
11
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:init():569] calling init triggers
12
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:init():576] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:init():619] starting backend
15
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:init():623] setting up manager
16
+ 2024-07-17 16:22:04,037 INFO MainThread:2115683 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-07-17 16:22:04,038 INFO MainThread:2115683 [wandb_init.py:init():631] backend started and connected
18
+ 2024-07-17 16:22:04,041 INFO MainThread:2115683 [wandb_init.py:init():720] updated telemetry
19
+ 2024-07-17 16:22:04,050 INFO MainThread:2115683 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
20
+ 2024-07-17 16:22:04,217 INFO MainThread:2115683 [wandb_run.py:_on_init():2402] communicating current version
21
+ 2024-07-17 16:22:04,270 INFO MainThread:2115683 [wandb_run.py:_on_init():2411] got version response
22
+ 2024-07-17 16:22:04,270 INFO MainThread:2115683 [wandb_init.py:init():804] starting run threads in backend
23
+ 2024-07-17 16:22:08,107 INFO MainThread:2115683 [wandb_run.py:_console_start():2380] atexit reg
24
+ 2024-07-17 16:22:08,107 INFO MainThread:2115683 [wandb_run.py:_redirect():2235] redirect: wrap_raw
25
+ 2024-07-17 16:22:08,107 INFO MainThread:2115683 [wandb_run.py:_redirect():2300] Wrapping output streams.
26
+ 2024-07-17 16:22:08,107 INFO MainThread:2115683 [wandb_run.py:_redirect():2325] Redirects installed.
27
+ 2024-07-17 16:22:08,108 INFO MainThread:2115683 [wandb_init.py:init():847] run started, returning control to user process
28
+ 2024-07-17 16:22:08,110 INFO MainThread:2115683 [wandb_run.py:_config_callback():1382] config_cb None None {'peft_config': {'default': {'peft_type': <PeftType.LORA: 'LORA'>, 'auto_mapping': None, 'base_model_name_or_path': '/home/cc/polymorph/fine-tuning/results/train-lora/5/base_model', 'revision': None, 'task_type': None, 'inference_mode': False, 'r': 16, 'target_modules': {'value', 'query'}, 'lora_alpha': 16, 'lora_dropout': 0.1, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': ['classifier'], 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'loftq_config': {}, 'use_dora': False, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}}}, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['ViTForImageClassification'], 'finetuning_task': None, 'id2label': {'0': 'airplane', '1': 'bird'}, 'label2id': {'airplane': '0', 'bird': '1'}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/home/cc/polymorph/fine-tuning/results/train-lora/5/base_model', 'transformers_version': '4.42.3', 'model_type': 'vit', 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'intermediate_size': 3072, 'hidden_act': 'gelu', 'hidden_dropout_prob': 0.0, 'attention_probs_dropout_prob': 0.0, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'image_size': 224, 'patch_size': 16, 'num_channels': 3, 'qkv_bias': True, 'encoder_stride': 16, 'output_dir': '/home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.005, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/runs/Jul17_16-22-01_gpu', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': ['labels'], 'load_best_model_at_end': True, 'metric_for_best_model': 'accuracy', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False}
29
+ 2024-07-17 16:22:08,113 INFO MainThread:2115683 [wandb_config.py:__setitem__():151] config set model/num_parameters = 86391556 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7fc944562610>>
30
+ 2024-07-17 16:22:08,113 INFO MainThread:2115683 [wandb_run.py:_config_callback():1382] config_cb model/num_parameters 86391556 None
wandb/run-20240717_162204-wglu07sk/files/conda-environment.yaml ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: vision
2
+ channels:
3
+ - defaults
4
+ dependencies:
5
+ - _libgcc_mutex=0.1=main
6
+ - _openmp_mutex=5.1=1_gnu
7
+ - bzip2=1.0.8=h5eee18b_6
8
+ - ca-certificates=2024.3.11=h06a4308_0
9
+ - ld_impl_linux-64=2.38=h1181459_1
10
+ - libffi=3.4.4=h6a678d5_1
11
+ - libgcc-ng=11.2.0=h1234567_1
12
+ - libgomp=11.2.0=h1234567_1
13
+ - libstdcxx-ng=11.2.0=h1234567_1
14
+ - libuuid=1.41.5=h5eee18b_0
15
+ - ncurses=6.4=h6a678d5_0
16
+ - openssl=3.0.14=h5eee18b_0
17
+ - pip=24.0=py311h06a4308_0
18
+ - python=3.11.9=h955ad1f_0
19
+ - readline=8.2=h5eee18b_0
20
+ - setuptools=69.5.1=py311h06a4308_0
21
+ - sqlite=3.45.3=h5eee18b_0
22
+ - tk=8.6.14=h39e8969_0
23
+ - wheel=0.43.0=py311h06a4308_0
24
+ - xz=5.4.6=h5eee18b_1
25
+ - zlib=1.2.13=h5eee18b_1
26
+ - pip:
27
+ - absl-py==2.1.0
28
+ - accelerate==0.32.1
29
+ - aiohttp==3.9.5
30
+ - aiosignal==1.3.1
31
+ - annotated-types==0.7.0
32
+ - anyio==4.4.0
33
+ - argon2-cffi==23.1.0
34
+ - argon2-cffi-bindings==21.2.0
35
+ - arrow==1.3.0
36
+ - asttokens==2.4.1
37
+ - async-lru==2.0.4
38
+ - attrs==23.2.0
39
+ - babel==2.15.0
40
+ - beautifulsoup4==4.12.3
41
+ - black==24.4.2
42
+ - bleach==6.1.0
43
+ - certifi==2024.6.2
44
+ - cffi==1.16.0
45
+ - charset-normalizer==3.3.2
46
+ - click==8.1.7
47
+ - comm==0.2.2
48
+ - contourpy==1.2.1
49
+ - cycler==0.12.1
50
+ - datasets==2.20.0
51
+ - debugpy==1.8.1
52
+ - decorator==5.1.1
53
+ - defusedxml==0.7.1
54
+ - dill==0.3.8
55
+ - dnspython==2.6.1
56
+ - docker-pycreds==0.4.0
57
+ - einops==0.8.0
58
+ - email-validator==2.2.0
59
+ - evaluate==0.4.2
60
+ - executing==2.0.1
61
+ - fastapi==0.111.1
62
+ - fastapi-cli==0.0.4
63
+ - fastjsonschema==2.20.0
64
+ - filelock==3.15.4
65
+ - fonttools==4.53.0
66
+ - fqdn==1.5.1
67
+ - frozenlist==1.4.1
68
+ - fsspec==2024.5.0
69
+ - fvcore==0.1.6
70
+ - gitdb==4.0.11
71
+ - gitpython==3.1.43
72
+ - grpcio==1.64.1
73
+ - grpcio-tools==1.64.1
74
+ - h11==0.14.0
75
+ - httpcore==1.0.5
76
+ - httptools==0.6.1
77
+ - httpx==0.27.0
78
+ - huggingface-hub==0.23.4
79
+ - idna==3.7
80
+ - imageio==2.34.2
81
+ - imageio-ffmpeg==0.5.1
82
+ - install==1.3.5
83
+ - iopath==0.1.10
84
+ - ipykernel==6.29.4
85
+ - ipython==8.25.0
86
+ - ipywidgets==8.1.3
87
+ - isoduration==20.11.0
88
+ - jedi==0.19.1
89
+ - jinja2==3.1.4
90
+ - joblib==1.4.2
91
+ - json5==0.9.25
92
+ - jsonpointer==3.0.0
93
+ - jsonschema==4.22.0
94
+ - jsonschema-specifications==2023.12.1
95
+ - jupyter==1.0.0
96
+ - jupyter-client==8.6.2
97
+ - jupyter-console==6.6.3
98
+ - jupyter-core==5.7.2
99
+ - jupyter-events==0.10.0
100
+ - jupyter-lsp==2.2.5
101
+ - jupyter-server==2.14.1
102
+ - jupyter-server-terminals==0.5.3
103
+ - jupyterlab==4.2.2
104
+ - jupyterlab-pygments==0.3.0
105
+ - jupyterlab-server==2.27.2
106
+ - jupyterlab-widgets==3.0.11
107
+ - kaggle==1.6.14
108
+ - kiwisolver==1.4.5
109
+ - lazy-loader==0.4
110
+ - markdown==3.6
111
+ - markdown-it-py==3.0.0
112
+ - markupsafe==2.1.5
113
+ - matplotlib==3.9.0
114
+ - matplotlib-inline==0.1.7
115
+ - mdurl==0.1.2
116
+ - mistune==3.0.2
117
+ - mpmath==1.3.0
118
+ - multidict==6.0.5
119
+ - multiprocess==0.70.16
120
+ - mypy-extensions==1.0.0
121
+ - nbclient==0.10.0
122
+ - nbconvert==7.16.4
123
+ - nbformat==5.10.4
124
+ - nest-asyncio==1.6.0
125
+ - networkx==3.3
126
+ - notebook==7.2.1
127
+ - notebook-shim==0.2.4
128
+ - numpy==1.26.4
129
+ - nvidia-cublas-cu12==12.1.3.1
130
+ - nvidia-cuda-cupti-cu12==12.1.105
131
+ - nvidia-cuda-nvrtc-cu12==12.1.105
132
+ - nvidia-cuda-runtime-cu12==12.1.105
133
+ - nvidia-cudnn-cu12==8.9.2.26
134
+ - nvidia-cufft-cu12==11.0.2.54
135
+ - nvidia-curand-cu12==10.3.2.106
136
+ - nvidia-cusolver-cu12==11.4.5.107
137
+ - nvidia-cusparse-cu12==12.1.0.106
138
+ - nvidia-nccl-cu12==2.20.5
139
+ - nvidia-nvjitlink-cu12==12.5.40
140
+ - nvidia-nvtx-cu12==12.1.105
141
+ - opencv-python==4.10.0.84
142
+ - opencv-python-headless==4.10.0.84
143
+ - overrides==7.7.0
144
+ - packaging==24.1
145
+ - pandas==2.2.2
146
+ - pandocfilters==1.5.1
147
+ - parso==0.8.4
148
+ - pathspec==0.12.1
149
+ - peft==0.11.2.dev0
150
+ - pexpect==4.9.0
151
+ - pillow==10.3.0
152
+ - platformdirs==4.2.2
153
+ - portalocker==2.10.0
154
+ - prometheus-client==0.20.0
155
+ - prompt-toolkit==3.0.47
156
+ - protobuf==5.27.2
157
+ - psutil==6.0.0
158
+ - ptyprocess==0.7.0
159
+ - pure-eval==0.2.2
160
+ - py-cpuinfo==9.0.0
161
+ - pyarrow==16.1.0
162
+ - pyarrow-hotfix==0.6
163
+ - pycocotools==2.0.8
164
+ - pycparser==2.22
165
+ - pydantic==2.8.2
166
+ - pydantic-core==2.20.1
167
+ - pygments==2.18.0
168
+ - pyparsing==3.1.2
169
+ - python-dateutil==2.9.0.post0
170
+ - python-dotenv==1.0.1
171
+ - python-json-logger==2.0.7
172
+ - python-multipart==0.0.9
173
+ - python-slugify==8.0.4
174
+ - pytz==2024.1
175
+ - pyyaml==6.0.1
176
+ - pyzmq==26.0.3
177
+ - qtconsole==5.5.2
178
+ - qtpy==2.4.1
179
+ - ranger==0.10
180
+ - referencing==0.35.1
181
+ - regex==2024.5.15
182
+ - requests==2.32.3
183
+ - rfc3339-validator==0.1.4
184
+ - rfc3986-validator==0.1.1
185
+ - rich==13.7.1
186
+ - rpds-py==0.18.1
187
+ - safetensors==0.4.3
188
+ - scikit-image==0.24.0
189
+ - scikit-learn==1.5.0
190
+ - scipy==1.13.1
191
+ - seaborn==0.13.2
192
+ - send2trash==1.8.3
193
+ - sentry-sdk==2.7.1
194
+ - setproctitle==1.3.3
195
+ - shellingham==1.5.4
196
+ - simplejson==3.19.2
197
+ - six==1.16.0
198
+ - smmap==5.0.1
199
+ - sniffio==1.3.1
200
+ - soupsieve==2.5
201
+ - stack-data==0.6.3
202
+ - starlette==0.37.2
203
+ - sympy==1.12.1
204
+ - tabulate==0.9.0
205
+ - tensorboard==2.17.0
206
+ - tensorboard-data-server==0.7.2
207
+ - termcolor==2.4.0
208
+ - terminado==0.18.1
209
+ - text-unidecode==1.3
210
+ - thop==0.1.1-2209072238
211
+ - threadpoolctl==3.5.0
212
+ - tifffile==2024.6.18
213
+ - timm==1.0.7
214
+ - tinycss2==1.3.0
215
+ - tokenize-rt==5.2.0
216
+ - tokenizers==0.19.1
217
+ - torch==2.3.1
218
+ - torchinfo==1.6.3
219
+ - torchvision==0.18.1
220
+ - tornado==6.4.1
221
+ - tqdm==4.66.4
222
+ - traitlets==5.14.3
223
+ - transformers==4.42.3
224
+ - triton==2.3.1
225
+ - typer==0.12.3
226
+ - types-python-dateutil==2.9.0.20240316
227
+ - typing-extensions==4.12.2
228
+ - tzdata==2024.1
229
+ - ultralytics==8.2.41
230
+ - ultralytics-thop==2.0.0
231
+ - uri-template==1.3.0
232
+ - urllib3==2.2.2
233
+ - uvicorn==0.30.1
234
+ - uvloop==0.19.0
235
+ - vit-pytorch==1.7.0
236
+ - wandb==0.17.4
237
+ - watchfiles==0.22.0
238
+ - wcwidth==0.2.13
239
+ - webcolors==24.6.0
240
+ - webencodings==0.5.1
241
+ - websocket-client==1.8.0
242
+ - websockets==12.0
243
+ - werkzeug==3.0.3
244
+ - widgetsnbextension==4.0.11
245
+ - xxhash==3.4.1
246
+ - yacs==0.1.8
247
+ - yarl==1.9.4
248
+ prefix: /home/cc/miniconda3/envs/vision
wandb/run-20240717_162204-wglu07sk/files/config.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.11.9
7
+ cli_version: 0.17.4
8
+ framework: huggingface
9
+ huggingface_version: 4.42.3
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: true
12
+ start_time: 1721233324
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 5
17
+ - 11
18
+ - 41
19
+ - 49
20
+ - 51
21
+ - 53
22
+ - 55
23
+ - 71
24
+ - 98
25
+ - 100
26
+ 3:
27
+ - 13
28
+ - 23
29
+ 4: 3.11.9
30
+ 5: 0.17.4
31
+ 6: 4.42.3
32
+ 8:
33
+ - 2
34
+ - 5
35
+ 13: linux-x86_64
wandb/run-20240717_162204-wglu07sk/files/output.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+
wandb/run-20240717_162204-wglu07sk/files/requirements.txt ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Babel==2.15.0
2
+ GitPython==3.1.43
3
+ Jinja2==3.1.4
4
+ Markdown==3.6
5
+ MarkupSafe==2.1.5
6
+ PyYAML==6.0.1
7
+ Pygments==2.18.0
8
+ QtPy==2.4.1
9
+ Send2Trash==1.8.3
10
+ Werkzeug==3.0.3
11
+ absl-py==2.1.0
12
+ accelerate==0.32.1
13
+ aiohttp==3.9.5
14
+ aiosignal==1.3.1
15
+ annotated-types==0.7.0
16
+ anyio==4.4.0
17
+ argon2-cffi-bindings==21.2.0
18
+ argon2-cffi==23.1.0
19
+ arrow==1.3.0
20
+ asttokens==2.4.1
21
+ async-lru==2.0.4
22
+ attrs==23.2.0
23
+ beautifulsoup4==4.12.3
24
+ black==24.4.2
25
+ bleach==6.1.0
26
+ certifi==2024.6.2
27
+ cffi==1.16.0
28
+ charset-normalizer==3.3.2
29
+ click==8.1.7
30
+ comm==0.2.2
31
+ contourpy==1.2.1
32
+ cycler==0.12.1
33
+ datasets==2.20.0
34
+ debugpy==1.8.1
35
+ decorator==5.1.1
36
+ defusedxml==0.7.1
37
+ dill==0.3.8
38
+ dnspython==2.6.1
39
+ docker-pycreds==0.4.0
40
+ einops==0.8.0
41
+ email_validator==2.2.0
42
+ evaluate==0.4.2
43
+ executing==2.0.1
44
+ fastapi-cli==0.0.4
45
+ fastapi==0.111.1
46
+ fastjsonschema==2.20.0
47
+ filelock==3.15.4
48
+ fonttools==4.53.0
49
+ fqdn==1.5.1
50
+ frozenlist==1.4.1
51
+ fsspec==2024.5.0
52
+ fvcore==0.1.6
53
+ gitdb==4.0.11
54
+ grpcio-tools==1.64.1
55
+ grpcio==1.64.1
56
+ h11==0.14.0
57
+ httpcore==1.0.5
58
+ httptools==0.6.1
59
+ httpx==0.27.0
60
+ huggingface-hub==0.23.4
61
+ idna==3.7
62
+ imageio-ffmpeg==0.5.1
63
+ imageio==2.34.2
64
+ install==1.3.5
65
+ iopath==0.1.10
66
+ ipykernel==6.29.4
67
+ ipython==8.25.0
68
+ ipywidgets==8.1.3
69
+ isoduration==20.11.0
70
+ jedi==0.19.1
71
+ joblib==1.4.2
72
+ json5==0.9.25
73
+ jsonpointer==3.0.0
74
+ jsonschema-specifications==2023.12.1
75
+ jsonschema==4.22.0
76
+ jupyter-console==6.6.3
77
+ jupyter-events==0.10.0
78
+ jupyter-lsp==2.2.5
79
+ jupyter==1.0.0
80
+ jupyter_client==8.6.2
81
+ jupyter_core==5.7.2
82
+ jupyter_server==2.14.1
83
+ jupyter_server_terminals==0.5.3
84
+ jupyterlab==4.2.2
85
+ jupyterlab_pygments==0.3.0
86
+ jupyterlab_server==2.27.2
87
+ jupyterlab_widgets==3.0.11
88
+ kaggle==1.6.14
89
+ kiwisolver==1.4.5
90
+ lazy_loader==0.4
91
+ markdown-it-py==3.0.0
92
+ matplotlib-inline==0.1.7
93
+ matplotlib==3.9.0
94
+ mdurl==0.1.2
95
+ mistune==3.0.2
96
+ mpmath==1.3.0
97
+ multidict==6.0.5
98
+ multiprocess==0.70.16
99
+ mypy-extensions==1.0.0
100
+ nbclient==0.10.0
101
+ nbconvert==7.16.4
102
+ nbformat==5.10.4
103
+ nest-asyncio==1.6.0
104
+ networkx==3.3
105
+ notebook==7.2.1
106
+ notebook_shim==0.2.4
107
+ numpy==1.26.4
108
+ nvidia-cublas-cu12==12.1.3.1
109
+ nvidia-cuda-cupti-cu12==12.1.105
110
+ nvidia-cuda-nvrtc-cu12==12.1.105
111
+ nvidia-cuda-runtime-cu12==12.1.105
112
+ nvidia-cudnn-cu12==8.9.2.26
113
+ nvidia-cufft-cu12==11.0.2.54
114
+ nvidia-curand-cu12==10.3.2.106
115
+ nvidia-cusolver-cu12==11.4.5.107
116
+ nvidia-cusparse-cu12==12.1.0.106
117
+ nvidia-nccl-cu12==2.20.5
118
+ nvidia-nvjitlink-cu12==12.5.40
119
+ nvidia-nvtx-cu12==12.1.105
120
+ opencv-python-headless==4.10.0.84
121
+ opencv-python==4.10.0.84
122
+ overrides==7.7.0
123
+ packaging==24.1
124
+ pandas==2.2.2
125
+ pandocfilters==1.5.1
126
+ parso==0.8.4
127
+ pathspec==0.12.1
128
+ peft==0.11.2.dev0
129
+ pexpect==4.9.0
130
+ pillow==10.3.0
131
+ pip==24.0
132
+ platformdirs==4.2.2
133
+ polymorph==0.1.0
134
+ portalocker==2.10.0
135
+ prometheus_client==0.20.0
136
+ prompt_toolkit==3.0.47
137
+ protobuf==5.27.2
138
+ psutil==6.0.0
139
+ ptyprocess==0.7.0
140
+ pure-eval==0.2.2
141
+ py-cpuinfo==9.0.0
142
+ pyarrow-hotfix==0.6
143
+ pyarrow==16.1.0
144
+ pycocotools==2.0.8
145
+ pycparser==2.22
146
+ pydantic==2.8.2
147
+ pydantic_core==2.20.1
148
+ pyparsing==3.1.2
149
+ python-dateutil==2.9.0.post0
150
+ python-dotenv==1.0.1
151
+ python-json-logger==2.0.7
152
+ python-multipart==0.0.9
153
+ python-slugify==8.0.4
154
+ pytz==2024.1
155
+ pyzmq==26.0.3
156
+ qtconsole==5.5.2
157
+ ranger==0.10
158
+ referencing==0.35.1
159
+ regex==2024.5.15
160
+ requests==2.32.3
161
+ rfc3339-validator==0.1.4
162
+ rfc3986-validator==0.1.1
163
+ rich==13.7.1
164
+ rpds-py==0.18.1
165
+ safetensors==0.4.3
166
+ scikit-image==0.24.0
167
+ scikit-learn==1.5.0
168
+ scipy==1.13.1
169
+ seaborn==0.13.2
170
+ sentry-sdk==2.7.1
171
+ setproctitle==1.3.3
172
+ setuptools==69.5.1
173
+ shellingham==1.5.4
174
+ simplejson==3.19.2
175
+ six==1.16.0
176
+ smmap==5.0.1
177
+ sniffio==1.3.1
178
+ soupsieve==2.5
179
+ stack-data==0.6.3
180
+ starlette==0.37.2
181
+ sympy==1.12.1
182
+ tabulate==0.9.0
183
+ tensorboard-data-server==0.7.2
184
+ tensorboard==2.17.0
185
+ termcolor==2.4.0
186
+ terminado==0.18.1
187
+ text-unidecode==1.3
188
+ thop==0.1.1-2209072238
189
+ threadpoolctl==3.5.0
190
+ tifffile==2024.6.18
191
+ timm==1.0.7
192
+ tinycss2==1.3.0
193
+ tokenize-rt==5.2.0
194
+ tokenizers==0.19.1
195
+ torch==2.3.1
196
+ torchinfo==1.6.3
197
+ torchvision==0.18.1
198
+ tornado==6.4.1
199
+ tqdm==4.66.4
200
+ traitlets==5.14.3
201
+ transformers==4.42.3
202
+ triton==2.3.1
203
+ typer==0.12.3
204
+ types-python-dateutil==2.9.0.20240316
205
+ typing_extensions==4.12.2
206
+ tzdata==2024.1
207
+ ultralytics-thop==2.0.0
208
+ ultralytics==8.2.41
209
+ uri-template==1.3.0
210
+ urllib3==2.2.2
211
+ uvicorn==0.30.1
212
+ uvloop==0.19.0
213
+ vit-pytorch==1.7.0
214
+ wandb==0.17.4
215
+ watchfiles==0.22.0
216
+ wcwidth==0.2.13
217
+ webcolors==24.6.0
218
+ webencodings==0.5.1
219
+ websocket-client==1.8.0
220
+ websockets==12.0
221
+ wheel==0.43.0
222
+ widgetsnbextension==4.0.11
223
+ xxhash==3.4.1
224
+ yacs==0.1.8
225
+ yarl==1.9.4
wandb/run-20240717_162204-wglu07sk/files/wandb-metadata.json ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-101-generic-x86_64-with-glibc2.35",
3
+ "python": "3.11.9",
4
+ "heartbeatAt": "2024-07-17T16:22:04.400954",
5
+ "startedAt": "2024-07-17T16:22:04.032523",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [],
9
+ "state": "running",
10
+ "program": "/home/cc/polymorph/fine-tuning/main-lora-train.py",
11
+ "codePathLocal": "main-lora-train.py",
12
+ "codePath": "fine-tuning/main-lora-train.py",
13
+ "git": {
14
+ "remote": "https://github.com/inference-serving/polymorph.git",
15
+ "commit": "e84189a37f0838a7e4ac1496b2345fe84c6a7683"
16
+ },
17
+ "email": "[email protected]",
18
+ "root": "/home/cc/polymorph",
19
+ "host": "gpu",
20
+ "username": "cc",
21
+ "executable": "/home/cc/miniconda3/envs/vision/bin/python",
22
+ "cpu_count": 24,
23
+ "cpu_count_logical": 48,
24
+ "cpu_freq": {
25
+ "current": 2576.3446041666666,
26
+ "min": 1000.0,
27
+ "max": 3700.0
28
+ },
29
+ "cpu_freq_per_core": [
30
+ {
31
+ "current": 2600.0,
32
+ "min": 1000.0,
33
+ "max": 3700.0
34
+ },
35
+ {
36
+ "current": 2600.0,
37
+ "min": 1000.0,
38
+ "max": 3700.0
39
+ },
40
+ {
41
+ "current": 2600.0,
42
+ "min": 1000.0,
43
+ "max": 3700.0
44
+ },
45
+ {
46
+ "current": 2600.0,
47
+ "min": 1000.0,
48
+ "max": 3700.0
49
+ },
50
+ {
51
+ "current": 2600.0,
52
+ "min": 1000.0,
53
+ "max": 3700.0
54
+ },
55
+ {
56
+ "current": 2600.0,
57
+ "min": 1000.0,
58
+ "max": 3700.0
59
+ },
60
+ {
61
+ "current": 2600.0,
62
+ "min": 1000.0,
63
+ "max": 3700.0
64
+ },
65
+ {
66
+ "current": 2600.0,
67
+ "min": 1000.0,
68
+ "max": 3700.0
69
+ },
70
+ {
71
+ "current": 2600.0,
72
+ "min": 1000.0,
73
+ "max": 3700.0
74
+ },
75
+ {
76
+ "current": 2600.0,
77
+ "min": 1000.0,
78
+ "max": 3700.0
79
+ },
80
+ {
81
+ "current": 2600.0,
82
+ "min": 1000.0,
83
+ "max": 3700.0
84
+ },
85
+ {
86
+ "current": 2600.0,
87
+ "min": 1000.0,
88
+ "max": 3700.0
89
+ },
90
+ {
91
+ "current": 2600.0,
92
+ "min": 1000.0,
93
+ "max": 3700.0
94
+ },
95
+ {
96
+ "current": 2600.0,
97
+ "min": 1000.0,
98
+ "max": 3700.0
99
+ },
100
+ {
101
+ "current": 2600.0,
102
+ "min": 1000.0,
103
+ "max": 3700.0
104
+ },
105
+ {
106
+ "current": 2600.0,
107
+ "min": 1000.0,
108
+ "max": 3700.0
109
+ },
110
+ {
111
+ "current": 2600.0,
112
+ "min": 1000.0,
113
+ "max": 3700.0
114
+ },
115
+ {
116
+ "current": 2600.0,
117
+ "min": 1000.0,
118
+ "max": 3700.0
119
+ },
120
+ {
121
+ "current": 2600.0,
122
+ "min": 1000.0,
123
+ "max": 3700.0
124
+ },
125
+ {
126
+ "current": 2600.0,
127
+ "min": 1000.0,
128
+ "max": 3700.0
129
+ },
130
+ {
131
+ "current": 2600.0,
132
+ "min": 1000.0,
133
+ "max": 3700.0
134
+ },
135
+ {
136
+ "current": 2600.0,
137
+ "min": 1000.0,
138
+ "max": 3700.0
139
+ },
140
+ {
141
+ "current": 2600.0,
142
+ "min": 1000.0,
143
+ "max": 3700.0
144
+ },
145
+ {
146
+ "current": 2600.0,
147
+ "min": 1000.0,
148
+ "max": 3700.0
149
+ },
150
+ {
151
+ "current": 2600.0,
152
+ "min": 1000.0,
153
+ "max": 3700.0
154
+ },
155
+ {
156
+ "current": 2600.0,
157
+ "min": 1000.0,
158
+ "max": 3700.0
159
+ },
160
+ {
161
+ "current": 2600.0,
162
+ "min": 1000.0,
163
+ "max": 3700.0
164
+ },
165
+ {
166
+ "current": 2600.0,
167
+ "min": 1000.0,
168
+ "max": 3700.0
169
+ },
170
+ {
171
+ "current": 2600.0,
172
+ "min": 1000.0,
173
+ "max": 3700.0
174
+ },
175
+ {
176
+ "current": 2600.0,
177
+ "min": 1000.0,
178
+ "max": 3700.0
179
+ },
180
+ {
181
+ "current": 2600.0,
182
+ "min": 1000.0,
183
+ "max": 3700.0
184
+ },
185
+ {
186
+ "current": 2600.0,
187
+ "min": 1000.0,
188
+ "max": 3700.0
189
+ },
190
+ {
191
+ "current": 2600.0,
192
+ "min": 1000.0,
193
+ "max": 3700.0
194
+ },
195
+ {
196
+ "current": 2600.0,
197
+ "min": 1000.0,
198
+ "max": 3700.0
199
+ },
200
+ {
201
+ "current": 2600.0,
202
+ "min": 1000.0,
203
+ "max": 3700.0
204
+ },
205
+ {
206
+ "current": 2600.0,
207
+ "min": 1000.0,
208
+ "max": 3700.0
209
+ },
210
+ {
211
+ "current": 2600.0,
212
+ "min": 1000.0,
213
+ "max": 3700.0
214
+ },
215
+ {
216
+ "current": 2600.0,
217
+ "min": 1000.0,
218
+ "max": 3700.0
219
+ },
220
+ {
221
+ "current": 2600.0,
222
+ "min": 1000.0,
223
+ "max": 3700.0
224
+ },
225
+ {
226
+ "current": 2600.0,
227
+ "min": 1000.0,
228
+ "max": 3700.0
229
+ },
230
+ {
231
+ "current": 2600.0,
232
+ "min": 1000.0,
233
+ "max": 3700.0
234
+ },
235
+ {
236
+ "current": 2600.0,
237
+ "min": 1000.0,
238
+ "max": 3700.0
239
+ },
240
+ {
241
+ "current": 2600.0,
242
+ "min": 1000.0,
243
+ "max": 3700.0
244
+ },
245
+ {
246
+ "current": 2600.0,
247
+ "min": 1000.0,
248
+ "max": 3700.0
249
+ },
250
+ {
251
+ "current": 2600.0,
252
+ "min": 1000.0,
253
+ "max": 3700.0
254
+ },
255
+ {
256
+ "current": 1401.746,
257
+ "min": 1000.0,
258
+ "max": 3700.0
259
+ },
260
+ {
261
+ "current": 2600.0,
262
+ "min": 1000.0,
263
+ "max": 3700.0
264
+ },
265
+ {
266
+ "current": 2600.0,
267
+ "min": 1000.0,
268
+ "max": 3700.0
269
+ }
270
+ ],
271
+ "disk": {
272
+ "/": {
273
+ "total": 208.95753479003906,
274
+ "used": 157.59302139282227
275
+ }
276
+ },
277
+ "gpu": "Quadro RTX 6000",
278
+ "gpu_count": 1,
279
+ "gpu_devices": [
280
+ {
281
+ "name": "Quadro RTX 6000",
282
+ "memory_total": 25769803776
283
+ }
284
+ ],
285
+ "memory": {
286
+ "total": 187.4629783630371
287
+ }
288
+ }
wandb/run-20240717_162204-wglu07sk/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval/loss": 0.5239526629447937, "eval/accuracy": 0.97, "eval/runtime": 0.8085, "eval/samples_per_second": 247.358, "eval/steps_per_second": 2.474, "train/epoch": 0.5714285714285714, "train/global_step": 1, "_timestamp": 1721233333.5496378, "_runtime": 9.510727882385254, "_step": 0}
wandb/run-20240717_162204-wglu07sk/logs/debug-internal.log ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-07-17 16:22:04,040 INFO StreamThr :2116314 [internal.py:wandb_internal():85] W&B internal server running at pid: 2116314, started at: 2024-07-17 16:22:04.039368
2
+ 2024-07-17 16:22:04,043 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: status
3
+ 2024-07-17 16:22:04,046 INFO WriterThread:2116314 [datastore.py:open_for_write():87] open: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/run-wglu07sk.wandb
4
+ 2024-07-17 16:22:04,050 DEBUG SenderThread:2116314 [sender.py:send():379] send: header
5
+ 2024-07-17 16:22:04,051 DEBUG SenderThread:2116314 [sender.py:send():379] send: run
6
+ 2024-07-17 16:22:04,205 INFO SenderThread:2116314 [dir_watcher.py:__init__():211] watching files in: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files
7
+ 2024-07-17 16:22:04,205 INFO SenderThread:2116314 [sender.py:_start_run_threads():1188] run started: wglu07sk with start time 1721233324.03891
8
+ 2024-07-17 16:22:04,218 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: check_version
9
+ 2024-07-17 16:22:04,219 DEBUG SenderThread:2116314 [sender.py:send_request():406] send_request: check_version
10
+ 2024-07-17 16:22:04,281 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: run_start
11
+ 2024-07-17 16:22:04,332 DEBUG HandlerThread:2116314 [system_info.py:__init__():26] System info init
12
+ 2024-07-17 16:22:04,332 DEBUG HandlerThread:2116314 [system_info.py:__init__():41] System info init done
13
+ 2024-07-17 16:22:04,332 INFO HandlerThread:2116314 [system_monitor.py:start():194] Starting system monitor
14
+ 2024-07-17 16:22:04,332 INFO SystemMonitor:2116314 [system_monitor.py:_start():158] Starting system asset monitoring threads
15
+ 2024-07-17 16:22:04,332 INFO HandlerThread:2116314 [system_monitor.py:probe():214] Collecting system info
16
+ 2024-07-17 16:22:04,333 INFO SystemMonitor:2116314 [interfaces.py:start():188] Started cpu monitoring
17
+ 2024-07-17 16:22:04,335 INFO SystemMonitor:2116314 [interfaces.py:start():188] Started disk monitoring
18
+ 2024-07-17 16:22:04,336 INFO SystemMonitor:2116314 [interfaces.py:start():188] Started gpu monitoring
19
+ 2024-07-17 16:22:04,337 INFO SystemMonitor:2116314 [interfaces.py:start():188] Started memory monitoring
20
+ 2024-07-17 16:22:04,338 INFO SystemMonitor:2116314 [interfaces.py:start():188] Started network monitoring
21
+ 2024-07-17 16:22:04,400 DEBUG HandlerThread:2116314 [system_info.py:probe():152] Probing system
22
+ 2024-07-17 16:22:04,405 DEBUG HandlerThread:2116314 [system_info.py:_probe_git():137] Probing git
23
+ 2024-07-17 16:22:04,416 DEBUG HandlerThread:2116314 [system_info.py:_probe_git():145] Probing git done
24
+ 2024-07-17 16:22:04,416 DEBUG HandlerThread:2116314 [system_info.py:probe():200] Probing system done
25
+ 2024-07-17 16:22:04,416 DEBUG HandlerThread:2116314 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-101-generic-x86_64-with-glibc2.35', 'python': '3.11.9', 'heartbeatAt': '2024-07-17T16:22:04.400954', 'startedAt': '2024-07-17T16:22:04.032523', 'docker': None, 'cuda': None, 'args': (), 'state': 'running', 'program': '/home/cc/polymorph/fine-tuning/main-lora-train.py', 'codePathLocal': 'main-lora-train.py', 'codePath': 'fine-tuning/main-lora-train.py', 'git': {'remote': 'https://github.com/inference-serving/polymorph.git', 'commit': 'e84189a37f0838a7e4ac1496b2345fe84c6a7683'}, 'email': '[email protected]', 'root': '/home/cc/polymorph', 'host': 'gpu', 'username': 'cc', 'executable': '/home/cc/miniconda3/envs/vision/bin/python', 'cpu_count': 24, 'cpu_count_logical': 48, 'cpu_freq': {'current': 2576.3446041666666, 'min': 1000.0, 'max': 3700.0}, 'cpu_freq_per_core': [{'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 1401.746, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}, {'current': 2600.0, 'min': 1000.0, 'max': 3700.0}], 'disk': {'/': {'total': 208.95753479003906, 'used': 157.59302139282227}}, 'gpu': 'Quadro RTX 6000', 'gpu_count': 1, 'gpu_devices': [{'name': 'Quadro RTX 6000', 'memory_total': 25769803776}], 'memory': {'total': 187.4629783630371}}
26
+ 2024-07-17 16:22:04,417 INFO HandlerThread:2116314 [system_monitor.py:probe():224] Finished collecting system info
27
+ 2024-07-17 16:22:04,417 INFO HandlerThread:2116314 [system_monitor.py:probe():227] Publishing system info
28
+ 2024-07-17 16:22:04,417 DEBUG HandlerThread:2116314 [system_info.py:_save_conda():209] Saving list of conda packages installed into the current environment
29
+ 2024-07-17 16:22:05,208 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_created():271] file/dir created: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/conda-environment.yaml
30
+ 2024-07-17 16:22:07,942 DEBUG HandlerThread:2116314 [system_info.py:_save_conda():224] Saving conda packages done
31
+ 2024-07-17 16:22:07,943 INFO HandlerThread:2116314 [system_monitor.py:probe():229] Finished publishing system info
32
+ 2024-07-17 16:22:07,953 DEBUG SenderThread:2116314 [sender.py:send():379] send: files
33
+ 2024-07-17 16:22:07,953 INFO SenderThread:2116314 [sender.py:_save_file():1454] saving file wandb-metadata.json with policy now
34
+ 2024-07-17 16:22:08,106 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: python_packages
35
+ 2024-07-17 16:22:08,106 DEBUG SenderThread:2116314 [sender.py:send_request():406] send_request: python_packages
36
+ 2024-07-17 16:22:08,107 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: stop_status
37
+ 2024-07-17 16:22:08,108 DEBUG SenderThread:2116314 [sender.py:send_request():406] send_request: stop_status
38
+ 2024-07-17 16:22:08,112 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
39
+ 2024-07-17 16:22:08,159 DEBUG SenderThread:2116314 [sender.py:send():379] send: telemetry
40
+ 2024-07-17 16:22:08,159 DEBUG SenderThread:2116314 [sender.py:send():379] send: config
41
+ 2024-07-17 16:22:08,161 DEBUG SenderThread:2116314 [sender.py:send():379] send: telemetry
42
+ 2024-07-17 16:22:08,162 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
43
+ 2024-07-17 16:22:08,162 DEBUG SenderThread:2116314 [sender.py:send():379] send: telemetry
44
+ 2024-07-17 16:22:08,162 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
45
+ 2024-07-17 16:22:08,162 WARNING SenderThread:2116314 [sender.py:send_metric():1405] Seen metric with glob (shouldn't happen)
46
+ 2024-07-17 16:22:08,162 DEBUG SenderThread:2116314 [sender.py:send():379] send: telemetry
47
+ 2024-07-17 16:22:08,163 DEBUG SenderThread:2116314 [sender.py:send():379] send: telemetry
48
+ 2024-07-17 16:22:08,163 DEBUG SenderThread:2116314 [sender.py:send():379] send: config
49
+ 2024-07-17 16:22:08,206 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_modified():288] file/dir modified: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/conda-environment.yaml
50
+ 2024-07-17 16:22:08,207 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_created():271] file/dir created: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/requirements.txt
51
+ 2024-07-17 16:22:08,207 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_created():271] file/dir created: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/wandb-metadata.json
52
+ 2024-07-17 16:22:08,216 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: log_artifact
53
+ 2024-07-17 16:22:08,216 DEBUG SenderThread:2116314 [sender.py:send_request():406] send_request: log_artifact
54
+ 2024-07-17 16:22:08,259 INFO wandb-upload_0:2116314 [upload_job.py:push():130] Uploaded file /tmp/tmp6xd4bkanwandb/g8dj0aj4-wandb-metadata.json
55
+ 2024-07-17 16:22:08,735 INFO wandb-upload_0:2116314 [upload_job.py:push():88] Uploaded file /tmp/tmpyvr0ja9x/model_architecture.txt
56
+ 2024-07-17 16:22:09,108 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
57
+ 2024-07-17 16:22:09,159 INFO SenderThread:2116314 [sender.py:send_request_log_artifact():1518] logged artifact model-wglu07sk - {'id': 'QXJ0aWZhY3Q6OTkxMzgwOTg0', 'state': 'PENDING', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjI4MjA3NTAwNA==', 'latestArtifact': None}}
58
+ 2024-07-17 16:22:09,160 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: status_report
59
+ 2024-07-17 16:22:09,207 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_created():271] file/dir created: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/output.log
60
+ 2024-07-17 16:22:10,107 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
61
+ 2024-07-17 16:22:11,108 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
62
+ 2024-07-17 16:22:11,208 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_modified():288] file/dir modified: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/output.log
63
+ 2024-07-17 16:22:12,108 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
64
+ 2024-07-17 16:22:13,108 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
65
+ 2024-07-17 16:22:13,209 INFO Thread-12 :2116314 [dir_watcher.py:_on_file_modified():288] file/dir modified: /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/files/output.log
66
+ 2024-07-17 16:22:13,550 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: partial_history
67
+ 2024-07-17 16:22:13,554 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
68
+ 2024-07-17 16:22:13,556 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
69
+ 2024-07-17 16:22:13,556 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
70
+ 2024-07-17 16:22:13,557 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
71
+ 2024-07-17 16:22:13,557 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
72
+ 2024-07-17 16:22:13,557 DEBUG SenderThread:2116314 [sender.py:send():379] send: metric
73
+ 2024-07-17 16:22:13,557 DEBUG SenderThread:2116314 [sender.py:send():379] send: history
74
+ 2024-07-17 16:22:13,557 DEBUG SenderThread:2116314 [sender.py:send_request():406] send_request: summary_record
75
+ 2024-07-17 16:22:13,558 INFO SenderThread:2116314 [sender.py:_save_file():1454] saving file wandb-summary.json with policy end
76
+ 2024-07-17 16:22:14,108 DEBUG HandlerThread:2116314 [handler.py:handle_request():158] handle_request: internal_messages
wandb/run-20240717_162204-wglu07sk/logs/debug.log ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Current SDK version is 0.17.4
2
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Configure stats pid to 2115683
3
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Loading settings from /home/cc/.config/wandb/settings
4
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Loading settings from /home/cc/polymorph/fine-tuning/wandb/settings
5
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'root_dir': '/home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19'}
6
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'fine-tuning/main-lora-train.py', 'program_abspath': '/home/cc/polymorph/fine-tuning/main-lora-train.py', 'program': '/home/cc/polymorph/fine-tuning/main-lora-train.py'}
8
+ 2024-07-17 16:22:04,034 INFO MainThread:2115683 [wandb_setup.py:_flush():76] Applying login settings: {}
9
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:_log_setup():529] Logging user logs to /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/logs/debug.log
10
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:_log_setup():530] Logging internal logs to /home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/wandb/run-20240717_162204-wglu07sk/logs/debug-internal.log
11
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:init():569] calling init triggers
12
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:init():576] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:init():619] starting backend
15
+ 2024-07-17 16:22:04,035 INFO MainThread:2115683 [wandb_init.py:init():623] setting up manager
16
+ 2024-07-17 16:22:04,037 INFO MainThread:2115683 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2024-07-17 16:22:04,038 INFO MainThread:2115683 [wandb_init.py:init():631] backend started and connected
18
+ 2024-07-17 16:22:04,041 INFO MainThread:2115683 [wandb_init.py:init():720] updated telemetry
19
+ 2024-07-17 16:22:04,050 INFO MainThread:2115683 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
20
+ 2024-07-17 16:22:04,217 INFO MainThread:2115683 [wandb_run.py:_on_init():2402] communicating current version
21
+ 2024-07-17 16:22:04,270 INFO MainThread:2115683 [wandb_run.py:_on_init():2411] got version response
22
+ 2024-07-17 16:22:04,270 INFO MainThread:2115683 [wandb_init.py:init():804] starting run threads in backend
23
+ 2024-07-17 16:22:08,107 INFO MainThread:2115683 [wandb_run.py:_console_start():2380] atexit reg
24
+ 2024-07-17 16:22:08,107 INFO MainThread:2115683 [wandb_run.py:_redirect():2235] redirect: wrap_raw
25
+ 2024-07-17 16:22:08,107 INFO MainThread:2115683 [wandb_run.py:_redirect():2300] Wrapping output streams.
26
+ 2024-07-17 16:22:08,107 INFO MainThread:2115683 [wandb_run.py:_redirect():2325] Redirects installed.
27
+ 2024-07-17 16:22:08,108 INFO MainThread:2115683 [wandb_init.py:init():847] run started, returning control to user process
28
+ 2024-07-17 16:22:08,110 INFO MainThread:2115683 [wandb_run.py:_config_callback():1382] config_cb None None {'peft_config': {'default': {'peft_type': <PeftType.LORA: 'LORA'>, 'auto_mapping': None, 'base_model_name_or_path': '/home/cc/polymorph/fine-tuning/results/train-lora/5/base_model', 'revision': None, 'task_type': None, 'inference_mode': False, 'r': 16, 'target_modules': {'value', 'query'}, 'lora_alpha': 16, 'lora_dropout': 0.1, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': ['classifier'], 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'loftq_config': {}, 'use_dora': False, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}}}, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['ViTForImageClassification'], 'finetuning_task': None, 'id2label': {'0': 'airplane', '1': 'bird'}, 'label2id': {'airplane': '0', 'bird': '1'}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': None, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/home/cc/polymorph/fine-tuning/results/train-lora/5/base_model', 'transformers_version': '4.42.3', 'model_type': 'vit', 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'intermediate_size': 3072, 'hidden_act': 'gelu', 'hidden_dropout_prob': 0.0, 'attention_probs_dropout_prob': 0.0, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'image_size': 224, 'patch_size': 16, 'num_channels': 3, 'qkv_bias': True, 'encoder_stride': 16, 'output_dir': '/home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 128, 'per_device_eval_batch_size': 128, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.005, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19/runs/Jul17_16-22-01_gpu', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/home/cc/polymorph/fine-tuning/results/train-lora/5/loras/19', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': ['labels'], 'load_best_model_at_end': True, 'metric_for_best_model': 'accuracy', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False}
29
+ 2024-07-17 16:22:08,113 INFO MainThread:2115683 [wandb_config.py:__setitem__():151] config set model/num_parameters = 86391556 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7fc944562610>>
30
+ 2024-07-17 16:22:08,113 INFO MainThread:2115683 [wandb_run.py:_config_callback():1382] config_cb model/num_parameters 86391556 None
wandb/run-20240717_162204-wglu07sk/run-wglu07sk.wandb ADDED
Binary file (11.2 kB). View file