juancauma commited on
Commit
20183f7
·
1 Parent(s): 10185cf

All models added

Browse files
Files changed (33) hide show
  1. data/mwoz_leaderboard_results.json +3 -0
  2. data/tau_leaderboard_results.json +3 -0
  3. process_submissions.py +76 -0
  4. submissions/{hehe.json → 20250130_140218-4o.json} +2 -2
  5. submissions/20250130_140439-4omini.json +3 -0
  6. submissions/20250130_145202-gpt35.json +3 -0
  7. submissions/20250130_183030-claude.json +3 -0
  8. submissions/20250130_184905-mistrallarge.json +3 -0
  9. submissions/20250131_010143-o1mini.json +3 -0
  10. submissions/20250131_012338-llama405.json +3 -0
  11. submissions/20250131_012449-llama70.json +3 -0
  12. submissions/20250131_013711-qwen72b.json +3 -0
  13. submissions/20250131_152226-tau-4o-mini-airline.json +3 -0
  14. submissions/20250131_152338-tau-4o-mini-retail.json +3 -0
  15. submissions/20250131_152422-tau-4o-retail.json +3 -0
  16. submissions/20250131_152503-tau-4o-airline.json +3 -0
  17. submissions/20250131_152610-tau-gpt35-retail.json +3 -0
  18. submissions/20250131_152708-tau-gpt35-airline.json +3 -0
  19. submissions/20250131_152807-tau-sonnet-retail.json +3 -0
  20. submissions/20250202_112945-qwen72b-airline.json +3 -0
  21. submissions/20250202_140527-qwen72b-retail.json +3 -0
  22. submissions/20250204_144222-tau-llama-405b-airline.json +3 -0
  23. submissions/20250205_024823-tau-mistrallarge-airline.json +3 -0
  24. submissions/20250205_030422-tau-sonnet-airline.json +3 -0
  25. submissions/{example.json → 20250205_033820-tau-llama405b-retail.json} +1 -1
  26. submissions/20250205_044403-tau-mistrallarge-retail.json +3 -0
  27. submissions/20250208_024344-tau-llama70b-airline.json +3 -0
  28. submissions/20250208_030407-tau-llama70b-retail.json +3 -0
  29. submissions/20250214_142736-tau-o1-mini-retail.json +3 -0
  30. submissions/20250214_180731-tau-o1-mini-airline.json +3 -0
  31. submissions/20250214_193236-o1.json +3 -0
  32. submissions/20250215_115156-tau-o1-airline.json +3 -0
  33. submissions/20250215_121147-tau-o1-retail.json +3 -0
data/mwoz_leaderboard_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30f233c17f2a9e1068eb5313c1cc1c1e4b622593eb01a40a34b1a95be2824873
3
+ size 3052
data/tau_leaderboard_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dd7553f04d89e492bfec22e2e8f9ab8d7afb269c0b8c027709aedce3ac63aa9
3
+ size 9396
process_submissions.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+ import argparse
5
+
6
+ def process_submissions(input_file, output_dir='submissions'):
7
+ """
8
+ Process a JSON file containing multiple model submissions and split it into
9
+ individual JSON files in the submissions directory.
10
+
11
+ Args:
12
+ input_file (str): Path to the input JSON file
13
+ output_dir (str): Directory where individual submission files will be stored
14
+ """
15
+ # Create submissions directory if it doesn't exist
16
+ Path(output_dir).mkdir(exist_ok=True)
17
+
18
+ # Read the input file
19
+ try:
20
+ with open(input_file, 'r') as f:
21
+ submissions = json.load(f)
22
+
23
+ if not isinstance(submissions, list):
24
+ print(f"Error: Input file {input_file} must contain a JSON array of submissions")
25
+ return
26
+
27
+ # Process each submission
28
+ for submission in submissions:
29
+ if 'model_name' not in submission:
30
+ print(f"Warning: Skipping submission without model_name field")
31
+ continue
32
+
33
+ model_name = submission['model_name']
34
+ # Create a safe filename from the model name
35
+ safe_filename = f"{model_name.replace('/', '_')}.json"
36
+ output_path = os.path.join(output_dir, safe_filename)
37
+
38
+ # Write individual submission file
39
+ with open(output_path, 'w') as f:
40
+ json.dump(submission, f, indent=4)
41
+
42
+ print(f"Created submission file: {output_path}")
43
+
44
+ print(f"\nProcessed {len(submissions)} submissions successfully!")
45
+
46
+ except FileNotFoundError:
47
+ print(f"Error: Input file '{input_file}' not found")
48
+ except json.JSONDecodeError:
49
+ print(f"Error: Input file '{input_file}' is not valid JSON")
50
+ except Exception as e:
51
+ print(f"Error processing submissions: {str(e)}")
52
+
53
+ def main():
54
+ # Set up argument parser
55
+ parser = argparse.ArgumentParser(
56
+ description='Process a JSON file containing model submissions and split into individual files.'
57
+ )
58
+ parser.add_argument(
59
+ 'input_file',
60
+ help='Path to the input JSON file containing model submissions'
61
+ )
62
+ parser.add_argument(
63
+ '--output-dir',
64
+ '-o',
65
+ default='submissions',
66
+ help='Directory where individual submission files will be stored (default: submissions)'
67
+ )
68
+
69
+ # Parse arguments
70
+ args = parser.parse_args()
71
+
72
+ # Process submissions
73
+ process_submissions(args.input_file, args.output_dir)
74
+
75
+ if __name__ == "__main__":
76
+ main()
submissions/{hehe.json → 20250130_140218-4o.json} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ce0d07af5700813b048417a3f05f4675935489924cde0322f3f3f3b8f2080c6
3
- size 427
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e6ac4738378227537d66f8f231f943581cca65a5a88badbf7b089d8ddd436aa
3
+ size 276
submissions/20250130_140439-4omini.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23d9f222f6ed78bfa0c664d31dad08d0da8782d6db7184469e9b5f1a43838bcd
3
+ size 280
submissions/20250130_145202-gpt35.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241d41c00306dfe554be2e712ef70befb98cb15287f8f362b64039e63d42ebcc
3
+ size 279
submissions/20250130_183030-claude.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28fd88cfba3985266a31145b0d7f8bbf7523e1486700bb1c0e1b8cdf395ae745
3
+ size 279
submissions/20250130_184905-mistrallarge.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa32abc214592ed42253f9d09e60049fbded16abf25d27bb58aa883551f52916
3
+ size 286
submissions/20250131_010143-o1mini.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72f8224b39acdde5456eb76914c7294153b1c3cb5d8f778b05fd646be89d1d4b
3
+ size 280
submissions/20250131_012338-llama405.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97ad8b202d517c19c62e492a377d58612c8a95870b71ceb8587896a7977415e7
3
+ size 282
submissions/20250131_012449-llama70.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9202529ccd1b01100e97ab1e49ffe9f80adf890a384bb0c9d54dcb50129044b
3
+ size 281
submissions/20250131_013711-qwen72b.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5785dea65c6e83d70b8eaae6143aa4d8755c3dfda8fd256d971a67242b444a7
3
+ size 281
submissions/20250131_152226-tau-4o-mini-airline.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54c876c671383e333599122f0789947b2906fbce489d26feb2fb44df6ca61bb9
3
+ size 430
submissions/20250131_152338-tau-4o-mini-retail.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccbdb2fa53caa6d08641632a10588c610355a1cd238527b1ab96e40d5314a78c
3
+ size 429
submissions/20250131_152422-tau-4o-retail.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caffe3b74905c9eb0953b3b7db2ca1caa2c9dbeb67aa9cb56f47c8102df82c73
3
+ size 419
submissions/20250131_152503-tau-4o-airline.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef952c41710fe34a4eef5f23f504e619bc4565a3eaea9ac587b4a2ea2ea3b660
3
+ size 420
submissions/20250131_152610-tau-gpt35-retail.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d88dffc671daefb67e80fdfed3b7b71a7adc3e7975754fd6a435a5783e6435d
3
+ size 434
submissions/20250131_152708-tau-gpt35-airline.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b7e7b87a74bcfe903d7455c718569fab7f0f593fda15b00ceb40999b101e0e
3
+ size 435
submissions/20250131_152807-tau-sonnet-retail.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:946a9b068da07c43972b41281338689f17caf9a454e6aaefab0dbc199bf434d5
3
+ size 443
submissions/20250202_112945-qwen72b-airline.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c42e48a557b297a545d9ab5065736de9d3a843ca196625e3c18574f3925c56
3
+ size 428
submissions/20250202_140527-qwen72b-retail.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bfbdf41eb3707e9d471ea6e8b0df66e0c30a876e64de13bfac05b19107dafa5
3
+ size 428
submissions/20250204_144222-tau-llama-405b-airline.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f183590e9fa041c88100e3ce26a2489cf55afad9dfb65142eed2864fa3b29d
3
+ size 442
submissions/20250205_024823-tau-mistrallarge-airline.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52aa75b36bbfc50d5e49ad5d0333a71248d833969db0050f1d1b2f71688e9a1c
3
+ size 435
submissions/20250205_030422-tau-sonnet-airline.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69df986398a9874b7ae42ea16c74d2537012d5c94c146cc7316be3d39929dd6f
3
+ size 434
submissions/{example.json → 20250205_033820-tau-llama405b-retail.json} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ced189d20e7d370a9ac23c8064a2ad5f9911b7004497d8cdc19c14b82181ae28
3
  size 431
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55533012c64e2aa9a09eeec6b5d8b1fb898e2d9e772cdac28c39de4c8167d158
3
  size 431
submissions/20250205_044403-tau-mistrallarge-retail.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:147f1a3cd17768bb01b54f4568348054af1ea72363f04f5edfa50773dcda3f80
3
+ size 433
submissions/20250208_024344-tau-llama70b-airline.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1b87a4e1981c7b8cfb364cc46ae4a16f4f85f9631d9c02e85008655c8b103e4
3
+ size 426
submissions/20250208_030407-tau-llama70b-retail.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe0c10dbfe0d56a5cd65267a448058aabd8a0e82fdedaf3748f7ffa3d7b7d12e
3
+ size 424
submissions/20250214_142736-tau-o1-mini-retail.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56519aef51f3d681df9e8f29c792af3f315cb67116c012bc6742a2977793c465
3
+ size 410
submissions/20250214_180731-tau-o1-mini-airline.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165618025471980ee895ab8a12c04f4ca94eb0df19772c8057de2d415760f684
3
+ size 410
submissions/20250214_193236-o1.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628356d0e3f25fcb909e50ff54c1a54b0b25ff7d2c8190ae1c87713edd90d238
3
+ size 276
submissions/20250215_115156-tau-o1-airline.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9db8211e9fa6bdb0b97c7341216dcce133cd815e31d0e209a5a5dd452939ffab
3
+ size 426
submissions/20250215_121147-tau-o1-retail.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c3e438a5019e96efb58d7c17f9b82b6de54da087cb4960445326f8f858a69ab
3
+ size 423