File size: 1,580 Bytes
892d25a
 
678531b
892d25a
0f3dbb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
- crossword:
    results-file: ./crossword/crossword-turbo.jsonl
    second-results-file: ./crossword/crossword-4.jsonl
    functions-file: ./crossword/crossword_fns.py
    link: https://github.com/openai/evals/commit/fa192552a19c297c65a8a398ede53b62a289ab9c
    description: Given a clue and a partially filled crossword answer, provide the filled in answer
- country-area:
    results-file: ./country-area/countries-turbo.jsonl
    second-results-file: ./country-area/countries-4.jsonl
    functions-file: ./country-area/country_fns.py
    link: https://github.com/openai/evals/commit/24dae81ae06ebc70808690c7a147f2710e3e54bf
    description: Select the country with the biggest area from a list of countries
- med-mcqa:
    results-file: ./medmcqa/med-3.5-turbo.jsonl
    second-results-file: ./medmcqa/med-4.jsonl
    link: https://github.com/openai/evals/commit/19b2cf9ff96b08af68f5c3b4d2c90184844a4fe6
    description: Multiple choice questions from different medical areas.
- aba-mrpc:
    results-file: ./aba-mrpc-true-false/aba-3.5-turbo.jsonl
    second-results-file: ./aba-mrpc-true-false/aba-4.jsonl
    link: https://github.com/openai/evals/commit/9badb0395b696958e539f001e62d377ed0e237de
    description: True/False questions based on the American Bar Association's Model Rules of Professional Conduct
- logic-statements:
    results-file: ./logic/logic-turbo.jsonl
    second-results-file: ./logic/logic-4.jsonl
    link: https://github.com/openai/evals/commit/81b959c37c7ed2ffe0c4c145e9e3eee955d27cc3
    description: Tests of simple logical reasoning statements.