File size: 5,274 Bytes
41b743c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
{
    "qwen2-7b-instruct": {
      "feature": "qwen2-7b-instruct β€’ Chinese & English LLM for language, coding, mathematics and reasoning; costs $0.20 per M input tokens and $0.20 per M output tokens on Together AI.",
      "input_price": 0.20,
      "output_price": 0.20,
      "model": "qwen/qwen2-7b-instruct"
    },
    "qwen2.5-7b-instruct": {
      "feature": "qwen2.5-7b-instruct β€’ upgraded Qwen with stronger multilingual capability; priced at $0.30 /M input and $0.30 /M output.",
      "input_price": 0.20,
      "output_price": 0.20,
      "model": "qwen/qwen2.5-7b-instruct"
    },
    "gemma-7b": {
      "feature": "gemma-7b β€’ Google’s lightweight 7 B model for text and code; Together cost is $0.20 /M input and $0.20 /M output.",
      "input_price": 0.20,
      "output_price": 0.20,
      "model": "google/gemma-7b"
    },
    "codegemma-7b": {
      "feature": "codegemma-7b β€’ Gemma variant focused on code generation & completion; $0.20 /M input, $0.20 /M output.",
      "input_price": 0.20,
      "output_price": 0.20,
      "model": "google/codegemma-7b"
    },
    "gemma-2-9b-it": {
      "feature": "gemma-2-9b-it β€’ 2.9 B instruction-tuned Gemma for general text; ultralow $0.10 /M input and $0.10 /M output.",
      "input_price": 0.10,
      "output_price": 0.10,
      "model": "google/gemma-2-9b-it"
    },
    "llama-3.1-8b-instruct": {
      "feature": "llama-3.1-8b-instruct β€’ Meta’s 8 B Llama-3 series for chat & reasoning; $0.20 /M input and $0.20 /M output.",
      "input_price": 0.20,
      "output_price": 0.20,
      "model": "meta/llama-3.1-8b-instruct"
    },
    "granite-3.0-8b-instruct": {
      "feature": "granite-3.0-8b-instruct β€’ IBM small LLM supporting RAG, summarization & code; $0.20 /M input, $0.20 /M output.",
      "input_price": 0.20,
      "output_price": 0.20,
      "model": "ibm/granite-3.0-8b-instruct"
    },
    "llama3-chatqa-1.5-8b": {
      "feature": "llama3-chatqa-1.5-8b β€’ NVIDIA fine-tuned 8 B for QA & reasoning; $0.20 /M input and output.",
      "input_price": 0.20,
      "output_price": 0.20,
      "model": "nvidia/llama3-chatqa-1.5-8b"
    },
    "mistral-nemo-12b-instruct": {
      "feature": "mistral-nemo-12b-instruct β€’ 12 B model combining Mistral and NeMo tech; $0.30 /M input, $0.30 /M output.",
      "input_price": 0.30,
      "output_price": 0.30,
      "model": "nv-mistralai/mistral-nemo-12b-instruct"
    },
    "mistral-7b-instruct-v0.3": {
      "feature": "mistral-7b-instruct-v0.3 β€’ fast 7 B model for instruction following; $0.20 /M in & out.",
      "input_price": 0.20,
      "output_price": 0.20,
      "model": "mistralai/mistral-7b-instruct-v0.3"
    },
    "llama-3.3-nemotron-super-49b-v1": {
      "feature": "llama-3.3-nemotron-super-49b-v1 β€’ 49 B Nemotron with high accuracy; $0.90 /M input and output.",
      "input_price": 0.90,
      "output_price": 0.90,
      "model": "nvidia/llama-3.3-nemotron-super-49b-v1"
    },
    "llama-3.1-nemotron-51b-instruct": {
      "feature": "llama-3.1-nemotron-51b-instruct β€’ 51 B NVIDIA alignment model; $0.90 /M in & out.",
      "input_price": 0.90,
      "output_price": 0.90,
      "model": "nvidia/llama-3.1-nemotron-51b-instruct"
    },
    "llama3-chatqa-1.5-70b": {
      "feature": "llama3-chatqa-1.5-70b β€’ 70 B chat-optimized Llama; $0.90 /M input and output.",
      "input_price": 0.90,
      "output_price": 0.90,
      "model": "nvidia/llama3-chatqa-1.5-70b"
    },
    "llama-3.1-70b-instruct": {
      "feature": "llama-3.1-70b-instruct β€’ Meta 70 B for complex conversations; $0.90 /M input/output.",
      "input_price": 0.90,
      "output_price": 0.90,
      "model": "meta/llama3-70b-instruct"
    },
    "llama3-70b-instruct": {
      "feature": "llama3-70b-instruct β€’ alternate naming of Meta’s 70 B; $0.90 /M input & output.",
      "input_price": 0.90,
      "output_price": 0.90,
      "model": "meta/llama-3.1-8b-instruct"
    },
    "granite-34b-code-instruct": {
      "feature": "granite-34b-code-instruct β€’ 34 B IBM coder model; $0.80 /M input and output.",
      "input_price": 0.80,
      "output_price": 0.80,
      "model": "ibm/granite-34b-code-instruct"
    },
    "mixtral-8x7b-instruct-v0.1": {
      "feature": "mixtral-8Γ—7b-instruct-v0.1 β€’ 56 B MoE (8Γ—7 B) for creative text; $0.60 /M input/output.",
      "input_price": 0.60,
      "output_price": 0.60,
      "model": "mistralai/mixtral-8x7b-instruct-v0.1"
    },
    "deepseek-r1": {
      "feature": "deepseek-r1 β€’ 671 B-param reasoning powerhouse; Together charges $3 /M input tokens and $7 /M output tokens.",
      "input_price": 0.55,
      "output_price": 2.19,
      "model": "deepseek-ai/deepseek-r1"
    },
    "mixtral-8x22b-instruct-v0.1": {
      "feature": "mixtral-8Γ—22b-instruct-v0.1 β€’ 176 B MoE (8Γ—22 B); $1.20 /M input and output.",
      "input_price": 1.20,
      "output_price": 1.20,
      "model": "mistralai/mixtral-8x22b-instruct-v0.1"
    },
    "palmyra-creative-122b": {
      "feature": "palmyra-creative-122b β€’ 122 B parameter model from Writer, optimized for creative and marketing content generation; $1.80 /M input and $1.80 /M output.",
      "input_price": 1.80,
      "output_price": 1.80,
      "model": "writer/palmyra-creative-122b"
    }
  }