| 1 |
ForecastBench |
Superforecaster median forecast |
0.135 |
0.119 |
0.036 |
0.051 |
0.127 |
0.093 |
[0.073, 0.112] |
|
0% |
0% |
| 2 |
ForecastBench |
Public median forecast |
0.167 |
0.165 |
0.022 |
0.048 |
0.166 |
0.107 |
[0.09, 0.125] |
0.009 |
30% |
0% |
| 3 |
Anthropic |
Claude-3-5-Sonnet-20240620 (scratchpad with freeze values) |
0.155 |
0.196 |
0.039 |
0.067 |
0.176 |
0.111 |
[0.09, 0.132] |
0.01 |
33% |
0% |
| 4 |
OpenAI |
GPT-4 (zero shot with freeze values) |
0.169 |
0.184 |
0.028 |
0.056 |
0.177 |
0.113 |
[0.093, 0.132] |
0.003 |
37% |
0% |
| 5 |
Anthropic |
Claude-3-5-Sonnet-20240620 (zero shot with freeze values) |
0.155 |
0.288 |
0.026 |
0.074 |
0.222 |
0.114 |
[0.09, 0.139] |
0.008 |
36% |
0% |
| 6 |
Anthropic |
Claude-3-5-Sonnet-20240620 (scratchpad with news with freeze values) |
0.160 |
0.215 |
0.038 |
0.070 |
0.188 |
0.115 |
[0.095, 0.135] |
0.004 |
31% |
0% |
| 7 |
OpenAI |
GPT-4o (scratchpad with freeze values) |
0.173 |
0.170 |
0.034 |
0.059 |
0.171 |
0.116 |
[0.098, 0.133] |
0.002 |
30% |
0% |
| 8 |
OpenAI |
GPT-4-Turbo-2024-04-09 (zero shot with freeze values) |
0.177 |
0.184 |
0.026 |
0.055 |
0.180 |
0.116 |
[0.097, 0.135] |
<0.001 |
37% |
0% |
| 9 |
OpenAI |
GPT-4o (scratchpad with news with freeze values) |
0.179 |
0.119 |
0.045 |
0.059 |
0.149 |
0.119 |
[0.101, 0.137] |
<0.001 |
28% |
0% |
| 10 |
Mistral AI |
Mistral-Large-Latest (scratchpad with freeze values) |
0.163 |
0.158 |
0.064 |
0.081 |
0.161 |
0.122 |
[0.107, 0.137] |
<0.001 |
24% |
0% |
| 11 |
Mistral AI |
Mistral-Large-Latest (zero shot with freeze values) |
0.173 |
0.196 |
0.047 |
0.074 |
0.184 |
0.123 |
[0.102, 0.145] |
<0.001 |
28% |
0% |
| 12 |
Anthropic |
Claude-3-5-Sonnet-20240620 (scratchpad) |
0.155 |
0.252 |
0.058 |
0.093 |
0.204 |
0.124 |
[0.103, 0.145] |
<0.001 |
29% |
0% |
| 13 |
Meta |
Llama-3-70b-Chat-Hf (zero shot with freeze values) |
0.175 |
0.177 |
0.053 |
0.076 |
0.176 |
0.125 |
[0.104, 0.146] |
<0.001 |
30% |
0% |
| 14 |
Anthropic |
Claude-3-5-Sonnet-20240620 (superforecaster with news 3) |
0.171 |
0.186 |
0.058 |
0.082 |
0.179 |
0.126 |
[0.108, 0.145] |
<0.001 |
28% |
2% |
| 15 |
Anthropic |
Claude-3-5-Sonnet-20240620 (superforecaster with news 1) |
0.167 |
0.228 |
0.058 |
0.089 |
0.197 |
0.128 |
[0.107, 0.148] |
<0.001 |
28% |
0% |
| 16 |
Anthropic |
Claude-3-Opus-20240229 (zero shot with freeze values) |
0.185 |
0.206 |
0.041 |
0.071 |
0.196 |
0.128 |
[0.106, 0.151] |
<0.001 |
33% |
0% |
| 17 |
OpenAI |
GPT-4 (scratchpad with freeze values) |
0.188 |
0.173 |
0.046 |
0.069 |
0.181 |
0.128 |
[0.114, 0.143] |
<0.001 |
24% |
0% |
| 18 |
Anthropic |
Claude-3-5-Sonnet-20240620 (scratchpad with news) |
0.160 |
0.222 |
0.069 |
0.097 |
0.191 |
0.128 |
[0.108, 0.149] |
<0.001 |
26% |
0% |
| 19 |
OpenAI |
GPT-4o (scratchpad) |
0.173 |
0.199 |
0.059 |
0.084 |
0.186 |
0.128 |
[0.111, 0.146] |
<0.001 |
26% |
0% |
| 20 |
Anthropic |
Claude-2.1 (scratchpad with freeze values) |
0.206 |
0.061 |
0.054 |
0.055 |
0.134 |
0.131 |
[0.116, 0.145] |
<0.001 |
30% |
23% |
| 21 |
Qwen |
Qwen1.5-110B-Chat (zero shot with freeze values) |
0.196 |
0.170 |
0.042 |
0.065 |
0.183 |
0.131 |
[0.113, 0.149] |
<0.001 |
29% |
0% |
| 22 |
Meta |
Llama-3-70b-Chat-Hf (scratchpad with freeze values) |
0.188 |
0.149 |
0.059 |
0.076 |
0.168 |
0.132 |
[0.117, 0.147] |
<0.001 |
24% |
0% |
| 23 |
Google |
Gemini-1.5-Pro (zero shot with freeze values) |
0.190 |
0.201 |
0.048 |
0.076 |
0.196 |
0.133 |
[0.109, 0.157] |
<0.001 |
31% |
0% |
| 24 |
OpenAI |
GPT-4o (zero shot with freeze values) |
0.205 |
0.185 |
0.036 |
0.063 |
0.195 |
0.134 |
[0.112, 0.156] |
<0.001 |
35% |
0% |
| 25 |
Anthropic |
Claude-3-Opus-20240229 (scratchpad with freeze values) |
0.176 |
0.173 |
0.074 |
0.092 |
0.175 |
0.134 |
[0.115, 0.152] |
<0.001 |
26% |
0% |
| 26 |
OpenAI |
GPT-4o (scratchpad with news) |
0.179 |
0.195 |
0.065 |
0.089 |
0.187 |
0.134 |
[0.113, 0.155] |
<0.001 |
25% |
0% |
| 27 |
Anthropic |
Claude-3-5-Sonnet-20240620 (zero shot) |
0.155 |
0.308 |
0.071 |
0.114 |
0.231 |
0.134 |
[0.109, 0.16] |
<0.001 |
28% |
0% |
| 28 |
Google |
Gemini-1.5-Flash (scratchpad with freeze values) |
0.195 |
0.164 |
0.058 |
0.077 |
0.179 |
0.136 |
[0.117, 0.155] |
<0.001 |
25% |
0% |
| 29 |
Google |
Gemini-1.5-Flash (zero shot with freeze values) |
0.183 |
0.205 |
0.063 |
0.089 |
0.194 |
0.136 |
[0.11, 0.163] |
<0.001 |
30% |
0% |
| 30 |
OpenAI |
GPT-4-Turbo-2024-04-09 (zero shot) |
0.177 |
0.235 |
0.067 |
0.098 |
0.206 |
0.137 |
[0.118, 0.156] |
<0.001 |
22% |
0% |
| 31 |
Mistral AI |
Mixtral-8x22B-Instruct-V0.1 (zero shot with freeze values) |
0.195 |
0.183 |
0.056 |
0.079 |
0.189 |
0.137 |
[0.114, 0.16] |
<0.001 |
32% |
0% |
| 32 |
Anthropic |
Claude-3-Opus-20240229 (scratchpad) |
0.176 |
0.189 |
0.080 |
0.100 |
0.182 |
0.138 |
[0.121, 0.155] |
<0.001 |
24% |
0% |
| 33 |
OpenAI |
GPT-4-Turbo-2024-04-09 (scratchpad with freeze values) |
0.191 |
0.152 |
0.070 |
0.085 |
0.171 |
0.138 |
[0.116, 0.16] |
<0.001 |
27% |
0% |
| 34 |
Mistral AI |
Mixtral-8x22B-Instruct-V0.1 (scratchpad with freeze values) |
0.190 |
0.187 |
0.063 |
0.086 |
0.189 |
0.138 |
[0.122, 0.154] |
<0.001 |
25% |
0% |
| 35 |
Anthropic |
Claude-2.1 (scratchpad) |
0.206 |
0.111 |
0.061 |
0.070 |
0.159 |
0.138 |
[0.121, 0.156] |
<0.001 |
28% |
23% |
| 36 |
Google |
Gemini-1.5-Pro (scratchpad with news with freeze values) |
0.181 |
0.176 |
0.079 |
0.096 |
0.179 |
0.138 |
[0.12, 0.157] |
<0.001 |
25% |
0% |
| 37 |
Google |
Gemini-1.5-Pro (scratchpad with freeze values) |
0.179 |
0.222 |
0.072 |
0.099 |
0.200 |
0.139 |
[0.121, 0.157] |
<0.001 |
26% |
0% |
| 38 |
OpenAI |
GPT-4 (scratchpad) |
0.188 |
0.176 |
0.079 |
0.096 |
0.182 |
0.142 |
[0.128, 0.157] |
<0.001 |
20% |
0% |
| 39 |
Mistral AI |
Mistral-Large-Latest (scratchpad) |
0.163 |
0.230 |
0.099 |
0.123 |
0.197 |
0.143 |
[0.126, 0.16] |
<0.001 |
24% |
0% |
| 40 |
OpenAI |
GPT-4-Turbo-2024-04-09 (scratchpad) |
0.191 |
0.206 |
0.071 |
0.095 |
0.198 |
0.143 |
[0.126, 0.16] |
<0.001 |
23% |
0% |
| 41 |
ForecastBench |
Imputed Forecaster |
0.250 |
0.107 |
0.021 |
0.037 |
0.179 |
0.143 |
[0.129, 0.158] |
<0.001 |
34% |
100% |
| 42 |
Anthropic |
Claude-2.1 (zero shot with freeze values) |
0.220 |
0.184 |
0.041 |
0.067 |
0.202 |
0.144 |
[0.124, 0.163] |
<0.001 |
31% |
0% |
| 43 |
Google |
Gemini-1.5-Pro (scratchpad) |
0.179 |
0.233 |
0.081 |
0.109 |
0.206 |
0.144 |
[0.125, 0.163] |
<0.001 |
24% |
0% |
| 44 |
OpenAI |
GPT-4 (zero shot) |
0.169 |
0.181 |
0.105 |
0.119 |
0.175 |
0.144 |
[0.127, 0.162] |
<0.001 |
23% |
0% |
| 45 |
Google |
Gemini-1.5-Pro (scratchpad with news) |
0.181 |
0.174 |
0.098 |
0.112 |
0.177 |
0.146 |
[0.127, 0.166] |
<0.001 |
26% |
0% |
| 46 |
OpenAI |
GPT-4-Turbo-2024-04-09 (scratchpad with news with freeze values) |
0.202 |
0.157 |
0.076 |
0.090 |
0.180 |
0.146 |
[0.124, 0.169] |
<0.001 |
31% |
0% |
| 47 |
Meta |
Llama-3-70b-Chat-Hf (zero shot) |
0.175 |
0.233 |
0.095 |
0.120 |
0.204 |
0.148 |
[0.128, 0.168] |
<0.001 |
24% |
0% |
| 48 |
Anthropic |
Claude-3-Opus-20240229 (zero shot) |
0.185 |
0.244 |
0.083 |
0.112 |
0.215 |
0.149 |
[0.125, 0.173] |
<0.001 |
25% |
0% |
| 49 |
Qwen |
Qwen1.5-110B-Chat (scratchpad with news with freeze values) |
0.196 |
0.182 |
0.085 |
0.103 |
0.189 |
0.149 |
[0.132, 0.167] |
<0.001 |
25% |
0% |
| 50 |
Mistral AI |
Mixtral-8x7B-Instruct-V0.1 (zero shot with freeze values) |
0.209 |
0.202 |
0.065 |
0.090 |
0.206 |
0.150 |
[0.12, 0.179] |
<0.001 |
36% |
0% |
| 51 |
OpenAI |
GPT-4o (zero shot) |
0.205 |
0.232 |
0.067 |
0.097 |
0.218 |
0.151 |
[0.13, 0.171] |
<0.001 |
24% |
0% |
| 52 |
Meta |
Llama-3-8b-Chat-Hf (zero shot with freeze values) |
0.201 |
0.246 |
0.069 |
0.101 |
0.223 |
0.151 |
[0.127, 0.175] |
<0.001 |
27% |
0% |
| 53 |
OpenAI |
GPT-4o (superforecaster with news 3) |
0.208 |
0.174 |
0.076 |
0.094 |
0.191 |
0.151 |
[0.133, 0.169] |
<0.001 |
23% |
4% |
| 54 |
Google |
Gemini-1.5-Flash (scratchpad) |
0.195 |
0.202 |
0.086 |
0.108 |
0.199 |
0.151 |
[0.132, 0.171] |
<0.001 |
21% |
0% |
| 55 |
Anthropic |
Claude-3-Opus-20240229 (superforecaster with news 1) |
0.171 |
0.272 |
0.101 |
0.132 |
0.221 |
0.151 |
[0.13, 0.173] |
<0.001 |
24% |
0% |
| 56 |
Mistral AI |
Mistral-Large-Latest (zero shot) |
0.173 |
0.195 |
0.118 |
0.132 |
0.184 |
0.152 |
[0.129, 0.176] |
<0.001 |
22% |
0% |
| 57 |
Mistral AI |
Mixtral-8x22B-Instruct-V0.1 (scratchpad with news with freeze values) |
0.201 |
0.181 |
0.088 |
0.105 |
0.191 |
0.153 |
[0.133, 0.172] |
<0.001 |
23% |
0% |
| 58 |
Qwen |
Qwen1.5-110B-Chat (scratchpad with freeze values) |
0.208 |
0.205 |
0.074 |
0.098 |
0.206 |
0.153 |
[0.136, 0.17] |
<0.001 |
21% |
0% |
| 59 |
Mistral AI |
Mixtral-8x22B-Instruct-V0.1 (scratchpad) |
0.190 |
0.229 |
0.090 |
0.116 |
0.209 |
0.153 |
[0.135, 0.17] |
<0.001 |
23% |
0% |
| 60 |
Google |
Gemini-1.5-Pro (superforecaster with news 3) |
0.194 |
0.171 |
0.101 |
0.114 |
0.183 |
0.154 |
[0.135, 0.173] |
<0.001 |
25% |
0% |
| 61 |
Anthropic |
Claude-2.1 (scratchpad with news) |
0.212 |
0.190 |
0.075 |
0.096 |
0.201 |
0.154 |
[0.135, 0.173] |
<0.001 |
26% |
8% |
| 62 |
Google |
Gemini-1.5-Pro (zero shot) |
0.190 |
0.297 |
0.079 |
0.119 |
0.244 |
0.154 |
[0.129, 0.18] |
<0.001 |
23% |
0% |
| 63 |
Mistral AI |
Mixtral-8x7B-Instruct-V0.1 (scratchpad) |
0.200 |
0.131 |
0.104 |
0.109 |
0.166 |
0.155 |
[0.136, 0.174] |
<0.001 |
28% |
14% |
| 64 |
Qwen |
Qwen1.5-110B-Chat (scratchpad with news) |
0.196 |
0.188 |
0.099 |
0.115 |
0.192 |
0.155 |
[0.137, 0.174] |
<0.001 |
24% |
0% |
| 65 |
ForecastBench |
LLM Crowd (gpt-4o, claude-3.5-sonnet, gemini-1.5-pro) with news |
0.224 |
0.206 |
0.062 |
0.088 |
0.215 |
0.156 |
[0.139, 0.172] |
<0.001 |
21% |
28% |
| 66 |
Anthropic |
Claude-3-5-Sonnet-20240620 (superforecaster with news 2) |
0.198 |
0.220 |
0.090 |
0.114 |
0.209 |
0.156 |
[0.134, 0.178] |
<0.001 |
27% |
0% |
| 67 |
Google |
Gemini-1.5-Flash (scratchpad with news with freeze values) |
0.203 |
0.232 |
0.080 |
0.108 |
0.218 |
0.156 |
[0.133, 0.179] |
<0.001 |
24% |
0% |
| 68 |
ForecastBench |
LLM Crowd (gpt-4o, claude-3.5-sonnet, gemini-1.5-pro) with news |
0.225 |
0.197 |
0.063 |
0.087 |
0.211 |
0.156 |
[0.139, 0.173] |
<0.001 |
22% |
28% |
| 69 |
Qwen |
Qwen1.5-110B-Chat (zero shot) |
0.196 |
0.226 |
0.092 |
0.116 |
0.211 |
0.156 |
[0.137, 0.175] |
<0.001 |
19% |
1% |
| 70 |
ForecastBench |
LLM Crowd (gpt-4o, claude-3.5-sonnet, gemini-1.5-pro) with news |
0.224 |
0.203 |
0.063 |
0.089 |
0.214 |
0.156 |
[0.14, 0.173] |
<0.001 |
22% |
28% |
| 71 |
OpenAI |
GPT-4-Turbo-2024-04-09 (scratchpad with news) |
0.202 |
0.203 |
0.090 |
0.110 |
0.203 |
0.156 |
[0.135, 0.178] |
<0.001 |
27% |
0% |
| 72 |
OpenAI |
GPT-4-Turbo-2024-04-09 (superforecaster with news 3) |
0.212 |
0.161 |
0.087 |
0.101 |
0.186 |
0.157 |
[0.137, 0.176] |
<0.001 |
26% |
8% |
| 73 |
Mistral AI |
Mixtral-8x22B-Instruct-V0.1 (scratchpad with news) |
0.201 |
0.209 |
0.092 |
0.114 |
0.205 |
0.157 |
[0.139, 0.176] |
<0.001 |
22% |
0% |
| 74 |
Anthropic |
Claude-2.1 (scratchpad with news with freeze values) |
0.212 |
0.142 |
0.094 |
0.103 |
0.177 |
0.157 |
[0.139, 0.176] |
<0.001 |
24% |
3% |
| 75 |
OpenAI |
GPT-4o (superforecaster with news 1) |
0.201 |
0.255 |
0.086 |
0.116 |
0.228 |
0.159 |
[0.133, 0.184] |
<0.001 |
27% |
0% |
| 76 |
OpenAI |
GPT-4-Turbo-2024-04-09 (superforecaster with news 1) |
0.197 |
0.272 |
0.087 |
0.121 |
0.234 |
0.159 |
[0.138, 0.181] |
<0.001 |
22% |
0% |
| 77 |
Qwen |
Qwen1.5-110B-Chat (superforecaster with news 1) |
0.201 |
0.303 |
0.077 |
0.118 |
0.252 |
0.159 |
[0.136, 0.183] |
<0.001 |
23% |
0% |
| 78 |
Anthropic |
Claude-3-Opus-20240229 (superforecaster with news 3) |
0.192 |
0.129 |
0.126 |
0.127 |
0.161 |
0.159 |
[0.139, 0.18] |
<0.001 |
22% |
5% |
| 79 |
Google |
Gemini-1.5-Flash (zero shot) |
0.183 |
0.228 |
0.115 |
0.136 |
0.206 |
0.160 |
[0.135, 0.184] |
<0.001 |
23% |
0% |
| 80 |
Meta |
Llama-3-8b-Chat-Hf (scratchpad with freeze values) |
0.224 |
0.170 |
0.078 |
0.095 |
0.197 |
0.160 |
[0.145, 0.174] |
<0.001 |
23% |
0% |
| 81 |
Meta |
Llama-3-8b-Chat-Hf (zero shot) |
0.201 |
0.329 |
0.075 |
0.121 |
0.265 |
0.161 |
[0.135, 0.187] |
<0.001 |
25% |
0% |
| 82 |
Qwen |
Qwen1.5-110B-Chat (scratchpad) |
0.208 |
0.239 |
0.087 |
0.115 |
0.224 |
0.161 |
[0.143, 0.179] |
<0.001 |
21% |
0% |
| 83 |
Mistral AI |
Mistral-Large-Latest (scratchpad with news with freeze values) |
0.207 |
0.200 |
0.097 |
0.116 |
0.203 |
0.162 |
[0.141, 0.183] |
<0.001 |
24% |
0% |
| 84 |
Meta |
Llama-3-70b-Chat-Hf (scratchpad) |
0.188 |
0.224 |
0.116 |
0.136 |
0.206 |
0.162 |
[0.145, 0.178] |
<0.001 |
22% |
0% |
| 85 |
Google |
Gemini-1.5-Pro (superforecaster with news 1) |
0.206 |
0.267 |
0.086 |
0.119 |
0.236 |
0.162 |
[0.141, 0.184] |
<0.001 |
26% |
0% |
| 86 |
Anthropic |
Claude-3-Opus-20240229 (scratchpad with news with freeze values) |
0.205 |
0.212 |
0.103 |
0.123 |
0.208 |
0.164 |
[0.142, 0.185] |
<0.001 |
24% |
0% |
| 87 |
Anthropic |
Claude-3-Opus-20240229 (scratchpad with news) |
0.205 |
0.201 |
0.106 |
0.123 |
0.203 |
0.164 |
[0.144, 0.185] |
<0.001 |
25% |
0% |
| 88 |
Google |
Gemini-1.5-Flash (scratchpad with news) |
0.203 |
0.203 |
0.108 |
0.125 |
0.203 |
0.164 |
[0.142, 0.187] |
<0.001 |
24% |
0% |
| 89 |
Anthropic |
Claude-3-Opus-20240229 (superforecaster with news 2) |
0.187 |
0.243 |
0.119 |
0.141 |
0.215 |
0.164 |
[0.141, 0.188] |
<0.001 |
24% |
0% |
| 90 |
Mistral AI |
Mixtral-8x22B-Instruct-V0.1 (superforecaster with news 1) |
0.206 |
0.228 |
0.103 |
0.126 |
0.217 |
0.166 |
[0.142, 0.19] |
<0.001 |
22% |
0% |
| 91 |
Mistral AI |
Mixtral-8x22B-Instruct-V0.1 (zero shot) |
0.195 |
0.284 |
0.105 |
0.137 |
0.240 |
0.166 |
[0.14, 0.192] |
<0.001 |
22% |
0% |
| 92 |
Mistral AI |
Mistral-Large-Latest (scratchpad with news) |
0.207 |
0.164 |
0.117 |
0.126 |
0.186 |
0.167 |
[0.147, 0.186] |
<0.001 |
23% |
0% |
| 93 |
Mistral AI |
Mixtral-8x22B-Instruct-V0.1 (superforecaster with news 3) |
0.221 |
0.168 |
0.101 |
0.113 |
0.194 |
0.167 |
[0.15, 0.184] |
<0.001 |
19% |
12% |
| 94 |
Mistral AI |
Mixtral-8x7B-Instruct-V0.1 (zero shot) |
0.209 |
0.233 |
0.105 |
0.128 |
0.221 |
0.168 |
[0.14, 0.197] |
<0.001 |
25% |
0% |
| 95 |
OpenAI |
GPT-4o (superforecaster with news 2) |
0.232 |
0.202 |
0.086 |
0.107 |
0.217 |
0.170 |
[0.145, 0.194] |
<0.001 |
26% |
0% |
| 96 |
Mistral AI |
Mixtral-8x7B-Instruct-V0.1 (superforecaster with news 1) |
0.238 |
0.207 |
0.080 |
0.103 |
0.222 |
0.171 |
[0.148, 0.193] |
<0.001 |
29% |
15% |
| 97 |
Mistral AI |
Mistral-Large-Latest (superforecaster with news 1) |
0.205 |
0.297 |
0.106 |
0.141 |
0.251 |
0.173 |
[0.147, 0.198] |
<0.001 |
24% |
0% |
| 98 |
Qwen |
Qwen1.5-110B-Chat (superforecaster with news 3) |
0.226 |
0.207 |
0.101 |
0.121 |
0.216 |
0.173 |
[0.156, 0.191] |
<0.001 |
23% |
4% |
| 99 |
Anthropic |
Claude-2.1 (zero shot) |
0.220 |
0.227 |
0.104 |
0.127 |
0.223 |
0.173 |
[0.154, 0.192] |
<0.001 |
19% |
0% |
| 100 |
Mistral AI |
Mistral-Large-Latest (superforecaster with news 2) |
0.198 |
0.235 |
0.130 |
0.149 |
0.216 |
0.174 |
[0.149, 0.198] |
<0.001 |
24% |
0% |
| 101 |
Mistral AI |
Mixtral-8x7B-Instruct-V0.1 (scratchpad with freeze values) |
0.200 |
0.171 |
0.147 |
0.151 |
0.186 |
0.176 |
[0.15, 0.202] |
<0.001 |
28% |
11% |
| 102 |
Anthropic |
Claude-2.1 (superforecaster with news 3) |
0.220 |
0.146 |
0.132 |
0.135 |
0.183 |
0.177 |
[0.156, 0.198] |
<0.001 |
25% |
3% |
| 103 |
Mistral AI |
Mixtral-8x22B-Instruct-V0.1 (superforecaster with news 2) |
0.232 |
0.188 |
0.110 |
0.124 |
0.210 |
0.178 |
[0.159, 0.198] |
<0.001 |
22% |
1% |
| 104 |
Meta |
Llama-3-8b-Chat-Hf (scratchpad) |
0.224 |
0.229 |
0.114 |
0.135 |
0.227 |
0.180 |
[0.163, 0.197] |
<0.001 |
22% |
0% |
| 105 |
Google |
Gemini-1.5-Flash (superforecaster with news 2) |
0.210 |
0.247 |
0.128 |
0.150 |
0.229 |
0.180 |
[0.155, 0.205] |
<0.001 |
22% |
0% |
| 106 |
Mistral AI |
Mixtral-8x7B-Instruct-V0.1 (superforecaster with news 2) |
0.249 |
0.185 |
0.098 |
0.114 |
0.217 |
0.181 |
[0.155, 0.208] |
<0.001 |
32% |
22% |
| 107 |
Mistral AI |
Mistral-Large-Latest (superforecaster with news 3) |
0.233 |
0.191 |
0.116 |
0.129 |
0.212 |
0.181 |
[0.162, 0.201] |
<0.001 |
23% |
5% |
| 108 |
Mistral AI |
Mixtral-8x7B-Instruct-V0.1 (superforecaster with news 3) |
0.242 |
0.166 |
0.113 |
0.123 |
0.204 |
0.182 |
[0.162, 0.202] |
<0.001 |
28% |
13% |
| 109 |
Meta |
Llama-2-70b-Chat-Hf (scratchpad with freeze values) |
0.224 |
0.212 |
0.125 |
0.141 |
0.218 |
0.182 |
[0.166, 0.199] |
<0.001 |
23% |
0% |
| 110 |
OpenAI |
GPT-4-Turbo-2024-04-09 (superforecaster with news 2) |
0.230 |
0.186 |
0.126 |
0.137 |
0.208 |
0.183 |
[0.159, 0.208] |
<0.001 |
27% |
1% |
| 111 |
Qwen |
Qwen1.5-110B-Chat (superforecaster with news 2) |
0.223 |
0.266 |
0.117 |
0.144 |
0.244 |
0.183 |
[0.163, 0.204] |
<0.001 |
24% |
2% |
| 112 |
Google |
Gemini-1.5-Flash (superforecaster with news 1) |
0.221 |
0.298 |
0.116 |
0.149 |
0.259 |
0.185 |
[0.157, 0.213] |
<0.001 |
24% |
0% |
| 113 |
Meta |
Llama-2-70b-Chat-Hf (zero shot with freeze values) |
0.237 |
0.160 |
0.129 |
0.135 |
0.199 |
0.186 |
[0.161, 0.211] |
<0.001 |
24% |
0% |
| 114 |
Mistral AI |
Mixtral-8x7B-Instruct-V0.1 (scratchpad with news with freeze values) |
0.278 |
0.155 |
0.083 |
0.096 |
0.216 |
0.187 |
[0.165, 0.208] |
<0.001 |
27% |
13% |
| 115 |
Google |
Gemini-1.5-Flash (superforecaster with news 3) |
0.236 |
0.196 |
0.126 |
0.139 |
0.216 |
0.187 |
[0.165, 0.209] |
<0.001 |
20% |
7% |
| 116 |
Anthropic |
Claude-2.1 (superforecaster with news 2) |
0.235 |
0.201 |
0.129 |
0.142 |
0.218 |
0.188 |
[0.165, 0.212] |
<0.001 |
28% |
11% |
| 117 |
Anthropic |
Claude-3-Haiku-20240307 (zero shot with freeze values) |
0.288 |
0.152 |
0.076 |
0.090 |
0.220 |
0.189 |
[0.171, 0.207] |
<0.001 |
26% |
0% |
| 118 |
Anthropic |
Claude-3-Haiku-20240307 (scratchpad with freeze values) |
0.242 |
0.174 |
0.131 |
0.139 |
0.208 |
0.191 |
[0.173, 0.208] |
<0.001 |
20% |
0% |
| 119 |
Google |
Gemini-1.5-Pro (superforecaster with news 2) |
0.236 |
0.280 |
0.119 |
0.148 |
0.258 |
0.192 |
[0.163, 0.221] |
<0.001 |
22% |
0% |
| 120 |
Meta |
Llama-2-70b-Chat-Hf (scratchpad) |
0.224 |
0.248 |
0.149 |
0.167 |
0.236 |
0.196 |
[0.177, 0.215] |
<0.001 |
21% |
0% |
| 121 |
Anthropic |
Claude-3-Haiku-20240307 (superforecaster with news 2) |
0.242 |
0.187 |
0.142 |
0.150 |
0.214 |
0.196 |
[0.177, 0.215] |
<0.001 |
21% |
0% |
| 122 |
Anthropic |
Claude-3-Haiku-20240307 (scratchpad) |
0.242 |
0.214 |
0.135 |
0.150 |
0.228 |
0.196 |
[0.178, 0.214] |
<0.001 |
22% |
0% |
| 123 |
Mistral AI |
Mixtral-8x7B-Instruct-V0.1 (scratchpad with news) |
0.278 |
0.197 |
0.097 |
0.115 |
0.238 |
0.196 |
[0.173, 0.22] |
<0.001 |
26% |
13% |
| 124 |
OpenAI |
GPT-3.5-Turbo-0125 (scratchpad with freeze values) |
0.246 |
0.274 |
0.124 |
0.151 |
0.260 |
0.199 |
[0.18, 0.218] |
<0.001 |
22% |
0% |
| 125 |
Anthropic |
Claude-3-Haiku-20240307 (scratchpad with news with freeze values) |
0.265 |
0.173 |
0.127 |
0.135 |
0.219 |
0.200 |
[0.183, 0.217] |
<0.001 |
22% |
0% |
| 126 |
Anthropic |
Claude-3-Haiku-20240307 (scratchpad with news) |
0.265 |
0.202 |
0.127 |
0.141 |
0.233 |
0.203 |
[0.185, 0.221] |
<0.001 |
22% |
0% |
| 127 |
OpenAI |
GPT-3.5-Turbo-0125 (scratchpad) |
0.246 |
0.274 |
0.140 |
0.165 |
0.260 |
0.206 |
[0.186, 0.225] |
<0.001 |
21% |
0% |
| 128 |
Anthropic |
Claude-3-Haiku-20240307 (zero shot) |
0.288 |
0.174 |
0.116 |
0.127 |
0.231 |
0.207 |
[0.188, 0.227] |
<0.001 |
23% |
0% |
| 129 |
ForecastBench |
Always 0.5 |
0.250 |
0.250 |
0.146 |
0.165 |
0.250 |
0.208 |
[0.198, 0.218] |
<0.001 |
17% |
0% |
| 130 |
Anthropic |
Claude-2.1 (superforecaster with news 1) |
0.267 |
0.258 |
0.124 |
0.148 |
0.263 |
0.208 |
[0.184, 0.231] |
<0.001 |
24% |
3% |
| 131 |
Meta |
Llama-2-70b-Chat-Hf (zero shot) |
0.237 |
0.221 |
0.195 |
0.200 |
0.229 |
0.219 |
[0.192, 0.245] |
<0.001 |
22% |
1% |
| 132 |
Anthropic |
Claude-3-Haiku-20240307 (superforecaster with news 3) |
0.268 |
0.224 |
0.168 |
0.178 |
0.246 |
0.223 |
[0.204, 0.242] |
<0.001 |
20% |
17% |
| 133 |
Anthropic |
Claude-3-Haiku-20240307 (superforecaster with news 1) |
0.276 |
0.243 |
0.182 |
0.193 |
0.260 |
0.235 |
[0.209, 0.261] |
<0.001 |
21% |
0% |
| 134 |
OpenAI |
GPT-3.5-Turbo-0125 (zero shot with freeze values) |
0.399 |
0.184 |
0.067 |
0.088 |
0.292 |
0.244 |
[0.215, 0.273] |
<0.001 |
37% |
0% |
| 135 |
OpenAI |
GPT-3.5-Turbo-0125 (zero shot) |
0.399 |
0.218 |
0.163 |
0.173 |
0.308 |
0.286 |
[0.256, 0.317] |
<0.001 |
24% |
0% |
| 136 |
ForecastBench |
Random Uniform |
0.356 |
0.318 |
0.207 |
0.227 |
0.337 |
0.292 |
[0.255, 0.328] |
<0.001 |
24% |
0% |
| 137 |
ForecastBench |
Always 0 |
0.352 |
0.500 |
0.187 |
0.244 |
0.426 |
0.298 |
[0.245, 0.351] |
<0.001 |
29% |
0% |
| 138 |
ForecastBench |
Always 1 |
0.648 |
0.500 |
0.606 |
0.587 |
0.574 |
0.617 |
[0.561, 0.673] |
<0.001 |
24% |
0% |