{
  "version": "1.0",
  "date": "2026-04-13",
  "threshold": 75,
  "judge": "claude-sonnet-4-6",
  "questions_per_run": 60,
  "models": [
    {"model": "claude-opus-4-6", "average_score": 82.1, "tier1_status": "PASS"},
    {"model": "gpt-4o", "average_score": 77.1, "tier1_status": "PASS"},
    {"model": "openrouter/google/gemini-2.5-flash", "average_score": 73.5, "tier1_status": "FAIL"},
    {"model": "phi4:14b", "average_score": 71.3, "tier1_status": "FAIL"},
    {"model": "gemma3:27b", "average_score": 71.2, "tier1_status": "FAIL"},
    {"model": "x-ai/grok-4", "average_score": 71.2, "tier1_status": "FAIL"},
    {"model": "qwen2.5:32b", "average_score": 69.0, "tier1_status": "FAIL"},
    {"model": "qwen2.5:14b", "average_score": 67.6, "tier1_status": "FAIL"},
    {"model": "qwen2.5:7b", "average_score": 66.7, "tier1_status": "FAIL"},
    {"model": "gemma3:12b", "average_score": 62.5, "tier1_status": "FAIL"},
    {"model": "mistral:latest", "average_score": 54.8, "tier1_status": "FAIL"},
    {"model": "llama3.2:latest", "average_score": 52.2, "tier1_status": "FAIL"},
    {"model": "falcon2:11b", "average_score": 38.2, "tier1_status": "FAIL"}
  ]
}
