Created
October 14, 2025 23:07
-
-
Save karminski/aa56b5b73e2f3070a7530dfe11c2ddb3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Qwen3-VL Performance Benchmark</title> | |
| <style> | |
| * { | |
| margin: 0; | |
| padding: 0; | |
| box-sizing: border-box; | |
| } | |
| body { | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; | |
| background: #ffffff; | |
| min-height: 100vh; | |
| padding: 40px 20px; | |
| color: #1d1d1f; | |
| } | |
| .container { | |
| max-width: 1400px; | |
| margin: 0 auto; | |
| } | |
| .card { | |
| background: rgba(255, 255, 255, 0.95); | |
| backdrop-filter: blur(10px); | |
| border-radius: 20px; | |
| box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3); | |
| overflow: hidden; | |
| } | |
| .header { | |
| background: linear-gradient(135deg, #8e9eab 0%, #6d7a86 100%); | |
| color: white; | |
| padding: 40px; | |
| text-align: center; | |
| } | |
| .header h1 { | |
| font-size: 42px; | |
| font-weight: 700; | |
| margin-bottom: 10px; | |
| letter-spacing: -0.5px; | |
| } | |
| .header p { | |
| font-size: 18px; | |
| opacity: 0.9; | |
| font-weight: 400; | |
| } | |
| .stats-section { | |
| padding: 40px; | |
| background: #f5f5f7; | |
| border-bottom: 1px solid #d2d2d7; | |
| } | |
| .stats-title { | |
| font-size: 24px; | |
| font-weight: 600; | |
| margin-bottom: 24px; | |
| color: #1d1d1f; | |
| } | |
| .stats-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); | |
| gap: 20px; | |
| } | |
| .stat-card { | |
| background: white; | |
| padding: 24px; | |
| border-radius: 12px; | |
| box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08); | |
| transition: transform 0.3s ease, box-shadow 0.3s ease; | |
| } | |
| .stat-card:hover { | |
| transform: translateY(-4px); | |
| box-shadow: 0 8px 24px rgba(0, 0, 0, 0.12); | |
| } | |
| .stat-card .model-name { | |
| font-size: 16px; | |
| font-weight: 600; | |
| color: #1d1d1f; | |
| margin-bottom: 8px; | |
| } | |
| .stat-card .win-count { | |
| font-size: 36px; | |
| font-weight: 700; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| } | |
| .stat-card .win-label { | |
| font-size: 14px; | |
| color: #86868b; | |
| margin-top: 4px; | |
| } | |
| .stat-card .elo-score { | |
| font-size: 20px; | |
| font-weight: 600; | |
| color: #667eea; | |
| margin-top: 12px; | |
| padding-top: 12px; | |
| border-top: 1px solid #e5e5e7; | |
| } | |
| .stat-card .elo-label { | |
| font-size: 12px; | |
| color: #86868b; | |
| margin-top: 4px; | |
| } | |
| .stat-card .elo-diff { | |
| font-size: 11px; | |
| color: #86868b; | |
| margin-top: 6px; | |
| font-style: italic; | |
| } | |
| .table-section { | |
| padding: 40px; | |
| overflow-x: auto; | |
| } | |
| table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| font-size: 14px; | |
| } | |
| thead { | |
| background: linear-gradient(135deg, #8e9eab 0%, #6d7a86 100%); | |
| color: white; | |
| position: sticky; | |
| top: 0; | |
| z-index: 10; | |
| } | |
| thead th { | |
| padding: 16px 12px; | |
| text-align: left; | |
| font-weight: 600; | |
| font-size: 13px; | |
| letter-spacing: 0.5px; | |
| white-space: nowrap; | |
| } | |
| tbody tr { | |
| border-bottom: 1px solid #e5e5e7; | |
| transition: background-color 0.2s ease; | |
| } | |
| tbody tr:hover { | |
| background-color: #f5f5f7; | |
| } | |
| tbody td { | |
| padding: 14px 12px; | |
| color: #1d1d1f; | |
| } | |
| .category-cell { | |
| font-weight: 600; | |
| color: #6d7a86; | |
| background: #f9f9fb; | |
| } | |
| .benchmark-cell { | |
| font-weight: 500; | |
| } | |
| .score-cell { | |
| text-align: center; | |
| font-weight: 500; | |
| font-variant-numeric: tabular-nums; | |
| } | |
| .best-score { | |
| background: linear-gradient(135deg, #ffd89b 0%, #19547b 100%); | |
| color: white; | |
| font-weight: 700; | |
| border-radius: 6px; | |
| padding: 6px 8px; | |
| display: inline-block; | |
| min-width: 60px; | |
| box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15); | |
| } | |
| .na-cell { | |
| color: #d2d2d7; | |
| text-align: center; | |
| } | |
| .notes-section { | |
| padding: 40px; | |
| background: #f9f9fb; | |
| font-size: 14px; | |
| color: #6e6e73; | |
| line-height: 1.8; | |
| } | |
| .notes-section h3 { | |
| font-size: 18px; | |
| color: #1d1d1f; | |
| margin-bottom: 16px; | |
| font-weight: 600; | |
| } | |
| .notes-section ul { | |
| list-style: none; | |
| padding-left: 0; | |
| } | |
| .notes-section li { | |
| padding-left: 24px; | |
| position: relative; | |
| margin-bottom: 8px; | |
| } | |
| .notes-section li:before { | |
| content: "•"; | |
| position: absolute; | |
| left: 8px; | |
| color: #667eea; | |
| font-weight: bold; | |
| } | |
| @media (max-width: 768px) { | |
| .header h1 { | |
| font-size: 28px; | |
| } | |
| .header p { | |
| font-size: 16px; | |
| } | |
| .stats-section, | |
| .table-section, | |
| .notes-section { | |
| padding: 24px; | |
| } | |
| table { | |
| font-size: 12px; | |
| } | |
| thead th, | |
| tbody td { | |
| padding: 10px 8px; | |
| } | |
| } | |
| .rank-badge { | |
| display: inline-block; | |
| padding: 4px 12px; | |
| border-radius: 20px; | |
| font-size: 12px; | |
| font-weight: 600; | |
| margin-left: 8px; | |
| } | |
| .rank-1 { | |
| background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); | |
| color: white; | |
| } | |
| .rank-2 { | |
| background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); | |
| color: white; | |
| } | |
| .rank-3 { | |
| background: linear-gradient(135deg, #43e97b 0%, #38f9d7 100%); | |
| color: white; | |
| } | |
| .lang-switcher { | |
| position: fixed; | |
| top: 20px; | |
| left: 20px; | |
| z-index: 1000; | |
| background: white; | |
| border-radius: 30px; | |
| box-shadow: 0 4px 20px rgba(0, 0, 0, 0.15); | |
| padding: 8px 12px; | |
| display: flex; | |
| align-items: center; | |
| gap: 8px; | |
| cursor: pointer; | |
| transition: all 0.3s ease; | |
| user-select: none; | |
| } | |
| .lang-switcher:hover { | |
| box-shadow: 0 6px 30px rgba(0, 0, 0, 0.25); | |
| transform: translateY(-2px); | |
| } | |
| .lang-switcher .lang-icon { | |
| font-size: 20px; | |
| } | |
| .lang-switcher .lang-text { | |
| font-size: 14px; | |
| font-weight: 600; | |
| color: #1d1d1f; | |
| } | |
| .footer-logo { | |
| padding: 40px; | |
| margin-top: 40px; | |
| text-align: center; | |
| background: transparent; | |
| } | |
| .footer-logo img { | |
| max-width: 300px; | |
| width: 100%; | |
| height: auto; | |
| opacity: 0.6; | |
| transition: opacity 0.3s ease; | |
| } | |
| .footer-logo img:hover { | |
| opacity: 0.9; | |
| } | |
| @media (max-width: 768px) { | |
| .footer-logo { | |
| padding: 30px 20px; | |
| margin-top: 30px; | |
| } | |
| .footer-logo img { | |
| max-width: 200px; | |
| } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="lang-switcher" onclick="toggleLanguage()"> | |
| <span class="lang-icon">🌐</span> | |
| <span class="lang-text" id="langText">中文</span> | |
| </div> | |
| <div class="container"> | |
| <div class="card"> | |
| <div class="header"> | |
| <h1 id="mainTitle">Qwen3-VL Performance Benchmark</h1> | |
| <p id="mainSubtitle">Comprehensive evaluation across multiple vision-language tasks</p> | |
| </div> | |
| <div class="stats-section"> | |
| <div class="stats-title" id="statsTitle">🏆 Model Performance Rankings</div> | |
| <div class="stats-grid" id="statsGrid"></div> | |
| </div> | |
| <div class="table-section"> | |
| <table id="benchmarkTable"> | |
| <thead> | |
| <tr> | |
| <th id="thCategory">Category</th> | |
| <th id="thBenchmark">Benchmark</th> | |
| <th>Qwen3-VL 4B</th> | |
| <th>Qwen3-VL 8B</th> | |
| <th>Qwen3-VL 235B</th> | |
| <th>Gemini2.5</th> | |
| <th>GPT5-Nano</th> | |
| </tr> | |
| </thead> | |
| <tbody id="tableBody"></tbody> | |
| </table> | |
| </div> | |
| <div class="notes-section"> | |
| <h3 id="notesTitle">📝 Notes</h3> | |
| <ul id="notesList"> | |
| <li id="note1">Highlighted scores indicate the best performance among all models for that benchmark</li> | |
| <li id="note2">The ranking section shows how many times each model achieved the highest score</li> | |
| <li id="note3">Qwen3-VL 235B-A22B Thinking is the large-scale thinking model version</li> | |
| <li id="note4">Results on video understanding are measured using a 256k-token context, handling up to 2048 frames</li> | |
| <li id="note5">"—" indicates data not available for that benchmark</li> | |
| </ul> | |
| </div> | |
| </div> | |
| <div class="footer-logo"> | |
| <img src="./assets/images/kcores-llm-arena-logo-black.png" alt="KCORES LLM Arena Logo"> | |
| </div> | |
| </div> | |
| <script> | |
| // Language data | |
| let currentLang = 'en'; | |
| const i18n = { | |
| en: { | |
| langText: '中文', | |
| mainTitle: 'Qwen3-VL Performance Benchmark', | |
| mainSubtitle: 'Comprehensive evaluation across multiple vision-language tasks', | |
| statsTitle: '🏆 Model Performance Rankings', | |
| thCategory: 'Category', | |
| thBenchmark: 'Benchmark', | |
| notesTitle: '📝 Notes', | |
| note1: 'Highlighted scores indicate the best performance among all models for that benchmark', | |
| note2: 'The ranking section shows how many times each model achieved the highest score', | |
| note3: 'Qwen3-VL 235B-A22B Thinking is the large-scale thinking model version', | |
| note4: 'Results on video understanding are measured using a 256k-token context, handling up to 2048 frames', | |
| note5: '"—" indicates data not available for that benchmark', | |
| firstPlaceWins: 'First Place Wins', | |
| eloRating: 'Elo Rating', | |
| topAmongQwen: 'Top among Qwen models', | |
| vs: 'vs' | |
| }, | |
| zh: { | |
| langText: 'English', | |
| mainTitle: 'Qwen3-VL 性能基准测试', | |
| mainSubtitle: '跨多个视觉语言任务的综合评估', | |
| statsTitle: '🏆 模型性能排名', | |
| thCategory: '分类', | |
| thBenchmark: '基准测试', | |
| notesTitle: '📝 说明', | |
| note1: '高亮显示的分数表示该基准测试中所有模型的最佳性能', | |
| note2: '排名部分显示每个模型获得最高分的次数', | |
| note3: 'Qwen3-VL 235B-A22B Thinking 是大规模思维模型版本', | |
| note4: '视频理解结果使用 256k token 上下文进行测量,可处理多达 2048 帧', | |
| note5: '"—" 表示该基准测试的数据不可用', | |
| firstPlaceWins: '第一名次数', | |
| eloRating: 'Elo 评分', | |
| topAmongQwen: 'Qwen 模型中最高', | |
| vs: '对比' | |
| } | |
| }; | |
| function toggleLanguage() { | |
| currentLang = currentLang === 'en' ? 'zh' : 'en'; | |
| updateLanguage(); | |
| } | |
| function updateLanguage() { | |
| const t = i18n[currentLang]; | |
| // Update static texts | |
| document.getElementById('langText').textContent = t.langText; | |
| document.getElementById('mainTitle').textContent = t.mainTitle; | |
| document.getElementById('mainSubtitle').textContent = t.mainSubtitle; | |
| document.getElementById('statsTitle').textContent = t.statsTitle; | |
| document.getElementById('thCategory').textContent = t.thCategory; | |
| document.getElementById('thBenchmark').textContent = t.thBenchmark; | |
| document.getElementById('notesTitle').textContent = t.notesTitle; | |
| document.getElementById('note1').textContent = t.note1; | |
| document.getElementById('note2').textContent = t.note2; | |
| document.getElementById('note3').textContent = t.note3; | |
| document.getElementById('note4').textContent = t.note4; | |
| document.getElementById('note5').textContent = t.note5; | |
| // Re-render stats grid | |
| renderStatsGrid(); | |
| } | |
| const benchmarkData = [ | |
| { category: "STEM & Puzzle", benchmark: "MMMU_VAL", scores: [70.8, 74.1, 80.6, 73.4, 75.8] }, | |
| { category: "STEM & Puzzle", benchmark: "MMMU_Pro", scores: [57.0, 60.4, 69.3, 59.7, 57.2] }, | |
| { category: "STEM & Puzzle", benchmark: "MathVista_mini", scores: [79.5, 81.4, 85.8, 72.8, 71.5] }, | |
| { category: "STEM & Puzzle", benchmark: "MathVision", scores: [60.0, 62.7, 74.6, 52.1, 62.2] }, | |
| { category: "STEM & Puzzle", benchmark: "MathVerse_mini", scores: [75.2, 77.7, 85.0, 69.6, 74.2] }, | |
| { category: "STEM & Puzzle", benchmark: "ZEROBench", scores: [null, null, 4.0, null, null] }, | |
| { category: "STEM & Puzzle", benchmark: "ZEROBench_Sub", scores: [null, null, 27.7, null, null] }, | |
| { category: "STEM & Puzzle", benchmark: "VisuLogic", scores: [null, null, 34.4, null, null] }, | |
| { category: "STEM & Puzzle", benchmark: "MMBenchDEV_EN_V1.1", scores: [86.7, 87.5, 90.6, 82.7, 80.3] }, | |
| { category: "General VQA", benchmark: "RealWorldQA", scores: [73.2, 73.5, 81.3, 72.2, 71.8] }, | |
| { category: "General VQA", benchmark: "MMStar", scores: [73.2, 75.3, 78.7, 69.1, 68.6] }, | |
| { category: "General VQA", benchmark: "SimpleVQA", scores: [48.8, 49.6, 61.3, 54.1, 46.0] }, | |
| { category: "Subjective Experience", benchmark: "HallusionBench", scores: [64.1, 65.4, 66.7, 64.5, 58.4] }, | |
| { category: "Subjective Experience", benchmark: "MM-MT-Bench", scores: [7.7, 8.0, 8.5, 7.7, 6.6] }, | |
| { category: "Subjective Experience", benchmark: "MIABench", scores: [91.0, 91.5, 92.7, 91.6, 89.9] }, | |
| { category: "Subjective Experience", benchmark: "MMLongBench-Doc", scores: [44.4, 48.0, 56.2, 46.5, 31.8] }, | |
| { category: "Subjective Experience", benchmark: "DocVQA_TEST", scores: [94.2, 95.3, 96.5, 92.5, 88.2] }, | |
| { category: "Subjective Experience", benchmark: "InfoVQA_TEST", scores: [83.0, 86.0, 89.5, 81.5, 68.6] }, | |
| { category: "Text Recognition", benchmark: "AI2D_TEST", scores: [84.9, 84.9, 89.2, 85.7, 81.9] }, | |
| { category: "Text Recognition", benchmark: "OCRBench", scores: [808.0, 819.0, 875.0, 825.0, 753.0] }, | |
| { category: "Text Recognition", benchmark: "OCRBenchV2", scores: [58.8, 61.55, 65.15, 47.75, 40.85] }, | |
| { category: "Text Recognition", benchmark: "CC_OCR", scores: [null, null, 81.5, null, null] }, | |
| { category: "Text Recognition", benchmark: "CharXiv_(RQ)", scores: [50.3, 53.0, 66.1, 56.1, 50.1] }, | |
| { category: "Text Recognition", benchmark: "RefCOCO-avg", scores: [null, null, 92.4, null, null] }, | |
| { category: "Text Recognition", benchmark: "CountBench", scores: [89.4, 91.5, 93.7, 79.2, 80.0] }, | |
| { category: "2D/3D Grounding", benchmark: "ODinW13", scores: [39.4, 39.8, 43.2, null, null] }, | |
| { category: "2D/3D Grounding", benchmark: "ARKitScenes", scores: [46.3, 46.6, 53.7, null, null] }, | |
| { category: "2D/3D Grounding", benchmark: "Hypersim", scores: [11.9, 12.0, 11.0, null, null] }, | |
| { category: "2D/3D Grounding", benchmark: "SUNRGBD", scores: [28.0, 30.4, 34.9, null, null] }, | |
| { category: "2D/3D Grounding", benchmark: "Objectron", scores: [null, null, 71.2, null, null] }, | |
| { category: "Multi-Image", benchmark: "BLINK", scores: [63.4, 64.7, 67.1, 64.4, 58.3] }, | |
| { category: "Multi-Image", benchmark: "MUIRBENCH", scores: [75.0, 76.8, 80.1, 64.0, 65.7] }, | |
| { category: "Multi-Image", benchmark: "ERQA", scores: [47.3, 46.8, 52.5, 44.3, 45.8] }, | |
| { category: "Embodied & Spatial", benchmark: "EmbSpatialBench", scores: [80.7, 81.1, 84.3, 66.1, 74.2] }, | |
| { category: "Embodied & Spatial", benchmark: "RefSpatialBench", scores: [45.3, 44.6, 69.9, 11.2, 12.6] }, | |
| { category: "Embodied & Spatial", benchmark: "RoboSpatialHome", scores: [63.2, 62.0, 73.9, 50.3, 46.1] }, | |
| { category: "Embodied & Spatial", benchmark: "VSI-Bench", scores: [55.2, 56.6, null, 30.3, 15.4] }, | |
| { category: "Embodied & Spatial", benchmark: "MVBench", scores: [69.3, 69.0, null, null, null] }, | |
| { category: "Video", benchmark: "VideoMME", scores: [68.9, 71.8, 79.0, 72.7, 66.2] }, | |
| { category: "Video", benchmark: "MLVU", scores: [75.7, 75.1, 83.8, 78.5, 69.2] }, | |
| { category: "Video", benchmark: "LVBench", scores: [53.5, 55.8, 63.6, 60.9, null] }, | |
| { category: "Video", benchmark: "CharadesSTA", scores: [59.0, 59.9, 63.5, null, null] }, | |
| { category: "Video", benchmark: "VideoMMU", scores: [69.4, 72.8, 80.0, 69.2, 63.0] }, | |
| { category: "Video", benchmark: "ScreenSpot", scores: [92.9, 93.6, 95.4, null, null] }, | |
| { category: "Video", benchmark: "ScreenSpot Pro", scores: [49.2, 46.6, 61.8, null, null] }, | |
| { category: "Agent", benchmark: "OSWorldG", scores: [53.9, 56.7, 68.3, null, null] }, | |
| { category: "Agent", benchmark: "OSWorld", scores: [31.4, 33.9, 38.1, null, null] }, | |
| { category: "Agent", benchmark: "AndroidWorld", scores: [52.0, 50.0, null, null, null] }, | |
| { category: "Coding", benchmark: "Design2Code", scores: [null, null, 93.4, null, null] }, | |
| { category: "Fine-grained", benchmark: "V*", scores: [74.9, 77.5, null, 70.2, null] }, | |
| { category: "Fine-grained", benchmark: "HRBench4K", scores: [73.5, 72.4, null, 77.8, null] }, | |
| { category: "Fine-grained", benchmark: "HRBench8K", scores: [67.1, 68.1, null, 75.5, null] } | |
| ]; | |
| const modelNames = [ | |
| "Qwen3-VL 4B Thinking", | |
| "Qwen3-VL 8B Thinking", | |
| "Qwen3-VL 235B-A22B", | |
| "Gemini2.5-Flash-Lite", | |
| "GPT5-Nano High" | |
| ]; | |
| // Calculate wins for each model | |
| const wins = [0, 0, 0, 0, 0]; | |
| benchmarkData.forEach(row => { | |
| const validScores = row.scores.map((score, idx) => ({ score, idx })) | |
| .filter(item => item.score !== null); | |
| if (validScores.length > 0) { | |
| const maxScore = Math.max(...validScores.map(item => item.score)); | |
| validScores.forEach(item => { | |
| if (item.score === maxScore) { | |
| wins[item.idx]++; | |
| } | |
| }); | |
| } | |
| }); | |
| // Calculate Elo ratings | |
| function calculateElo(benchmarkData, numModels) { | |
| // Initialize Elo ratings (starting at 1500) | |
| let elo = new Array(numModels).fill(1500); | |
| const K = 32; // K-factor for Elo calculation | |
| // Process each benchmark as a tournament | |
| benchmarkData.forEach(row => { | |
| const validScores = row.scores.map((score, idx) => ({ score, idx })) | |
| .filter(item => item.score !== null); | |
| if (validScores.length < 2) return; | |
| // Compare each pair of models | |
| for (let i = 0; i < validScores.length; i++) { | |
| for (let j = i + 1; j < validScores.length; j++) { | |
| const modelA = validScores[i]; | |
| const modelB = validScores[j]; | |
| // Expected scores | |
| const expectedA = 1 / (1 + Math.pow(10, (elo[modelB.idx] - elo[modelA.idx]) / 400)); | |
| const expectedB = 1 / (1 + Math.pow(10, (elo[modelA.idx] - elo[modelB.idx]) / 400)); | |
| // Actual scores (1 for win, 0.5 for tie, 0 for loss) | |
| let actualA, actualB; | |
| if (modelA.score > modelB.score) { | |
| actualA = 1; | |
| actualB = 0; | |
| } else if (modelA.score < modelB.score) { | |
| actualA = 0; | |
| actualB = 1; | |
| } else { | |
| actualA = 0.5; | |
| actualB = 0.5; | |
| } | |
| // Update Elo ratings | |
| elo[modelA.idx] += K * (actualA - expectedA); | |
| elo[modelB.idx] += K * (actualB - expectedB); | |
| } | |
| } | |
| }); | |
| return elo.map(rating => Math.round(rating)); | |
| } | |
| const eloRatings = calculateElo(benchmarkData, 5); | |
| // Create ranking data | |
| const rankings = modelNames.map((name, idx) => ({ | |
| name, | |
| wins: wins[idx], | |
| elo: eloRatings[idx], | |
| modelIdx: idx | |
| })) | |
| .sort((a, b) => b.wins - a.wins); | |
| // Calculate Elo differences between Qwen models | |
| const qwenModels = [ | |
| { name: "Qwen3-VL 4B", idx: 0, elo: eloRatings[0] }, | |
| { name: "Qwen3-VL 8B", idx: 1, elo: eloRatings[1] }, | |
| { name: "Qwen3-VL 235B", idx: 2, elo: eloRatings[2] } | |
| ].sort((a, b) => b.elo - a.elo); | |
| function getEloDiff(modelIdx) { | |
| const t = i18n[currentLang]; | |
| if (modelIdx < 3) { // Only for Qwen models | |
| const currentElo = eloRatings[modelIdx]; | |
| const topQwenElo = qwenModels[0].elo; | |
| if (currentElo === topQwenElo) { | |
| return t.topAmongQwen; | |
| } | |
| const diff = topQwenElo - currentElo; | |
| return `${diff > 0 ? '-' : '+'}${Math.abs(diff)} ${t.vs} ${qwenModels[0].name}`; | |
| } | |
| return ""; | |
| } | |
| // Render stats grid | |
| function renderStatsGrid() { | |
| const t = i18n[currentLang]; | |
| const statsGrid = document.getElementById('statsGrid'); | |
| statsGrid.innerHTML = ''; | |
| rankings.forEach((model, idx) => { | |
| const rankBadge = idx < 3 ? `<span class="rank-badge rank-${idx + 1}">#${idx + 1}</span>` : ''; | |
| const eloDiff = getEloDiff(model.modelIdx); | |
| const eloDiffHtml = eloDiff ? `<div class="elo-diff">${eloDiff}</div>` : ''; | |
| statsGrid.innerHTML += ` | |
| <div class="stat-card"> | |
| <div class="model-name">${model.name}${rankBadge}</div> | |
| <div class="win-count">${model.wins}</div> | |
| <div class="win-label">${t.firstPlaceWins}</div> | |
| <div class="elo-score">${model.elo}</div> | |
| <div class="elo-label">${t.eloRating}</div> | |
| ${eloDiffHtml} | |
| </div> | |
| `; | |
| }); | |
| } | |
| // Initial render | |
| renderStatsGrid(); | |
| // Render table | |
| const tableBody = document.getElementById('tableBody'); | |
| let currentCategory = ''; | |
| benchmarkData.forEach(row => { | |
| const tr = document.createElement('tr'); | |
| // Category cell | |
| const categoryCell = document.createElement('td'); | |
| categoryCell.className = 'category-cell'; | |
| if (row.category !== currentCategory) { | |
| categoryCell.textContent = row.category; | |
| currentCategory = row.category; | |
| } | |
| tr.appendChild(categoryCell); | |
| // Benchmark cell | |
| const benchmarkCell = document.createElement('td'); | |
| benchmarkCell.className = 'benchmark-cell'; | |
| benchmarkCell.textContent = row.benchmark; | |
| tr.appendChild(benchmarkCell); | |
| // Find max score | |
| const validScores = row.scores.filter(s => s !== null); | |
| const maxScore = validScores.length > 0 ? Math.max(...validScores) : null; | |
| // Score cells | |
| row.scores.forEach(score => { | |
| const td = document.createElement('td'); | |
| td.className = 'score-cell'; | |
| if (score === null) { | |
| td.className += ' na-cell'; | |
| td.textContent = '—'; | |
| } else { | |
| if (score === maxScore && maxScore !== null) { | |
| td.innerHTML = `<span class="best-score">${score}</span>`; | |
| } else { | |
| td.textContent = score; | |
| } | |
| } | |
| tr.appendChild(td); | |
| }); | |
| tableBody.appendChild(tr); | |
| }); | |
| </script> | |
| </body> | |
| </html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment