|
| 1 | +#!/usr/bin/env node |
| 2 | + |
| 3 | +/** |
| 4 | + * Offline token decoder for Knowledge Mapper response collection. |
| 5 | + * |
| 6 | + * Reads tokens from a CSV or JSON file (exported from the Google Sheet) |
| 7 | + * and decodes each into structured response data. |
| 8 | + * |
| 9 | + * Usage: |
| 10 | + * node scripts/decode-tokens.js --input tokens.csv --format csv > decoded.csv |
| 11 | + * node scripts/decode-tokens.js --input tokens.json --format json > decoded.json |
| 12 | + * |
| 13 | + * Input CSV format (from Google Sheet): |
| 14 | + * Timestamp, Session ID, Token, Response Count, Domain |
| 15 | + * |
| 16 | + * Output includes: session_id, timestamp, question_id, is_correct, is_skipped |
| 17 | + */ |
| 18 | + |
| 19 | +import { readFileSync } from 'fs'; |
| 20 | +import { resolve, dirname } from 'path'; |
| 21 | +import { fileURLToPath } from 'url'; |
| 22 | +import { inflate } from 'pako'; |
| 23 | + |
| 24 | +const __dirname = dirname(fileURLToPath(import.meta.url)); |
| 25 | + |
| 26 | +// ── Inline token decoder (avoids importing browser-only modules) ─────── |
| 27 | + |
| 28 | +function base64urlToBytes(str) { |
| 29 | + const b64 = str.replace(/-/g, '+').replace(/_/g, '/'); |
| 30 | + const pad = (4 - (b64.length % 4)) % 4; |
| 31 | + const padded = b64 + '='.repeat(pad); |
| 32 | + const binary = atob(padded); |
| 33 | + const bytes = new Uint8Array(binary.length); |
| 34 | + for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i); |
| 35 | + return bytes; |
| 36 | +} |
| 37 | + |
| 38 | +function decodeTokenRaw(base64urlString) { |
| 39 | + try { |
| 40 | + const compressed = base64urlToBytes(base64urlString); |
| 41 | + const bytes = inflate(compressed, { raw: true }); |
| 42 | + if (bytes.length < 3) return null; |
| 43 | + |
| 44 | + const version = bytes[0]; |
| 45 | + const count = (bytes[1] << 8) | bytes[2]; |
| 46 | + const entries = []; |
| 47 | + |
| 48 | + for (let i = 0; i < count; i++) { |
| 49 | + const offset = 3 + i * 3; |
| 50 | + if (offset + 2 >= bytes.length) break; |
| 51 | + const index = (bytes[offset] << 8) | bytes[offset + 1]; |
| 52 | + const value = bytes[offset + 2]; |
| 53 | + entries.push({ |
| 54 | + index, |
| 55 | + is_correct: value === 2, |
| 56 | + is_skipped: value === 1, |
| 57 | + }); |
| 58 | + } |
| 59 | + |
| 60 | + return { version, entries }; |
| 61 | + } catch (err) { |
| 62 | + console.error('[decoder] Failed to decode token:', err.message); |
| 63 | + return null; |
| 64 | + } |
| 65 | +} |
| 66 | + |
| 67 | +// ── Question index builder ───────────────────────────────────────────── |
| 68 | + |
| 69 | +async function loadQuestionIndex() { |
| 70 | + // Load all domain bundles and merge questions (matching browser boot flow) |
| 71 | + const dataDir = resolve(__dirname, '..', 'data', 'domains'); |
| 72 | + const { readdirSync } = await import('fs'); |
| 73 | + const files = readdirSync(dataDir).filter(f => f.endsWith('.json') && f !== 'all.json'); |
| 74 | + |
| 75 | + const allQuestions = new Map(); |
| 76 | + |
| 77 | + // Load all.json first (boot bundle with 50 questions) |
| 78 | + const allBundle = JSON.parse(readFileSync(resolve(dataDir, 'all.json'), 'utf-8')); |
| 79 | + for (const q of allBundle.questions) allQuestions.set(q.id, q); |
| 80 | + |
| 81 | + // Load all domain bundles to get the full 2500 questions |
| 82 | + for (const file of files) { |
| 83 | + try { |
| 84 | + const bundle = JSON.parse(readFileSync(resolve(dataDir, file), 'utf-8')); |
| 85 | + if (bundle.questions) { |
| 86 | + for (const q of bundle.questions) allQuestions.set(q.id, q); |
| 87 | + } |
| 88 | + } catch { /* skip malformed files */ } |
| 89 | + } |
| 90 | + |
| 91 | + // Sort deterministically — must match buildIndex() in question-index.js exactly |
| 92 | + // (uses < / > comparison, NOT localeCompare) |
| 93 | + const sorted = [...allQuestions.values()].sort((a, b) => { |
| 94 | + const da = (a.domain_ids?.[0] || ''); |
| 95 | + const db = (b.domain_ids?.[0] || ''); |
| 96 | + if (da < db) return -1; |
| 97 | + if (da > db) return 1; |
| 98 | + if (a.id < b.id) return -1; |
| 99 | + if (a.id > b.id) return 1; |
| 100 | + return 0; |
| 101 | + }); |
| 102 | + |
| 103 | + const indexToQuestion = new Map(); |
| 104 | + sorted.forEach((q, i) => indexToQuestion.set(i, q)); |
| 105 | + |
| 106 | + return indexToQuestion; |
| 107 | +} |
| 108 | + |
| 109 | +// ── Input parsing ────────────────────────────────────────────────────── |
| 110 | + |
| 111 | +function parseCSVInput(content) { |
| 112 | + const lines = content.trim().split('\n'); |
| 113 | + // Skip header row |
| 114 | + const records = []; |
| 115 | + for (let i = 1; i < lines.length; i++) { |
| 116 | + const parts = lines[i].split(',').map(s => s.trim()); |
| 117 | + if (parts.length < 3) continue; |
| 118 | + records.push({ |
| 119 | + timestamp: parts[0], |
| 120 | + session_id: parts[1], |
| 121 | + token: parts[2], |
| 122 | + response_count: parseInt(parts[3], 10) || 0, |
| 123 | + }); |
| 124 | + } |
| 125 | + return records; |
| 126 | +} |
| 127 | + |
| 128 | +function parseJSONInput(content) { |
| 129 | + const data = JSON.parse(content); |
| 130 | + return Array.isArray(data) ? data : [data]; |
| 131 | +} |
| 132 | + |
| 133 | +// ── Main ─────────────────────────────────────────────────────────────── |
| 134 | + |
| 135 | +async function main() { |
| 136 | + const args = process.argv.slice(2); |
| 137 | + let inputFile = null; |
| 138 | + let outputFormat = 'csv'; |
| 139 | + |
| 140 | + for (let i = 0; i < args.length; i++) { |
| 141 | + if (args[i] === '--input' && args[i + 1]) inputFile = args[++i]; |
| 142 | + else if (args[i] === '--format' && args[i + 1]) outputFormat = args[++i]; |
| 143 | + else if (args[i] === '--help') { |
| 144 | + console.log('Usage: node scripts/decode-tokens.js --input <file> --format <csv|json>'); |
| 145 | + process.exit(0); |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + if (!inputFile) { |
| 150 | + console.error('Error: --input <file> is required'); |
| 151 | + process.exit(1); |
| 152 | + } |
| 153 | + |
| 154 | + const content = readFileSync(resolve(inputFile), 'utf-8'); |
| 155 | + const isJSON = inputFile.endsWith('.json'); |
| 156 | + const records = isJSON ? parseJSONInput(content) : parseCSVInput(content); |
| 157 | + |
| 158 | + console.error(`[decoder] Loading question index...`); |
| 159 | + const indexToQuestion = await loadQuestionIndex(); |
| 160 | + console.error(`[decoder] Loaded ${indexToQuestion.size} questions`); |
| 161 | + console.error(`[decoder] Decoding ${records.length} tokens...`); |
| 162 | + |
| 163 | + const decoded = []; |
| 164 | + |
| 165 | + for (const record of records) { |
| 166 | + const result = decodeTokenRaw(record.token); |
| 167 | + if (!result) { |
| 168 | + console.error(`[decoder] Failed to decode token from session ${record.session_id}`); |
| 169 | + continue; |
| 170 | + } |
| 171 | + |
| 172 | + for (const entry of result.entries) { |
| 173 | + const q = indexToQuestion.get(entry.index); |
| 174 | + decoded.push({ |
| 175 | + session_id: record.session_id, |
| 176 | + timestamp: record.timestamp, |
| 177 | + question_index: entry.index, |
| 178 | + question_id: q?.id || `unknown_${entry.index}`, |
| 179 | + domain: q?.domain_ids?.[0] || 'unknown', |
| 180 | + question_text: q?.question_text || '', |
| 181 | + correct_answer: q ? q.options?.[q.correct_answer] || '' : '', |
| 182 | + is_correct: entry.is_correct, |
| 183 | + is_skipped: entry.is_skipped, |
| 184 | + }); |
| 185 | + } |
| 186 | + } |
| 187 | + |
| 188 | + // Output |
| 189 | + if (outputFormat === 'json') { |
| 190 | + console.log(JSON.stringify(decoded, null, 2)); |
| 191 | + } else { |
| 192 | + // CSV |
| 193 | + console.log('session_id,timestamp,domain,question_index,question_id,question_text,correct_answer,is_correct,is_skipped'); |
| 194 | + for (const row of decoded) { |
| 195 | + const text = `"${(row.question_text || '').replace(/"/g, '""')}"`; |
| 196 | + const answer = `"${(row.correct_answer || '').replace(/"/g, '""')}"`; |
| 197 | + console.log(`${row.session_id},${row.timestamp},${row.domain},${row.question_index},${row.question_id},${text},${answer},${row.is_correct},${row.is_skipped}`); |
| 198 | + } |
| 199 | + } |
| 200 | + |
| 201 | + console.error(`[decoder] Decoded ${decoded.length} responses from ${records.length} tokens`); |
| 202 | +} |
| 203 | + |
| 204 | +main().catch(err => { |
| 205 | + console.error('Fatal:', err); |
| 206 | + process.exit(1); |
| 207 | +}); |
0 commit comments