Newer
Older
TelosDB / .scripts / dump_gguf_specials.cjs
@楽曲作りまくりおじさん 楽曲作りまくりおじさん 8 hours ago 1 KB chore: prepare for history rewrite
const fs = require('fs');
const p = process.argv[2];
if (!p) { console.error('Usage: node dump_gguf_specials.cjs <file>'); process.exit(2); }
const b = fs.readFileSync(p);
let s = b.toString('utf8');
const keys = ['special_eog_ids','special_eos_id','special_tokens','tokenizer.ggml.tokens'];
for (const k of keys) {
  const idx = s.indexOf(k);
  if (idx===-1) { console.log(`${k}: NOT FOUND`); continue; }
  const start = Math.max(0, idx-200), end = Math.min(s.length, idx+600);
  console.log('='.repeat(40));
  console.log(`Key: ${k} at char ${idx}`);
  console.log(s.slice(start,end));
}
const m = s.match(/special_eog_ids\s*arr\[.*?\]\s*=\s*\[(.*?)\]/s);
if (m) console.log('parsed special_eog_ids:', m[1].trim()); else console.log('special_eog_ids pattern not parsed');
const m2 = s.match(/special_eos_id\s*u32\s*=\s*(\d+)/);
if (m2) console.log('parsed special_eos_id:', m2[1]); else console.log('special_eos_id pattern not parsed');
// Additional searches for tokens mentioned in logs
const toks = ['end_of_turn','<end_of_turn>','<eos>','eog','eog_ids'];
for (const t of toks) {
  const idx = s.indexOf(t);
  if (idx===-1) continue;
  const start = Math.max(0, idx-80), end = Math.min(s.length, idx+80);
  console.log('----');
  console.log(`Found token text: ${t} at ${idx}`);
  console.log(s.slice(start,end));
}