修复导出的模型

python - <<'PY'import json path = "tokenizer_config.json" with open(path, "r", encoding="utf-8") as f: cfg = json.load(f) if isinstance(cfg.get("extra_special_…

作者:lh

python - <<'PY'
import json

path = "tokenizer_config.json"

with open(path, "r", encoding="utf-8") as f:
    cfg = json.load(f)

if isinstance(cfg.get("extra_special_tokens"), list):
    print("extra_special_tokens 是 list,删除该字段")
    cfg.pop("extra_special_tokens", None)

with open(path, "w", encoding="utf-8") as f:
    json.dump(cfg, f, ensure_ascii=False, indent=2)

print("修复完成")
PY

测试tokenizer

python - <<'PY'
from transformers import AutoTokenizer

model_path = "/你的模型目录"
tok = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
print("tokenizer ok")
PY