CPM-9G-8B/convert.py

import sys
import shutil
import json
sys.path.insert(0, "/home/wangshuo1/projects/CPM-9G/gejiu_train")
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM, LlamaConfig
from cpm.cpm9g.models import CPM9GTorch, CPM9GConfig

from transformers import AutoTokenizer, AutoConfig
from cpm.cpm9g import CPM9GTokenizer as BMTCPM9GTokenizer
from cpm.cpm9g import CPM9GTokenizer
from cpm.cpm9g.generation.cpm9g import CPM9GBeamSearch

source_path = "/data/public/zwl_data/11b-base/"
target_path = "/home/wangshuo1/projects/CPM-9G/convert_to_hf/11b-base-hf/"
file_name = "11b.pt"

def convert_pkl():
    shutil.copyfile(f"{source_path}vocabs.txt", f"{target_path}vocabs.txt")
    with open(f"{source_path}config.json") as f:
        bmt_config = json.load(f)
    config = {
        "architectures": [
          "LlamaForCausalLM"
        ],
        "bos_token_id": 1,
        "eos_token_id": 2,
        "hidden_act": "silu",
        "hidden_size": bmt_config['dim_model'],
        "initializer_range": 0.02,
        "intermediate_size": bmt_config['dim_ff'],
        "max_length": 4096,
        "max_position_embeddings": 4096,
        "model_type": "llama",
        "num_attention_heads": bmt_config['num_heads'],
        "num_hidden_layers": bmt_config['num_layers'],
        "num_key_value_heads": bmt_config['num_kv_heads'],
        "pad_token_id": 0,
        "pretraining_tp": 1,
        "rms_norm_eps": 1e-05,
        "rope_scaling": None,
        "tie_word_embeddings": False,
        "torch_dtype": "float32",
        "transformers_version": "4.31.0",
        "use_cache": True,
        "vocab_size": bmt_config['vocab_size'],
    }
    with open(f"{target_path}config.json", "w") as f:
        json.dump(config, f)

    state = torch.load(f"{source_path}{file_name}")
    new_state = {}
    new_state["model.embed_tokens.weight"] = state["input_embedding.weight"]
    new_state["lm_head.weight"] = state["lm_head.weight"]
    new_state["model.norm.weight"] = state["encoder.output_layernorm.weight"]
    layer_num = bmt_config['num_layers']
    for lid in range(layer_num):
        print(lid)
        new_state[f"model.layers.{lid}.self_attn.q_proj.weight"] = state[f"encoder.layers.{lid}.self_att.self_attention.project_q.weight"]
        new_state[f"model.layers.{lid}.self_attn.k_proj.weight"] = state[f"encoder.layers.{lid}.self_att.self_attention.project_k.weight"]
        new_state[f"model.layers.{lid}.self_attn.v_proj.weight"] = state[f"encoder.layers.{lid}.self_att.self_attention.project_v.weight"]

        new_state[f"model.layers.{lid}.self_attn.o_proj.weight"] = state[f"encoder.layers.{lid}.self_att.self_attention.attention_out.weight"]
        new_state[f"model.layers.{lid}.mlp.gate_proj.weight"] = state[f"encoder.layers.{lid}.ffn.ffn.w_in.w_0.weight"]
        new_state[f"model.layers.{lid}.mlp.up_proj.weight"] = state[f"encoder.layers.{lid}.ffn.ffn.w_in.w_1.weight"]
        new_state[f"model.layers.{lid}.mlp.down_proj.weight"] = state[f"encoder.layers.{lid}.ffn.ffn.w_out.weight"]

        new_state[f"model.layers.{lid}.input_layernorm.weight"] = state[f"encoder.layers.{lid}.self_att.layernorm_before_attention.weight"]
        new_state[f"model.layers.{lid}.post_attention_layernorm.weight"] = state[f"encoder.layers.{lid}.ffn.layernorm_before_ffn.weight"]
    del state
    state = None
    torch.save(new_state, f"{target_path}pytorch_model.bin")

def test():
    config = LlamaConfig.from_pretrained(f"{target_path}")
    tokenizer = CPM9GTokenizer(f"{target_path}vocabs.txt")
    model = LlamaForCausalLM.from_pretrained(f"{target_path}").cuda()

    text = "请介绍一下清华大学："
    inputs = torch.tensor([[tokenizer.bos_id] + tokenizer.encode(text)]).cuda()
    output = model.generate(inputs, max_length=200)[0].tolist()
    print(tokenizer.decode(output))

if __name__ == "__main__":
    convert_pkl()
    test()