fix #763
This commit is contained in:
parent
a9d1fb72f7
commit
370bdb6e43
|
@ -34,6 +34,8 @@ def preprocess_dataset(
|
|||
# build grouped texts with format `X1 X2 X3 ...`
|
||||
if isinstance(getattr(tokenizer, "tokenizer", None), tiktoken.Encoding):
|
||||
kwargs = dict(allowed_special="all") # for tiktoken tokenizer (Qwen)
|
||||
else:
|
||||
kwargs = dict(add_special_tokens=True)
|
||||
|
||||
if hasattr(tokenizer, "add_bos_token") and hasattr(tokenizer, "add_eos_token"):
|
||||
setattr(tokenizer, "add_bos_token", True) # for LLaMA tokenizer
|
||||
|
|
|
@ -74,7 +74,7 @@ def can_quantize(finetuning_type: str) -> Dict[str, Any]:
|
|||
def gen_cmd(args: Dict[str, Any]) -> str:
|
||||
if args.get("do_train", None):
|
||||
args["plot_loss"] = True
|
||||
cmd_lines = ["CUDA_VISIBLE_DEVICES=0 python src/train_bash.py"]
|
||||
cmd_lines = ["CUDA_VISIBLE_DEVICES=0 python src/train_bash.py "]
|
||||
for k, v in args.items():
|
||||
if v is not None and v != "":
|
||||
cmd_lines.append(" --{} {} ".format(k, str(v)))
|
||||
|
|
Loading…
Reference in New Issue