fix up
This commit is contained in:
parent
15b399a82f
commit
29ebcd75d5
|
@ -11,9 +11,9 @@
|
||||||
"formatting": "数据集格式(可选,默认:alpaca,可以为 alpaca 或 sharegpt)",
|
"formatting": "数据集格式(可选,默认:alpaca,可以为 alpaca 或 sharegpt)",
|
||||||
"ranking": "是否为偏好数据集(可选,默认:False)",
|
"ranking": "是否为偏好数据集(可选,默认:False)",
|
||||||
"subset": "数据集子集的名称(可选,默认:None)",
|
"subset": "数据集子集的名称(可选,默认:None)",
|
||||||
|
"split": "所使用的数据集切分(可选,默认:train)",
|
||||||
"folder": "Hugging Face 仓库的文件夹名称(可选,默认:None)",
|
"folder": "Hugging Face 仓库的文件夹名称(可选,默认:None)",
|
||||||
"num_samples": "该数据集中用于训练的样本数量。(可选,默认:None)",
|
"num_samples": "该数据集所使用的样本数量。(可选,默认:None)",
|
||||||
"split": "数据集中的要使用的训练测试集切分(可选,默认:train)",
|
|
||||||
"columns(可选)": {
|
"columns(可选)": {
|
||||||
"prompt": "数据集代表提示词的表头名称(默认:instruction)",
|
"prompt": "数据集代表提示词的表头名称(默认:instruction)",
|
||||||
"query": "数据集代表请求的表头名称(默认:input)",
|
"query": "数据集代表请求的表头名称(默认:input)",
|
||||||
|
|
|
@ -181,7 +181,7 @@
|
||||||
"response": "summary"
|
"response": "summary"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"adgen_val": {
|
"adgen_eval": {
|
||||||
"hf_hub_url": "HasturOfficial/adgen",
|
"hf_hub_url": "HasturOfficial/adgen",
|
||||||
"ms_hub_url": "AI-ModelScope/adgen",
|
"ms_hub_url": "AI-ModelScope/adgen",
|
||||||
"split": "validation",
|
"split": "validation",
|
||||||
|
|
|
@ -8,7 +8,7 @@ do_predict: true
|
||||||
finetuning_type: lora
|
finetuning_type: lora
|
||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: identity,alpaca_en_demo
|
eval_dataset: identity,alpaca_en_demo
|
||||||
template: llama3
|
template: llama3
|
||||||
cutoff_len: 1024
|
cutoff_len: 1024
|
||||||
max_samples: 50
|
max_samples: 50
|
||||||
|
|
|
@ -61,11 +61,12 @@ def calculate_lr(
|
||||||
packing=packing,
|
packing=packing,
|
||||||
output_dir="dummy_dir",
|
output_dir="dummy_dir",
|
||||||
overwrite_cache=True,
|
overwrite_cache=True,
|
||||||
|
do_train=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
tokenizer_module = load_tokenizer(model_args)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
tokenizer = tokenizer_module["tokenizer"]
|
tokenizer = tokenizer_module["tokenizer"]
|
||||||
dataset_module = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
|
trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)["train_dataset"]
|
||||||
if stage == "pt":
|
if stage == "pt":
|
||||||
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
||||||
elif stage == "sft":
|
elif stage == "sft":
|
||||||
|
@ -73,7 +74,7 @@ def calculate_lr(
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Stage does not supported: {}.".format(stage))
|
raise NotImplementedError("Stage does not supported: {}.".format(stage))
|
||||||
|
|
||||||
dataloader = DataLoader(dataset_module["eval_dataset"], batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
|
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
|
||||||
valid_tokens, total_tokens = 0, 0
|
valid_tokens, total_tokens = 0, 0
|
||||||
for batch in tqdm(dataloader):
|
for batch in tqdm(dataloader):
|
||||||
valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
|
valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
|
||||||
|
|
|
@ -83,11 +83,12 @@ def cal_ppl(
|
||||||
train_on_prompt=train_on_prompt,
|
train_on_prompt=train_on_prompt,
|
||||||
output_dir="dummy_dir",
|
output_dir="dummy_dir",
|
||||||
overwrite_cache=True,
|
overwrite_cache=True,
|
||||||
|
do_train=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
tokenizer_module = load_tokenizer(model_args)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
tokenizer = tokenizer_module["tokenizer"]
|
tokenizer = tokenizer_module["tokenizer"]
|
||||||
dataset_module = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
|
trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)["train_dataset"]
|
||||||
model = load_model(tokenizer, model_args, finetuning_args, is_trainable=False)
|
model = load_model(tokenizer, model_args, finetuning_args, is_trainable=False)
|
||||||
if stage == "pt":
|
if stage == "pt":
|
||||||
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
||||||
|
@ -100,7 +101,7 @@ def cal_ppl(
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Stage does not supported: {}.".format(stage))
|
raise NotImplementedError("Stage does not supported: {}.".format(stage))
|
||||||
|
|
||||||
dataloader = DataLoader(dataset_module["eval_dataset"], batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
|
dataloader = DataLoader(trainset, batch_size, shuffle=False, collate_fn=data_collator, pin_memory=True)
|
||||||
criterion = torch.nn.CrossEntropyLoss(reduction="none")
|
criterion = torch.nn.CrossEntropyLoss(reduction="none")
|
||||||
total_ppl = 0
|
total_ppl = 0
|
||||||
perplexities = []
|
perplexities = []
|
||||||
|
|
|
@ -44,13 +44,14 @@ def length_cdf(
|
||||||
cutoff_len=1_000_000,
|
cutoff_len=1_000_000,
|
||||||
output_dir="dummy_dir",
|
output_dir="dummy_dir",
|
||||||
overwrite_cache=True,
|
overwrite_cache=True,
|
||||||
|
do_train=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
tokenizer_module = load_tokenizer(model_args)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
dataset_module = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
|
trainset = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)["train_dataset"]
|
||||||
total_num = len(dataset_module["eval_dataset"])
|
total_num = len(trainset)
|
||||||
length_dict = defaultdict(int)
|
length_dict = defaultdict(int)
|
||||||
for sample in tqdm(dataset_module["eval_dataset"]["input_ids"]):
|
for sample in tqdm(trainset["input_ids"]):
|
||||||
length_dict[len(sample) // interval * interval] += 1
|
length_dict[len(sample) // interval * interval] += 1
|
||||||
|
|
||||||
length_tuples = list(length_dict.items())
|
length_tuples = list(length_dict.items())
|
||||||
|
|
|
@ -37,7 +37,6 @@
|
||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
# SOFTWARE.
|
# SOFTWARE.
|
||||||
|
|
||||||
import inspect
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
@ -88,18 +87,13 @@ class Evaluator:
|
||||||
pbar = tqdm(categorys.keys(), desc="Processing subjects", position=0)
|
pbar = tqdm(categorys.keys(), desc="Processing subjects", position=0)
|
||||||
results = {}
|
results = {}
|
||||||
for subject in pbar:
|
for subject in pbar:
|
||||||
if "trust_remote_code" in inspect.signature(load_dataset).parameters: # for datasets==2.16.0
|
|
||||||
kwargs = {"trust_remote_code": True}
|
|
||||||
else:
|
|
||||||
kwargs = {}
|
|
||||||
|
|
||||||
dataset = load_dataset(
|
dataset = load_dataset(
|
||||||
path=os.path.join(self.eval_args.task_dir, self.eval_args.task),
|
path=os.path.join(self.eval_args.task_dir, self.eval_args.task),
|
||||||
name=subject,
|
name=subject,
|
||||||
cache_dir=self.model_args.cache_dir,
|
cache_dir=self.model_args.cache_dir,
|
||||||
download_mode=self.eval_args.download_mode,
|
download_mode=self.eval_args.download_mode,
|
||||||
token=self.model_args.hf_hub_token,
|
token=self.model_args.hf_hub_token,
|
||||||
**kwargs,
|
trust_remote_code=True,
|
||||||
)
|
)
|
||||||
pbar.set_postfix_str(categorys[subject]["name"])
|
pbar.set_postfix_str(categorys[subject]["name"])
|
||||||
inputs, outputs, labels = [], [], []
|
inputs, outputs, labels = [], [], []
|
||||||
|
|
|
@ -104,7 +104,7 @@ def _verify_model_args(
|
||||||
raise ValueError("Quantized model only accepts a single adapter. Merge them first.")
|
raise ValueError("Quantized model only accepts a single adapter. Merge them first.")
|
||||||
|
|
||||||
if data_args.template == "yi" and model_args.use_fast_tokenizer:
|
if data_args.template == "yi" and model_args.use_fast_tokenizer:
|
||||||
logger.warning("We should use slow tokenizer for the Yi models.")
|
logger.warning("We should use slow tokenizer for the Yi models. Change `use_fast_tokenizer` to False.")
|
||||||
model_args.use_fast_tokenizer = False
|
model_args.use_fast_tokenizer = False
|
||||||
|
|
||||||
|
|
||||||
|
@ -203,6 +203,14 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
||||||
if training_args.do_train and training_args.predict_with_generate:
|
if training_args.do_train and training_args.predict_with_generate:
|
||||||
raise ValueError("`predict_with_generate` cannot be set as True while training.")
|
raise ValueError("`predict_with_generate` cannot be set as True while training.")
|
||||||
|
|
||||||
|
if training_args.do_train and data_args.dataset is None:
|
||||||
|
raise ValueError("Please specify dataset for training.")
|
||||||
|
|
||||||
|
if (training_args.do_eval or training_args.do_predict) and (
|
||||||
|
data_args.eval_dataset is None and data_args.val_size < 1e-6
|
||||||
|
):
|
||||||
|
raise ValueError("Please specify dataset for evaluation.")
|
||||||
|
|
||||||
if training_args.do_train and model_args.quantization_device_map == "auto":
|
if training_args.do_train and model_args.quantization_device_map == "auto":
|
||||||
raise ValueError("Cannot use device map for quantized models in training.")
|
raise ValueError("Cannot use device map for quantized models in training.")
|
||||||
|
|
||||||
|
@ -242,7 +250,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
||||||
raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.")
|
raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.")
|
||||||
|
|
||||||
if data_args.neat_packing and not data_args.packing:
|
if data_args.neat_packing and not data_args.packing:
|
||||||
logger.warning("`neat_packing` requires `packing` is True. Change it to True.")
|
logger.warning("`neat_packing` requires `packing` is True. Change `packing` to True.")
|
||||||
data_args.packing = True
|
data_args.packing = True
|
||||||
|
|
||||||
_verify_model_args(model_args, data_args, finetuning_args)
|
_verify_model_args(model_args, data_args, finetuning_args)
|
||||||
|
|
|
@ -71,8 +71,6 @@ def llama_attention_forward(
|
||||||
cos, sin = self.rotary_emb(value_states, position_ids)
|
cos, sin = self.rotary_emb(value_states, position_ids)
|
||||||
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
|
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
|
||||||
|
|
||||||
past_key_value = getattr(self, "past_key_value", past_key_value)
|
|
||||||
|
|
||||||
if past_key_value is not None:
|
if past_key_value is not None:
|
||||||
cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
|
cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
|
||||||
key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
|
key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
|
||||||
|
@ -156,8 +154,6 @@ def llama_flash_attention_2_forward(
|
||||||
cos, sin = self.rotary_emb(value_states, position_ids)
|
cos, sin = self.rotary_emb(value_states, position_ids)
|
||||||
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
|
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
|
||||||
|
|
||||||
past_key_value = getattr(self, "past_key_value", past_key_value)
|
|
||||||
|
|
||||||
if past_key_value is not None:
|
if past_key_value is not None:
|
||||||
cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
|
cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
|
||||||
key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
|
key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, List, Optional
|
from typing import TYPE_CHECKING, List, Optional
|
||||||
|
|
||||||
from ...data import PairwiseDataCollatorWithPadding, get_dataset, split_dataset
|
from ...data import PairwiseDataCollatorWithPadding, get_dataset
|
||||||
from ...extras.constants import IGNORE_INDEX
|
from ...extras.constants import IGNORE_INDEX
|
||||||
from ...extras.ploting import plot_loss
|
from ...extras.ploting import plot_loss
|
||||||
from ...hparams import ModelArguments
|
from ...hparams import ModelArguments
|
||||||
|
@ -70,8 +70,8 @@ def run_dpo(
|
||||||
finetuning_args=finetuning_args,
|
finetuning_args=finetuning_args,
|
||||||
data_collator=data_collator,
|
data_collator=data_collator,
|
||||||
callbacks=callbacks,
|
callbacks=callbacks,
|
||||||
**tokenizer_module,
|
|
||||||
**dataset_module,
|
**dataset_module,
|
||||||
|
**tokenizer_module,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Training
|
# Training
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, List, Optional
|
from typing import TYPE_CHECKING, List, Optional
|
||||||
|
|
||||||
from ...data import KTODataCollatorWithPadding, get_dataset, split_dataset
|
from ...data import KTODataCollatorWithPadding, get_dataset
|
||||||
from ...extras.constants import IGNORE_INDEX
|
from ...extras.constants import IGNORE_INDEX
|
||||||
from ...extras.ploting import plot_loss
|
from ...extras.ploting import plot_loss
|
||||||
from ...hparams import ModelArguments
|
from ...hparams import ModelArguments
|
||||||
|
@ -67,8 +67,8 @@ def run_kto(
|
||||||
finetuning_args=finetuning_args,
|
finetuning_args=finetuning_args,
|
||||||
data_collator=data_collator,
|
data_collator=data_collator,
|
||||||
callbacks=callbacks,
|
callbacks=callbacks,
|
||||||
**tokenizer_module,
|
|
||||||
**dataset_module,
|
**dataset_module,
|
||||||
|
**tokenizer_module,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Training
|
# Training
|
||||||
|
|
|
@ -77,9 +77,13 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
||||||
ref_model: Optional["AutoModelForCausalLMWithValueHead"],
|
ref_model: Optional["AutoModelForCausalLMWithValueHead"],
|
||||||
tokenizer: "PreTrainedTokenizer",
|
tokenizer: "PreTrainedTokenizer",
|
||||||
processor: Optional["ProcessorMixin"],
|
processor: Optional["ProcessorMixin"],
|
||||||
dataset: "Dataset",
|
|
||||||
data_collator: "DataCollatorWithPadding",
|
data_collator: "DataCollatorWithPadding",
|
||||||
|
train_dataset: Optional["Dataset"] = None,
|
||||||
|
eval_dataset: Optional["Dataset"] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
if eval_dataset is not None:
|
||||||
|
raise NotImplementedError("PPOTrainer does not support eval dataset yet.")
|
||||||
|
|
||||||
backward_batch_size = training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps
|
backward_batch_size = training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps
|
||||||
ppo_config = PPOConfig(
|
ppo_config = PPOConfig(
|
||||||
model_name=model_args.model_name_or_path,
|
model_name=model_args.model_name_or_path,
|
||||||
|
@ -115,7 +119,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
||||||
num_training_steps = training_args.max_steps
|
num_training_steps = training_args.max_steps
|
||||||
else:
|
else:
|
||||||
total_train_batch_size = backward_batch_size * finetuning_args.ppo_buffer_size * training_args.world_size
|
total_train_batch_size = backward_batch_size * finetuning_args.ppo_buffer_size * training_args.world_size
|
||||||
num_training_steps = training_args.num_train_epochs * math.ceil(len(dataset) / total_train_batch_size)
|
num_training_steps = training_args.num_train_epochs * math.ceil(len(train_dataset) / total_train_batch_size)
|
||||||
|
|
||||||
optimizer = self.create_optimizer(model, training_args, finetuning_args)
|
optimizer = self.create_optimizer(model, training_args, finetuning_args)
|
||||||
scheduler = self.create_scheduler(training_args, num_training_steps, optimizer)
|
scheduler = self.create_scheduler(training_args, num_training_steps, optimizer)
|
||||||
|
@ -126,7 +130,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
||||||
model=model,
|
model=model,
|
||||||
ref_model=ref_model,
|
ref_model=ref_model,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
dataset=dataset,
|
dataset=train_dataset,
|
||||||
data_collator=data_collator,
|
data_collator=data_collator,
|
||||||
lr_scheduler=scheduler,
|
lr_scheduler=scheduler,
|
||||||
)
|
)
|
||||||
|
|
|
@ -63,8 +63,8 @@ def run_ppo(
|
||||||
model=model,
|
model=model,
|
||||||
reward_model=reward_model,
|
reward_model=reward_model,
|
||||||
ref_model=ref_model,
|
ref_model=ref_model,
|
||||||
dataset=dataset_module["train_dataset"],
|
|
||||||
data_collator=data_collator,
|
data_collator=data_collator,
|
||||||
|
**dataset_module,
|
||||||
**tokenizer_module,
|
**tokenizer_module,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ from typing import TYPE_CHECKING, List, Optional
|
||||||
|
|
||||||
from transformers import DataCollatorForLanguageModeling
|
from transformers import DataCollatorForLanguageModeling
|
||||||
|
|
||||||
from ...data import get_dataset, split_dataset
|
from ...data import get_dataset
|
||||||
from ...extras.ploting import plot_loss
|
from ...extras.ploting import plot_loss
|
||||||
from ...model import load_model, load_tokenizer
|
from ...model import load_model, load_tokenizer
|
||||||
from ..trainer_utils import create_modelcard_and_push
|
from ..trainer_utils import create_modelcard_and_push
|
||||||
|
@ -53,8 +53,8 @@ def run_pt(
|
||||||
finetuning_args=finetuning_args,
|
finetuning_args=finetuning_args,
|
||||||
data_collator=data_collator,
|
data_collator=data_collator,
|
||||||
callbacks=callbacks,
|
callbacks=callbacks,
|
||||||
**tokenizer_module,
|
|
||||||
**dataset_module,
|
**dataset_module,
|
||||||
|
**tokenizer_module,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Training
|
# Training
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, List, Optional
|
from typing import TYPE_CHECKING, List, Optional
|
||||||
|
|
||||||
from ...data import PairwiseDataCollatorWithPadding, get_dataset, split_dataset
|
from ...data import PairwiseDataCollatorWithPadding, get_dataset
|
||||||
from ...extras.ploting import plot_loss
|
from ...extras.ploting import plot_loss
|
||||||
from ...model import load_model, load_tokenizer
|
from ...model import load_model, load_tokenizer
|
||||||
from ..callbacks import fix_valuehead_checkpoint
|
from ..callbacks import fix_valuehead_checkpoint
|
||||||
|
@ -56,8 +56,8 @@ def run_rm(
|
||||||
data_collator=data_collator,
|
data_collator=data_collator,
|
||||||
callbacks=callbacks,
|
callbacks=callbacks,
|
||||||
compute_metrics=compute_accuracy,
|
compute_metrics=compute_accuracy,
|
||||||
**tokenizer_module,
|
|
||||||
**dataset_module,
|
**dataset_module,
|
||||||
|
**tokenizer_module,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Training
|
# Training
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, List, Optional
|
from typing import TYPE_CHECKING, List, Optional
|
||||||
|
|
||||||
from ...data import SFTDataCollatorWith4DAttentionMask, get_dataset, split_dataset
|
from ...data import SFTDataCollatorWith4DAttentionMask, get_dataset
|
||||||
from ...extras.constants import IGNORE_INDEX
|
from ...extras.constants import IGNORE_INDEX
|
||||||
from ...extras.misc import get_logits_processor
|
from ...extras.misc import get_logits_processor
|
||||||
from ...extras.ploting import plot_loss
|
from ...extras.ploting import plot_loss
|
||||||
|
@ -75,8 +75,8 @@ def run_sft(
|
||||||
callbacks=callbacks,
|
callbacks=callbacks,
|
||||||
compute_metrics=ComputeMetrics(tokenizer) if training_args.predict_with_generate else compute_accuracy,
|
compute_metrics=ComputeMetrics(tokenizer) if training_args.predict_with_generate else compute_accuracy,
|
||||||
preprocess_logits_for_metrics=None if training_args.predict_with_generate else eval_logit_processor,
|
preprocess_logits_for_metrics=None if training_args.predict_with_generate else eval_logit_processor,
|
||||||
**tokenizer_module,
|
|
||||||
**dataset_module,
|
**dataset_module,
|
||||||
|
**tokenizer_module,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Keyword arguments for `model.generate`
|
# Keyword arguments for `model.generate`
|
||||||
|
|
|
@ -79,7 +79,7 @@ def create_modelcard_and_push(
|
||||||
"tags": ["llama-factory", finetuning_args.finetuning_type],
|
"tags": ["llama-factory", finetuning_args.finetuning_type],
|
||||||
}
|
}
|
||||||
if data_args.dataset is not None:
|
if data_args.dataset is not None:
|
||||||
kwargs["dataset"] = [dataset.strip() for dataset in data_args.dataset.split(",")]
|
kwargs["dataset"] = data_args.dataset
|
||||||
|
|
||||||
if model_args.use_unsloth:
|
if model_args.use_unsloth:
|
||||||
kwargs["tags"] = kwargs["tags"] + ["unsloth"]
|
kwargs["tags"] = kwargs["tags"] + ["unsloth"]
|
||||||
|
|
|
@ -174,8 +174,8 @@ def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
|
||||||
r"""
|
r"""
|
||||||
Loads dataset_info.json.
|
Loads dataset_info.json.
|
||||||
"""
|
"""
|
||||||
if dataset_dir == "ONLINE":
|
if dataset_dir == "ONLINE" or dataset_dir.startswith("REMOTE:"):
|
||||||
logger.info("dataset_dir is ONLINE, using online dataset.")
|
logger.info("dataset_dir is {}, using online dataset.".format(dataset_dir))
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -259,7 +259,7 @@ class Runner:
|
||||||
use_unsloth=(get("top.booster") == "unsloth"),
|
use_unsloth=(get("top.booster") == "unsloth"),
|
||||||
visual_inputs=get("top.visual_inputs"),
|
visual_inputs=get("top.visual_inputs"),
|
||||||
dataset_dir=get("eval.dataset_dir"),
|
dataset_dir=get("eval.dataset_dir"),
|
||||||
dataset=",".join(get("eval.dataset")),
|
eval_dataset=",".join(get("eval.dataset")),
|
||||||
cutoff_len=get("eval.cutoff_len"),
|
cutoff_len=get("eval.cutoff_len"),
|
||||||
max_samples=int(get("eval.max_samples")),
|
max_samples=int(get("eval.max_samples")),
|
||||||
per_device_eval_batch_size=get("eval.batch_size"),
|
per_device_eval_batch_size=get("eval.batch_size"),
|
||||||
|
|
Loading…
Reference in New Issue