From eefcd105c12dd916a7b80825fdb1e979f87501d6 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Thu, 25 Apr 2024 20:09:43 +0800 Subject: [PATCH] rm some --- src/llmtuner/train/sftmm/collator.py | 42 ---------------------------- 1 file changed, 42 deletions(-) delete mode 100644 src/llmtuner/train/sftmm/collator.py diff --git a/src/llmtuner/train/sftmm/collator.py b/src/llmtuner/train/sftmm/collator.py deleted file mode 100644 index 2931dd9c..00000000 --- a/src/llmtuner/train/sftmm/collator.py +++ /dev/null @@ -1,42 +0,0 @@ -from dataclasses import dataclass -from transformers import AutoProcessor - - -@dataclass -class DataCollatorForVis2Seq: - processor: AutoProcessor - - def __call__(self, examples): - texts = [] - images = [] - for example in examples: - if len(example["images"]) > 1: - raise ValueError("This collator only supports one image per example") - messages = example["messages"] - text = self.processor.tokenizer.apply_chat_template( - messages, tokenize=False, add_generation_prompt=False - ) - texts.append(text) - images.append(example["images"][0]) - - batch = self.processor( - text=texts, images=images, return_tensors="pt", padding=True - ) - - labels = batch["input_ids"].clone() - if self.processor.tokenizer.pad_token_id is not None: - labels[labels == self.processor.tokenizer.pad_token_id] = -100 - batch["labels"] = labels - - return batch - - -@dataclass -class DataCollatorForMLLM: - processor: AutoProcessor - - def __call__(self, examples): - print(examples[0].keys()) - print(examples[0]["input_ids"]) - batch = {} - return batch