forked from p04798526/LLaMA-Factory-Mirror
remove checksum and fix ui args
This commit is contained in:
parent
d06d56661b
commit
58c522cd5c
|
@ -366,7 +366,7 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr
|
||||||
#### Use local environment
|
#### Use local environment
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 GRADIO_SHARE=1 llamafactory-cli webui
|
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
|
||||||
```
|
```
|
||||||
|
|
||||||
<details><summary>For Alibaba Cloud PAI or AutoDL users</summary>
|
<details><summary>For Alibaba Cloud PAI or AutoDL users</summary>
|
||||||
|
@ -374,7 +374,7 @@ CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 GRADIO_SHARE=1 llamafactory-cli w
|
||||||
If you encountered display problems in LLaMA Board on Alibaba Cloud PAI, try using the following command to set environment variables before starting LLaMA Board:
|
If you encountered display problems in LLaMA Board on Alibaba Cloud PAI, try using the following command to set environment variables before starting LLaMA Board:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/
|
export GRADIO_SERVER_PORT=7860 GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using AutoDL, please install a specific version of Gradio:
|
If you are using AutoDL, please install a specific version of Gradio:
|
||||||
|
|
|
@ -366,7 +366,7 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s
|
||||||
#### 使用本地环境
|
#### 使用本地环境
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 GRADIO_SHARE=1 llamafactory-cli webui
|
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
|
||||||
```
|
```
|
||||||
|
|
||||||
<details><summary>阿里云 PAI 和 AutoDL 用户指南</summary>
|
<details><summary>阿里云 PAI 和 AutoDL 用户指南</summary>
|
||||||
|
@ -374,7 +374,7 @@ CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 GRADIO_SHARE=1 llamafactory-cli w
|
||||||
如果您在阿里云 PAI 上使用 LLaMA Board 时遇到显示问题,请尝试在启动前使用以下命令设置环境变量:
|
如果您在阿里云 PAI 上使用 LLaMA Board 时遇到显示问题,请尝试在启动前使用以下命令设置环境变量:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/
|
export GRADIO_SERVER_PORT=7860 GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/
|
||||||
```
|
```
|
||||||
|
|
||||||
如果您正在使用 AutoDL,请安装下述 Gradio 版本:
|
如果您正在使用 AutoDL,请安装下述 Gradio 版本:
|
||||||
|
|
|
@ -1,27 +1,21 @@
|
||||||
{
|
{
|
||||||
"alpaca_en": {
|
"alpaca_en": {
|
||||||
"file_name": "alpaca_data_en_52k.json",
|
"file_name": "alpaca_data_en_52k.json"
|
||||||
"file_sha1": "607f94a7f581341e59685aef32f531095232cf23"
|
|
||||||
},
|
},
|
||||||
"alpaca_zh": {
|
"alpaca_zh": {
|
||||||
"file_name": "alpaca_data_zh_51k.json",
|
"file_name": "alpaca_data_zh_51k.json"
|
||||||
"file_sha1": "2ba9827122c158dc256668d42bd1bcb8bc6b786e"
|
|
||||||
},
|
},
|
||||||
"alpaca_gpt4_en": {
|
"alpaca_gpt4_en": {
|
||||||
"file_name": "alpaca_gpt4_data_en.json",
|
"file_name": "alpaca_gpt4_data_en.json"
|
||||||
"file_sha1": "647f4ad447bd993e4b6b6223d1be15208bab694a"
|
|
||||||
},
|
},
|
||||||
"alpaca_gpt4_zh": {
|
"alpaca_gpt4_zh": {
|
||||||
"file_name": "alpaca_gpt4_data_zh.json",
|
"file_name": "alpaca_gpt4_data_zh.json"
|
||||||
"file_sha1": "3eaa3bda364ccdd59925d7448a698256c31ef845"
|
|
||||||
},
|
},
|
||||||
"identity": {
|
"identity": {
|
||||||
"file_name": "identity.json",
|
"file_name": "identity.json"
|
||||||
"file_sha1": "0f67e97fd01612006ab3536cdaf6cfb0d1e7f279"
|
|
||||||
},
|
},
|
||||||
"oaast_sft_zh": {
|
"oaast_sft_zh": {
|
||||||
"file_name": "oaast_sft_zh.json",
|
"file_name": "oaast_sft_zh.json",
|
||||||
"file_sha1": "a6a91f18f80f37b10ded9cf633fb50c033bf7b9f",
|
|
||||||
"columns": {
|
"columns": {
|
||||||
"prompt": "instruction",
|
"prompt": "instruction",
|
||||||
"query": "input",
|
"query": "input",
|
||||||
|
@ -31,7 +25,6 @@
|
||||||
},
|
},
|
||||||
"lima": {
|
"lima": {
|
||||||
"file_name": "lima.json",
|
"file_name": "lima.json",
|
||||||
"file_sha1": "9db59f6b7007dc4b17529fc63379b9cd61640f37",
|
|
||||||
"columns": {
|
"columns": {
|
||||||
"prompt": "instruction",
|
"prompt": "instruction",
|
||||||
"query": "input",
|
"query": "input",
|
||||||
|
@ -41,7 +34,6 @@
|
||||||
},
|
},
|
||||||
"glaive_toolcall": {
|
"glaive_toolcall": {
|
||||||
"file_name": "glaive_toolcall_10k.json",
|
"file_name": "glaive_toolcall_10k.json",
|
||||||
"file_sha1": "36aea64548fbf6aa300bef411b9221092ed84902",
|
|
||||||
"formatting": "sharegpt",
|
"formatting": "sharegpt",
|
||||||
"columns": {
|
"columns": {
|
||||||
"messages": "conversations",
|
"messages": "conversations",
|
||||||
|
@ -50,7 +42,6 @@
|
||||||
},
|
},
|
||||||
"mllm_demo": {
|
"mllm_demo": {
|
||||||
"file_name": "mllm_demo.json",
|
"file_name": "mllm_demo.json",
|
||||||
"file_sha1": "d626cc0ad88a26d0dc9fcb47336821cf486d8bcc",
|
|
||||||
"formatting": "sharegpt",
|
"formatting": "sharegpt",
|
||||||
"columns": {
|
"columns": {
|
||||||
"messages": "messages",
|
"messages": "messages",
|
||||||
|
@ -308,7 +299,6 @@
|
||||||
},
|
},
|
||||||
"oaast_rm_zh": {
|
"oaast_rm_zh": {
|
||||||
"file_name": "oaast_rm_zh.json",
|
"file_name": "oaast_rm_zh.json",
|
||||||
"file_sha1": "1065af1f3784dd61be5e79713a35f427b713a232",
|
|
||||||
"columns": {
|
"columns": {
|
||||||
"prompt": "instruction",
|
"prompt": "instruction",
|
||||||
"query": "input",
|
"query": "input",
|
||||||
|
@ -319,17 +309,14 @@
|
||||||
},
|
},
|
||||||
"comparison_gpt4_en": {
|
"comparison_gpt4_en": {
|
||||||
"file_name": "comparison_gpt4_data_en.json",
|
"file_name": "comparison_gpt4_data_en.json",
|
||||||
"file_sha1": "96fa18313544e22444fe20eead7754b17da452ae",
|
|
||||||
"ranking": true
|
"ranking": true
|
||||||
},
|
},
|
||||||
"comparison_gpt4_zh": {
|
"comparison_gpt4_zh": {
|
||||||
"file_name": "comparison_gpt4_data_zh.json",
|
"file_name": "comparison_gpt4_data_zh.json",
|
||||||
"file_sha1": "515b18ed497199131ddcc1af950345c11dc5c7fd",
|
|
||||||
"ranking": true
|
"ranking": true
|
||||||
},
|
},
|
||||||
"orca_rlhf": {
|
"orca_rlhf": {
|
||||||
"file_name": "orca_rlhf.json",
|
"file_name": "orca_rlhf.json",
|
||||||
"file_sha1": "acc8f74d16fd1fc4f68e7d86eaa781c2c3f5ba8e",
|
|
||||||
"ranking": true,
|
"ranking": true,
|
||||||
"columns": {
|
"columns": {
|
||||||
"prompt": "question",
|
"prompt": "question",
|
||||||
|
@ -370,14 +357,12 @@
|
||||||
},
|
},
|
||||||
"wiki_demo": {
|
"wiki_demo": {
|
||||||
"file_name": "wiki_demo.txt",
|
"file_name": "wiki_demo.txt",
|
||||||
"file_sha1": "e70375e28eda542a90c68213640cc371898ce181",
|
|
||||||
"columns": {
|
"columns": {
|
||||||
"prompt": "text"
|
"prompt": "text"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"c4_demo": {
|
"c4_demo": {
|
||||||
"file_name": "c4_demo.json",
|
"file_name": "c4_demo.json",
|
||||||
"file_sha1": "a5a0c86759732f9a5238e447fecd74f28a66cca8",
|
|
||||||
"columns": {
|
"columns": {
|
||||||
"prompt": "text"
|
"prompt": "text"
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ from .aligner import align_dataset
|
||||||
from .parser import get_dataset_list
|
from .parser import get_dataset_list
|
||||||
from .preprocess import get_preprocess_and_print_func
|
from .preprocess import get_preprocess_and_print_func
|
||||||
from .template import get_template_and_fix_tokenizer
|
from .template import get_template_and_fix_tokenizer
|
||||||
from .utils import checksum, merge_dataset
|
from .utils import merge_dataset
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -61,8 +61,6 @@ def load_single_dataset(
|
||||||
|
|
||||||
if data_path is None:
|
if data_path is None:
|
||||||
raise ValueError("File extension must be txt, csv, json or jsonl.")
|
raise ValueError("File extension must be txt, csv, json or jsonl.")
|
||||||
|
|
||||||
checksum(data_files, dataset_attr.file_sha1)
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,6 @@ class DatasetAttr:
|
||||||
load_from: Literal["hf_hub", "ms_hub", "script", "file"]
|
load_from: Literal["hf_hub", "ms_hub", "script", "file"]
|
||||||
dataset_name: str
|
dataset_name: str
|
||||||
""" extra configs """
|
""" extra configs """
|
||||||
file_sha1: Optional[str] = None
|
|
||||||
subset: Optional[str] = None
|
subset: Optional[str] = None
|
||||||
folder: Optional[str] = None
|
folder: Optional[str] = None
|
||||||
ranking: bool = False
|
ranking: bool = False
|
||||||
|
@ -99,7 +98,6 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]:
|
||||||
else:
|
else:
|
||||||
dataset_attr = DatasetAttr("file", dataset_name=dataset_info[name]["file_name"])
|
dataset_attr = DatasetAttr("file", dataset_name=dataset_info[name]["file_name"])
|
||||||
|
|
||||||
dataset_attr.set_attr("file_sha1", dataset_info[name])
|
|
||||||
dataset_attr.set_attr("subset", dataset_info[name])
|
dataset_attr.set_attr("subset", dataset_info[name])
|
||||||
dataset_attr.set_attr("folder", dataset_info[name])
|
dataset_attr.set_attr("folder", dataset_info[name])
|
||||||
dataset_attr.set_attr("ranking", dataset_info[name], default=False)
|
dataset_attr.set_attr("ranking", dataset_info[name], default=False)
|
||||||
|
|
|
@ -26,21 +26,6 @@ class Role(str, Enum):
|
||||||
OBSERVATION = "observation"
|
OBSERVATION = "observation"
|
||||||
|
|
||||||
|
|
||||||
def checksum(data_files: List[str], file_sha1: Optional[str] = None) -> None:
|
|
||||||
if file_sha1 is None:
|
|
||||||
logger.warning("Checksum failed: missing SHA-1 hash value in dataset_info.json.")
|
|
||||||
return
|
|
||||||
|
|
||||||
if len(data_files) != 1:
|
|
||||||
logger.warning("Checksum failed: too many files.")
|
|
||||||
return
|
|
||||||
|
|
||||||
with open(data_files[0], "rb") as f:
|
|
||||||
sha1 = hashlib.sha1(f.read()).hexdigest()
|
|
||||||
if sha1 != file_sha1:
|
|
||||||
logger.warning("Checksum failed: mismatched SHA-1 hash value at {}.".format(data_files[0]))
|
|
||||||
|
|
||||||
|
|
||||||
def infer_max_len(source_len: int, target_len: int, max_len: int, reserved_label_len: int) -> Tuple[int, int]:
|
def infer_max_len(source_len: int, target_len: int, max_len: int, reserved_label_len: int) -> Tuple[int, int]:
|
||||||
max_target_len = int(max_len * (target_len / (source_len + target_len)))
|
max_target_len = int(max_len * (target_len / (source_len + target_len)))
|
||||||
max_target_len = max(max_target_len, reserved_label_len)
|
max_target_len = max(max_target_len, reserved_label_len)
|
||||||
|
|
|
@ -71,10 +71,12 @@ def create_web_demo() -> gr.Blocks:
|
||||||
|
|
||||||
|
|
||||||
def run_web_ui() -> None:
|
def run_web_ui() -> None:
|
||||||
|
gradio_share = bool(int(os.environ.get("GRADIO_SHARE", "0")))
|
||||||
server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
|
server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
|
||||||
create_ui().queue().launch(server_name=server_name)
|
create_ui().queue().launch(share=gradio_share, server_name=server_name)
|
||||||
|
|
||||||
|
|
||||||
def run_web_demo() -> None:
|
def run_web_demo() -> None:
|
||||||
|
gradio_share = bool(int(os.environ.get("GRADIO_SHARE", "0")))
|
||||||
server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
|
server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
|
||||||
create_web_demo().queue().launch(server_name=server_name)
|
create_web_demo().queue().launch(share=gradio_share, server_name=server_name)
|
||||||
|
|
|
@ -4,8 +4,9 @@ from llmtuner.webui.interface import create_ui
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
gradio_share = bool(int(os.environ.get("GRADIO_SHARE", "0")))
|
||||||
server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
|
server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
|
||||||
create_ui().queue().launch(server_name=server_name)
|
create_ui().queue().launch(share=gradio_share, server_name=server_name)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in New Issue