diff --git a/data/README.md b/data/README.md
index 0f14bef8..5a34bcbe 100644
--- a/data/README.md
+++ b/data/README.md
@@ -11,9 +11,9 @@ Currently we support datasets in **alpaca** and **sharegpt** format.
   "formatting": "the format of the dataset. (optional, default: alpaca, can be chosen from {alpaca, sharegpt})",
   "ranking": "whether the dataset is a preference dataset or not. (default: False)",
   "subset": "the name of the subset. (optional, default: None)",
+  "split": "the name of dataset split to be used. (optional, default: train)",
   "folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)",
-  "num_samples": "the number of samples in the dataset used for training. (optional, default: None)", 
-  "split": "which dataset split to use for training and evaluation (optional, default: train)",
+  "num_samples": "the number of samples in the dataset to be used. (optional, default: None)",
   "columns (optional)": {
     "prompt": "the column name in the dataset containing the prompts. (default: instruction)",
     "query": "the column name in the dataset containing the queries. (default: input)",