UBI: preserve corrupted PEBs

Currently UBI erases all corrupted eraseblocks, irrespectively of the nature of corruption: corruption due to power cuts and non-power cut corruption. The former case is OK, but the latter is not, because UBI may destroy potentially important data. With this patch, during scanning, when UBI hits a PEB with corrupted VID header, it checks whether this PEB contains only 0xFF data. If yes, it is safe to erase this PEB and it is put to the 'erase' list. If not, this may be important data and it is better to avoid erasing this PEB. Instead, UBI puts it to the corr list and moves out of the pool of available PEB. IOW, UBI preserves this PEB. Such corrupted PEB lessen the amount of available PEBs. So the more of them we accumulate, the less PEBs are available. The maximum amount of non-power cut corrupted PEBs is 8. This patch is a response to UBIFS problem where reporter (Matthew L. Creech <mlcreech@gmail.com>) observes that UBIFS index points to an unmapped LEB. The theory is that corresponding PEB somehow got corrupted and UBI wiped it. This patch (actually a series of patches) tries to make sure such PEBs are preserved - this would make it is easier to analyze the corruption. Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
2010-09-03 23:08:15 +03:00 · 2010-09-03 23:08:15 +03:00 · 5fc01ab693
parent feeba4b872
commit 5fc01ab693
7 changed files with 45 additions and 47 deletions
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@ -591,6 +591,7 @@ static int attach_by_scanning(struct ubi_device *ubi)

 	ubi->bad_peb_count = si->bad_peb_count;
 	ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count;
+	ubi->corr_peb_count = si->corr_peb_count;
 	ubi->max_ec = si->max_ec;
 	ubi->mean_ec = si->mean_ec;
 	ubi_msg("max. sequence number:       %llu", si->max_sqnum);
@ -972,6 +973,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
 	ubi_msg("MTD device size:            %llu MiB", ubi->flash_size >> 20);
 	ubi_msg("number of good PEBs:        %d", ubi->good_peb_count);
 	ubi_msg("number of bad PEBs:         %d", ubi->bad_peb_count);
+	ubi_msg("number of corrupted PEBs:   %d", ubi->corr_peb_count);
 	ubi_msg("max. allowed volumes:       %d", ubi->vtbl_slots);
 	ubi_msg("wear-leveling threshold:    %d", CONFIG_MTD_UBI_WL_THRESHOLD);
 	ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT);
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@ -1201,6 +1201,9 @@ static void print_rsvd_warning(struct ubi_device *ubi,

 	ubi_warn("cannot reserve enough PEBs for bad PEB handling, reserved %d,"
 		 " need %d", ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
+	if (ubi->corr_peb_count)
+		ubi_warn("%d PEBs are corrupted and not used",
+			ubi->corr_peb_count);
 }

 /**
@ -1263,6 +1266,9 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	if (ubi->avail_pebs < EBA_RESERVED_PEBS) {
 		ubi_err("no enough physical eraseblocks (%d, need %d)",
 			ubi->avail_pebs, EBA_RESERVED_PEBS);
+		if (ubi->corr_peb_count)
+			ubi_err("%d PEBs are corrupted and not used",
+				ubi->corr_peb_count);
 		err = -ENOSPC;
 		goto out_free;
 	}
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@ -706,8 +706,8 @@ int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si,
 struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
 					   struct ubi_scan_info *si)
 {
-	int err = 0, i;
-	struct ubi_scan_leb *seb;
+	int err = 0;
+	struct ubi_scan_leb *seb, *tmp_seb;

 	if (!list_empty(&si->free)) {
 		seb = list_entry(si->free.next, struct ubi_scan_leb, u.list);
@ -716,37 +716,27 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
 		return seb;
 	}

-	for (i = 0; i < 2; i++) {
-		struct list_head *head;
-		struct ubi_scan_leb *tmp_seb;
+	/*
+	 * We try to erase the first physical eraseblock from the erase list
+	 * and pick it if we succeed, or try to erase the next one if not. And
+	 * so forth. We don't want to take care about bad eraseblocks here -
+	 * they'll be handled later.
+	 */
+	list_for_each_entry_safe(seb, tmp_seb, &si->erase, u.list) {
+		if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+			seb->ec = si->mean_ec;

-		if (i == 0)
-			head = &si->erase;
-		else
-			head = &si->corr;
+		err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
+		if (err)
+			continue;

-		/*
-		 * We try to erase the first physical eraseblock from the @head
-		 * list and pick it if we succeed, or try to erase the
-		 * next one if not. And so forth. We don't want to take care
-		 * about bad eraseblocks here - they'll be handled later.
-		 */
-		list_for_each_entry_safe(seb, tmp_seb, head, u.list) {
-			if (seb->ec == UBI_SCAN_UNKNOWN_EC)
-				seb->ec = si->mean_ec;
-
-			err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
-			if (err)
-				continue;
-
-			seb->ec += 1;
-			list_del(&seb->u.list);
-			dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
-			return seb;
-		}
+		seb->ec += 1;
+		list_del(&seb->u.list);
+		dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
+		return seb;
 	}

-	ubi_err("no eraseblocks found");
+	ubi_err("no free eraseblocks");
 	return ERR_PTR(-ENOSPC);
 }

--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@ -361,6 +361,8 @@ struct ubi_wl_entry;
 * @peb_size: physical eraseblock size
 * @bad_peb_count: count of bad physical eraseblocks
 * @good_peb_count: count of good physical eraseblocks
+ * @corr_peb_count: count of corrupted physical eraseblocks (preserved and not
+ *                  used by UBI)
 * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous
 * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks
 * @min_io_size: minimal input/output unit size of the underlying MTD device
@ -447,6 +449,7 @@ struct ubi_device {
 	int peb_size;
 	int bad_peb_count;
 	int good_peb_count;
+	int corr_peb_count;
 	int erroneous_peb_count;
 	int max_erroneous;
 	int min_io_size;
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@ -261,6 +261,9 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 	/* Reserve physical eraseblocks */
 	if (vol->reserved_pebs > ubi->avail_pebs) {
 		dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);
+		if (ubi->corr_peb_count)
+			dbg_err("%d PEBs are corrupted and not used",
+				ubi->corr_peb_count);
 		err = -ENOSPC;
 		goto out_unlock;
 	}
@ -527,6 +530,9 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
 		if (pebs > ubi->avail_pebs) {
 			dbg_err("not enough PEBs: requested %d, available %d",
 				pebs, ubi->avail_pebs);
+			if (ubi->corr_peb_count)
+				dbg_err("%d PEBs are corrupted and not used",
+					ubi->corr_peb_count);
 			spin_unlock(&ubi->volumes_lock);
 			err = -ENOSPC;
 			goto out_free;
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@ -662,9 +662,13 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
 	ubi->vol_count += 1;
 	vol->ubi = ubi;

-	if (reserved_pebs > ubi->avail_pebs)
+	if (reserved_pebs > ubi->avail_pebs) {
 		ubi_err("not enough PEBs, required %d, available %d",
 			reserved_pebs, ubi->avail_pebs);
+		if (ubi->corr_peb_count)
+			ubi_err("%d PEBs are corrupted and not used",
+				ubi->corr_peb_count);
+	}
 	ubi->rsvd_pebs += reserved_pebs;
 	ubi->avail_pebs -= reserved_pebs;

@ -837,7 +841,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
 			return PTR_ERR(ubi->vtbl);
 	}

-	ubi->avail_pebs = ubi->good_peb_count;
+	ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count;

 	/*
 	 * The layout volume is OK, initialize the corresponding in-RAM data
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@ -1478,22 +1478,6 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 		ubi->lookuptbl[e->pnum] = e;
 	}

-	list_for_each_entry(seb, &si->corr, u.list) {
-		cond_resched();
-
-		e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
-		if (!e)
-			goto out_free;
-
-		e->pnum = seb->pnum;
-		e->ec = seb->ec;
-		ubi->lookuptbl[e->pnum] = e;
-		if (schedule_erase(ubi, e, 0)) {
-			kmem_cache_free(ubi_wl_entry_slab, e);
-			goto out_free;
-		}
-	}
-
 	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
 		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
 			cond_resched();
@ -1520,6 +1504,9 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	if (ubi->avail_pebs < WL_RESERVED_PEBS) {
 		ubi_err("no enough physical eraseblocks (%d, need %d)",
 			ubi->avail_pebs, WL_RESERVED_PEBS);
+		if (ubi->corr_peb_count)
+			ubi_err("%d PEBs are corrupted and not used",
+				ubi->corr_peb_count);
 		goto out_free;
 	}
 	ubi->avail_pebs -= WL_RESERVED_PEBS;