From e7c1c00cf3fafde7496e31cacd718a51d0e7d70c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 May 2020 19:21:54 -0700 Subject: [PATCH 01/35] pstore: Drop useless try_module_get() for backend There is no reason to be doing a module get/put in pstore_register(), since the module calling pstore_register() cannot be unloaded since it hasn't finished its initialization. Remove it so there is no confusion about how registration ordering works. Link: https://lore.kernel.org/lkml/20200506152114.50375-2-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/platform.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 408277ee3cdb..44f8b9742263 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -555,8 +555,6 @@ static int pstore_write_user_compat(struct pstore_record *record, */ int pstore_register(struct pstore_info *psi) { - struct module *owner = psi->owner; - if (backend && strcmp(backend, psi->name)) { pr_warn("ignoring unexpected backend '%s'\n", psi->name); return -EPERM; @@ -591,10 +589,6 @@ int pstore_register(struct pstore_info *psi) sema_init(&psinfo->buf_lock, 1); spin_unlock(&pstore_lock); - if (owner && !try_module_get(owner)) { - psinfo = NULL; - return -EINVAL; - } if (psi->flags & PSTORE_FLAGS_DMESG) allocate_buf_for_compression(); @@ -626,8 +620,6 @@ int pstore_register(struct pstore_info *psi) pr_info("Registered %s as persistent store backend\n", psi->name); - module_put(owner); - return 0; } EXPORT_SYMBOL_GPL(pstore_register); From c30b20cd96a7a08e77c12cb3326c6fd801f7fe87 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 5 May 2020 23:29:10 -0700 Subject: [PATCH 02/35] pstore: Rename "pstore_lock" to "psinfo_lock" The name "pstore_lock" sounds very global, but it is only supposed to be used for managing changes to "psinfo", so rename it accordingly. Link: https://lore.kernel.org/lkml/20200506152114.50375-3-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/platform.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 44f8b9742263..347b6c07f4cf 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -69,10 +69,10 @@ static void pstore_dowork(struct work_struct *); static DECLARE_WORK(pstore_work, pstore_dowork); /* - * pstore_lock just protects "psinfo" during + * psinfo_lock just protects "psinfo" during * calls to pstore_register() */ -static DEFINE_SPINLOCK(pstore_lock); +static DEFINE_SPINLOCK(psinfo_lock); struct pstore_info *psinfo; static char *backend; @@ -574,11 +574,11 @@ int pstore_register(struct pstore_info *psi) return -EINVAL; } - spin_lock(&pstore_lock); + spin_lock(&psinfo_lock); if (psinfo) { pr_warn("backend '%s' already loaded: ignoring '%s'\n", psinfo->name, psi->name); - spin_unlock(&pstore_lock); + spin_unlock(&psinfo_lock); return -EBUSY; } @@ -587,7 +587,7 @@ int pstore_register(struct pstore_info *psi) psinfo = psi; mutex_init(&psinfo->read_mutex); sema_init(&psinfo->buf_lock, 1); - spin_unlock(&pstore_lock); + spin_unlock(&psinfo_lock); if (psi->flags & PSTORE_FLAGS_DMESG) From cab12fd049380b1d0c3ac9f534407a8cc0ac4bba Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 May 2020 19:31:36 -0700 Subject: [PATCH 03/35] pstore: Convert "psinfo" locking to mutex Currently pstore can only have a single backend attached at a time, and it tracks the active backend via "psinfo", under a lock. The locking for this does not need to be a spinlock, and in order to avoid may_sleep() issues during future changes to pstore_unregister(), switch to a mutex instead. Link: https://lore.kernel.org/lkml/20200506152114.50375-4-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/platform.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 347b6c07f4cf..d0ce22237589 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -72,7 +72,7 @@ static DECLARE_WORK(pstore_work, pstore_dowork); * psinfo_lock just protects "psinfo" during * calls to pstore_register() */ -static DEFINE_SPINLOCK(psinfo_lock); +static DEFINE_MUTEX(psinfo_lock); struct pstore_info *psinfo; static char *backend; @@ -574,11 +574,11 @@ int pstore_register(struct pstore_info *psi) return -EINVAL; } - spin_lock(&psinfo_lock); + mutex_lock(&psinfo_lock); if (psinfo) { pr_warn("backend '%s' already loaded: ignoring '%s'\n", psinfo->name, psi->name); - spin_unlock(&psinfo_lock); + mutex_unlock(&psinfo_lock); return -EBUSY; } @@ -587,7 +587,7 @@ int pstore_register(struct pstore_info *psi) psinfo = psi; mutex_init(&psinfo->read_mutex); sema_init(&psinfo->buf_lock, 1); - spin_unlock(&psinfo_lock); + mutex_unlock(&psinfo_lock); if (psi->flags & PSTORE_FLAGS_DMESG) From 47af61ffb19baa8df2b109582f87f7622fbe85cc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 5 May 2020 23:32:40 -0700 Subject: [PATCH 04/35] pstore: Rename "allpstore" to "records_list" The name "allpstore" doesn't carry much meaning, so rename it to what it actually is: the list of all records present in the filesystem. The lock is also renamed accordingly. Link: https://lore.kernel.org/lkml/20200506152114.50375-5-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/inode.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index d99b5d39aa90..5cc09cb315f9 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -29,8 +29,8 @@ #define PSTORE_NAMELEN 64 -static DEFINE_SPINLOCK(allpstore_lock); -static LIST_HEAD(allpstore); +static DEFINE_SPINLOCK(records_list_lock); +static LIST_HEAD(records_list); struct pstore_private { struct list_head list; @@ -196,9 +196,9 @@ static void pstore_evict_inode(struct inode *inode) clear_inode(inode); if (p) { - spin_lock_irqsave(&allpstore_lock, flags); + spin_lock_irqsave(&records_list_lock, flags); list_del(&p->list); - spin_unlock_irqrestore(&allpstore_lock, flags); + spin_unlock_irqrestore(&records_list_lock, flags); free_pstore_private(p); } } @@ -302,8 +302,8 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) WARN_ON(!inode_is_locked(d_inode(root))); - spin_lock_irqsave(&allpstore_lock, flags); - list_for_each_entry(pos, &allpstore, list) { + spin_lock_irqsave(&records_list_lock, flags); + list_for_each_entry(pos, &records_list, list) { if (pos->record->type == record->type && pos->record->id == record->id && pos->record->psi == record->psi) { @@ -311,7 +311,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) break; } } - spin_unlock_irqrestore(&allpstore_lock, flags); + spin_unlock_irqrestore(&records_list_lock, flags); if (rc) return rc; @@ -343,9 +343,9 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) d_add(dentry, inode); - spin_lock_irqsave(&allpstore_lock, flags); - list_add(&private->list, &allpstore); - spin_unlock_irqrestore(&allpstore_lock, flags); + spin_lock_irqsave(&records_list_lock, flags); + list_add(&private->list, &records_list); + spin_unlock_irqrestore(&records_list_lock, flags); return 0; From db23491c77207ef6bec2b232238710de4755db6a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 May 2020 19:07:04 -0700 Subject: [PATCH 05/35] pstore: Convert "records_list" locking to mutex The pstorefs internal list lock doesn't need to be a spinlock and will create problems when trying to access the list in the subsequent patch that will walk the pstorefs records during pstore_unregister(). Change this to a mutex to avoid may_sleep() warnings when unregistering devices. Link: https://lore.kernel.org/lkml/20200506152114.50375-6-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/inode.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 5cc09cb315f9..92ebcc75434f 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -22,14 +22,13 @@ #include #include #include -#include #include #include "internal.h" #define PSTORE_NAMELEN 64 -static DEFINE_SPINLOCK(records_list_lock); +static DEFINE_MUTEX(records_list_lock); static LIST_HEAD(records_list); struct pstore_private { @@ -192,13 +191,12 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) static void pstore_evict_inode(struct inode *inode) { struct pstore_private *p = inode->i_private; - unsigned long flags; clear_inode(inode); if (p) { - spin_lock_irqsave(&records_list_lock, flags); + mutex_lock(&records_list_lock); list_del(&p->list); - spin_unlock_irqrestore(&records_list_lock, flags); + mutex_unlock(&records_list_lock); free_pstore_private(p); } } @@ -297,12 +295,11 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) int rc = 0; char name[PSTORE_NAMELEN]; struct pstore_private *private, *pos; - unsigned long flags; size_t size = record->size + record->ecc_notice_size; WARN_ON(!inode_is_locked(d_inode(root))); - spin_lock_irqsave(&records_list_lock, flags); + mutex_lock(&records_list_lock); list_for_each_entry(pos, &records_list, list) { if (pos->record->type == record->type && pos->record->id == record->id && @@ -311,7 +308,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) break; } } - spin_unlock_irqrestore(&records_list_lock, flags); + mutex_unlock(&records_list_lock); if (rc) return rc; @@ -343,9 +340,9 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) d_add(dentry, inode); - spin_lock_irqsave(&records_list_lock, flags); + mutex_lock(&records_list_lock); list_add(&private->list, &records_list); - spin_unlock_irqrestore(&records_list_lock, flags); + mutex_unlock(&records_list_lock); return 0; From 6248a0666c8a408dcc5bd952536274d5bd0f02cb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 May 2020 19:33:54 -0700 Subject: [PATCH 06/35] pstore: Add proper unregister lock checking The pstore backend lock wasn't being used during pstore_unregister(). Add sanity check and locking. Link: https://lore.kernel.org/lkml/20200506152114.50375-7-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/platform.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index d0ce22237589..8c0076a1f896 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -69,8 +69,9 @@ static void pstore_dowork(struct work_struct *); static DECLARE_WORK(pstore_work, pstore_dowork); /* - * psinfo_lock just protects "psinfo" during - * calls to pstore_register() + * psinfo_lock protects "psinfo" during calls to + * pstore_register(), pstore_unregister(), and + * the filesystem mount/unmount routines. */ static DEFINE_MUTEX(psinfo_lock); struct pstore_info *psinfo; @@ -587,8 +588,6 @@ int pstore_register(struct pstore_info *psi) psinfo = psi; mutex_init(&psinfo->read_mutex); sema_init(&psinfo->buf_lock, 1); - mutex_unlock(&psinfo_lock); - if (psi->flags & PSTORE_FLAGS_DMESG) allocate_buf_for_compression(); @@ -620,12 +619,25 @@ int pstore_register(struct pstore_info *psi) pr_info("Registered %s as persistent store backend\n", psi->name); + mutex_unlock(&psinfo_lock); return 0; } EXPORT_SYMBOL_GPL(pstore_register); void pstore_unregister(struct pstore_info *psi) { + /* It's okay to unregister nothing. */ + if (!psi) + return; + + mutex_lock(&psinfo_lock); + + /* Only one backend can be registered at a time. */ + if (WARN_ON(psi != psinfo)) { + mutex_unlock(&psinfo_lock); + return; + } + /* Stop timer and make sure all work has finished. */ pstore_update_ms = -1; del_timer_sync(&pstore_timer); @@ -644,6 +656,7 @@ void pstore_unregister(struct pstore_info *psi) psinfo = NULL; backend = NULL; + mutex_unlock(&psinfo_lock); } EXPORT_SYMBOL_GPL(pstore_unregister); From 7a0ad546847a23f92f5e227fa8e4578eaa3a8d0a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 May 2020 19:41:30 -0700 Subject: [PATCH 07/35] pstore: Refactor pstorefs record list removal The "unlink" handling should perform list removal (which can also make sure records don't get double-erased), and the "evict" handling should be responsible only for memory freeing. Link: https://lore.kernel.org/lkml/20200506152114.50375-8-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/inode.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 92ebcc75434f..5f08b21b7a46 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -177,10 +177,21 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = d_inode(dentry)->i_private; struct pstore_record *record = p->record; + int rc = 0; if (!record->psi->erase) return -EPERM; + /* Make sure we can't race while removing this file. */ + mutex_lock(&records_list_lock); + if (!list_empty(&p->list)) + list_del_init(&p->list); + else + rc = -ENOENT; + mutex_unlock(&records_list_lock); + if (rc) + return rc; + mutex_lock(&record->psi->read_mutex); record->psi->erase(record); mutex_unlock(&record->psi->read_mutex); @@ -193,12 +204,7 @@ static void pstore_evict_inode(struct inode *inode) struct pstore_private *p = inode->i_private; clear_inode(inode); - if (p) { - mutex_lock(&records_list_lock); - list_del(&p->list); - mutex_unlock(&records_list_lock); - free_pstore_private(p); - } + free_pstore_private(p); } static const struct inode_operations pstore_dir_inode_operations = { @@ -417,6 +423,7 @@ static void pstore_kill_sb(struct super_block *sb) { kill_litter_super(sb); pstore_sb = NULL; + INIT_LIST_HEAD(&records_list); } static struct file_system_type pstore_fs_type = { From 27e5041a87e8af2d0b6452dffe053d0253e914cc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 May 2020 19:43:41 -0700 Subject: [PATCH 08/35] pstore: Add locking around superblock changes Nothing was protecting changes to the pstorefs superblock. Add locking and refactor away is_pstore_mounted(), instead using a helper to add a way to safely lock the pstorefs root inode during filesystem changes. Link: https://lore.kernel.org/lkml/20200506152114.50375-9-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/inode.c | 65 +++++++++++++++++++++++++++++--------------- fs/pstore/internal.h | 1 - fs/pstore/platform.c | 5 ++-- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 5f08b21b7a46..0e76e12fa6d1 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -31,6 +31,9 @@ static DEFINE_MUTEX(records_list_lock); static LIST_HEAD(records_list); +static DEFINE_MUTEX(pstore_sb_lock); +static struct super_block *pstore_sb; + struct pstore_private { struct list_head list; struct pstore_record *record; @@ -282,11 +285,25 @@ static const struct super_operations pstore_ops = { .show_options = pstore_show_options, }; -static struct super_block *pstore_sb; - -bool pstore_is_mounted(void) +static struct dentry *psinfo_lock_root(void) { - return pstore_sb != NULL; + struct dentry *root; + + mutex_lock(&pstore_sb_lock); + /* + * Having no backend is fine -- no records appear. + * Not being mounted is fine -- nothing to do. + */ + if (!psinfo || !pstore_sb) { + mutex_unlock(&pstore_sb_lock); + return NULL; + } + + root = pstore_sb->s_root; + inode_lock(d_inode(root)); + mutex_unlock(&pstore_sb_lock); + + return root; } /* @@ -303,20 +320,18 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) struct pstore_private *private, *pos; size_t size = record->size + record->ecc_notice_size; - WARN_ON(!inode_is_locked(d_inode(root))); + if (WARN_ON(!inode_is_locked(d_inode(root)))) + return -EINVAL; + rc = -EEXIST; + /* Skip records that are already present in the filesystem. */ mutex_lock(&records_list_lock); list_for_each_entry(pos, &records_list, list) { if (pos->record->type == record->type && pos->record->id == record->id && - pos->record->psi == record->psi) { - rc = -EEXIST; - break; - } + pos->record->psi == record->psi) + goto fail; } - mutex_unlock(&records_list_lock); - if (rc) - return rc; rc = -ENOMEM; inode = pstore_get_inode(root->d_sb); @@ -346,7 +361,6 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) d_add(dentry, inode); - mutex_lock(&records_list_lock); list_add(&private->list, &records_list); mutex_unlock(&records_list_lock); @@ -356,8 +370,8 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) free_pstore_private(private); fail_inode: iput(inode); - fail: + mutex_unlock(&records_list_lock); return rc; } @@ -369,16 +383,13 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) */ void pstore_get_records(int quiet) { - struct pstore_info *psi = psinfo; struct dentry *root; - if (!psi || !pstore_sb) + root = psinfo_lock_root(); + if (!root) return; - root = pstore_sb->s_root; - - inode_lock(d_inode(root)); - pstore_get_backend_records(psi, root, quiet); + pstore_get_backend_records(psinfo, root, quiet); inode_unlock(d_inode(root)); } @@ -386,8 +397,6 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; - pstore_sb = sb; - sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -408,6 +417,10 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) return -ENOMEM; + mutex_lock(&pstore_sb_lock); + pstore_sb = sb; + mutex_unlock(&pstore_sb_lock); + pstore_get_records(0); return 0; @@ -421,9 +434,17 @@ static struct dentry *pstore_mount(struct file_system_type *fs_type, static void pstore_kill_sb(struct super_block *sb) { + mutex_lock(&pstore_sb_lock); + WARN_ON(pstore_sb != sb); + kill_litter_super(sb); pstore_sb = NULL; + + mutex_lock(&records_list_lock); INIT_LIST_HEAD(&records_list); + mutex_unlock(&records_list_lock); + + mutex_unlock(&pstore_sb_lock); } static struct file_system_type pstore_fs_type = { diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 7062ea4bc57c..fe5f7ef7323f 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -33,7 +33,6 @@ extern void pstore_get_backend_records(struct pstore_info *psi, struct dentry *root, int quiet); extern int pstore_mkfile(struct dentry *root, struct pstore_record *record); -extern bool pstore_is_mounted(void); extern void pstore_record_init(struct pstore_record *record, struct pstore_info *psi); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 8c0076a1f896..624e100fbeca 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -460,7 +460,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, } ret = psinfo->write(&record); - if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) + if (ret == 0 && reason == KMSG_DUMP_OOPS) pstore_new_entry = 1; total += record.size; @@ -592,8 +592,7 @@ int pstore_register(struct pstore_info *psi) if (psi->flags & PSTORE_FLAGS_DMESG) allocate_buf_for_compression(); - if (pstore_is_mounted()) - pstore_get_records(0); + pstore_get_records(0); if (psi->flags & PSTORE_FLAGS_DMESG) pstore_register_kmsg(); From 78c83c828c043f2e18929137c1e218e8977349b1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 5 May 2020 21:36:15 -0700 Subject: [PATCH 09/35] pstore: Do not leave timer disabled for next backend The pstore.update_ms value was being disabled during pstore_unregister(), which would cause any prior value to go unnoticed on the next pstore_register(). Instead, just let del_timer() stop the timer, which was always sufficient. This additionally refactors the timer reset code and allows the timer to be enabled if the module parameter is changed away from the default. Link: https://lore.kernel.org/lkml/20200506152114.50375-10-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/platform.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 624e100fbeca..327ee70e881d 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -44,7 +44,7 @@ static int pstore_update_ms = -1; module_param_named(update_ms, pstore_update_ms, int, 0600); MODULE_PARM_DESC(update_ms, "milliseconds before pstore updates its content " "(default is -1, which means runtime updates are disabled; " - "enabling this option is not safe, it may lead to further " + "enabling this option may not be safe; it may lead to further " "corruption on Oopses)"); /* Names should be in the same order as the enum pstore_type_id */ @@ -150,6 +150,14 @@ static const char *get_reason_str(enum kmsg_dump_reason reason) } } +static void pstore_timer_kick(void) +{ + if (pstore_update_ms < 0) + return; + + mod_timer(&pstore_timer, jiffies + msecs_to_jiffies(pstore_update_ms)); +} + /* * Should pstore_dump() wait for a concurrent pstore_dump()? If * not, the current pstore_dump() will report a failure to dump @@ -460,8 +468,10 @@ static void pstore_dump(struct kmsg_dumper *dumper, } ret = psinfo->write(&record); - if (ret == 0 && reason == KMSG_DUMP_OOPS) + if (ret == 0 && reason == KMSG_DUMP_OOPS) { pstore_new_entry = 1; + pstore_timer_kick(); + } total += record.size; part++; @@ -604,11 +614,7 @@ int pstore_register(struct pstore_info *psi) pstore_register_pmsg(); /* Start watching for new records, if desired. */ - if (pstore_update_ms >= 0) { - pstore_timer.expires = jiffies + - msecs_to_jiffies(pstore_update_ms); - add_timer(&pstore_timer); - } + pstore_timer_kick(); /* * Update the module parameter backend, so it is visible @@ -637,11 +643,7 @@ void pstore_unregister(struct pstore_info *psi) return; } - /* Stop timer and make sure all work has finished. */ - pstore_update_ms = -1; - del_timer_sync(&pstore_timer); - flush_work(&pstore_work); - + /* Unregister all callbacks. */ if (psi->flags & PSTORE_FLAGS_PMSG) pstore_unregister_pmsg(); if (psi->flags & PSTORE_FLAGS_FTRACE) @@ -651,6 +653,10 @@ void pstore_unregister(struct pstore_info *psi) if (psi->flags & PSTORE_FLAGS_DMESG) pstore_unregister_kmsg(); + /* Stop timer and make sure all work has finished. */ + del_timer_sync(&pstore_timer); + flush_work(&pstore_work); + free_buf_for_compression(); psinfo = NULL; @@ -792,9 +798,7 @@ static void pstore_timefunc(struct timer_list *unused) schedule_work(&pstore_work); } - if (pstore_update_ms >= 0) - mod_timer(&pstore_timer, - jiffies + msecs_to_jiffies(pstore_update_ms)); + pstore_timer_kick(); } static void __init pstore_choose_compression(void) From 609e28bb139e53621521130f0d4aea27a725d465 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 May 2020 19:46:53 -0700 Subject: [PATCH 10/35] pstore: Remove filesystem records when backend is unregistered If a backend was unloaded without having first removed all its associated records in pstorefs, subsequent removals would crash while attempting to call into the now missing backend. Add automatic removal from the tree in pstore_unregister(), so that no references to the backend remain. Reported-by: Luis Henriques Link: https://lore.kernel.org/lkml/87o8yrmv69.fsf@suse.com Link: https://lore.kernel.org/lkml/20200506152114.50375-11-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/inode.c | 32 ++++++++++++++++++++++++++++++++ fs/pstore/internal.h | 1 + fs/pstore/platform.c | 3 +++ 3 files changed, 36 insertions(+) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 0e76e12fa6d1..c331efe8de95 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -36,6 +36,7 @@ static struct super_block *pstore_sb; struct pstore_private { struct list_head list; + struct dentry *dentry; struct pstore_record *record; size_t total_size; }; @@ -191,6 +192,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) list_del_init(&p->list); else rc = -ENOENT; + p->dentry = NULL; mutex_unlock(&records_list_lock); if (rc) return rc; @@ -306,6 +308,35 @@ static struct dentry *psinfo_lock_root(void) return root; } +int pstore_put_backend_records(struct pstore_info *psi) +{ + struct pstore_private *pos, *tmp; + struct dentry *root; + int rc = 0; + + root = psinfo_lock_root(); + if (!root) + return 0; + + mutex_lock(&records_list_lock); + list_for_each_entry_safe(pos, tmp, &records_list, list) { + if (pos->record->psi == psi) { + list_del_init(&pos->list); + rc = simple_unlink(d_inode(root), pos->dentry); + if (WARN_ON(rc)) + break; + d_drop(pos->dentry); + dput(pos->dentry); + pos->dentry = NULL; + } + } + mutex_unlock(&records_list_lock); + + inode_unlock(d_inode(root)); + + return rc; +} + /* * Make a regular file in the root directory of our file system. * Load it up with "size" bytes of data from "buf". @@ -352,6 +383,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) if (!dentry) goto fail_private; + private->dentry = dentry; private->record = record; inode->i_size = private->total_size = size; inode->i_private = private; diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index fe5f7ef7323f..8efd72d93b10 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -31,6 +31,7 @@ extern void pstore_set_kmsg_bytes(int); extern void pstore_get_records(int); extern void pstore_get_backend_records(struct pstore_info *psi, struct dentry *root, int quiet); +extern int pstore_put_backend_records(struct pstore_info *psi); extern int pstore_mkfile(struct dentry *root, struct pstore_record *record); extern void pstore_record_init(struct pstore_record *record, diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 327ee70e881d..398785ab059f 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -657,6 +657,9 @@ void pstore_unregister(struct pstore_info *psi) del_timer_sync(&pstore_timer); flush_work(&pstore_work); + /* Remove all backend records from filesystem tree. */ + pstore_put_backend_records(psi); + free_buf_for_compression(); psinfo = NULL; From b7753fc7f6f5626e51ee78156fd801fb52163af0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 6 May 2020 16:34:42 -0700 Subject: [PATCH 11/35] pstore: Make sure console capturing will restart The CON_ENABLED flag gets cleared during unregister_console(), so make sure we already reset the console flags before calling register_console(), otherwise unloading and reloading a pstore backend will not restart console logging. Signed-off-by: Kees Cook --- fs/pstore/platform.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 398785ab059f..8beaeff72386 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -516,12 +516,16 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) static struct console pstore_console = { .name = "pstore", .write = pstore_console_write, - .flags = CON_PRINTBUFFER | CON_ENABLED | CON_ANYTIME, .index = -1, }; static void pstore_register_console(void) { + /* + * Always initialize flags here since prior unregister_console() + * calls may have changed settings (specifically CON_ENABLED). + */ + pstore_console.flags = CON_PRINTBUFFER | CON_ENABLED | CON_ANYTIME; register_console(&pstore_console); } From 563ca40ddf400dbf8c6254077f9b6887101d0f08 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 8 May 2020 09:16:02 -0700 Subject: [PATCH 12/35] pstore/platform: Switch pstore_info::name to const In order to more cleanly pass around backend names, make the "name" member const. This means the module param needs to be dynamic (technically, it was before, so this actually cleans up a minor memory leak if a backend was specified and then gets unloaded.) Link: https://lore.kernel.org/lkml/20200510202436.63222-3-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/platform.c | 3 ++- include/linux/pstore.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 8beaeff72386..715396bef0ea 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -624,7 +624,7 @@ int pstore_register(struct pstore_info *psi) * Update the module parameter backend, so it is visible * through /sys/module/pstore/parameters/backend */ - backend = psi->name; + backend = kstrdup(psi->name, GFP_KERNEL); pr_info("Registered %s as persistent store backend\n", psi->name); @@ -667,6 +667,7 @@ void pstore_unregister(struct pstore_info *psi) free_buf_for_compression(); psinfo = NULL; + kfree(backend); backend = NULL; mutex_unlock(&psinfo_lock); } diff --git a/include/linux/pstore.h b/include/linux/pstore.h index e779441e6d26..f6f22b13e04f 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -170,7 +170,7 @@ struct pstore_record { */ struct pstore_info { struct module *owner; - char *name; + const char *name; struct semaphore buf_lock; char *buf; From d195c39052d1da278a00a6744ce59c383b67b191 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 8 May 2020 09:26:28 -0700 Subject: [PATCH 13/35] pstore/platform: Use backend name for console registration If the pstore backend changes, there's no indication in the logs what the console is (it always says "pstore"). Instead, pass through the active backend's name. (Also adjust the selftest to match.) Link: https://lore.kernel.org/lkml/20200510202436.63222-5-keescook@chromium.org/ Link: https://lore.kernel.org/lkml/20200526135429.GQ12456@shao2-debian Signed-off-by: Kees Cook --- fs/pstore/platform.c | 4 +++- tools/testing/selftests/pstore/pstore_tests | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 715396bef0ea..e8690d8606e0 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -514,13 +514,15 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) } static struct console pstore_console = { - .name = "pstore", .write = pstore_console_write, .index = -1, }; static void pstore_register_console(void) { + /* Show which backend is going to get console writes. */ + strscpy(pstore_console.name, psinfo->name, + sizeof(pstore_console.name)); /* * Always initialize flags here since prior unregister_console() * calls may have changed settings (specifically CON_ENABLED). diff --git a/tools/testing/selftests/pstore/pstore_tests b/tools/testing/selftests/pstore/pstore_tests index 1cef54458aff..2aa9a3852a84 100755 --- a/tools/testing/selftests/pstore/pstore_tests +++ b/tools/testing/selftests/pstore/pstore_tests @@ -10,7 +10,7 @@ . ./common_tests prlog -n "Checking pstore console is registered ... " -dmesg | grep -q "console \[pstore" +dmesg | grep -Eq "console \[(pstore|${backend})" show_result $? prlog -n "Checking /dev/pmsg0 exists ... " From d973f7d83dc7360597373536b34e80eea306d15f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 8 May 2020 09:25:19 -0700 Subject: [PATCH 14/35] pstore/platform: Move module params after declarations It is easier to see how module params are used if they're near the variables they use. Link: https://lore.kernel.org/lkml/20200510202436.63222-4-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/platform.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index e8690d8606e0..072440457c08 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -77,12 +77,17 @@ static DEFINE_MUTEX(psinfo_lock); struct pstore_info *psinfo; static char *backend; +module_param(backend, charp, 0444); +MODULE_PARM_DESC(backend, "specific backend to use"); + static char *compress = #ifdef CONFIG_PSTORE_COMPRESS_DEFAULT CONFIG_PSTORE_COMPRESS_DEFAULT; #else NULL; #endif +module_param(compress, charp, 0444); +MODULE_PARM_DESC(compress, "compression to use"); /* Compression parameters */ static struct crypto_comp *tfm; @@ -853,11 +858,5 @@ static void __exit pstore_exit(void) } module_exit(pstore_exit) -module_param(compress, charp, 0444); -MODULE_PARM_DESC(compress, "Pstore compression to use"); - -module_param(backend, charp, 0444); -MODULE_PARM_DESC(backend, "Pstore backend to use"); - MODULE_AUTHOR("Tony Luck "); MODULE_LICENSE("GPL"); From f858b57f7dd28b5bb55e9dc3776fff58f38a7c91 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 6 May 2020 13:06:18 -0700 Subject: [PATCH 15/35] pstore/ram: Adjust module param permissions to reflect reality A couple module parameters had 0600 permissions, but changing them would have no impact on ramoops, so switch these to 0400 to reflect reality. Link: https://lore.kernel.org/lkml/20200506211523.15077-7-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/ram.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 795622190c01..3b399f72e495 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -53,17 +53,17 @@ MODULE_PARM_DESC(mem_size, "size of reserved RAM used to store oops/panic logs"); static unsigned int mem_type; -module_param(mem_type, uint, 0600); +module_param(mem_type, uint, 0400); MODULE_PARM_DESC(mem_type, "set to 1 to try to use unbuffered memory (default 0)"); static int dump_oops = 1; -module_param(dump_oops, int, 0600); +module_param(dump_oops, int, 0400); MODULE_PARM_DESC(dump_oops, "set to 1 to dump oopses, 0 to only dump panics (default 1)"); static int ramoops_ecc; -module_param_named(ecc, ramoops_ecc, int, 0600); +module_param_named(ecc, ramoops_ecc, int, 0400); MODULE_PARM_DESC(ramoops_ecc, "if non-zero, the option enables ECC support and specifies " "ECC buffer size in bytes (1 is a special value, means 16 " From 26961d76ff35aecb1af0fefd283fb5170786a812 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 6 May 2020 11:44:14 -0700 Subject: [PATCH 16/35] pstore/ram: Refactor DT size parsing Refactor device tree size parsing routines to be able to pass a non-zero default value for providing a configurable default for the coming "max_reason" field. Also rename the helpers, since we're not always parsing a size -- we're parsing a u32 and making sure it's not greater than INT_MAX. Link: https://lore.kernel.org/lkml/20200506211523.15077-4-keescook@chromium.org/ Link: https://lore.kernel.org/lkml/20200521205223.175957-1-tyhicks@linux.microsoft.com Signed-off-by: Kees Cook --- fs/pstore/ram.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 3b399f72e495..8cdafb686736 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -644,19 +644,25 @@ static int ramoops_init_prz(const char *name, return 0; } -static int ramoops_parse_dt_size(struct platform_device *pdev, - const char *propname, u32 *value) +/* Read a u32 from a dt property and make sure it's safe for an int. */ +static int ramoops_parse_dt_u32(struct platform_device *pdev, + const char *propname, + u32 default_value, u32 *value) { u32 val32 = 0; int ret; ret = of_property_read_u32(pdev->dev.of_node, propname, &val32); - if (ret < 0 && ret != -EINVAL) { + if (ret == -EINVAL) { + /* field is missing, use default value. */ + val32 = default_value; + } else if (ret < 0) { dev_err(&pdev->dev, "failed to parse property %s: %d\n", propname, ret); return ret; } + /* Sanity check our results. */ if (val32 > INT_MAX) { dev_err(&pdev->dev, "%s %u > INT_MAX\n", propname, val32); return -EOVERFLOW; @@ -689,21 +695,22 @@ static int ramoops_parse_dt(struct platform_device *pdev, pdata->mem_type = of_property_read_bool(of_node, "unbuffered"); pdata->dump_oops = !of_property_read_bool(of_node, "no-dump-oops"); -#define parse_size(name, field) { \ - ret = ramoops_parse_dt_size(pdev, name, &value); \ +#define parse_u32(name, field, default_value) { \ + ret = ramoops_parse_dt_u32(pdev, name, default_value, \ + &value); \ if (ret < 0) \ return ret; \ field = value; \ } - parse_size("record-size", pdata->record_size); - parse_size("console-size", pdata->console_size); - parse_size("ftrace-size", pdata->ftrace_size); - parse_size("pmsg-size", pdata->pmsg_size); - parse_size("ecc-size", pdata->ecc_info.ecc_size); - parse_size("flags", pdata->flags); + parse_u32("record-size", pdata->record_size, 0); + parse_u32("console-size", pdata->console_size, 0); + parse_u32("ftrace-size", pdata->ftrace_size, 0); + parse_u32("pmsg-size", pdata->pmsg_size, 0); + parse_u32("ecc-size", pdata->ecc_info.ecc_size, 0); + parse_u32("flags", pdata->flags, 0); -#undef parse_size +#undef parse_u32 /* * Some old Chromebooks relied on the kernel setting the From df9bf19d88965758c700f46ef75110336fea8654 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 8 May 2020 08:34:38 -0700 Subject: [PATCH 17/35] pstore/ram: Refactor ftrace buffer merging This changes the ftrace record merging code to be agnostic of pstore/ram, as the first step to making it available as a generic routine for other backends to use, such as pstore/zone. Link: https://lore.kernel.org/lkml/20200510202436.63222-6-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/ram.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 8cdafb686736..dba9b4e3dc3f 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -168,8 +168,9 @@ static bool prz_ok(struct persistent_ram_zone *prz) persistent_ram_ecc_string(prz, NULL, 0)); } -static ssize_t ftrace_log_combine(struct persistent_ram_zone *dest, - struct persistent_ram_zone *src) +static +ssize_t ftrace_log_combine(char **dest_log, size_t *dest_log_size, + const char *src_log, size_t src_log_size) { size_t dest_size, src_size, total, dest_off, src_off; size_t dest_idx = 0, src_idx = 0, merged_idx = 0; @@ -177,19 +178,19 @@ static ssize_t ftrace_log_combine(struct persistent_ram_zone *dest, struct pstore_ftrace_record *drec, *srec, *mrec; size_t record_size = sizeof(struct pstore_ftrace_record); - dest_off = dest->old_log_size % record_size; - dest_size = dest->old_log_size - dest_off; + dest_off = *dest_log_size % record_size; + dest_size = *dest_log_size - dest_off; - src_off = src->old_log_size % record_size; - src_size = src->old_log_size - src_off; + src_off = src_log_size % record_size; + src_size = src_log_size - src_off; total = dest_size + src_size; merged_buf = kmalloc(total, GFP_KERNEL); if (!merged_buf) return -ENOMEM; - drec = (struct pstore_ftrace_record *)(dest->old_log + dest_off); - srec = (struct pstore_ftrace_record *)(src->old_log + src_off); + drec = (struct pstore_ftrace_record *)(*dest_log + dest_off); + srec = (struct pstore_ftrace_record *)(src_log + src_off); mrec = (struct pstore_ftrace_record *)(merged_buf); while (dest_size > 0 && src_size > 0) { @@ -213,9 +214,9 @@ static ssize_t ftrace_log_combine(struct persistent_ram_zone *dest, src_size -= record_size; } - kfree(dest->old_log); - dest->old_log = merged_buf; - dest->old_log_size = total; + kfree(*dest_log); + *dest_log = merged_buf; + *dest_log_size = total; return 0; } @@ -291,7 +292,11 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record) tmp_prz->corrected_bytes += prz_next->corrected_bytes; tmp_prz->bad_blocks += prz_next->bad_blocks; - size = ftrace_log_combine(tmp_prz, prz_next); + + size = ftrace_log_combine(&tmp_prz->old_log, + &tmp_prz->old_log_size, + prz_next->old_log, + prz_next->old_log_size); if (size) goto out; } From 16a583079e937f6f5e6274ef7fda6dbf7dcb669f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 8 May 2020 08:42:12 -0700 Subject: [PATCH 18/35] pstore/ftrace: Provide ftrace log merging routine Move the ftrace log merging logic out of pstore/ram into pstore/ftrace so other backends can use it, like pstore/zone. Link: https://lore.kernel.org/lkml/20200510202436.63222-7-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/ftrace.c | 54 +++++++++++++++++++++++++++++++++++++++++ fs/pstore/internal.h | 9 +++++++ fs/pstore/ram.c | 57 +++----------------------------------------- 3 files changed, 66 insertions(+), 54 deletions(-) diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index bfbfc2698070..5c0450701293 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "internal.h" @@ -132,3 +133,56 @@ void pstore_unregister_ftrace(void) debugfs_remove_recursive(pstore_ftrace_dir); } + +ssize_t pstore_ftrace_combine_log(char **dest_log, size_t *dest_log_size, + const char *src_log, size_t src_log_size) +{ + size_t dest_size, src_size, total, dest_off, src_off; + size_t dest_idx = 0, src_idx = 0, merged_idx = 0; + void *merged_buf; + struct pstore_ftrace_record *drec, *srec, *mrec; + size_t record_size = sizeof(struct pstore_ftrace_record); + + dest_off = *dest_log_size % record_size; + dest_size = *dest_log_size - dest_off; + + src_off = src_log_size % record_size; + src_size = src_log_size - src_off; + + total = dest_size + src_size; + merged_buf = kmalloc(total, GFP_KERNEL); + if (!merged_buf) + return -ENOMEM; + + drec = (struct pstore_ftrace_record *)(*dest_log + dest_off); + srec = (struct pstore_ftrace_record *)(src_log + src_off); + mrec = (struct pstore_ftrace_record *)(merged_buf); + + while (dest_size > 0 && src_size > 0) { + if (pstore_ftrace_read_timestamp(&drec[dest_idx]) < + pstore_ftrace_read_timestamp(&srec[src_idx])) { + mrec[merged_idx++] = drec[dest_idx++]; + dest_size -= record_size; + } else { + mrec[merged_idx++] = srec[src_idx++]; + src_size -= record_size; + } + } + + while (dest_size > 0) { + mrec[merged_idx++] = drec[dest_idx++]; + dest_size -= record_size; + } + + while (src_size > 0) { + mrec[merged_idx++] = srec[src_idx++]; + src_size -= record_size; + } + + kfree(*dest_log); + *dest_log = merged_buf; + *dest_log_size = total; + + return 0; +} +EXPORT_SYMBOL_GPL(pstore_ftrace_combine_log); diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 8efd72d93b10..7fb219042f13 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -12,9 +12,18 @@ extern unsigned long kmsg_bytes; #ifdef CONFIG_PSTORE_FTRACE extern void pstore_register_ftrace(void); extern void pstore_unregister_ftrace(void); +ssize_t pstore_ftrace_combine_log(char **dest_log, size_t *dest_log_size, + const char *src_log, size_t src_log_size); #else static inline void pstore_register_ftrace(void) {} static inline void pstore_unregister_ftrace(void) {} +static inline ssize_t +pstore_ftrace_combine_log(char **dest_log, size_t *dest_log_size, + const char *src_log, size_t src_log_size) +{ + *dest_log_size = 0; + return 0; +} #endif #ifdef CONFIG_PSTORE_PMSG diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index dba9b4e3dc3f..26a1f502a3ea 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -21,6 +21,7 @@ #include #include #include +#include "internal.h" #define RAMOOPS_KERNMSG_HDR "====" #define MIN_MEM_SIZE 4096UL @@ -168,59 +169,6 @@ static bool prz_ok(struct persistent_ram_zone *prz) persistent_ram_ecc_string(prz, NULL, 0)); } -static -ssize_t ftrace_log_combine(char **dest_log, size_t *dest_log_size, - const char *src_log, size_t src_log_size) -{ - size_t dest_size, src_size, total, dest_off, src_off; - size_t dest_idx = 0, src_idx = 0, merged_idx = 0; - void *merged_buf; - struct pstore_ftrace_record *drec, *srec, *mrec; - size_t record_size = sizeof(struct pstore_ftrace_record); - - dest_off = *dest_log_size % record_size; - dest_size = *dest_log_size - dest_off; - - src_off = src_log_size % record_size; - src_size = src_log_size - src_off; - - total = dest_size + src_size; - merged_buf = kmalloc(total, GFP_KERNEL); - if (!merged_buf) - return -ENOMEM; - - drec = (struct pstore_ftrace_record *)(*dest_log + dest_off); - srec = (struct pstore_ftrace_record *)(src_log + src_off); - mrec = (struct pstore_ftrace_record *)(merged_buf); - - while (dest_size > 0 && src_size > 0) { - if (pstore_ftrace_read_timestamp(&drec[dest_idx]) < - pstore_ftrace_read_timestamp(&srec[src_idx])) { - mrec[merged_idx++] = drec[dest_idx++]; - dest_size -= record_size; - } else { - mrec[merged_idx++] = srec[src_idx++]; - src_size -= record_size; - } - } - - while (dest_size > 0) { - mrec[merged_idx++] = drec[dest_idx++]; - dest_size -= record_size; - } - - while (src_size > 0) { - mrec[merged_idx++] = srec[src_idx++]; - src_size -= record_size; - } - - kfree(*dest_log); - *dest_log = merged_buf; - *dest_log_size = total; - - return 0; -} - static ssize_t ramoops_pstore_read(struct pstore_record *record) { ssize_t size = 0; @@ -293,7 +241,8 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record) prz_next->corrected_bytes; tmp_prz->bad_blocks += prz_next->bad_blocks; - size = ftrace_log_combine(&tmp_prz->old_log, + size = pstore_ftrace_combine_log( + &tmp_prz->old_log, &tmp_prz->old_log_size, prz_next->old_log, prz_next->old_log_size); From 6d3cf962dd1a95df868c547b090bfc4c7977f4be Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 15 May 2020 11:05:43 -0700 Subject: [PATCH 19/35] printk: Collapse shutdown types into a single dump reason To turn the KMSG_DUMP_* reasons into a more ordered list, collapse the redundant KMSG_DUMP_(RESTART|HALT|POWEROFF) reasons into KMSG_DUMP_SHUTDOWN. The current users already don't meaningfully distinguish between them, so there's no need to, as discussed here: https://lore.kernel.org/lkml/CA+CK2bAPv5u1ih5y9t5FUnTyximtFCtDYXJCpuyjOyHNOkRdqw@mail.gmail.com/ Link: https://lore.kernel.org/lkml/20200515184434.8470-2-keescook@chromium.org/ Reviewed-by: Pavel Tatashin Reviewed-by: Petr Mladek Acked-by: Michael Ellerman (powerpc) Signed-off-by: Kees Cook --- arch/powerpc/kernel/nvram_64.c | 4 +--- fs/pstore/platform.c | 8 ++------ include/linux/kmsg_dump.h | 4 +--- kernel/reboot.c | 6 +++--- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index fb4f61096613..0cd1c88bfc8b 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -655,9 +655,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, int rc = -1; switch (reason) { - case KMSG_DUMP_RESTART: - case KMSG_DUMP_HALT: - case KMSG_DUMP_POWEROFF: + case KMSG_DUMP_SHUTDOWN: /* These are almost always orderly shutdowns. */ return; case KMSG_DUMP_OOPS: diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 072440457c08..90d74ebaa70a 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -144,12 +144,8 @@ static const char *get_reason_str(enum kmsg_dump_reason reason) return "Oops"; case KMSG_DUMP_EMERG: return "Emergency"; - case KMSG_DUMP_RESTART: - return "Restart"; - case KMSG_DUMP_HALT: - return "Halt"; - case KMSG_DUMP_POWEROFF: - return "Poweroff"; + case KMSG_DUMP_SHUTDOWN: + return "Shutdown"; default: return "Unknown"; } diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 2e7a1e032c71..3f82b5cb2d82 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -25,9 +25,7 @@ enum kmsg_dump_reason { KMSG_DUMP_PANIC, KMSG_DUMP_OOPS, KMSG_DUMP_EMERG, - KMSG_DUMP_RESTART, - KMSG_DUMP_HALT, - KMSG_DUMP_POWEROFF, + KMSG_DUMP_SHUTDOWN, }; /** diff --git a/kernel/reboot.c b/kernel/reboot.c index c4d472b7f1b4..491f1347bf43 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -250,7 +250,7 @@ void kernel_restart(char *cmd) pr_emerg("Restarting system\n"); else pr_emerg("Restarting system with command '%s'\n", cmd); - kmsg_dump(KMSG_DUMP_RESTART); + kmsg_dump(KMSG_DUMP_SHUTDOWN); machine_restart(cmd); } EXPORT_SYMBOL_GPL(kernel_restart); @@ -274,7 +274,7 @@ void kernel_halt(void) migrate_to_reboot_cpu(); syscore_shutdown(); pr_emerg("System halted\n"); - kmsg_dump(KMSG_DUMP_HALT); + kmsg_dump(KMSG_DUMP_SHUTDOWN); machine_halt(); } EXPORT_SYMBOL_GPL(kernel_halt); @@ -292,7 +292,7 @@ void kernel_power_off(void) migrate_to_reboot_cpu(); syscore_shutdown(); pr_emerg("Power down\n"); - kmsg_dump(KMSG_DUMP_POWEROFF); + kmsg_dump(KMSG_DUMP_SHUTDOWN); machine_power_off(); } EXPORT_SYMBOL_GPL(kernel_power_off); From b1f6f161b236d0e5a9222fb8b482e65aaff13689 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 5 May 2020 11:45:06 -0400 Subject: [PATCH 20/35] printk: honor the max_reason field in kmsg_dumper kmsg_dump() allows to dump kmesg buffer for various system events: oops, panic, reboot, etc. It provides an interface to register a callback call for clients, and in that callback interface there is a field "max_reason", but it was getting ignored when set to any "reason" higher than KMSG_DUMP_OOPS unless "always_kmsg_dump" was passed as kernel parameter. Allow clients to actually control their "max_reason", and keep the current behavior when "max_reason" is not set. Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/lkml/20200515184434.8470-3-keescook@chromium.org/ Reviewed-by: Petr Mladek Signed-off-by: Kees Cook --- include/linux/kmsg_dump.h | 1 + kernel/printk/printk.c | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 3f82b5cb2d82..9826014771ab 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -26,6 +26,7 @@ enum kmsg_dump_reason { KMSG_DUMP_OOPS, KMSG_DUMP_EMERG, KMSG_DUMP_SHUTDOWN, + KMSG_DUMP_MAX }; /** diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 9a9b6156270b..a121c2255737 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3157,12 +3157,19 @@ void kmsg_dump(enum kmsg_dump_reason reason) struct kmsg_dumper *dumper; unsigned long flags; - if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) - return; - rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) { - if (dumper->max_reason && reason > dumper->max_reason) + enum kmsg_dump_reason max_reason = dumper->max_reason; + + /* + * If client has not provided a specific max_reason, default + * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set. + */ + if (max_reason == KMSG_DUMP_UNDEF) { + max_reason = always_kmsg_dump ? KMSG_DUMP_MAX : + KMSG_DUMP_OOPS; + } + if (reason > max_reason) continue; /* initialize iterator with data about the stored records */ From fb13cb8a0482105a415e24042209d02a684255e2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 7 May 2020 19:36:22 -0700 Subject: [PATCH 21/35] printk: Introduce kmsg_dump_reason_str() The pstore subsystem already had a private version of this function. With the coming addition of the pstore/zone driver, this needs to be shared. As it really should live with printk, move it there instead. Link: https://lore.kernel.org/lkml/20200515184434.8470-4-keescook@chromium.org/ Acked-by: Petr Mladek Acked-by: Sergey Senozhatsky Reviewed-by: Pavel Tatashin Signed-off-by: Kees Cook --- fs/pstore/platform.c | 18 +----------------- include/linux/kmsg_dump.h | 7 +++++++ kernel/printk/printk.c | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 90d74ebaa70a..5e6c6022deb9 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -135,22 +135,6 @@ enum pstore_type_id pstore_name_to_type(const char *name) } EXPORT_SYMBOL_GPL(pstore_name_to_type); -static const char *get_reason_str(enum kmsg_dump_reason reason) -{ - switch (reason) { - case KMSG_DUMP_PANIC: - return "Panic"; - case KMSG_DUMP_OOPS: - return "Oops"; - case KMSG_DUMP_EMERG: - return "Emergency"; - case KMSG_DUMP_SHUTDOWN: - return "Shutdown"; - default: - return "Unknown"; - } -} - static void pstore_timer_kick(void) { if (pstore_update_ms < 0) @@ -403,7 +387,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned int part = 1; int ret; - why = get_reason_str(reason); + why = kmsg_dump_reason_str(reason); if (down_trylock(&psinfo->buf_lock)) { /* Failed to acquire lock: give up if we cannot wait. */ diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 9826014771ab..3378bcbe585e 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -70,6 +70,8 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper); int kmsg_dump_register(struct kmsg_dumper *dumper); int kmsg_dump_unregister(struct kmsg_dumper *dumper); + +const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason); #else static inline void kmsg_dump(enum kmsg_dump_reason reason) { @@ -111,6 +113,11 @@ static inline int kmsg_dump_unregister(struct kmsg_dumper *dumper) { return -EINVAL; } + +static inline const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) +{ + return "Disabled"; +} #endif #endif /* _LINUX_KMSG_DUMP_H */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a121c2255737..14ca4d05d902 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3144,6 +3144,23 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister); static bool always_kmsg_dump; module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); +const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) +{ + switch (reason) { + case KMSG_DUMP_PANIC: + return "Panic"; + case KMSG_DUMP_OOPS: + return "Oops"; + case KMSG_DUMP_EMERG: + return "Emergency"; + case KMSG_DUMP_SHUTDOWN: + return "Shutdown"; + default: + return "Unknown"; + } +} +EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); + /** * kmsg_dump - dump kernel log to kernel message dumpers. * @reason: the reason (oops, panic etc) for dumping From 3524e688b8ee50b0edc76f0e020727eb6c684dbc Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 5 May 2020 11:45:07 -0400 Subject: [PATCH 22/35] pstore/platform: Pass max_reason to kmesg dump Add a new member to struct pstore_info for passing information about kmesg dump maximum reason. This allows a finer control of what kmesg dumps are sent to pstore storage backends. Those backends that do not explicitly set this field (keeping it equal to 0), get the default behavior: store only Oopses and Panics, or everything if the printk.always_kmsg_dump boot param is set. Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/lkml/20200515184434.8470-5-keescook@chromium.org/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- fs/pstore/platform.c | 4 +++- include/linux/pstore.h | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 5e6c6022deb9..a9e297eefdff 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -595,8 +595,10 @@ int pstore_register(struct pstore_info *psi) pstore_get_records(0); - if (psi->flags & PSTORE_FLAGS_DMESG) + if (psi->flags & PSTORE_FLAGS_DMESG) { + pstore_dumper.max_reason = psinfo->max_reason; pstore_register_kmsg(); + } if (psi->flags & PSTORE_FLAGS_CONSOLE) pstore_register_console(); if (psi->flags & PSTORE_FLAGS_FTRACE) diff --git a/include/linux/pstore.h b/include/linux/pstore.h index f6f22b13e04f..eb93a54cff31 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -96,6 +96,12 @@ struct pstore_record { * * @read_mutex: serializes @open, @read, @close, and @erase callbacks * @flags: bitfield of frontends the backend can accept writes for + * @max_reason: Used when PSTORE_FLAGS_DMESG is set. Contains the + * kmsg_dump_reason enum value. KMSG_DUMP_UNDEF means + * "use existing kmsg_dump() filtering, based on the + * printk.always_kmsg_dump boot param" (which is either + * KMSG_DUMP_OOPS when false, or KMSG_DUMP_MAX when + * true); see printk.always_kmsg_dump for more details. * @data: backend-private pointer passed back during callbacks * * Callbacks: @@ -179,6 +185,7 @@ struct pstore_info { struct mutex read_mutex; int flags; + int max_reason; void *data; int (*open)(struct pstore_info *psi); From 791205e3ec6081a8da6f00621e3453d622dc41e7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 13 May 2020 14:35:03 -0700 Subject: [PATCH 23/35] pstore/ram: Introduce max_reason and convert dump_oops Now that pstore_register() can correctly pass max_reason to the kmesg dump facility, introduce a new "max_reason" module parameter and "max-reason" Device Tree field. The "dump_oops" module parameter and "dump-oops" Device Tree field are now considered deprecated, but are now automatically converted to their corresponding max_reason values when present, though the new max_reason setting has precedence. For struct ramoops_platform_data, the "dump_oops" member is entirely replaced by a new "max_reason" member, with the only existing user updated in place. Additionally remove the "reason" filter logic from ramoops_pstore_write(), as that is not specifically needed anymore, though technically this is a change in behavior for any ramoops users also setting the printk.always_kmsg_dump boot param, which will cause ramoops to behave as if max_reason was set to KMSG_DUMP_MAX. Co-developed-by: Pavel Tatashin Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/lkml/20200515184434.8470-6-keescook@chromium.org/ Signed-off-by: Kees Cook --- Documentation/admin-guide/ramoops.rst | 14 ++++-- drivers/platform/chrome/chromeos_pstore.c | 2 +- fs/pstore/ram.c | 58 +++++++++++++++-------- include/linux/pstore_ram.h | 2 +- 4 files changed, 51 insertions(+), 25 deletions(-) diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst index 6dbcc5481000..a60a96218ba9 100644 --- a/Documentation/admin-guide/ramoops.rst +++ b/Documentation/admin-guide/ramoops.rst @@ -32,11 +32,17 @@ memory to be mapped strongly ordered, and atomic operations on strongly ordered memory are implementation defined, and won't work on many ARMs such as omaps. The memory area is divided into ``record_size`` chunks (also rounded down to -power of two) and each oops/panic writes a ``record_size`` chunk of +power of two) and each kmesg dump writes a ``record_size`` chunk of information. -Dumping both oopses and panics can be done by setting 1 in the ``dump_oops`` -variable while setting 0 in that variable dumps only the panics. +Limiting which kinds of kmsg dumps are stored can be controlled via +the ``max_reason`` value, as defined in include/linux/kmsg_dump.h's +``enum kmsg_dump_reason``. For example, to store both Oopses and Panics, +``max_reason`` should be set to 2 (KMSG_DUMP_OOPS), to store only Panics +``max_reason`` should be set to 1 (KMSG_DUMP_PANIC). Setting this to 0 +(KMSG_DUMP_UNDEF), means the reason filtering will be controlled by the +``printk.always_kmsg_dump`` boot param: if unset, it'll be KMSG_DUMP_OOPS, +otherwise KMSG_DUMP_MAX. The module uses a counter to record multiple dumps but the counter gets reset on restart (i.e. new dumps after the restart will overwrite old ones). @@ -90,7 +96,7 @@ Setting the ramoops parameters can be done in several different manners: .mem_address = <...>, .mem_type = <...>, .record_size = <...>, - .dump_oops = <...>, + .max_reason = <...>, .ecc = <...>, }; diff --git a/drivers/platform/chrome/chromeos_pstore.c b/drivers/platform/chrome/chromeos_pstore.c index d13770785fb5..fa51153688b4 100644 --- a/drivers/platform/chrome/chromeos_pstore.c +++ b/drivers/platform/chrome/chromeos_pstore.c @@ -57,7 +57,7 @@ static struct ramoops_platform_data chromeos_ramoops_data = { .record_size = 0x40000, .console_size = 0x20000, .ftrace_size = 0x20000, - .dump_oops = 1, + .max_reason = KMSG_DUMP_OOPS, }; static struct platform_device chromeos_ramoops = { diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 26a1f502a3ea..ca6d8a867285 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -58,10 +58,10 @@ module_param(mem_type, uint, 0400); MODULE_PARM_DESC(mem_type, "set to 1 to try to use unbuffered memory (default 0)"); -static int dump_oops = 1; -module_param(dump_oops, int, 0400); -MODULE_PARM_DESC(dump_oops, - "set to 1 to dump oopses, 0 to only dump panics (default 1)"); +static int ramoops_max_reason = -1; +module_param_named(max_reason, ramoops_max_reason, int, 0400); +MODULE_PARM_DESC(max_reason, + "maximum reason for kmsg dump (default 2: Oops and Panic) "); static int ramoops_ecc; module_param_named(ecc, ramoops_ecc, int, 0400); @@ -70,6 +70,11 @@ MODULE_PARM_DESC(ramoops_ecc, "ECC buffer size in bytes (1 is a special value, means 16 " "bytes ECC)"); +static int ramoops_dump_oops = -1; +module_param_named(dump_oops, ramoops_dump_oops, int, 0400); +MODULE_PARM_DESC(dump_oops, + "(deprecated: use max_reason instead) set to 1 to dump oopses & panics, 0 to only dump panics"); + struct ramoops_context { struct persistent_ram_zone **dprzs; /* Oops dump zones */ struct persistent_ram_zone *cprz; /* Console zone */ @@ -82,7 +87,6 @@ struct ramoops_context { size_t console_size; size_t ftrace_size; size_t pmsg_size; - int dump_oops; u32 flags; struct persistent_ram_ecc_info ecc_info; unsigned int max_dump_cnt; @@ -336,16 +340,14 @@ static int notrace ramoops_pstore_write(struct pstore_record *record) return -EINVAL; /* - * Out of the various dmesg dump types, ramoops is currently designed - * to only store crash logs, rather than storing general kernel logs. + * We could filter on record->reason here if we wanted to (which + * would duplicate what happened before the "max_reason" setting + * was added), but that would defeat the purpose of a system + * changing printk.always_kmsg_dump, so instead log everything that + * the kmsg dumper sends us, since it should be doing the filtering + * based on the combination of printk.always_kmsg_dump and our + * requested "max_reason". */ - if (record->reason != KMSG_DUMP_OOPS && - record->reason != KMSG_DUMP_PANIC) - return -EINVAL; - - /* Skip Oopes when configured to do so. */ - if (record->reason == KMSG_DUMP_OOPS && !cxt->dump_oops) - return -EINVAL; /* * Explicitly only take the first part of any new crash. @@ -647,7 +649,14 @@ static int ramoops_parse_dt(struct platform_device *pdev, pdata->mem_size = resource_size(res); pdata->mem_address = res->start; pdata->mem_type = of_property_read_bool(of_node, "unbuffered"); - pdata->dump_oops = !of_property_read_bool(of_node, "no-dump-oops"); + /* + * Setting "no-dump-oops" is deprecated and will be ignored if + * "max_reason" is also specified. + */ + if (of_property_read_bool(of_node, "no-dump-oops")) + pdata->max_reason = KMSG_DUMP_PANIC; + else + pdata->max_reason = KMSG_DUMP_OOPS; #define parse_u32(name, field, default_value) { \ ret = ramoops_parse_dt_u32(pdev, name, default_value, \ @@ -663,6 +672,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, parse_u32("pmsg-size", pdata->pmsg_size, 0); parse_u32("ecc-size", pdata->ecc_info.ecc_size, 0); parse_u32("flags", pdata->flags, 0); + parse_u32("max-reason", pdata->max_reason, pdata->max_reason); #undef parse_u32 @@ -746,7 +756,6 @@ static int ramoops_probe(struct platform_device *pdev) cxt->console_size = pdata->console_size; cxt->ftrace_size = pdata->ftrace_size; cxt->pmsg_size = pdata->pmsg_size; - cxt->dump_oops = pdata->dump_oops; cxt->flags = pdata->flags; cxt->ecc_info = pdata->ecc_info; @@ -789,8 +798,10 @@ static int ramoops_probe(struct platform_device *pdev) * the single region size is how to check. */ cxt->pstore.flags = 0; - if (cxt->max_dump_cnt) + if (cxt->max_dump_cnt) { cxt->pstore.flags |= PSTORE_FLAGS_DMESG; + cxt->pstore.max_reason = pdata->max_reason; + } if (cxt->console_size) cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE; if (cxt->max_ftrace_cnt) @@ -826,7 +837,7 @@ static int ramoops_probe(struct platform_device *pdev) mem_size = pdata->mem_size; mem_address = pdata->mem_address; record_size = pdata->record_size; - dump_oops = pdata->dump_oops; + ramoops_max_reason = pdata->max_reason; ramoops_console_size = pdata->console_size; ramoops_pmsg_size = pdata->pmsg_size; ramoops_ftrace_size = pdata->ftrace_size; @@ -909,7 +920,16 @@ static void __init ramoops_register_dummy(void) pdata.console_size = ramoops_console_size; pdata.ftrace_size = ramoops_ftrace_size; pdata.pmsg_size = ramoops_pmsg_size; - pdata.dump_oops = dump_oops; + /* If "max_reason" is set, its value has priority over "dump_oops". */ + if (ramoops_max_reason >= 0) + pdata.max_reason = ramoops_max_reason; + /* Otherwise, if "dump_oops" is set, parse it into "max_reason". */ + else if (ramoops_dump_oops != -1) + pdata.max_reason = ramoops_dump_oops ? KMSG_DUMP_OOPS + : KMSG_DUMP_PANIC; + /* And if neither are explicitly set, use the default. */ + else + pdata.max_reason = KMSG_DUMP_OOPS; pdata.flags = RAMOOPS_FLAG_FTRACE_PER_CPU; /* diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 9cb9b9067298..9f16afec7290 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -133,7 +133,7 @@ struct ramoops_platform_data { unsigned long console_size; unsigned long ftrace_size; unsigned long pmsg_size; - int dump_oops; + int max_reason; u32 flags; struct persistent_ram_ecc_info ecc_info; }; From acf12c5e58a45cb978d5de4628f767d326c3d740 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 5 May 2020 11:45:10 -0400 Subject: [PATCH 24/35] ramoops: Add "max-reason" optional field to ramoops DT node Currently, it is only possible to get kmsg dumps for panic and oops, or just panic, via "no-dump-oops". With "max-reason" it is possible to dump messages for other kmsg_dump events, for example emerg and shutdown. Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/lkml/20200515184434.8470-7-keescook@chromium.org/ Signed-off-by: Kees Cook --- .../devicetree/bindings/reserved-memory/ramoops.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/reserved-memory/ramoops.txt b/Documentation/devicetree/bindings/reserved-memory/ramoops.txt index 0eba562fe5c6..b7886fea368c 100644 --- a/Documentation/devicetree/bindings/reserved-memory/ramoops.txt +++ b/Documentation/devicetree/bindings/reserved-memory/ramoops.txt @@ -30,7 +30,7 @@ Optional properties: - ecc-size: enables ECC support and specifies ECC buffer size in bytes (defaults to 0: no ECC) -- record-size: maximum size in bytes of each dump done on oops/panic +- record-size: maximum size in bytes of each kmsg dump. (defaults to 0: disabled) - console-size: size in bytes of log buffer reserved for kernel messages @@ -45,7 +45,16 @@ Optional properties: - unbuffered: if present, use unbuffered mappings to map the reserved region (defaults to buffered mappings) -- no-dump-oops: if present, only dump panics (defaults to panics and oops) +- max-reason: if present, sets maximum type of kmsg dump reasons to store + (defaults to 2: log Oopses and Panics). This can be set to INT_MAX to + store all kmsg dumps. See include/linux/kmsg_dump.h KMSG_DUMP_* for other + kmsg dump reason values. Setting this to 0 (KMSG_DUMP_UNDEF), means the + reason filtering will be controlled by the printk.always_kmsg_dump boot + param: if unset, it will be KMSG_DUMP_OOPS, otherwise KMSG_DUMP_MAX. + +- no-dump-oops: deprecated, use max_reason instead. If present, and + max_reason is not specified, it is equivalent to max_reason = 1 + (KMSG_DUMP_PANIC). - flags: if present, pass ramoops behavioral flags (defaults to 0, see include/linux/pstore_ram.h RAMOOPS_FLAG_* for flag values). From d26c3321fe18dc74517dc1f518d584aa33b0a851 Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:54:56 +0800 Subject: [PATCH 25/35] pstore/zone: Introduce common layer to manage storage zones Implement a common set of APIs needed to support pstore storage zones, based on how ramoops is designed. This will be used by pstore/blk with the intention of migrating pstore/ram in the future. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-2-keescook@chromium.org/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- fs/pstore/Kconfig | 7 + fs/pstore/Makefile | 3 + fs/pstore/zone.c | 985 ++++++++++++++++++++++++++++++++++++ include/linux/pstore_zone.h | 44 ++ 4 files changed, 1039 insertions(+) create mode 100644 fs/pstore/zone.c create mode 100644 include/linux/pstore_zone.h diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 8f0369aad22a..98d2457bdd9f 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -153,3 +153,10 @@ config PSTORE_RAM "ramoops.ko". For more information, see Documentation/admin-guide/ramoops.rst. + +config PSTORE_ZONE + tristate + depends on PSTORE + help + The common layer for pstore/blk (and pstore/ram in the future) + to manage storage in zones. diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile index 967b5891f325..58a967cbe4af 100644 --- a/fs/pstore/Makefile +++ b/fs/pstore/Makefile @@ -12,3 +12,6 @@ pstore-$(CONFIG_PSTORE_PMSG) += pmsg.o ramoops-objs += ram.o ram_core.o obj-$(CONFIG_PSTORE_RAM) += ramoops.o + +pstore_zone-objs += zone.o +obj-$(CONFIG_PSTORE_ZONE) += pstore_zone.o diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c new file mode 100644 index 000000000000..362d72a24012 --- /dev/null +++ b/fs/pstore/zone.c @@ -0,0 +1,985 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide a pstore intermediate backend, organized into kernel memory + * allocated zones that are then mapped and flushed into a single + * contiguous region on a storage backend of some kind (block, mtd, etc). + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +/** + * struct psz_head - header of zone to flush to storage + * + * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value) + * @datalen: length of data in @data + * @data: zone data. + */ +struct psz_buffer { +#define PSZ_SIG (0x43474244) /* DBGC */ + uint32_t sig; + atomic_t datalen; + uint8_t data[]; +}; + +/** + * struct psz_kmsg_header - kmsg dump-specific header to flush to storage + * + * @magic: magic num for kmsg dump header + * @time: kmsg dump trigger time + * @compressed: whether conpressed + * @counter: kmsg dump counter + * @reason: the kmsg dump reason (e.g. oops, panic, etc) + * @data: pointer to log data + * + * This is a sub-header for a kmsg dump, trailing after &psz_buffer. + */ +struct psz_kmsg_header { +#define PSTORE_KMSG_HEADER_MAGIC 0x4dfc3ae5 /* Just a random number */ + uint32_t magic; + struct timespec64 time; + bool compressed; + uint32_t counter; + enum kmsg_dump_reason reason; + uint8_t data[]; +}; + +/** + * struct pstore_zone - single stored buffer + * + * @off: zone offset of storage + * @type: front-end type for this zone + * @name: front-end name for this zone + * @buffer: pointer to data buffer managed by this zone + * @oldbuf: pointer to old data buffer + * @buffer_size: bytes in @buffer->data + * @should_recover: whether this zone should recover from storage + * @dirty: whether the data in @buffer dirty + * + * zone structure in memory. + */ +struct pstore_zone { + loff_t off; + const char *name; + enum pstore_type_id type; + + struct psz_buffer *buffer; + struct psz_buffer *oldbuf; + size_t buffer_size; + bool should_recover; + atomic_t dirty; +}; + +/** + * struct psz_context - all about running state of pstore/zone + * + * @kpszs: kmsg dump storage zones + * @kmsg_max_cnt: max count of @kpszs + * @kmsg_read_cnt: counter of total read kmsg dumps + * @kmsg_write_cnt: counter of total kmsg dump writes + * @oops_counter: counter of oops dumps + * @panic_counter: counter of panic dumps + * @recovered: whether finished recovering data from storage + * @on_panic: whether panic is happening + * @pstore_zone_info_lock: lock to @pstore_zone_info + * @pstore_zone_info: information from backend + * @pstore: structure for pstore + */ +struct psz_context { + struct pstore_zone **kpszs; + unsigned int kmsg_max_cnt; + unsigned int kmsg_read_cnt; + unsigned int kmsg_write_cnt; + /* + * These counters should be calculated during recovery. + * It records the oops/panic times after crashes rather than boots. + */ + unsigned int oops_counter; + unsigned int panic_counter; + atomic_t recovered; + atomic_t on_panic; + + /* + * pstore_zone_info_lock protects this entire structure during calls + * to register_pstore_zone()/unregister_pstore_zone(). + */ + struct mutex pstore_zone_info_lock; + struct pstore_zone_info *pstore_zone_info; + struct pstore_info pstore; +}; +static struct psz_context pstore_zone_cxt; + +/** + * enum psz_flush_mode - flush mode for psz_zone_write() + * + * @FLUSH_NONE: do not flush to storage but update data on memory + * @FLUSH_PART: just flush part of data including meta data to storage + * @FLUSH_META: just flush meta data of zone to storage + * @FLUSH_ALL: flush all of zone + */ +enum psz_flush_mode { + FLUSH_NONE = 0, + FLUSH_PART, + FLUSH_META, + FLUSH_ALL, +}; + +static inline int buffer_datalen(struct pstore_zone *zone) +{ + return atomic_read(&zone->buffer->datalen); +} + +static inline bool is_on_panic(void) +{ + return atomic_read(&pstore_zone_cxt.on_panic); +} + +static ssize_t psz_zone_read(struct pstore_zone *zone, char *buf, + size_t len, unsigned long off) +{ + if (!buf || !zone->buffer) + return -EINVAL; + if (off > zone->buffer_size) + return -EINVAL; + len = min_t(size_t, len, zone->buffer_size - off); + memcpy(buf, zone->buffer->data + off, len); + return len; +} + +static int psz_zone_write(struct pstore_zone *zone, + enum psz_flush_mode flush_mode, const char *buf, + size_t len, unsigned long off) +{ + struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info; + ssize_t wcnt = 0; + ssize_t (*writeop)(const char *buf, size_t bytes, loff_t pos); + size_t wlen; + + if (off > zone->buffer_size) + return -EINVAL; + + wlen = min_t(size_t, len, zone->buffer_size - off); + if (buf && wlen) { + memcpy(zone->buffer->data + off, buf, wlen); + atomic_set(&zone->buffer->datalen, wlen + off); + } + + /* avoid to damage old records */ + if (!is_on_panic() && !atomic_read(&pstore_zone_cxt.recovered)) + goto dirty; + + writeop = is_on_panic() ? info->panic_write : info->write; + if (!writeop) + goto dirty; + + switch (flush_mode) { + case FLUSH_NONE: + if (unlikely(buf && wlen)) + goto dirty; + return 0; + case FLUSH_PART: + wcnt = writeop((const char *)zone->buffer->data + off, wlen, + zone->off + sizeof(*zone->buffer) + off); + if (wcnt != wlen) + goto dirty; + fallthrough; + case FLUSH_META: + wlen = sizeof(struct psz_buffer); + wcnt = writeop((const char *)zone->buffer, wlen, zone->off); + if (wcnt != wlen) + goto dirty; + break; + case FLUSH_ALL: + wlen = zone->buffer_size + sizeof(*zone->buffer); + wcnt = writeop((const char *)zone->buffer, wlen, zone->off); + if (wcnt != wlen) + goto dirty; + break; + } + + return 0; +dirty: + atomic_set(&zone->dirty, true); + return -EBUSY; +} + +static int psz_flush_dirty_zone(struct pstore_zone *zone) +{ + int ret; + + if (unlikely(!zone)) + return -EINVAL; + + if (unlikely(!atomic_read(&pstore_zone_cxt.recovered))) + return -EBUSY; + + if (!atomic_xchg(&zone->dirty, false)) + return 0; + + ret = psz_zone_write(zone, FLUSH_ALL, NULL, 0, 0); + if (ret) + atomic_set(&zone->dirty, true); + return ret; +} + +static int psz_flush_dirty_zones(struct pstore_zone **zones, unsigned int cnt) +{ + int i, ret; + struct pstore_zone *zone; + + if (!zones) + return -EINVAL; + + for (i = 0; i < cnt; i++) { + zone = zones[i]; + if (!zone) + return -EINVAL; + ret = psz_flush_dirty_zone(zone); + if (ret) + return ret; + } + return 0; +} + +static int psz_move_zone(struct pstore_zone *old, struct pstore_zone *new) +{ + const char *data = (const char *)old->buffer->data; + int ret; + + ret = psz_zone_write(new, FLUSH_ALL, data, buffer_datalen(old), 0); + if (ret) { + atomic_set(&new->buffer->datalen, 0); + atomic_set(&new->dirty, false); + return ret; + } + atomic_set(&old->buffer->datalen, 0); + return 0; +} + +static int psz_kmsg_recover_data(struct psz_context *cxt) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + struct pstore_zone *zone = NULL; + struct psz_buffer *buf; + unsigned long i; + ssize_t rcnt; + + if (!info->read) + return -EINVAL; + + for (i = 0; i < cxt->kmsg_max_cnt; i++) { + zone = cxt->kpszs[i]; + if (unlikely(!zone)) + return -EINVAL; + if (atomic_read(&zone->dirty)) { + unsigned int wcnt = cxt->kmsg_write_cnt; + struct pstore_zone *new = cxt->kpszs[wcnt]; + int ret; + + ret = psz_move_zone(zone, new); + if (ret) { + pr_err("move zone from %lu to %d failed\n", + i, wcnt); + return ret; + } + cxt->kmsg_write_cnt = (wcnt + 1) % cxt->kmsg_max_cnt; + } + if (!zone->should_recover) + continue; + buf = zone->buffer; + rcnt = info->read((char *)buf, zone->buffer_size + sizeof(*buf), + zone->off); + if (rcnt != zone->buffer_size + sizeof(*buf)) + return (int)rcnt < 0 ? (int)rcnt : -EIO; + } + return 0; +} + +static int psz_kmsg_recover_meta(struct psz_context *cxt) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + struct pstore_zone *zone; + size_t rcnt, len; + struct psz_buffer *buf; + struct psz_kmsg_header *hdr; + struct timespec64 time = { }; + unsigned long i; + /* + * Recover may on panic, we can't allocate any memory by kmalloc. + * So, we use local array instead. + */ + char buffer_header[sizeof(*buf) + sizeof(*hdr)] = {0}; + + if (!info->read) + return -EINVAL; + + len = sizeof(*buf) + sizeof(*hdr); + buf = (struct psz_buffer *)buffer_header; + for (i = 0; i < cxt->kmsg_max_cnt; i++) { + zone = cxt->kpszs[i]; + if (unlikely(!zone)) + return -EINVAL; + + rcnt = info->read((char *)buf, len, zone->off); + if (rcnt != len) { + pr_err("read %s with id %lu failed\n", zone->name, i); + return (int)rcnt < 0 ? (int)rcnt : -EIO; + } + + if (buf->sig != zone->buffer->sig) { + pr_debug("no valid data in kmsg dump zone %lu\n", i); + continue; + } + + if (zone->buffer_size < atomic_read(&buf->datalen)) { + pr_info("found overtop zone: %s: id %lu, off %lld, size %zu\n", + zone->name, i, zone->off, + zone->buffer_size); + continue; + } + + hdr = (struct psz_kmsg_header *)buf->data; + if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC) { + pr_info("found invalid zone: %s: id %lu, off %lld, size %zu\n", + zone->name, i, zone->off, + zone->buffer_size); + continue; + } + + /* + * we get the newest zone, and the next one must be the oldest + * or unused zone, because we do write one by one like a circle. + */ + if (hdr->time.tv_sec >= time.tv_sec) { + time.tv_sec = hdr->time.tv_sec; + cxt->kmsg_write_cnt = (i + 1) % cxt->kmsg_max_cnt; + } + + if (hdr->reason == KMSG_DUMP_OOPS) + cxt->oops_counter = + max(cxt->oops_counter, hdr->counter); + else if (hdr->reason == KMSG_DUMP_PANIC) + cxt->panic_counter = + max(cxt->panic_counter, hdr->counter); + + if (!atomic_read(&buf->datalen)) { + pr_debug("found erased zone: %s: id %lu, off %lld, size %zu, datalen %d\n", + zone->name, i, zone->off, + zone->buffer_size, + atomic_read(&buf->datalen)); + continue; + } + + if (!is_on_panic()) + zone->should_recover = true; + pr_debug("found nice zone: %s: id %lu, off %lld, size %zu, datalen %d\n", + zone->name, i, zone->off, + zone->buffer_size, atomic_read(&buf->datalen)); + } + + return 0; +} + +static int psz_kmsg_recover(struct psz_context *cxt) +{ + int ret; + + if (!cxt->kpszs) + return 0; + + ret = psz_kmsg_recover_meta(cxt); + if (ret) + goto recover_fail; + + ret = psz_kmsg_recover_data(cxt); + if (ret) + goto recover_fail; + + return 0; +recover_fail: + pr_debug("psz_recover_kmsg failed\n"); + return ret; +} + +/** + * psz_recovery() - recover data from storage + * @cxt: the context of pstore/zone + * + * recovery means reading data back from storage after rebooting + * + * Return: 0 on success, others on failure. + */ +static inline int psz_recovery(struct psz_context *cxt) +{ + int ret; + + if (atomic_read(&cxt->recovered)) + return 0; + + ret = psz_kmsg_recover(cxt); + + if (unlikely(ret)) + pr_err("recover failed\n"); + else { + pr_debug("recover end!\n"); + atomic_set(&cxt->recovered, 1); + } + return ret; +} + +static int psz_pstore_open(struct pstore_info *psi) +{ + struct psz_context *cxt = psi->data; + + cxt->kmsg_read_cnt = 0; + return 0; +} + +static inline bool psz_ok(struct pstore_zone *zone) +{ + if (zone && zone->buffer && buffer_datalen(zone)) + return true; + return false; +} + +static inline int psz_kmsg_erase(struct psz_context *cxt, + struct pstore_zone *zone, struct pstore_record *record) +{ + struct psz_buffer *buffer = zone->buffer; + struct psz_kmsg_header *hdr = + (struct psz_kmsg_header *)buffer->data; + + if (unlikely(!psz_ok(zone))) + return 0; + /* this zone is already updated, no need to erase */ + if (record->count != hdr->counter) + return 0; + + atomic_set(&zone->buffer->datalen, 0); + return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); +} + +static int psz_pstore_erase(struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + + switch (record->type) { + case PSTORE_TYPE_DMESG: + if (record->id >= cxt->kmsg_max_cnt) + return -EINVAL; + return psz_kmsg_erase(cxt, cxt->kpszs[record->id], record); + default: + return -EINVAL; + } +} + +static void psz_write_kmsg_hdr(struct pstore_zone *zone, + struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + struct psz_buffer *buffer = zone->buffer; + struct psz_kmsg_header *hdr = + (struct psz_kmsg_header *)buffer->data; + + hdr->magic = PSTORE_KMSG_HEADER_MAGIC; + hdr->compressed = record->compressed; + hdr->time.tv_sec = record->time.tv_sec; + hdr->time.tv_nsec = record->time.tv_nsec; + hdr->reason = record->reason; + if (hdr->reason == KMSG_DUMP_OOPS) + hdr->counter = ++cxt->oops_counter; + else if (hdr->reason == KMSG_DUMP_PANIC) + hdr->counter = ++cxt->panic_counter; + else + hdr->counter = 0; +} + +static inline int notrace psz_kmsg_write_record(struct psz_context *cxt, + struct pstore_record *record) +{ + size_t size, hlen; + struct pstore_zone *zone; + unsigned int zonenum; + + zonenum = cxt->kmsg_write_cnt; + zone = cxt->kpszs[zonenum]; + if (unlikely(!zone)) + return -ENOSPC; + cxt->kmsg_write_cnt = (zonenum + 1) % cxt->kmsg_max_cnt; + + pr_debug("write %s to zone id %d\n", zone->name, zonenum); + psz_write_kmsg_hdr(zone, record); + hlen = sizeof(struct psz_kmsg_header); + size = min_t(size_t, record->size, zone->buffer_size - hlen); + return psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen); +} + +static int notrace psz_kmsg_write(struct psz_context *cxt, + struct pstore_record *record) +{ + int ret; + + /* + * Explicitly only take the first part of any new crash. + * If our buffer is larger than kmsg_bytes, this can never happen, + * and if our buffer is smaller than kmsg_bytes, we don't want the + * report split across multiple records. + */ + if (record->part != 1) + return -ENOSPC; + + if (!cxt->kpszs) + return -ENOSPC; + + ret = psz_kmsg_write_record(cxt, record); + if (!ret) { + pr_debug("try to flush other dirty zones\n"); + psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt); + } + + /* always return 0 as we had handled it on buffer */ + return 0; +} + +static int notrace psz_pstore_write(struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + + if (record->type == PSTORE_TYPE_DMESG && + record->reason == KMSG_DUMP_PANIC) + atomic_set(&cxt->on_panic, 1); + + switch (record->type) { + case PSTORE_TYPE_DMESG: + return psz_kmsg_write(cxt, record); + default: + return -EINVAL; + } +} + +static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt) +{ + struct pstore_zone *zone = NULL; + + while (cxt->kmsg_read_cnt < cxt->kmsg_max_cnt) { + zone = cxt->kpszs[cxt->kmsg_read_cnt++]; + if (psz_ok(zone)) + return zone; + } + + return NULL; +} + +static int psz_kmsg_read_hdr(struct pstore_zone *zone, + struct pstore_record *record) +{ + struct psz_buffer *buffer = zone->buffer; + struct psz_kmsg_header *hdr = + (struct psz_kmsg_header *)buffer->data; + + if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC) + return -EINVAL; + record->compressed = hdr->compressed; + record->time.tv_sec = hdr->time.tv_sec; + record->time.tv_nsec = hdr->time.tv_nsec; + record->reason = hdr->reason; + record->count = hdr->counter; + return 0; +} + +static ssize_t psz_kmsg_read(struct pstore_zone *zone, + struct pstore_record *record) +{ + ssize_t size, hlen = 0; + + size = buffer_datalen(zone); + /* Clear and skip this kmsg dump record if it has no valid header */ + if (psz_kmsg_read_hdr(zone, record)) { + atomic_set(&zone->buffer->datalen, 0); + atomic_set(&zone->dirty, 0); + return -ENOMSG; + } + size -= sizeof(struct psz_kmsg_header); + + if (!record->compressed) { + char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n", + kmsg_dump_reason_str(record->reason), + record->count); + hlen = strlen(buf); + record->buf = krealloc(buf, hlen + size, GFP_KERNEL); + if (!record->buf) { + kfree(buf); + return -ENOMEM; + } + } else { + record->buf = kmalloc(size, GFP_KERNEL); + if (!record->buf) + return -ENOMEM; + } + + size = psz_zone_read(zone, record->buf + hlen, size, + sizeof(struct psz_kmsg_header)); + if (unlikely(size < 0)) { + kfree(record->buf); + return -ENOMSG; + } + + return size + hlen; +} + +static ssize_t psz_pstore_read(struct pstore_record *record) +{ + struct psz_context *cxt = record->psi->data; + ssize_t (*readop)(struct pstore_zone *zone, + struct pstore_record *record); + struct pstore_zone *zone; + ssize_t ret; + + /* before read, we must recover from storage */ + ret = psz_recovery(cxt); + if (ret) + return ret; + +next_zone: + zone = psz_read_next_zone(cxt); + if (!zone) + return 0; + + record->type = zone->type; + switch (record->type) { + case PSTORE_TYPE_DMESG: + readop = psz_kmsg_read; + record->id = cxt->kmsg_read_cnt - 1; + break; + default: + goto next_zone; + } + + ret = readop(zone, record); + if (ret == -ENOMSG) + goto next_zone; + return ret; +} + +static struct psz_context pstore_zone_cxt = { + .pstore_zone_info_lock = + __MUTEX_INITIALIZER(pstore_zone_cxt.pstore_zone_info_lock), + .recovered = ATOMIC_INIT(0), + .on_panic = ATOMIC_INIT(0), + .pstore = { + .owner = THIS_MODULE, + .open = psz_pstore_open, + .read = psz_pstore_read, + .write = psz_pstore_write, + .erase = psz_pstore_erase, + }, +}; + +static void psz_free_zone(struct pstore_zone **pszone) +{ + struct pstore_zone *zone = *pszone; + + if (!zone) + return; + + kfree(zone->buffer); + kfree(zone); + *pszone = NULL; +} + +static void psz_free_zones(struct pstore_zone ***pszones, unsigned int *cnt) +{ + struct pstore_zone **zones = *pszones; + + if (!zones) + return; + + while (*cnt > 0) { + (*cnt)--; + psz_free_zone(&(zones[*cnt])); + } + kfree(zones); + *pszones = NULL; +} + +static void psz_free_all_zones(struct psz_context *cxt) +{ + if (cxt->kpszs) + psz_free_zones(&cxt->kpszs, &cxt->kmsg_max_cnt); +} + +static struct pstore_zone *psz_init_zone(enum pstore_type_id type, + loff_t *off, size_t size) +{ + struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info; + struct pstore_zone *zone; + const char *name = pstore_type_to_name(type); + + if (!size) + return NULL; + + if (*off + size > info->total_size) { + pr_err("no room for %s (0x%zx@0x%llx over 0x%lx)\n", + name, size, *off, info->total_size); + return ERR_PTR(-ENOMEM); + } + + zone = kzalloc(sizeof(struct pstore_zone), GFP_KERNEL); + if (!zone) + return ERR_PTR(-ENOMEM); + + zone->buffer = kmalloc(size, GFP_KERNEL); + if (!zone->buffer) { + kfree(zone); + return ERR_PTR(-ENOMEM); + } + memset(zone->buffer, 0xFF, size); + zone->off = *off; + zone->name = name; + zone->type = type; + zone->buffer_size = size - sizeof(struct psz_buffer); + zone->buffer->sig = type ^ PSZ_SIG; + atomic_set(&zone->dirty, 0); + atomic_set(&zone->buffer->datalen, 0); + + *off += size; + + pr_debug("pszone %s: off 0x%llx, %zu header, %zu data\n", zone->name, + zone->off, sizeof(*zone->buffer), zone->buffer_size); + return zone; +} + +static struct pstore_zone **psz_init_zones(enum pstore_type_id type, + loff_t *off, size_t total_size, ssize_t record_size, + unsigned int *cnt) +{ + struct pstore_zone_info *info = pstore_zone_cxt.pstore_zone_info; + struct pstore_zone **zones, *zone; + const char *name = pstore_type_to_name(type); + int c, i; + + *cnt = 0; + if (!total_size || !record_size) + return NULL; + + if (*off + total_size > info->total_size) { + pr_err("no room for zones %s (0x%zx@0x%llx over 0x%lx)\n", + name, total_size, *off, info->total_size); + return ERR_PTR(-ENOMEM); + } + + c = total_size / record_size; + zones = kcalloc(c, sizeof(*zones), GFP_KERNEL); + if (!zones) { + pr_err("allocate for zones %s failed\n", name); + return ERR_PTR(-ENOMEM); + } + memset(zones, 0, c * sizeof(*zones)); + + for (i = 0; i < c; i++) { + zone = psz_init_zone(type, off, record_size); + if (!zone || IS_ERR(zone)) { + pr_err("initialize zones %s failed\n", name); + psz_free_zones(&zones, &i); + return (void *)zone; + } + zones[i] = zone; + } + + *cnt = c; + return zones; +} + +static int psz_alloc_zones(struct psz_context *cxt) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + loff_t off = 0; + int err; + size_t size; + + size = info->total_size; + cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, size, + info->kmsg_size, &cxt->kmsg_max_cnt); + if (IS_ERR(cxt->kpszs)) { + err = PTR_ERR(cxt->kpszs); + cxt->kpszs = NULL; + goto fail_out; + } + + return 0; +fail_out: + return err; +} + +/** + * register_pstore_zone() - register to pstore/zone + * + * @info: back-end driver information. See &struct pstore_zone_info. + * + * Only one back-end at one time. + * + * Return: 0 on success, others on failure. + */ +int register_pstore_zone(struct pstore_zone_info *info) +{ + int err = -EINVAL; + struct psz_context *cxt = &pstore_zone_cxt; + + if (info->total_size < 4096) { + pr_warn("total_size must be >= 4096\n"); + return -EINVAL; + } + + if (!info->kmsg_size) { + pr_warn("at least one record size must be non-zero\n"); + return -EINVAL; + } + + if (!info->name || !info->name[0]) + return -EINVAL; + +#define check_size(name, size) { \ + if (info->name > 0 && info->name < (size)) { \ + pr_err(#name " must be over %d\n", (size)); \ + return -EINVAL; \ + } \ + if (info->name & (size - 1)) { \ + pr_err(#name " must be a multiple of %d\n", \ + (size)); \ + return -EINVAL; \ + } \ + } + + check_size(total_size, 4096); + check_size(kmsg_size, SECTOR_SIZE); + +#undef check_size + + /* + * the @read and @write must be applied. + * if no @read, pstore may mount failed. + * if no @write, pstore do not support to remove record file. + */ + if (!info->read || !info->write) { + pr_err("no valid general read/write interface\n"); + return -EINVAL; + } + + mutex_lock(&cxt->pstore_zone_info_lock); + if (cxt->pstore_zone_info) { + pr_warn("'%s' already loaded: ignoring '%s'\n", + cxt->pstore_zone_info->name, info->name); + mutex_unlock(&cxt->pstore_zone_info_lock); + return -EBUSY; + } + cxt->pstore_zone_info = info; + + pr_debug("register %s with properties:\n", info->name); + pr_debug("\ttotal size : %ld Bytes\n", info->total_size); + pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size); + + err = psz_alloc_zones(cxt); + if (err) { + pr_err("alloc zones failed\n"); + goto fail_out; + } + + if (info->kmsg_size) { + cxt->pstore.bufsize = cxt->kpszs[0]->buffer_size - + sizeof(struct psz_kmsg_header); + cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL); + if (!cxt->pstore.buf) { + err = -ENOMEM; + goto fail_free; + } + } + cxt->pstore.data = cxt; + + pr_info("registered %s as backend for", info->name); + cxt->pstore.max_reason = info->max_reason; + cxt->pstore.name = info->name; + if (info->kmsg_size) { + cxt->pstore.flags |= PSTORE_FLAGS_DMESG; + pr_cont(" kmsg(%s", + kmsg_dump_reason_str(cxt->pstore.max_reason)); + if (cxt->pstore_zone_info->panic_write) + pr_cont(",panic_write"); + pr_cont(")"); + } + pr_cont("\n"); + + err = pstore_register(&cxt->pstore); + if (err) { + pr_err("registering with pstore failed\n"); + goto fail_free; + } + mutex_unlock(&pstore_zone_cxt.pstore_zone_info_lock); + + return 0; + +fail_free: + kfree(cxt->pstore.buf); + cxt->pstore.buf = NULL; + cxt->pstore.bufsize = 0; + psz_free_all_zones(cxt); +fail_out: + pstore_zone_cxt.pstore_zone_info = NULL; + mutex_unlock(&pstore_zone_cxt.pstore_zone_info_lock); + return err; +} +EXPORT_SYMBOL_GPL(register_pstore_zone); + +/** + * unregister_pstore_zone() - unregister to pstore/zone + * + * @info: back-end driver information. See struct pstore_zone_info. + */ +void unregister_pstore_zone(struct pstore_zone_info *info) +{ + struct psz_context *cxt = &pstore_zone_cxt; + + mutex_lock(&cxt->pstore_zone_info_lock); + if (!cxt->pstore_zone_info) { + mutex_unlock(&cxt->pstore_zone_info_lock); + return; + } + + /* Stop incoming writes from pstore. */ + pstore_unregister(&cxt->pstore); + + /* Clean up allocations. */ + kfree(cxt->pstore.buf); + cxt->pstore.buf = NULL; + cxt->pstore.bufsize = 0; + cxt->pstore_zone_info = NULL; + + psz_free_all_zones(cxt); + + /* Clear counters and zone state. */ + cxt->oops_counter = 0; + cxt->panic_counter = 0; + atomic_set(&cxt->recovered, 0); + atomic_set(&cxt->on_panic, 0); + + mutex_unlock(&cxt->pstore_zone_info_lock); +} +EXPORT_SYMBOL_GPL(unregister_pstore_zone); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("WeiXiong Liao "); +MODULE_AUTHOR("Kees Cook "); +MODULE_DESCRIPTION("Storage Manager for pstore/blk"); diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h new file mode 100644 index 000000000000..eb005d9ae40c --- /dev/null +++ b/include/linux/pstore_zone.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __PSTORE_ZONE_H_ +#define __PSTORE_ZONE_H_ + +#include + +typedef ssize_t (*pstore_zone_read_op)(char *, size_t, loff_t); +typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); +/** + * struct pstore_zone_info - pstore/zone back-end driver structure + * + * @owner: Module which is responsible for this back-end driver. + * @name: Name of the back-end driver. + * @total_size: The total size in bytes pstore/zone can use. It must be greater + * than 4096 and be multiple of 4096. + * @kmsg_size: The size of oops/panic zone. Zero means disabled, otherwise, + * it must be multiple of SECTOR_SIZE(512 Bytes). + * @max_reason: Maximum kmsg dump reason to store. + * @read: The general read operation. Both of the function parameters + * @size and @offset are relative value to storage. + * On success, the number of bytes should be returned, others + * means error. + * @write: The same as @read. + * @panic_write:The write operation only used for panic case. It's optional + * if you do not care panic log. The parameters and return value + * are the same as @read. + */ +struct pstore_zone_info { + struct module *owner; + const char *name; + + unsigned long total_size; + unsigned long kmsg_size; + int max_reason; + pstore_zone_read_op read; + pstore_zone_write_op write; + pstore_zone_write_op panic_write; +}; + +extern int register_pstore_zone(struct pstore_zone_info *info); +extern void unregister_pstore_zone(struct pstore_zone_info *info); + +#endif From 17639f67c1d61aba3c05e7703f75cd468f9d484f Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:54:57 +0800 Subject: [PATCH 26/35] pstore/blk: Introduce backend for block devices pstore/blk is similar to pstore/ram, but uses a block device as the storage rather than persistent ram. The pstore/blk backend solves two common use-cases that used to preclude using pstore/ram: - not all devices have a battery that could be used to persist regular RAM across power failures. - most embedded intelligent equipment have no persistent ram, which increases costs, instead preferring cheaper solutions, like block devices. pstore/blk provides separate configurations for the end user and for the block drivers. User configuration determines how pstore/blk operates, such as record sizes, max kmsg dump reasons, etc. These can be set by Kconfig and/or module parameters, but module parameter have priority over Kconfig. Driver configuration covers all the details about the target block device, such as total size of the device and how to perform read/write operations. These are provided by block drivers, calling pstore_register_blkdev(), including an optional panic_write callback used to bypass regular IO APIs in an effort to avoid potentially destabilized kernel code during a panic. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-3-keescook@chromium.org/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- fs/pstore/Kconfig | 64 ++++++ fs/pstore/Makefile | 3 + fs/pstore/blk.c | 434 +++++++++++++++++++++++++++++++++++++ include/linux/pstore_blk.h | 51 +++++ 4 files changed, 552 insertions(+) create mode 100644 fs/pstore/blk.c create mode 100644 include/linux/pstore_blk.h diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 98d2457bdd9f..958bec75f907 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -160,3 +160,67 @@ config PSTORE_ZONE help The common layer for pstore/blk (and pstore/ram in the future) to manage storage in zones. + +config PSTORE_BLK + tristate "Log panic/oops to a block device" + depends on PSTORE + depends on BLOCK + select PSTORE_ZONE + default n + help + This enables panic and oops message to be logged to a block dev + where it can be read back at some later point. + + If unsure, say N. + +config PSTORE_BLK_BLKDEV + string "block device identifier" + depends on PSTORE_BLK + default "" + help + Which block device should be used for pstore/blk. + + It accept the following variants: + 1) device number in hexadecimal representation, + with no leading 0x, for example b302. + 2) /dev/ represents the device number of disk + 3) /dev/ represents the device number + of partition - device number of disk plus the partition number + 4) /dev/p - same as the above, this form is + used when disk name of partitioned disk ends with a digit. + 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the + unique id of a partition if the partition table provides it. + The UUID may be either an EFI/GPT UUID, or refer to an MSDOS + partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero- + filled hex representation of the 32-bit "NT disk signature", and PP + is a zero-filled hex representation of the 1-based partition number. + 6) PARTUUID=/PARTNROFF= to select a partition in relation + to a partition with a known unique id. + 7) : major and minor number of the device separated by + a colon. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_KMSG_SIZE + int "Size in Kbytes of kmsg dump log to store" + depends on PSTORE_BLK + default 64 + help + This just sets size of kmsg dump (oops, panic, etc) log for + pstore/blk. The size is in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_MAX_REASON + int "Maximum kmsg dump reason to store" + depends on PSTORE_BLK + default 2 + help + The maximum reason for kmsg dumps to store. The default is + 2 (KMSG_DUMP_OOPS), see include/linux/kmsg_dump.h's + enum kmsg_dump_reason for more details. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile index 58a967cbe4af..c270467aeece 100644 --- a/fs/pstore/Makefile +++ b/fs/pstore/Makefile @@ -15,3 +15,6 @@ obj-$(CONFIG_PSTORE_RAM) += ramoops.o pstore_zone-objs += zone.o obj-$(CONFIG_PSTORE_ZONE) += pstore_zone.o + +pstore_blk-objs += blk.o +obj-$(CONFIG_PSTORE_BLK) += pstore_blk.o diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c new file mode 100644 index 000000000000..8f131c6e412e --- /dev/null +++ b/fs/pstore/blk.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implements pstore backend driver that write to block (or non-block) storage + * devices, using the pstore/zone API. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include "../../block/blk.h" +#include +#include +#include +#include +#include +#include +#include +#include + +static long kmsg_size = CONFIG_PSTORE_BLK_KMSG_SIZE; +module_param(kmsg_size, long, 0400); +MODULE_PARM_DESC(kmsg_size, "kmsg dump record size in kbytes"); + +static int max_reason = CONFIG_PSTORE_BLK_MAX_REASON; +module_param(max_reason, int, 0400); +MODULE_PARM_DESC(max_reason, + "maximum reason for kmsg dump (default 2: Oops and Panic)"); + +/* + * blkdev - the block device to use for pstore storage + * + * Usually, this will be a partition of a block device. + * + * blkdev accepts the following variants: + * 1) device number in hexadecimal representation, + * with no leading 0x, for example b302. + * 2) /dev/ represents the device number of disk + * 3) /dev/ represents the device number + * of partition - device number of disk plus the partition number + * 4) /dev/p - same as the above, that form is + * used when disk name of partitioned disk ends on a digit. + * 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the + * unique id of a partition if the partition table provides it. + * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS + * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero- + * filled hex representation of the 32-bit "NT disk signature", and PP + * is a zero-filled hex representation of the 1-based partition number. + * 6) PARTUUID=/PARTNROFF= to select a partition in relation to + * a partition with a known unique id. + * 7) : major and minor number of the device separated by + * a colon. + */ +static char blkdev[80] = CONFIG_PSTORE_BLK_BLKDEV; +module_param_string(blkdev, blkdev, 80, 0400); +MODULE_PARM_DESC(blkdev, "block device for pstore storage"); + +/* + * All globals must only be accessed under the pstore_blk_lock + * during the register/unregister functions. + */ +static DEFINE_MUTEX(pstore_blk_lock); +static struct block_device *psblk_bdev; +static struct pstore_zone_info *pstore_zone_info; +static pstore_blk_panic_write_op blkdev_panic_write; + +struct bdev_info { + dev_t devt; + sector_t nr_sects; + sector_t start_sect; +}; + +/** + * struct pstore_device_info - back-end pstore/blk driver structure. + * + * @total_size: The total size in bytes pstore/blk can use. It must be greater + * than 4096 and be multiple of 4096. + * @flags: Refer to macro starting with PSTORE_FLAGS defined in + * linux/pstore.h. It means what front-ends this device support. + * Zero means all backends for compatible. + * @read: The general read operation. Both of the function parameters + * @size and @offset are relative value to bock device (not the + * whole disk). + * On success, the number of bytes should be returned, others + * means error. + * @write: The same as @read. + * @panic_write:The write operation only used for panic case. It's optional + * if you do not care panic log. The parameters and return value + * are the same as @read. + */ +struct pstore_device_info { + unsigned long total_size; + unsigned int flags; + pstore_zone_read_op read; + pstore_zone_write_op write; + pstore_zone_write_op panic_write; +}; + +static int psblk_register_do(struct pstore_device_info *dev) +{ + int ret; + + if (!dev || !dev->total_size || !dev->read || !dev->write) + return -EINVAL; + + mutex_lock(&pstore_blk_lock); + + /* someone already registered before */ + if (pstore_zone_info) { + mutex_unlock(&pstore_blk_lock); + return -EBUSY; + } + pstore_zone_info = kzalloc(sizeof(struct pstore_zone_info), GFP_KERNEL); + if (!pstore_zone_info) { + mutex_unlock(&pstore_blk_lock); + return -ENOMEM; + } + + /* zero means not limit on which backends to attempt to store. */ + if (!dev->flags) + dev->flags = UINT_MAX; + +#define verify_size(name, alignsize, enabled) { \ + long _##name_ = (enabled) ? (name) : 0; \ + _##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024); \ + if (_##name_ & ((alignsize) - 1)) { \ + pr_info(#name " must align to %d\n", \ + (alignsize)); \ + _##name_ = ALIGN(name, (alignsize)); \ + } \ + name = _##name_ / 1024; \ + pstore_zone_info->name = _##name_; \ + } + + verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG); +#undef verify_size + + pstore_zone_info->total_size = dev->total_size; + pstore_zone_info->max_reason = max_reason; + pstore_zone_info->read = dev->read; + pstore_zone_info->write = dev->write; + pstore_zone_info->panic_write = dev->panic_write; + pstore_zone_info->name = KBUILD_MODNAME; + pstore_zone_info->owner = THIS_MODULE; + + ret = register_pstore_zone(pstore_zone_info); + if (ret) { + kfree(pstore_zone_info); + pstore_zone_info = NULL; + } + mutex_unlock(&pstore_blk_lock); + return ret; +} + +static void psblk_unregister_do(struct pstore_device_info *dev) +{ + mutex_lock(&pstore_blk_lock); + if (pstore_zone_info && pstore_zone_info->read == dev->read) { + unregister_pstore_zone(pstore_zone_info); + kfree(pstore_zone_info); + pstore_zone_info = NULL; + } + mutex_unlock(&pstore_blk_lock); +} + +/** + * psblk_get_bdev() - open block device + * + * @holder: Exclusive holder identifier + * @info: Information about bdev to fill in + * + * Return: pointer to block device on success and others on error. + * + * On success, the returned block_device has reference count of one. + */ +static struct block_device *psblk_get_bdev(void *holder, + struct bdev_info *info) +{ + struct block_device *bdev = ERR_PTR(-ENODEV); + fmode_t mode = FMODE_READ | FMODE_WRITE; + sector_t nr_sects; + + lockdep_assert_held(&pstore_blk_lock); + + if (pstore_zone_info) + return ERR_PTR(-EBUSY); + + if (!blkdev[0]) + return ERR_PTR(-ENODEV); + + if (holder) + mode |= FMODE_EXCL; + bdev = blkdev_get_by_path(blkdev, mode, holder); + if (IS_ERR(bdev)) { + dev_t devt; + + devt = name_to_dev_t(blkdev); + if (devt == 0) + return ERR_PTR(-ENODEV); + bdev = blkdev_get_by_dev(devt, mode, holder); + if (IS_ERR(bdev)) + return bdev; + } + + nr_sects = part_nr_sects_read(bdev->bd_part); + if (!nr_sects) { + pr_err("not enough space for '%s'\n", blkdev); + blkdev_put(bdev, mode); + return ERR_PTR(-ENOSPC); + } + + if (info) { + info->devt = bdev->bd_dev; + info->nr_sects = nr_sects; + info->start_sect = get_start_sect(bdev); + } + + return bdev; +} + +static void psblk_put_bdev(struct block_device *bdev, void *holder) +{ + fmode_t mode = FMODE_READ | FMODE_WRITE; + + lockdep_assert_held(&pstore_blk_lock); + + if (!bdev) + return; + + if (holder) + mode |= FMODE_EXCL; + blkdev_put(bdev, mode); +} + +static ssize_t psblk_generic_blk_read(char *buf, size_t bytes, loff_t pos) +{ + struct block_device *bdev = psblk_bdev; + struct file file; + struct kiocb kiocb; + struct iov_iter iter; + struct kvec iov = {.iov_base = buf, .iov_len = bytes}; + + if (!bdev) + return -ENODEV; + + memset(&file, 0, sizeof(struct file)); + file.f_mapping = bdev->bd_inode->i_mapping; + file.f_flags = O_DSYNC | __O_SYNC | O_NOATIME; + file.f_inode = bdev->bd_inode; + file_ra_state_init(&file.f_ra, file.f_mapping); + + init_sync_kiocb(&kiocb, &file); + kiocb.ki_pos = pos; + iov_iter_kvec(&iter, READ, &iov, 1, bytes); + + return generic_file_read_iter(&kiocb, &iter); +} + +static ssize_t psblk_generic_blk_write(const char *buf, size_t bytes, + loff_t pos) +{ + struct block_device *bdev = psblk_bdev; + struct iov_iter iter; + struct kiocb kiocb; + struct file file; + ssize_t ret; + struct kvec iov = {.iov_base = (void *)buf, .iov_len = bytes}; + + if (!bdev) + return -ENODEV; + + /* Console/Ftrace backend may handle buffer until flush dirty zones */ + if (in_interrupt() || irqs_disabled()) + return -EBUSY; + + memset(&file, 0, sizeof(struct file)); + file.f_mapping = bdev->bd_inode->i_mapping; + file.f_flags = O_DSYNC | __O_SYNC | O_NOATIME; + file.f_inode = bdev->bd_inode; + + init_sync_kiocb(&kiocb, &file); + kiocb.ki_pos = pos; + iov_iter_kvec(&iter, WRITE, &iov, 1, bytes); + + inode_lock(bdev->bd_inode); + ret = generic_write_checks(&kiocb, &iter); + if (ret > 0) + ret = generic_perform_write(&file, &iter, pos); + inode_unlock(bdev->bd_inode); + + if (likely(ret > 0)) { + const struct file_operations f_op = {.fsync = blkdev_fsync}; + + file.f_op = &f_op; + kiocb.ki_pos += ret; + ret = generic_write_sync(&kiocb, ret); + } + return ret; +} + +static ssize_t psblk_blk_panic_write(const char *buf, size_t size, + loff_t off) +{ + int ret; + + if (!blkdev_panic_write) + return -EOPNOTSUPP; + + /* size and off must align to SECTOR_SIZE for block device */ + ret = blkdev_panic_write(buf, off >> SECTOR_SHIFT, + size >> SECTOR_SHIFT); + return ret ? -EIO : size; +} + +static int __register_pstore_blk(struct pstore_blk_info *info) +{ + char bdev_name[BDEVNAME_SIZE]; + struct block_device *bdev; + struct pstore_device_info dev; + struct bdev_info binfo; + void *holder = blkdev; + int ret = -ENODEV; + + lockdep_assert_held(&pstore_blk_lock); + + /* hold bdev exclusively */ + memset(&binfo, 0, sizeof(binfo)); + bdev = psblk_get_bdev(holder, &binfo); + if (IS_ERR(bdev)) { + pr_err("failed to open '%s'!\n", blkdev); + return PTR_ERR(bdev); + } + + /* only allow driver matching the @blkdev */ + if (!binfo.devt || MAJOR(binfo.devt) != info->major) { + pr_debug("invalid major %u (expect %u)\n", + info->major, MAJOR(binfo.devt)); + ret = -ENODEV; + goto err_put_bdev; + } + + /* psblk_bdev must be assigned before register to pstore/blk */ + psblk_bdev = bdev; + blkdev_panic_write = info->panic_write; + + /* Copy back block device details. */ + info->devt = binfo.devt; + info->nr_sects = binfo.nr_sects; + info->start_sect = binfo.start_sect; + + memset(&dev, 0, sizeof(dev)); + dev.total_size = info->nr_sects << SECTOR_SHIFT; + dev.flags = info->flags; + dev.read = psblk_generic_blk_read; + dev.write = psblk_generic_blk_write; + dev.panic_write = info->panic_write ? psblk_blk_panic_write : NULL; + + ret = psblk_register_do(&dev); + if (ret) + goto err_put_bdev; + + bdevname(bdev, bdev_name); + pr_info("attached %s%s\n", bdev_name, + info->panic_write ? "" : " (no dedicated panic_write!)"); + return 0; + +err_put_bdev: + psblk_bdev = NULL; + blkdev_panic_write = NULL; + psblk_put_bdev(bdev, holder); + return ret; +} + +/** + * register_pstore_blk() - register block device to pstore/blk + * + * @info: details on the desired block device interface + * + * Return: + * * 0 - OK + * * Others - something error. + */ +int register_pstore_blk(struct pstore_blk_info *info) +{ + int ret; + + mutex_lock(&pstore_blk_lock); + ret = __register_pstore_blk(info); + mutex_unlock(&pstore_blk_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(register_pstore_blk); + +static void __unregister_pstore_blk(unsigned int major) +{ + struct pstore_device_info dev = { .read = psblk_generic_blk_read }; + void *holder = blkdev; + + lockdep_assert_held(&pstore_blk_lock); + if (psblk_bdev && MAJOR(psblk_bdev->bd_dev) == major) { + psblk_unregister_do(&dev); + psblk_put_bdev(psblk_bdev, holder); + blkdev_panic_write = NULL; + psblk_bdev = NULL; + } +} + +/** + * unregister_pstore_blk() - unregister block device from pstore/blk + * + * @major: the major device number of device + */ +void unregister_pstore_blk(unsigned int major) +{ + mutex_lock(&pstore_blk_lock); + __unregister_pstore_blk(major); + mutex_unlock(&pstore_blk_lock); +} +EXPORT_SYMBOL_GPL(unregister_pstore_blk); + +static void __exit pstore_blk_exit(void) +{ + mutex_lock(&pstore_blk_lock); + if (psblk_bdev) + __unregister_pstore_blk(MAJOR(psblk_bdev->bd_dev)); + mutex_unlock(&pstore_blk_lock); +} +module_exit(pstore_blk_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("WeiXiong Liao "); +MODULE_AUTHOR("Kees Cook "); +MODULE_DESCRIPTION("pstore backend for block devices"); diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h new file mode 100644 index 000000000000..4501977b1336 --- /dev/null +++ b/include/linux/pstore_blk.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __PSTORE_BLK_H_ +#define __PSTORE_BLK_H_ + +#include +#include +#include + +/** + * typedef pstore_blk_panic_write_op - panic write operation to block device + * + * @buf: the data to write + * @start_sect: start sector to block device + * @sects: sectors count on buf + * + * Return: On success, zero should be returned. Others mean error. + * + * Panic write to block device must be aligned to SECTOR_SIZE. + */ +typedef int (*pstore_blk_panic_write_op)(const char *buf, sector_t start_sect, + sector_t sects); + +/** + * struct pstore_blk_info - pstore/blk registration details + * + * @major: Which major device number to support with pstore/blk + * @flags: The supported PSTORE_FLAGS_* from linux/pstore.h. + * @panic_write:The write operation only used for the panic case. + * This can be NULL, but is recommended to avoid losing + * crash data if the kernel's IO path or work queues are + * broken during a panic. + * @devt: The dev_t that pstore/blk has attached to. + * @nr_sects: Number of sectors on @devt. + * @start_sect: Starting sector on @devt. + */ +struct pstore_blk_info { + unsigned int major; + unsigned int flags; + pstore_blk_panic_write_op panic_write; + + /* Filled in by pstore/blk after registration. */ + dev_t devt; + sector_t nr_sects; + sector_t start_sect; +}; + +int register_pstore_blk(struct pstore_blk_info *info); +void unregister_pstore_blk(unsigned int major); + +#endif From 0dc068265a1c5923ffebf40388fbe93050a77ad1 Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:54:59 +0800 Subject: [PATCH 27/35] pstore/zone,blk: Add support for pmsg frontend Add pmsg support to pstore/blk (through pstore/zone). To enable, pmsg_size must be greater than 0 and a multiple of 4096. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-4-keescook@chromium.org/ Co-developed-by: Colin Ian King Signed-off-by: Colin Ian King Link: https://lore.kernel.org/lkml/20200512171932.222102-1-colin.king@canonical.com Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- fs/pstore/Kconfig | 12 ++ fs/pstore/blk.c | 9 ++ fs/pstore/zone.c | 268 ++++++++++++++++++++++++++++++++++-- include/linux/pstore_zone.h | 2 + 4 files changed, 282 insertions(+), 9 deletions(-) diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 958bec75f907..ef01c48f0ff7 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -224,3 +224,15 @@ config PSTORE_BLK_MAX_REASON NOTE that, both Kconfig and module parameters can configure pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_PMSG_SIZE + int "Size in Kbytes of pmsg to store" + depends on PSTORE_BLK + depends on PSTORE_PMSG + default 64 + help + This just sets size of pmsg (pmsg_size) for pstore/blk. The size is + in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 8f131c6e412e..a9e7078f08d6 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -27,6 +27,14 @@ module_param(max_reason, int, 0400); MODULE_PARM_DESC(max_reason, "maximum reason for kmsg dump (default 2: Oops and Panic)"); +#if IS_ENABLED(CONFIG_PSTORE_PMSG) +static long pmsg_size = CONFIG_PSTORE_BLK_PMSG_SIZE; +#else +static long pmsg_size = -1; +#endif +module_param(pmsg_size, long, 0400); +MODULE_PARM_DESC(pmsg_size, "pmsg size in kbytes"); + /* * blkdev - the block device to use for pstore storage * @@ -133,6 +141,7 @@ static int psblk_register_do(struct pstore_device_info *dev) } verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG); + verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG); #undef verify_size pstore_zone_info->total_size = dev->total_size; diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 362d72a24012..2f8e3b349edb 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -27,12 +27,14 @@ * * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value) * @datalen: length of data in @data + * @start: offset into @data where the beginning of the stored bytes begin * @data: zone data. */ struct psz_buffer { #define PSZ_SIG (0x43474244) /* DBGC */ uint32_t sig; atomic_t datalen; + atomic_t start; uint8_t data[]; }; @@ -88,9 +90,11 @@ struct pstore_zone { * struct psz_context - all about running state of pstore/zone * * @kpszs: kmsg dump storage zones + * @ppsz: pmsg storage zone * @kmsg_max_cnt: max count of @kpszs * @kmsg_read_cnt: counter of total read kmsg dumps * @kmsg_write_cnt: counter of total kmsg dump writes + * @pmsg_read_cnt: counter of total read pmsg zone * @oops_counter: counter of oops dumps * @panic_counter: counter of panic dumps * @recovered: whether finished recovering data from storage @@ -101,9 +105,11 @@ struct pstore_zone { */ struct psz_context { struct pstore_zone **kpszs; + struct pstore_zone *ppsz; unsigned int kmsg_max_cnt; unsigned int kmsg_read_cnt; unsigned int kmsg_write_cnt; + unsigned int pmsg_read_cnt; /* * These counters should be calculated during recovery. * It records the oops/panic times after crashes rather than boots. @@ -143,15 +149,20 @@ static inline int buffer_datalen(struct pstore_zone *zone) return atomic_read(&zone->buffer->datalen); } +static inline int buffer_start(struct pstore_zone *zone) +{ + return atomic_read(&zone->buffer->start); +} + static inline bool is_on_panic(void) { return atomic_read(&pstore_zone_cxt.on_panic); } -static ssize_t psz_zone_read(struct pstore_zone *zone, char *buf, +static ssize_t psz_zone_read_buffer(struct pstore_zone *zone, char *buf, size_t len, unsigned long off) { - if (!buf || !zone->buffer) + if (!buf || !zone || !zone->buffer) return -EINVAL; if (off > zone->buffer_size) return -EINVAL; @@ -160,6 +171,18 @@ static ssize_t psz_zone_read(struct pstore_zone *zone, char *buf, return len; } +static int psz_zone_read_oldbuf(struct pstore_zone *zone, char *buf, + size_t len, unsigned long off) +{ + if (!buf || !zone || !zone->oldbuf) + return -EINVAL; + if (off > zone->buffer_size) + return -EINVAL; + len = min_t(size_t, len, zone->buffer_size - off); + memcpy(buf, zone->oldbuf->data + off, len); + return 0; +} + static int psz_zone_write(struct pstore_zone *zone, enum psz_flush_mode flush_mode, const char *buf, size_t len, unsigned long off) @@ -415,6 +438,93 @@ static int psz_kmsg_recover(struct psz_context *cxt) return ret; } +static int psz_recover_zone(struct psz_context *cxt, struct pstore_zone *zone) +{ + struct pstore_zone_info *info = cxt->pstore_zone_info; + struct psz_buffer *oldbuf, tmpbuf; + int ret = 0; + char *buf; + ssize_t rcnt, len, start, off; + + if (!zone || zone->oldbuf) + return 0; + + if (is_on_panic()) { + /* save data as much as possible */ + psz_flush_dirty_zone(zone); + return 0; + } + + if (unlikely(!info->read)) + return -EINVAL; + + len = sizeof(struct psz_buffer); + rcnt = info->read((char *)&tmpbuf, len, zone->off); + if (rcnt != len) { + pr_debug("read zone %s failed\n", zone->name); + return (int)rcnt < 0 ? (int)rcnt : -EIO; + } + + if (tmpbuf.sig != zone->buffer->sig) { + pr_debug("no valid data in zone %s\n", zone->name); + return 0; + } + + if (zone->buffer_size < atomic_read(&tmpbuf.datalen) || + zone->buffer_size < atomic_read(&tmpbuf.start)) { + pr_info("found overtop zone: %s: off %lld, size %zu\n", + zone->name, zone->off, zone->buffer_size); + /* just keep going */ + return 0; + } + + if (!atomic_read(&tmpbuf.datalen)) { + pr_debug("found erased zone: %s: off %lld, size %zu, datalen %d\n", + zone->name, zone->off, zone->buffer_size, + atomic_read(&tmpbuf.datalen)); + return 0; + } + + pr_debug("found nice zone: %s: off %lld, size %zu, datalen %d\n", + zone->name, zone->off, zone->buffer_size, + atomic_read(&tmpbuf.datalen)); + + len = atomic_read(&tmpbuf.datalen) + sizeof(*oldbuf); + oldbuf = kzalloc(len, GFP_KERNEL); + if (!oldbuf) + return -ENOMEM; + + memcpy(oldbuf, &tmpbuf, sizeof(*oldbuf)); + buf = (char *)oldbuf + sizeof(*oldbuf); + len = atomic_read(&oldbuf->datalen); + start = atomic_read(&oldbuf->start); + off = zone->off + sizeof(*oldbuf); + + /* get part of data */ + rcnt = info->read(buf, len - start, off + start); + if (rcnt != len - start) { + pr_err("read zone %s failed\n", zone->name); + ret = (int)rcnt < 0 ? (int)rcnt : -EIO; + goto free_oldbuf; + } + + /* get the rest of data */ + rcnt = info->read(buf + len - start, start, off); + if (rcnt != start) { + pr_err("read zone %s failed\n", zone->name); + ret = (int)rcnt < 0 ? (int)rcnt : -EIO; + goto free_oldbuf; + } + + zone->oldbuf = oldbuf; + psz_flush_dirty_zone(zone); + return 0; + +free_oldbuf: + kfree(oldbuf); + return ret; +} + /** * psz_recovery() - recover data from storage * @cxt: the context of pstore/zone @@ -431,7 +541,12 @@ static inline int psz_recovery(struct psz_context *cxt) return 0; ret = psz_kmsg_recover(cxt); + if (ret) + goto out; + ret = psz_recover_zone(cxt, cxt->ppsz); + +out: if (unlikely(ret)) pr_err("recover failed\n"); else { @@ -446,9 +561,17 @@ static int psz_pstore_open(struct pstore_info *psi) struct psz_context *cxt = psi->data; cxt->kmsg_read_cnt = 0; + cxt->pmsg_read_cnt = 0; return 0; } +static inline bool psz_old_ok(struct pstore_zone *zone) +{ + if (zone && zone->oldbuf && atomic_read(&zone->oldbuf->datalen)) + return true; + return false; +} + static inline bool psz_ok(struct pstore_zone *zone) { if (zone && zone->buffer && buffer_datalen(zone)) @@ -473,6 +596,25 @@ static inline int psz_kmsg_erase(struct psz_context *cxt, return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); } +static inline int psz_record_erase(struct psz_context *cxt, + struct pstore_zone *zone) +{ + if (unlikely(!psz_old_ok(zone))) + return 0; + + kfree(zone->oldbuf); + zone->oldbuf = NULL; + /* + * if there are new data in zone buffer, that means the old data + * are already invalid. It is no need to flush 0 (erase) to + * block device. + */ + if (!buffer_datalen(zone)) + return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); + psz_flush_dirty_zone(zone); + return 0; +} + static int psz_pstore_erase(struct pstore_record *record) { struct psz_context *cxt = record->psi->data; @@ -482,6 +624,8 @@ static int psz_pstore_erase(struct pstore_record *record) if (record->id >= cxt->kmsg_max_cnt) return -EINVAL; return psz_kmsg_erase(cxt, cxt->kpszs[record->id], record); + case PSTORE_TYPE_PMSG: + return psz_record_erase(cxt, cxt->ppsz); default: return -EINVAL; } @@ -555,6 +699,55 @@ static int notrace psz_kmsg_write(struct psz_context *cxt, return 0; } +static int notrace psz_record_write(struct pstore_zone *zone, + struct pstore_record *record) +{ + size_t start, rem; + bool is_full_data = false; + char *buf; + int cnt; + + if (!zone || !record) + return -ENOSPC; + + if (atomic_read(&zone->buffer->datalen) >= zone->buffer_size) + is_full_data = true; + + cnt = record->size; + buf = record->buf; + if (unlikely(cnt > zone->buffer_size)) { + buf += cnt - zone->buffer_size; + cnt = zone->buffer_size; + } + + start = buffer_start(zone); + rem = zone->buffer_size - start; + if (unlikely(rem < cnt)) { + psz_zone_write(zone, FLUSH_PART, buf, rem, start); + buf += rem; + cnt -= rem; + start = 0; + is_full_data = true; + } + + atomic_set(&zone->buffer->start, cnt + start); + psz_zone_write(zone, FLUSH_PART, buf, cnt, start); + + /** + * psz_zone_write will set datalen as start + cnt. + * It work if actual data length lesser than buffer size. + * If data length greater than buffer size, pmsg will rewrite to + * beginning of zone, which make buffer->datalen wrongly. + * So we should reset datalen as buffer size once actual data length + * greater than buffer size. + */ + if (is_full_data) { + atomic_set(&zone->buffer->datalen, zone->buffer_size); + psz_zone_write(zone, FLUSH_META, NULL, 0, 0); + } + return 0; +} + static int notrace psz_pstore_write(struct pstore_record *record) { struct psz_context *cxt = record->psi->data; @@ -566,6 +759,8 @@ static int notrace psz_pstore_write(struct pstore_record *record) switch (record->type) { case PSTORE_TYPE_DMESG: return psz_kmsg_write(cxt, record); + case PSTORE_TYPE_PMSG: + return psz_record_write(cxt->ppsz, record); default: return -EINVAL; } @@ -581,6 +776,13 @@ static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt) return zone; } + if (cxt->pmsg_read_cnt == 0) { + cxt->pmsg_read_cnt++; + zone = cxt->ppsz; + if (psz_old_ok(zone)) + return zone; + } + return NULL; } @@ -631,7 +833,7 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone, return -ENOMEM; } - size = psz_zone_read(zone, record->buf + hlen, size, + size = psz_zone_read_buffer(zone, record->buf + hlen, size, sizeof(struct psz_kmsg_header)); if (unlikely(size < 0)) { kfree(record->buf); @@ -641,6 +843,32 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone, return size + hlen; } +static ssize_t psz_record_read(struct pstore_zone *zone, + struct pstore_record *record) +{ + size_t len; + struct psz_buffer *buf; + + if (!zone || !record) + return -ENOSPC; + + buf = (struct psz_buffer *)zone->oldbuf; + if (!buf) + return -ENOMSG; + + len = atomic_read(&buf->datalen); + record->buf = kmalloc(len, GFP_KERNEL); + if (!record->buf) + return -ENOMEM; + + if (unlikely(psz_zone_read_oldbuf(zone, record->buf, len, 0))) { + kfree(record->buf); + return -ENOMSG; + } + + return len; +} + static ssize_t psz_pstore_read(struct pstore_record *record) { struct psz_context *cxt = record->psi->data; @@ -665,6 +893,9 @@ static ssize_t psz_pstore_read(struct pstore_record *record) readop = psz_kmsg_read; record->id = cxt->kmsg_read_cnt - 1; break; + case PSTORE_TYPE_PMSG: + readop = psz_record_read; + break; default: goto next_zone; } @@ -720,6 +951,8 @@ static void psz_free_all_zones(struct psz_context *cxt) { if (cxt->kpszs) psz_free_zones(&cxt->kpszs, &cxt->kmsg_max_cnt); + if (cxt->ppsz) + psz_free_zone(&cxt->ppsz); } static struct pstore_zone *psz_init_zone(enum pstore_type_id type, @@ -753,8 +986,10 @@ static struct pstore_zone *psz_init_zone(enum pstore_type_id type, zone->type = type; zone->buffer_size = size - sizeof(struct psz_buffer); zone->buffer->sig = type ^ PSZ_SIG; + zone->oldbuf = NULL; atomic_set(&zone->dirty, 0); atomic_set(&zone->buffer->datalen, 0); + atomic_set(&zone->buffer->start, 0); *off += size; @@ -809,19 +1044,28 @@ static int psz_alloc_zones(struct psz_context *cxt) struct pstore_zone_info *info = cxt->pstore_zone_info; loff_t off = 0; int err; - size_t size; + size_t off_size = 0; - size = info->total_size; - cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, size, + off_size += info->pmsg_size; + cxt->ppsz = psz_init_zone(PSTORE_TYPE_PMSG, &off, info->pmsg_size); + if (IS_ERR(cxt->ppsz)) { + err = PTR_ERR(cxt->ppsz); + cxt->ppsz = NULL; + goto free_out; + } + + cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, + info->total_size - off_size, info->kmsg_size, &cxt->kmsg_max_cnt); if (IS_ERR(cxt->kpszs)) { err = PTR_ERR(cxt->kpszs); cxt->kpszs = NULL; - goto fail_out; + goto free_out; } return 0; -fail_out: +free_out: + psz_free_all_zones(cxt); return err; } @@ -844,7 +1088,7 @@ int register_pstore_zone(struct pstore_zone_info *info) return -EINVAL; } - if (!info->kmsg_size) { + if (!info->kmsg_size && !info->pmsg_size) { pr_warn("at least one record size must be non-zero\n"); return -EINVAL; } @@ -866,6 +1110,7 @@ int register_pstore_zone(struct pstore_zone_info *info) check_size(total_size, 4096); check_size(kmsg_size, SECTOR_SIZE); + check_size(pmsg_size, SECTOR_SIZE); #undef check_size @@ -891,6 +1136,7 @@ int register_pstore_zone(struct pstore_zone_info *info) pr_debug("register %s with properties:\n", info->name); pr_debug("\ttotal size : %ld Bytes\n", info->total_size); pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size); + pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size); err = psz_alloc_zones(cxt); if (err) { @@ -920,6 +1166,10 @@ int register_pstore_zone(struct pstore_zone_info *info) pr_cont(",panic_write"); pr_cont(")"); } + if (info->pmsg_size) { + cxt->pstore.flags |= PSTORE_FLAGS_PMSG; + pr_cont(" pmsg"); + } pr_cont("\n"); err = pstore_register(&cxt->pstore); diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h index eb005d9ae40c..29c367a3bd80 100644 --- a/include/linux/pstore_zone.h +++ b/include/linux/pstore_zone.h @@ -17,6 +17,7 @@ typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); * @kmsg_size: The size of oops/panic zone. Zero means disabled, otherwise, * it must be multiple of SECTOR_SIZE(512 Bytes). * @max_reason: Maximum kmsg dump reason to store. + * @pmsg_size: The size of pmsg zone which is the same as @kmsg_size. * @read: The general read operation. Both of the function parameters * @size and @offset are relative value to storage. * On success, the number of bytes should be returned, others @@ -33,6 +34,7 @@ struct pstore_zone_info { unsigned long total_size; unsigned long kmsg_size; int max_reason; + unsigned long pmsg_size; pstore_zone_read_op read; pstore_zone_write_op write; pstore_zone_write_op panic_write; From cc9c4d1b5597167f8e8c92f6b61e1cda6d01884d Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:00 +0800 Subject: [PATCH 28/35] pstore/zone,blk: Add console frontend support Support backend for console. To enable console backend, just make console_size be greater than 0 and a multiple of 4096. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-5-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/Kconfig | 18 +++++++-- fs/pstore/blk.c | 12 +++++- fs/pstore/zone.c | 81 ++++++++++++++++++++++++++++++++++--- include/linux/pstore_zone.h | 4 +- 4 files changed, 105 insertions(+), 10 deletions(-) diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index ef01c48f0ff7..126aa6c3ecf2 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -180,11 +180,11 @@ config PSTORE_BLK_BLKDEV help Which block device should be used for pstore/blk. - It accept the following variants: + It accepts the following variants: 1) device number in hexadecimal representation, with no leading 0x, for example b302. - 2) /dev/ represents the device number of disk - 3) /dev/ represents the device number + 2) /dev/ represents the device name of disk + 3) /dev/ represents the device name and number of partition - device number of disk plus the partition number 4) /dev/p - same as the above, this form is used when disk name of partitioned disk ends with a digit. @@ -236,3 +236,15 @@ config PSTORE_BLK_PMSG_SIZE NOTE that, both Kconfig and module parameters can configure pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_CONSOLE_SIZE + int "Size in Kbytes of console log to store" + depends on PSTORE_BLK + depends on PSTORE_CONSOLE + default 64 + help + This just sets size of console log (console_size) to store via + pstore/blk. The size is in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index a9e7078f08d6..082f6cdaf4bd 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -35,6 +35,14 @@ static long pmsg_size = -1; module_param(pmsg_size, long, 0400); MODULE_PARM_DESC(pmsg_size, "pmsg size in kbytes"); +#if IS_ENABLED(CONFIG_PSTORE_CONSOLE) +static long console_size = CONFIG_PSTORE_BLK_CONSOLE_SIZE; +#else +static long console_size = -1; +#endif +module_param(console_size, long, 0400); +MODULE_PARM_DESC(console_size, "console size in kbytes"); + /* * blkdev - the block device to use for pstore storage * @@ -91,7 +99,8 @@ struct bdev_info { * whole disk). * On success, the number of bytes should be returned, others * means error. - * @write: The same as @read. + * @write: The same as @read, but the following error number: + * -EBUSY means try to write again later. * @panic_write:The write operation only used for panic case. It's optional * if you do not care panic log. The parameters and return value * are the same as @read. @@ -142,6 +151,7 @@ static int psblk_register_do(struct pstore_device_info *dev) verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG); verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG); + verify_size(console_size, 4096, dev->flags & PSTORE_FLAGS_CONSOLE); #undef verify_size pstore_zone_info->total_size = dev->total_size; diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 2f8e3b349edb..ee0b64da410d 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -91,10 +91,12 @@ struct pstore_zone { * * @kpszs: kmsg dump storage zones * @ppsz: pmsg storage zone + * @cpsz: console storage zone * @kmsg_max_cnt: max count of @kpszs * @kmsg_read_cnt: counter of total read kmsg dumps * @kmsg_write_cnt: counter of total kmsg dump writes * @pmsg_read_cnt: counter of total read pmsg zone + * @console_read_cnt: counter of total read console zone * @oops_counter: counter of oops dumps * @panic_counter: counter of panic dumps * @recovered: whether finished recovering data from storage @@ -106,10 +108,12 @@ struct pstore_zone { struct psz_context { struct pstore_zone **kpszs; struct pstore_zone *ppsz; + struct pstore_zone *cpsz; unsigned int kmsg_max_cnt; unsigned int kmsg_read_cnt; unsigned int kmsg_write_cnt; unsigned int pmsg_read_cnt; + unsigned int console_read_cnt; /* * These counters should be calculated during recovery. * It records the oops/panic times after crashes rather than boots. @@ -129,6 +133,9 @@ struct psz_context { }; static struct psz_context pstore_zone_cxt; +static void psz_flush_all_dirty_zones(struct work_struct *); +static DECLARE_DELAYED_WORK(psz_cleaner, psz_flush_all_dirty_zones); + /** * enum psz_flush_mode - flush mode for psz_zone_write() * @@ -237,6 +244,9 @@ static int psz_zone_write(struct pstore_zone *zone, return 0; dirty: atomic_set(&zone->dirty, true); + /* flush dirty zones nicely */ + if (wcnt == -EBUSY && !is_on_panic()) + schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(500)); return -EBUSY; } @@ -293,6 +303,21 @@ static int psz_move_zone(struct pstore_zone *old, struct pstore_zone *new) return 0; } +static void psz_flush_all_dirty_zones(struct work_struct *work) +{ + struct psz_context *cxt = &pstore_zone_cxt; + int ret = 0; + + if (cxt->ppsz) + ret |= psz_flush_dirty_zone(cxt->ppsz); + if (cxt->cpsz) + ret |= psz_flush_dirty_zone(cxt->cpsz); + if (cxt->kpszs) + ret |= psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt); + if (ret && cxt->pstore_zone_info) + schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(1000)); +} + static int psz_kmsg_recover_data(struct psz_context *cxt) { struct pstore_zone_info *info = cxt->pstore_zone_info; @@ -545,6 +570,10 @@ static inline int psz_recovery(struct psz_context *cxt) goto out; ret = psz_recover_zone(cxt, cxt->ppsz); + if (ret) + goto out; + + ret = psz_recover_zone(cxt, cxt->cpsz); out: if (unlikely(ret)) @@ -562,6 +591,7 @@ static int psz_pstore_open(struct pstore_info *psi) cxt->kmsg_read_cnt = 0; cxt->pmsg_read_cnt = 0; + cxt->console_read_cnt = 0; return 0; } @@ -626,8 +656,9 @@ static int psz_pstore_erase(struct pstore_record *record) return psz_kmsg_erase(cxt, cxt->kpszs[record->id], record); case PSTORE_TYPE_PMSG: return psz_record_erase(cxt, cxt->ppsz); - default: - return -EINVAL; + case PSTORE_TYPE_CONSOLE: + return psz_record_erase(cxt, cxt->cpsz); + default: return -EINVAL; } } @@ -690,9 +721,10 @@ static int notrace psz_kmsg_write(struct psz_context *cxt, return -ENOSPC; ret = psz_kmsg_write_record(cxt, record); - if (!ret) { + if (!ret && is_on_panic()) { + /* ensure all data are flushed to storage when panic */ pr_debug("try to flush other dirty zones\n"); - psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt); + psz_flush_all_dirty_zones(NULL); } /* always return 0 as we had handled it on buffer */ @@ -756,9 +788,18 @@ static int notrace psz_pstore_write(struct pstore_record *record) record->reason == KMSG_DUMP_PANIC) atomic_set(&cxt->on_panic, 1); + /* + * if on panic, do not write except panic records + * Fix case that panic_write prints log which wakes up console backend. + */ + if (is_on_panic() && record->type != PSTORE_TYPE_DMESG) + return -EBUSY; + switch (record->type) { case PSTORE_TYPE_DMESG: return psz_kmsg_write(cxt, record); + case PSTORE_TYPE_CONSOLE: + return psz_record_write(cxt->cpsz, record); case PSTORE_TYPE_PMSG: return psz_record_write(cxt->ppsz, record); default: @@ -783,6 +824,13 @@ static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt) return zone; } + if (cxt->console_read_cnt == 0) { + cxt->console_read_cnt++; + zone = cxt->cpsz; + if (psz_old_ok(zone)) + return zone; + } + return NULL; } @@ -893,6 +941,8 @@ static ssize_t psz_pstore_read(struct pstore_record *record) readop = psz_kmsg_read; record->id = cxt->kmsg_read_cnt - 1; break; + case PSTORE_TYPE_CONSOLE: + fallthrough; case PSTORE_TYPE_PMSG: readop = psz_record_read; break; @@ -953,6 +1003,8 @@ static void psz_free_all_zones(struct psz_context *cxt) psz_free_zones(&cxt->kpszs, &cxt->kmsg_max_cnt); if (cxt->ppsz) psz_free_zone(&cxt->ppsz); + if (cxt->cpsz) + psz_free_zone(&cxt->cpsz); } static struct pstore_zone *psz_init_zone(enum pstore_type_id type, @@ -1054,6 +1106,15 @@ static int psz_alloc_zones(struct psz_context *cxt) goto free_out; } + off_size += info->console_size; + cxt->cpsz = psz_init_zone(PSTORE_TYPE_CONSOLE, &off, + info->console_size); + if (IS_ERR(cxt->cpsz)) { + err = PTR_ERR(cxt->cpsz); + cxt->cpsz = NULL; + goto free_out; + } + cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, info->total_size - off_size, info->kmsg_size, &cxt->kmsg_max_cnt); @@ -1088,7 +1149,7 @@ int register_pstore_zone(struct pstore_zone_info *info) return -EINVAL; } - if (!info->kmsg_size && !info->pmsg_size) { + if (!info->kmsg_size && !info->pmsg_size && !info->console_size) { pr_warn("at least one record size must be non-zero\n"); return -EINVAL; } @@ -1111,6 +1172,7 @@ int register_pstore_zone(struct pstore_zone_info *info) check_size(total_size, 4096); check_size(kmsg_size, SECTOR_SIZE); check_size(pmsg_size, SECTOR_SIZE); + check_size(console_size, SECTOR_SIZE); #undef check_size @@ -1137,6 +1199,7 @@ int register_pstore_zone(struct pstore_zone_info *info) pr_debug("\ttotal size : %ld Bytes\n", info->total_size); pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size); pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size); + pr_debug("\tconsole size : %ld Bytes\n", info->console_size); err = psz_alloc_zones(cxt); if (err) { @@ -1170,6 +1233,10 @@ int register_pstore_zone(struct pstore_zone_info *info) cxt->pstore.flags |= PSTORE_FLAGS_PMSG; pr_cont(" pmsg"); } + if (info->console_size) { + cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE; + pr_cont(" console"); + } pr_cont("\n"); err = pstore_register(&cxt->pstore); @@ -1211,6 +1278,10 @@ void unregister_pstore_zone(struct pstore_zone_info *info) /* Stop incoming writes from pstore. */ pstore_unregister(&cxt->pstore); + /* Flush any pending writes. */ + psz_flush_all_dirty_zones(NULL); + flush_delayed_work(&psz_cleaner); + /* Clean up allocations. */ kfree(cxt->pstore.buf); cxt->pstore.buf = NULL; diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h index 29c367a3bd80..904ee67f4ba2 100644 --- a/include/linux/pstore_zone.h +++ b/include/linux/pstore_zone.h @@ -18,11 +18,12 @@ typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); * it must be multiple of SECTOR_SIZE(512 Bytes). * @max_reason: Maximum kmsg dump reason to store. * @pmsg_size: The size of pmsg zone which is the same as @kmsg_size. + * @console_size:The size of console zone which is the same as @kmsg_size. * @read: The general read operation. Both of the function parameters * @size and @offset are relative value to storage. * On success, the number of bytes should be returned, others * means error. - * @write: The same as @read. + * @write: The same as @read, but -EBUSY means try to write again later. * @panic_write:The write operation only used for panic case. It's optional * if you do not care panic log. The parameters and return value * are the same as @read. @@ -35,6 +36,7 @@ struct pstore_zone_info { unsigned long kmsg_size; int max_reason; unsigned long pmsg_size; + unsigned long console_size; pstore_zone_read_op read; pstore_zone_write_op write; pstore_zone_write_op panic_write; From 34327e9fd213414b35eb70aa512c4e39b2095907 Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:01 +0800 Subject: [PATCH 29/35] pstore/zone,blk: Add ftrace frontend support Support backend for ftrace. To enable ftrace backend, just make ftrace_size be greater than 0 and a multiple of 4096. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-6-keescook@chromium.org/ Co-developed-by: Colin Ian King Signed-off-by: Colin Ian King Link: https://lore.kernel.org/lkml/20200512170719.221514-1-colin.king@canonical.com Signed-off-by: Kees Cook --- fs/pstore/Kconfig | 12 ++++ fs/pstore/blk.c | 9 +++ fs/pstore/zone.c | 114 +++++++++++++++++++++++++++++++++++- include/linux/pstore_zone.h | 2 + 4 files changed, 136 insertions(+), 1 deletion(-) diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 126aa6c3ecf2..c2237984b407 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -248,3 +248,15 @@ config PSTORE_BLK_CONSOLE_SIZE NOTE that, both Kconfig and module parameters can configure pstore/blk, but module parameters have priority over Kconfig. + +config PSTORE_BLK_FTRACE_SIZE + int "Size in Kbytes of ftrace log to store" + depends on PSTORE_BLK + depends on PSTORE_FTRACE + default 64 + help + This just sets size of ftrace log (ftrace_size) for pstore/blk. The + size is in KB and must be a multiple of 4. + + NOTE that, both Kconfig and module parameters can configure + pstore/blk, but module parameters have priority over Kconfig. diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 082f6cdaf4bd..e0d95fb48428 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -43,6 +43,14 @@ static long console_size = -1; module_param(console_size, long, 0400); MODULE_PARM_DESC(console_size, "console size in kbytes"); +#if IS_ENABLED(CONFIG_PSTORE_FTRACE) +static long ftrace_size = CONFIG_PSTORE_BLK_FTRACE_SIZE; +#else +static long ftrace_size = -1; +#endif +module_param(ftrace_size, long, 0400); +MODULE_PARM_DESC(ftrace_size, "ftrace size in kbytes"); + /* * blkdev - the block device to use for pstore storage * @@ -152,6 +160,7 @@ static int psblk_register_do(struct pstore_device_info *dev) verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG); verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG); verify_size(console_size, 4096, dev->flags & PSTORE_FLAGS_CONSOLE); + verify_size(ftrace_size, 4096, dev->flags & PSTORE_FLAGS_FTRACE); #undef verify_size pstore_zone_info->total_size = dev->total_size; diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index ee0b64da410d..2d9f452360bb 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -92,11 +92,14 @@ struct pstore_zone { * @kpszs: kmsg dump storage zones * @ppsz: pmsg storage zone * @cpsz: console storage zone + * @fpszs: ftrace storage zones * @kmsg_max_cnt: max count of @kpszs * @kmsg_read_cnt: counter of total read kmsg dumps * @kmsg_write_cnt: counter of total kmsg dump writes * @pmsg_read_cnt: counter of total read pmsg zone * @console_read_cnt: counter of total read console zone + * @ftrace_max_cnt: max count of @fpszs + * @ftrace_read_cnt: counter of max read ftrace zone * @oops_counter: counter of oops dumps * @panic_counter: counter of panic dumps * @recovered: whether finished recovering data from storage @@ -109,11 +112,14 @@ struct psz_context { struct pstore_zone **kpszs; struct pstore_zone *ppsz; struct pstore_zone *cpsz; + struct pstore_zone **fpszs; unsigned int kmsg_max_cnt; unsigned int kmsg_read_cnt; unsigned int kmsg_write_cnt; unsigned int pmsg_read_cnt; unsigned int console_read_cnt; + unsigned int ftrace_max_cnt; + unsigned int ftrace_read_cnt; /* * These counters should be calculated during recovery. * It records the oops/panic times after crashes rather than boots. @@ -314,6 +320,8 @@ static void psz_flush_all_dirty_zones(struct work_struct *work) ret |= psz_flush_dirty_zone(cxt->cpsz); if (cxt->kpszs) ret |= psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt); + if (cxt->fpszs) + ret |= psz_flush_dirty_zones(cxt->fpszs, cxt->ftrace_max_cnt); if (ret && cxt->pstore_zone_info) schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(1000)); } @@ -550,6 +558,31 @@ static int psz_recover_zone(struct psz_context *cxt, struct pstore_zone *zone) return ret; } +static int psz_recover_zones(struct psz_context *cxt, + struct pstore_zone **zones, unsigned int cnt) +{ + int ret; + unsigned int i; + struct pstore_zone *zone; + + if (!zones) + return 0; + + for (i = 0; i < cnt; i++) { + zone = zones[i]; + if (unlikely(!zone)) + continue; + ret = psz_recover_zone(cxt, zone); + if (ret) + goto recover_fail; + } + + return 0; +recover_fail: + pr_debug("recover %s[%u] failed\n", zone->name, i); + return ret; +} + /** * psz_recovery() - recover data from storage * @cxt: the context of pstore/zone @@ -574,6 +607,10 @@ static inline int psz_recovery(struct psz_context *cxt) goto out; ret = psz_recover_zone(cxt, cxt->cpsz); + if (ret) + goto out; + + ret = psz_recover_zones(cxt, cxt->fpszs, cxt->ftrace_max_cnt); out: if (unlikely(ret)) @@ -592,6 +629,7 @@ static int psz_pstore_open(struct pstore_info *psi) cxt->kmsg_read_cnt = 0; cxt->pmsg_read_cnt = 0; cxt->console_read_cnt = 0; + cxt->ftrace_read_cnt = 0; return 0; } @@ -658,6 +696,10 @@ static int psz_pstore_erase(struct pstore_record *record) return psz_record_erase(cxt, cxt->ppsz); case PSTORE_TYPE_CONSOLE: return psz_record_erase(cxt, cxt->cpsz); + case PSTORE_TYPE_FTRACE: + if (record->id >= cxt->ftrace_max_cnt) + return -EINVAL; + return psz_record_erase(cxt, cxt->fpszs[record->id]); default: return -EINVAL; } } @@ -802,6 +844,13 @@ static int notrace psz_pstore_write(struct pstore_record *record) return psz_record_write(cxt->cpsz, record); case PSTORE_TYPE_PMSG: return psz_record_write(cxt->ppsz, record); + case PSTORE_TYPE_FTRACE: { + int zonenum = smp_processor_id(); + + if (!cxt->fpszs) + return -ENOSPC; + return psz_record_write(cxt->fpszs[zonenum], record); + } default: return -EINVAL; } @@ -817,6 +866,14 @@ static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt) return zone; } + if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt) + /* + * No need psz_old_ok(). Let psz_ftrace_read() do so for + * combination. psz_ftrace_read() should traverse over + * all zones in case of some zone without data. + */ + return cxt->fpszs[cxt->ftrace_read_cnt++]; + if (cxt->pmsg_read_cnt == 0) { cxt->pmsg_read_cnt++; zone = cxt->ppsz; @@ -891,6 +948,38 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone, return size + hlen; } +/* try to combine all ftrace zones */ +static ssize_t psz_ftrace_read(struct pstore_zone *zone, + struct pstore_record *record) +{ + struct psz_context *cxt; + struct psz_buffer *buf; + int ret; + + if (!zone || !record) + return -ENOSPC; + + if (!psz_old_ok(zone)) + goto out; + + buf = (struct psz_buffer *)zone->oldbuf; + if (!buf) + return -ENOMSG; + + ret = pstore_ftrace_combine_log(&record->buf, &record->size, + (char *)buf->data, atomic_read(&buf->datalen)); + if (unlikely(ret)) + return ret; + +out: + cxt = record->psi->data; + if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt) + /* then, read next ftrace zone */ + return -ENOMSG; + record->id = 0; + return record->size ? record->size : -ENOMSG; +} + static ssize_t psz_record_read(struct pstore_zone *zone, struct pstore_record *record) { @@ -941,6 +1030,9 @@ static ssize_t psz_pstore_read(struct pstore_record *record) readop = psz_kmsg_read; record->id = cxt->kmsg_read_cnt - 1; break; + case PSTORE_TYPE_FTRACE: + readop = psz_ftrace_read; + break; case PSTORE_TYPE_CONSOLE: fallthrough; case PSTORE_TYPE_PMSG: @@ -1005,6 +1097,8 @@ static void psz_free_all_zones(struct psz_context *cxt) psz_free_zone(&cxt->ppsz); if (cxt->cpsz) psz_free_zone(&cxt->cpsz); + if (cxt->fpszs) + psz_free_zones(&cxt->fpszs, &cxt->ftrace_max_cnt); } static struct pstore_zone *psz_init_zone(enum pstore_type_id type, @@ -1115,6 +1209,17 @@ static int psz_alloc_zones(struct psz_context *cxt) goto free_out; } + off_size += info->ftrace_size; + cxt->fpszs = psz_init_zones(PSTORE_TYPE_FTRACE, &off, + info->ftrace_size, + info->ftrace_size / nr_cpu_ids, + &cxt->ftrace_max_cnt); + if (IS_ERR(cxt->fpszs)) { + err = PTR_ERR(cxt->fpszs); + cxt->fpszs = NULL; + goto free_out; + } + cxt->kpszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, info->total_size - off_size, info->kmsg_size, &cxt->kmsg_max_cnt); @@ -1149,7 +1254,8 @@ int register_pstore_zone(struct pstore_zone_info *info) return -EINVAL; } - if (!info->kmsg_size && !info->pmsg_size && !info->console_size) { + if (!info->kmsg_size && !info->pmsg_size && !info->console_size && + !info->ftrace_size) { pr_warn("at least one record size must be non-zero\n"); return -EINVAL; } @@ -1173,6 +1279,7 @@ int register_pstore_zone(struct pstore_zone_info *info) check_size(kmsg_size, SECTOR_SIZE); check_size(pmsg_size, SECTOR_SIZE); check_size(console_size, SECTOR_SIZE); + check_size(ftrace_size, SECTOR_SIZE); #undef check_size @@ -1200,6 +1307,7 @@ int register_pstore_zone(struct pstore_zone_info *info) pr_debug("\tkmsg size : %ld Bytes\n", info->kmsg_size); pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size); pr_debug("\tconsole size : %ld Bytes\n", info->console_size); + pr_debug("\tftrace size : %ld Bytes\n", info->ftrace_size); err = psz_alloc_zones(cxt); if (err) { @@ -1237,6 +1345,10 @@ int register_pstore_zone(struct pstore_zone_info *info) cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE; pr_cont(" console"); } + if (info->ftrace_size) { + cxt->pstore.flags |= PSTORE_FLAGS_FTRACE; + pr_cont(" ftrace"); + } pr_cont("\n"); err = pstore_register(&cxt->pstore); diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h index 904ee67f4ba2..6f16b0dd834a 100644 --- a/include/linux/pstore_zone.h +++ b/include/linux/pstore_zone.h @@ -19,6 +19,7 @@ typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); * @max_reason: Maximum kmsg dump reason to store. * @pmsg_size: The size of pmsg zone which is the same as @kmsg_size. * @console_size:The size of console zone which is the same as @kmsg_size. + * @ftrace_size:The size of ftrace zone which is the same as @kmsg_size. * @read: The general read operation. Both of the function parameters * @size and @offset are relative value to storage. * On success, the number of bytes should be returned, others @@ -37,6 +38,7 @@ struct pstore_zone_info { int max_reason; unsigned long pmsg_size; unsigned long console_size; + unsigned long ftrace_size; pstore_zone_read_op read; pstore_zone_write_op write; pstore_zone_write_op panic_write; From 649304c936cd4d2a2128bb877044772416c7d4f5 Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:02 +0800 Subject: [PATCH 30/35] Documentation: Add details for pstore/blk Add details on using pstore/blk, the new backend of pstore to record dumps to block devices, in Documentation/admin-guide/pstore-blk.rst Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-7-keescook@chromium.org/ Signed-off-by: Kees Cook --- Documentation/admin-guide/pstore-blk.rst | 229 +++++++++++++++++++++++ MAINTAINERS | 1 + fs/pstore/Kconfig | 2 + 3 files changed, 232 insertions(+) create mode 100644 Documentation/admin-guide/pstore-blk.rst diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst new file mode 100644 index 000000000000..bef8c7436721 --- /dev/null +++ b/Documentation/admin-guide/pstore-blk.rst @@ -0,0 +1,229 @@ +.. SPDX-License-Identifier: GPL-2.0 + +pstore block oops/panic logger +============================== + +Introduction +------------ + +pstore block (pstore/blk) is an oops/panic logger that writes its logs to a +block device before the system crashes. You can get these log files by +mounting pstore filesystem like:: + + mount -t pstore pstore /sys/fs/pstore + + +pstore block concepts +--------------------- + +pstore/blk provides efficient configuration method for pstore/blk, which +divides all configurations into two parts, configurations for user and +configurations for driver. + +Configurations for user determine how pstore/blk works, such as pmsg_size, +kmsg_size and so on. All of them support both Kconfig and module parameters, +but module parameters have priority over Kconfig. + +Configurations for driver are all about block device, such as total_size +of block device and read/write operations. + +Configurations for user +----------------------- + +All of these configurations support both Kconfig and module parameters, but +module parameters have priority over Kconfig. + +Here is an example for module parameters:: + + pstore_blk.blkdev=179:7 pstore_blk.kmsg_size=64 + +The detail of each configurations may be of interest to you. + +blkdev +~~~~~~ + +The block device to use. Most of the time, it is a partition of block device. +It's required for pstore/blk. + +It accepts the following variants: + +1. device number in hexadecimal represents itself; no + leading 0x, for example b302. +#. /dev/ represents the device number of disk +#. /dev/ represents the device number of partition - device + number of disk plus the partition number +#. /dev/p - same as the above; this form is used when disk + name of partitioned disk ends with a digit. +#. PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF represents the unique id of + a partition if the partition table provides it. The UUID may be either an + EFI/GPT UUID, or refer to an MSDOS partition using the format SSSSSSSS-PP, + where SSSSSSSS is a zero-filled hex representation of the 32-bit + "NT disk signature", and PP is a zero-filled hex representation of the + 1-based partition number. +#. PARTUUID=/PARTNROFF= to select a partition in relation to a + partition with a known unique id. +#. : major and minor number of the device separated by a colon. + +kmsg_size +~~~~~~~~~ + +The chunk size in KB for oops/panic front-end. It **MUST** be a multiple of 4. +It's optional if you do not care oops/panic log. + +There are multiple chunks for oops/panic front-end depending on the remaining +space except other pstore front-ends. + +pstore/blk will log to oops/panic chunks one by one, and always overwrite the +oldest chunk if there is no more free chunk. + +pmsg_size +~~~~~~~~~ + +The chunk size in KB for pmsg front-end. It **MUST** be a multiple of 4. +It's optional if you do not care pmsg log. + +Unlike oops/panic front-end, there is only one chunk for pmsg front-end. + +Pmsg is a user space accessible pstore object. Writes to */dev/pmsg0* are +appended to the chunk. On reboot the contents are available in +*/sys/fs/pstore/pmsg-pstore-blk-0*. + +console_size +~~~~~~~~~~~~ + +The chunk size in KB for console front-end. It **MUST** be a multiple of 4. +It's optional if you do not care console log. + +Similar to pmsg front-end, there is only one chunk for console front-end. + +All log of console will be appended to the chunk. On reboot the contents are +available in */sys/fs/pstore/console-pstore-blk-0*. + +ftrace_size +~~~~~~~~~~~ + +The chunk size in KB for ftrace front-end. It **MUST** be a multiple of 4. +It's optional if you do not care console log. + +Similar to oops front-end, there are multiple chunks for ftrace front-end +depending on the count of cpu processors. Each chunk size is equal to +ftrace_size / processors_count. + +All log of ftrace will be appended to the chunk. On reboot the contents are +combined and available in */sys/fs/pstore/ftrace-pstore-blk-0*. + +Persistent function tracing might be useful for debugging software or hardware +related hangs. Here is an example of usage:: + + # mount -t pstore pstore /sys/fs/pstore + # mount -t debugfs debugfs /sys/kernel/debug/ + # echo 1 > /sys/kernel/debug/pstore/record_ftrace + # reboot -f + [...] + # mount -t pstore pstore /sys/fs/pstore + # tail /sys/fs/pstore/ftrace-pstore-blk-0 + CPU:0 ts:5914676 c0063828 c0063b94 call_cpuidle <- cpu_startup_entry+0x1b8/0x1e0 + CPU:0 ts:5914678 c039ecdc c006385c cpuidle_enter_state <- call_cpuidle+0x44/0x48 + CPU:0 ts:5914680 c039e9a0 c039ecf0 cpuidle_enter_freeze <- cpuidle_enter_state+0x304/0x314 + CPU:0 ts:5914681 c0063870 c039ea30 sched_idle_set_state <- cpuidle_enter_state+0x44/0x314 + CPU:1 ts:5916720 c0160f59 c015ee04 kernfs_unmap_bin_file <- __kernfs_remove+0x140/0x204 + CPU:1 ts:5916721 c05ca625 c015ee0c __mutex_lock_slowpath <- __kernfs_remove+0x148/0x204 + CPU:1 ts:5916723 c05c813d c05ca630 yield_to <- __mutex_lock_slowpath+0x314/0x358 + CPU:1 ts:5916724 c05ca2d1 c05ca638 __ww_mutex_lock <- __mutex_lock_slowpath+0x31c/0x358 + +max_reason +~~~~~~~~~~ + +Limiting which kinds of kmsg dumps are stored can be controlled via +the ``max_reason`` value, as defined in include/linux/kmsg_dump.h's +``enum kmsg_dump_reason``. For example, to store both Oopses and Panics, +``max_reason`` should be set to 2 (KMSG_DUMP_OOPS), to store only Panics +``max_reason`` should be set to 1 (KMSG_DUMP_PANIC). Setting this to 0 +(KMSG_DUMP_UNDEF), means the reason filtering will be controlled by the +``printk.always_kmsg_dump`` boot param: if unset, it'll be KMSG_DUMP_OOPS, +otherwise KMSG_DUMP_MAX. + +Configurations for driver +------------------------- + +Only a block device driver cares about these configurations. A block device +driver uses ``register_pstore_blk`` to register to pstore/blk. + +.. kernel-doc:: fs/pstore/blk.c + :identifiers: register_pstore_blk + +Compression and header +---------------------- + +Block device is large enough for uncompressed oops data. Actually we do not +recommend data compression because pstore/blk will insert some information into +the first line of oops/panic data. For example:: + + Panic: Total 16 times + +It means that it's OOPS|Panic for the 16th time since the first booting. +Sometimes the number of occurrences of oops|panic since the first booting is +important to judge whether the system is stable. + +The following line is inserted by pstore filesystem. For example:: + + Oops#2 Part1 + +It means that it's OOPS for the 2nd time on the last boot. + +Reading the data +---------------- + +The dump data can be read from the pstore filesystem. The format for these +files is ``dmesg-pstore-blk-[N]`` for oops/panic front-end, +``pmsg-pstore-blk-0`` for pmsg front-end and so on. The timestamp of the +dump file records the trigger time. To delete a stored record from block +device, simply unlink the respective pstore file. + +Attentions in panic read/write APIs +----------------------------------- + +If on panic, the kernel is not going to run for much longer, the tasks will not +be scheduled and most kernel resources will be out of service. It +looks like a single-threaded program running on a single-core computer. + +The following points require special attention for panic read/write APIs: + +1. Can **NOT** allocate any memory. + If you need memory, just allocate while the block driver is initializing + rather than waiting until the panic. +#. Must be polled, **NOT** interrupt driven. + No task schedule any more. The block driver should delay to ensure the write + succeeds, but NOT sleep. +#. Can **NOT** take any lock. + There is no other task, nor any shared resource; you are safe to break all + locks. +#. Just use CPU to transfer. + Do not use DMA to transfer unless you are sure that DMA will not keep lock. +#. Control registers directly. + Please control registers directly rather than use Linux kernel resources. + Do I/O map while initializing rather than wait until a panic occurs. +#. Reset your block device and controller if necessary. + If you are not sure of the state of your block device and controller when + a panic occurs, you are safe to stop and reset them. + +pstore/blk supports psblk_blkdev_info(), which is defined in +*linux/pstore_blk.h*, to get information of using block device, such as the +device number, sector count and start sector of the whole disk. + +pstore block internals +---------------------- + +For developer reference, here are all the important structures and APIs: + +.. kernel-doc:: fs/pstore/zone.c + :internal: + +.. kernel-doc:: include/linux/pstore_zone.h + :internal: + +.. kernel-doc:: fs/pstore/blk.c + :export: + +.. kernel-doc:: include/linux/pstore_blk.h + :internal: diff --git a/MAINTAINERS b/MAINTAINERS index e64e5db31497..9c1f4feff418 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13660,6 +13660,7 @@ M: Tony Luck S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/pstore F: Documentation/admin-guide/ramoops.rst +F: Documentation/admin-guide/pstore-blk.rst F: Documentation/devicetree/bindings/reserved-memory/ramoops.txt F: drivers/acpi/apei/erst.c F: drivers/firmware/efi/efi-pstore.c diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index c2237984b407..e16a49ebfe54 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -171,6 +171,8 @@ config PSTORE_BLK This enables panic and oops message to be logged to a block dev where it can be read back at some later point. + For more information, see Documentation/admin-guide/pstore-blk.rst + If unsure, say N. config PSTORE_BLK_BLKDEV From 335426c6dcdd338d6b7c939c2da15fc9c5dd4959 Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:03 +0800 Subject: [PATCH 31/35] pstore/zone: Provide way to skip "broken" zone for MTD devices One requirement to support MTD devices in pstore/zone is having a way to declare certain regions as broken. Add this support to pstore/zone. The MTD driver should return -ENOMSG when encountering a bad region, which tells pstore/zone to skip and try the next one. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-8-keescook@chromium.org/ Co-developed-by: Colin Ian King Signed-off-by: Colin Ian King Link: //lore.kernel.org/lkml/20200512173801.222666-1-colin.king@canonical.com Signed-off-by: Kees Cook --- fs/pstore/blk.c | 10 ++++-- fs/pstore/zone.c | 65 ++++++++++++++++++++++++++++++------- include/linux/pstore_blk.h | 3 +- include/linux/pstore_zone.h | 12 ++++--- 4 files changed, 71 insertions(+), 19 deletions(-) diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index e0d95fb48428..67343d6aa27a 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -109,9 +109,12 @@ struct bdev_info { * means error. * @write: The same as @read, but the following error number: * -EBUSY means try to write again later. + * -ENOMSG means to try next zone. * @panic_write:The write operation only used for panic case. It's optional - * if you do not care panic log. The parameters and return value - * are the same as @read. + * if you do not care panic log. The parameters are relative + * value to storage. + * On success, the number of bytes should be returned, others + * excluding -ENOMSG mean error. -ENOMSG means to try next zone. */ struct pstore_device_info { unsigned long total_size; @@ -337,6 +340,9 @@ static ssize_t psblk_blk_panic_write(const char *buf, size_t size, /* size and off must align to SECTOR_SIZE for block device */ ret = blkdev_panic_write(buf, off >> SECTOR_SHIFT, size >> SECTOR_SHIFT); + /* try next zone */ + if (ret == -ENOMSG) + return ret; return ret ? -EIO : size; } diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 2d9f452360bb..add26b125984 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -249,6 +249,9 @@ static int psz_zone_write(struct pstore_zone *zone, return 0; dirty: + /* no need to mark dirty if going to try next zone */ + if (wcnt == -ENOMSG) + return -ENOMSG; atomic_set(&zone->dirty, true); /* flush dirty zones nicely */ if (wcnt == -EBUSY && !is_on_panic()) @@ -391,7 +394,11 @@ static int psz_kmsg_recover_meta(struct psz_context *cxt) return -EINVAL; rcnt = info->read((char *)buf, len, zone->off); - if (rcnt != len) { + if (rcnt == -ENOMSG) { + pr_debug("%s with id %lu may be broken, skip\n", + zone->name, i); + continue; + } else if (rcnt != len) { pr_err("read %s with id %lu failed\n", zone->name, i); return (int)rcnt < 0 ? (int)rcnt : -EIO; } @@ -725,24 +732,58 @@ static void psz_write_kmsg_hdr(struct pstore_zone *zone, hdr->counter = 0; } +/* + * In case zone is broken, which may occur to MTD device, we try each zones, + * start at cxt->kmsg_write_cnt. + */ static inline int notrace psz_kmsg_write_record(struct psz_context *cxt, struct pstore_record *record) { size_t size, hlen; struct pstore_zone *zone; - unsigned int zonenum; + unsigned int i; - zonenum = cxt->kmsg_write_cnt; - zone = cxt->kpszs[zonenum]; - if (unlikely(!zone)) - return -ENOSPC; - cxt->kmsg_write_cnt = (zonenum + 1) % cxt->kmsg_max_cnt; + for (i = 0; i < cxt->kmsg_max_cnt; i++) { + unsigned int zonenum, len; + int ret; - pr_debug("write %s to zone id %d\n", zone->name, zonenum); - psz_write_kmsg_hdr(zone, record); - hlen = sizeof(struct psz_kmsg_header); - size = min_t(size_t, record->size, zone->buffer_size - hlen); - return psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen); + zonenum = (cxt->kmsg_write_cnt + i) % cxt->kmsg_max_cnt; + zone = cxt->kpszs[zonenum]; + if (unlikely(!zone)) + return -ENOSPC; + + /* avoid destroying old data, allocate a new one */ + len = zone->buffer_size + sizeof(*zone->buffer); + zone->oldbuf = zone->buffer; + zone->buffer = kzalloc(len, GFP_KERNEL); + if (!zone->buffer) { + zone->buffer = zone->oldbuf; + return -ENOMEM; + } + zone->buffer->sig = zone->oldbuf->sig; + + pr_debug("write %s to zone id %d\n", zone->name, zonenum); + psz_write_kmsg_hdr(zone, record); + hlen = sizeof(struct psz_kmsg_header); + size = min_t(size_t, record->size, zone->buffer_size - hlen); + ret = psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen); + if (likely(!ret || ret != -ENOMSG)) { + cxt->kmsg_write_cnt = zonenum + 1; + cxt->kmsg_write_cnt %= cxt->kmsg_max_cnt; + /* no need to try next zone, free last zone buffer */ + kfree(zone->oldbuf); + zone->oldbuf = NULL; + return ret; + } + + pr_debug("zone %u may be broken, try next dmesg zone\n", + zonenum); + kfree(zone->buffer); + zone->buffer = zone->oldbuf; + zone->oldbuf = NULL; + } + + return -EBUSY; } static int notrace psz_kmsg_write(struct psz_context *cxt, diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h index 4501977b1336..ccba8c068752 100644 --- a/include/linux/pstore_blk.h +++ b/include/linux/pstore_blk.h @@ -14,7 +14,8 @@ * @start_sect: start sector to block device * @sects: sectors count on buf * - * Return: On success, zero should be returned. Others mean error. + * Return: On success, zero should be returned. Others excluding -ENOMSG + * mean error. -ENOMSG means to try next zone. * * Panic write to block device must be aligned to SECTOR_SIZE. */ diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h index 6f16b0dd834a..e79a18e41064 100644 --- a/include/linux/pstore_zone.h +++ b/include/linux/pstore_zone.h @@ -23,11 +23,15 @@ typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); * @read: The general read operation. Both of the function parameters * @size and @offset are relative value to storage. * On success, the number of bytes should be returned, others - * means error. - * @write: The same as @read, but -EBUSY means try to write again later. + * mean error. + * @write: The same as @read, but the following error number: + * -EBUSY means try to write again later. + * -ENOMSG means to try next zone. * @panic_write:The write operation only used for panic case. It's optional - * if you do not care panic log. The parameters and return value - * are the same as @read. + * if you do not care panic log. The parameters are relative + * value to storage. + * On success, the number of bytes should be returned, others + * excluding -ENOMSG mean error. -ENOMSG means to try next zone. */ struct pstore_zone_info { struct module *owner; From 1525fb3bb6d69028b3941d34363397c28345ffab Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:04 +0800 Subject: [PATCH 32/35] pstore/blk: Provide way to query pstore configuration In order to configure itself, the MTD backend needs to be able to query the current pstore configuration. Introduce pstore_blk_get_config() for this purpose. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-9-keescook@chromium.org/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- fs/pstore/blk.c | 37 ++++++++++++++++++++++++++++++------- include/linux/pstore_blk.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 7 deletions(-) diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 67343d6aa27a..631bc27c8661 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -94,6 +94,17 @@ struct bdev_info { sector_t start_sect; }; +#define check_size(name, alignsize) ({ \ + long _##name_ = (name); \ + _##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024); \ + if (_##name_ & ((alignsize) - 1)) { \ + pr_info(#name " must align to %d\n", \ + (alignsize)); \ + _##name_ = ALIGN(name, (alignsize)); \ + } \ + _##name_; \ +}) + /** * struct pstore_device_info - back-end pstore/blk driver structure. * @@ -149,13 +160,11 @@ static int psblk_register_do(struct pstore_device_info *dev) dev->flags = UINT_MAX; #define verify_size(name, alignsize, enabled) { \ - long _##name_ = (enabled) ? (name) : 0; \ - _##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024); \ - if (_##name_ & ((alignsize) - 1)) { \ - pr_info(#name " must align to %d\n", \ - (alignsize)); \ - _##name_ = ALIGN(name, (alignsize)); \ - } \ + long _##name_; \ + if (enabled) \ + _##name_ = check_size(name, alignsize); \ + else \ + _##name_ = 0; \ name = _##name_ / 1024; \ pstore_zone_info->name = _##name_; \ } @@ -453,6 +462,20 @@ void unregister_pstore_blk(unsigned int major) } EXPORT_SYMBOL_GPL(unregister_pstore_blk); +/* get information of pstore/blk */ +int pstore_blk_get_config(struct pstore_blk_config *info) +{ + strncpy(info->device, blkdev, 80); + info->max_reason = max_reason; + info->kmsg_size = check_size(kmsg_size, 4096); + info->pmsg_size = check_size(pmsg_size, 4096); + info->ftrace_size = check_size(ftrace_size, 4096); + info->console_size = check_size(console_size, 4096); + + return 0; +} +EXPORT_SYMBOL_GPL(pstore_blk_get_config); + static void __exit pstore_blk_exit(void) { mutex_lock(&pstore_blk_lock); diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h index ccba8c068752..0c40774e71e0 100644 --- a/include/linux/pstore_blk.h +++ b/include/linux/pstore_blk.h @@ -49,4 +49,32 @@ struct pstore_blk_info { int register_pstore_blk(struct pstore_blk_info *info); void unregister_pstore_blk(unsigned int major); +/** + * struct pstore_blk_config - the pstore_blk backend configuration + * + * @device: Name of the desired block device + * @max_reason: Maximum kmsg dump reason to store to block device + * @kmsg_size: Total size of for kmsg dumps + * @pmsg_size: Total size of the pmsg storage area + * @console_size: Total size of the console storage area + * @ftrace_size: Total size for ftrace logging data (for all CPUs) + */ +struct pstore_blk_config { + char device[80]; + enum kmsg_dump_reason max_reason; + unsigned long kmsg_size; + unsigned long pmsg_size; + unsigned long console_size; + unsigned long ftrace_size; +}; + +/** + * pstore_blk_get_config - get a copy of the pstore_blk backend configuration + * + * @info: The sturct pstore_blk_config to be filled in + * + * Failure returns negative error code, and success returns 0. + */ +int pstore_blk_get_config(struct pstore_blk_config *info); + #endif From 7dcb7848ba110ff192efc917d1a6de66b4c9ca4f Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:05 +0800 Subject: [PATCH 33/35] pstore/blk: Support non-block storage devices Add support for non-block devices (e.g. MTD). A non-block driver calls pstore_blk_register_device() to register iself. In addition, pstore/zone is updated to handle non-block devices, where an erase must be done before a write. Without this, there is no way to remove records stored to an MTD. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-10-keescook@chromium.org/ Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- Documentation/admin-guide/pstore-blk.rst | 17 ++++- fs/pstore/blk.c | 95 +++++++++++++----------- fs/pstore/zone.c | 8 +- include/linux/pstore_blk.h | 38 ++++++++++ include/linux/pstore_zone.h | 6 ++ 5 files changed, 115 insertions(+), 49 deletions(-) diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst index bef8c7436721..d45341e55e82 100644 --- a/Documentation/admin-guide/pstore-blk.rst +++ b/Documentation/admin-guide/pstore-blk.rst @@ -7,8 +7,8 @@ Introduction ------------ pstore block (pstore/blk) is an oops/panic logger that writes its logs to a -block device before the system crashes. You can get these log files by -mounting pstore filesystem like:: +block device and non-block device before the system crashes. You can get +these log files by mounting pstore filesystem like:: mount -t pstore pstore /sys/fs/pstore @@ -24,8 +24,8 @@ Configurations for user determine how pstore/blk works, such as pmsg_size, kmsg_size and so on. All of them support both Kconfig and module parameters, but module parameters have priority over Kconfig. -Configurations for driver are all about block device, such as total_size -of block device and read/write operations. +Configurations for driver are all about block device and non-block device, +such as total_size of block device and read/write operations. Configurations for user ----------------------- @@ -152,6 +152,15 @@ driver uses ``register_pstore_blk`` to register to pstore/blk. .. kernel-doc:: fs/pstore/blk.c :identifiers: register_pstore_blk +A non-block device driver uses ``register_pstore_device`` with +``struct pstore_device_info`` to register to pstore/blk. + +.. kernel-doc:: fs/pstore/blk.c + :identifiers: register_pstore_device + +.. kernel-doc:: include/linux/pstore_blk.h + :identifiers: pstore_device_info + Compression and header ---------------------- diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 631bc27c8661..881b40ed8142 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -105,55 +105,22 @@ struct bdev_info { _##name_; \ }) -/** - * struct pstore_device_info - back-end pstore/blk driver structure. - * - * @total_size: The total size in bytes pstore/blk can use. It must be greater - * than 4096 and be multiple of 4096. - * @flags: Refer to macro starting with PSTORE_FLAGS defined in - * linux/pstore.h. It means what front-ends this device support. - * Zero means all backends for compatible. - * @read: The general read operation. Both of the function parameters - * @size and @offset are relative value to bock device (not the - * whole disk). - * On success, the number of bytes should be returned, others - * means error. - * @write: The same as @read, but the following error number: - * -EBUSY means try to write again later. - * -ENOMSG means to try next zone. - * @panic_write:The write operation only used for panic case. It's optional - * if you do not care panic log. The parameters are relative - * value to storage. - * On success, the number of bytes should be returned, others - * excluding -ENOMSG mean error. -ENOMSG means to try next zone. - */ -struct pstore_device_info { - unsigned long total_size; - unsigned int flags; - pstore_zone_read_op read; - pstore_zone_write_op write; - pstore_zone_write_op panic_write; -}; - -static int psblk_register_do(struct pstore_device_info *dev) +static int __register_pstore_device(struct pstore_device_info *dev) { int ret; + lockdep_assert_held(&pstore_blk_lock); + if (!dev || !dev->total_size || !dev->read || !dev->write) return -EINVAL; - mutex_lock(&pstore_blk_lock); - /* someone already registered before */ - if (pstore_zone_info) { - mutex_unlock(&pstore_blk_lock); + if (pstore_zone_info) return -EBUSY; - } + pstore_zone_info = kzalloc(sizeof(struct pstore_zone_info), GFP_KERNEL); - if (!pstore_zone_info) { - mutex_unlock(&pstore_blk_lock); + if (!pstore_zone_info) return -ENOMEM; - } /* zero means not limit on which backends to attempt to store. */ if (!dev->flags) @@ -179,6 +146,7 @@ static int psblk_register_do(struct pstore_device_info *dev) pstore_zone_info->max_reason = max_reason; pstore_zone_info->read = dev->read; pstore_zone_info->write = dev->write; + pstore_zone_info->erase = dev->erase; pstore_zone_info->panic_write = dev->panic_write; pstore_zone_info->name = KBUILD_MODNAME; pstore_zone_info->owner = THIS_MODULE; @@ -188,20 +156,51 @@ static int psblk_register_do(struct pstore_device_info *dev) kfree(pstore_zone_info); pstore_zone_info = NULL; } - mutex_unlock(&pstore_blk_lock); return ret; } - -static void psblk_unregister_do(struct pstore_device_info *dev) +/** + * register_pstore_device() - register non-block device to pstore/blk + * + * @dev: non-block device information + * + * Return: + * * 0 - OK + * * Others - something error. + */ +int register_pstore_device(struct pstore_device_info *dev) { + int ret; + mutex_lock(&pstore_blk_lock); + ret = __register_pstore_device(dev); + mutex_unlock(&pstore_blk_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(register_pstore_device); + +static void __unregister_pstore_device(struct pstore_device_info *dev) +{ + lockdep_assert_held(&pstore_blk_lock); if (pstore_zone_info && pstore_zone_info->read == dev->read) { unregister_pstore_zone(pstore_zone_info); kfree(pstore_zone_info); pstore_zone_info = NULL; } +} + +/** + * unregister_pstore_device() - unregister non-block device from pstore/blk + * + * @dev: non-block device information + */ +void unregister_pstore_device(struct pstore_device_info *dev) +{ + mutex_lock(&pstore_blk_lock); + __unregister_pstore_device(dev); mutex_unlock(&pstore_blk_lock); } +EXPORT_SYMBOL_GPL(unregister_pstore_device); /** * psblk_get_bdev() - open block device @@ -396,9 +395,10 @@ static int __register_pstore_blk(struct pstore_blk_info *info) dev.flags = info->flags; dev.read = psblk_generic_blk_read; dev.write = psblk_generic_blk_write; + dev.erase = NULL; dev.panic_write = info->panic_write ? psblk_blk_panic_write : NULL; - ret = psblk_register_do(&dev); + ret = __register_pstore_device(&dev); if (ret) goto err_put_bdev; @@ -442,7 +442,7 @@ static void __unregister_pstore_blk(unsigned int major) lockdep_assert_held(&pstore_blk_lock); if (psblk_bdev && MAJOR(psblk_bdev->bd_dev) == major) { - psblk_unregister_do(&dev); + __unregister_pstore_device(&dev); psblk_put_bdev(psblk_bdev, holder); blkdev_panic_write = NULL; psblk_bdev = NULL; @@ -481,6 +481,13 @@ static void __exit pstore_blk_exit(void) mutex_lock(&pstore_blk_lock); if (psblk_bdev) __unregister_pstore_blk(MAJOR(psblk_bdev->bd_dev)); + else { + struct pstore_device_info dev = { }; + + if (pstore_zone_info) + dev.read = pstore_zone_info->read; + __unregister_pstore_device(&dev); + } mutex_unlock(&pstore_blk_lock); } module_exit(pstore_blk_exit); diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index add26b125984..819428dfa32f 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -660,15 +660,21 @@ static inline int psz_kmsg_erase(struct psz_context *cxt, struct psz_buffer *buffer = zone->buffer; struct psz_kmsg_header *hdr = (struct psz_kmsg_header *)buffer->data; + size_t size; if (unlikely(!psz_ok(zone))) return 0; + /* this zone is already updated, no need to erase */ if (record->count != hdr->counter) return 0; + size = buffer_datalen(zone) + sizeof(*zone->buffer); atomic_set(&zone->buffer->datalen, 0); - return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); + if (cxt->pstore_zone_info->erase) + return cxt->pstore_zone_info->erase(size, zone->off); + else + return psz_zone_write(zone, FLUSH_META, NULL, 0, 0); } static inline int psz_record_erase(struct psz_context *cxt, diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h index 0c40774e71e0..61e914522b01 100644 --- a/include/linux/pstore_blk.h +++ b/include/linux/pstore_blk.h @@ -49,6 +49,44 @@ struct pstore_blk_info { int register_pstore_blk(struct pstore_blk_info *info); void unregister_pstore_blk(unsigned int major); +/** + * struct pstore_device_info - back-end pstore/blk driver structure. + * + * @total_size: The total size in bytes pstore/blk can use. It must be greater + * than 4096 and be multiple of 4096. + * @flags: Refer to macro starting with PSTORE_FLAGS defined in + * linux/pstore.h. It means what front-ends this device support. + * Zero means all backends for compatible. + * @read: The general read operation. Both of the function parameters + * @size and @offset are relative value to bock device (not the + * whole disk). + * On success, the number of bytes should be returned, others + * means error. + * @write: The same as @read, but the following error number: + * -EBUSY means try to write again later. + * -ENOMSG means to try next zone. + * @erase: The general erase operation for device with special removing + * job. Both of the function parameters @size and @offset are + * relative value to storage. + * Return 0 on success and others on failure. + * @panic_write:The write operation only used for panic case. It's optional + * if you do not care panic log. The parameters are relative + * value to storage. + * On success, the number of bytes should be returned, others + * excluding -ENOMSG mean error. -ENOMSG means to try next zone. + */ +struct pstore_device_info { + unsigned long total_size; + unsigned int flags; + pstore_zone_read_op read; + pstore_zone_write_op write; + pstore_zone_erase_op erase; + pstore_zone_write_op panic_write; +}; + +int register_pstore_device(struct pstore_device_info *dev); +void unregister_pstore_device(struct pstore_device_info *dev); + /** * struct pstore_blk_config - the pstore_blk backend configuration * diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h index e79a18e41064..1e35eaa33e5e 100644 --- a/include/linux/pstore_zone.h +++ b/include/linux/pstore_zone.h @@ -7,6 +7,7 @@ typedef ssize_t (*pstore_zone_read_op)(char *, size_t, loff_t); typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); +typedef ssize_t (*pstore_zone_erase_op)(size_t, loff_t); /** * struct pstore_zone_info - pstore/zone back-end driver structure * @@ -27,6 +28,10 @@ typedef ssize_t (*pstore_zone_write_op)(const char *, size_t, loff_t); * @write: The same as @read, but the following error number: * -EBUSY means try to write again later. * -ENOMSG means to try next zone. + * @erase: The general erase operation for device with special removing + * job. Both of the function parameters @size and @offset are + * relative value to storage. + * Return 0 on success and others on failure. * @panic_write:The write operation only used for panic case. It's optional * if you do not care panic log. The parameters are relative * value to storage. @@ -45,6 +50,7 @@ struct pstore_zone_info { unsigned long ftrace_size; pstore_zone_read_op read; pstore_zone_write_op write; + pstore_zone_erase_op erase; pstore_zone_write_op panic_write; }; From f8feafeaeedbf0a324c373c5fa29a2098a69c458 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 8 May 2020 08:34:01 -0700 Subject: [PATCH 34/35] pstore/blk: Introduce "best_effort" mode In order to use arbitrary block devices as a pstore backend, provide a new module param named "best_effort", which will allow using any block device, even if it has not provided a panic_write callback. Link: https://lore.kernel.org/lkml/20200511233229.27745-12-keescook@chromium.org/ Signed-off-by: Kees Cook --- fs/pstore/blk.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 881b40ed8142..fcd5563dde06 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -51,6 +51,10 @@ static long ftrace_size = -1; module_param(ftrace_size, long, 0400); MODULE_PARM_DESC(ftrace_size, "ftrace size in kbytes"); +static bool best_effort; +module_param(best_effort, bool, 0400); +MODULE_PARM_DESC(best_effort, "use best effort to write (i.e. do not require storage driver pstore support, default: off)"); + /* * blkdev - the block device to use for pstore storage * @@ -374,7 +378,8 @@ static int __register_pstore_blk(struct pstore_blk_info *info) } /* only allow driver matching the @blkdev */ - if (!binfo.devt || MAJOR(binfo.devt) != info->major) { + if (!binfo.devt || (!best_effort && + MAJOR(binfo.devt) != info->major)) { pr_debug("invalid major %u (expect %u)\n", info->major, MAJOR(binfo.devt)); ret = -ENODEV; @@ -476,6 +481,20 @@ int pstore_blk_get_config(struct pstore_blk_config *info) } EXPORT_SYMBOL_GPL(pstore_blk_get_config); +static int __init pstore_blk_init(void) +{ + struct pstore_blk_info info = { }; + int ret = 0; + + mutex_lock(&pstore_blk_lock); + if (!pstore_zone_info && best_effort && blkdev[0]) + ret = __register_pstore_blk(&info); + mutex_unlock(&pstore_blk_lock); + + return ret; +} +late_initcall(pstore_blk_init); + static void __exit pstore_blk_exit(void) { mutex_lock(&pstore_blk_lock); From 78c08247b9d3e03192f8b359aa079024e805a948 Mon Sep 17 00:00:00 2001 From: WeiXiong Liao Date: Wed, 25 Mar 2020 16:55:06 +0800 Subject: [PATCH 35/35] mtd: Support kmsg dumper based on pstore/blk This introduces mtdpstore, which is similar to mtdoops but more powerful. It uses pstore/blk, and aims to store panic and oops logs to a flash partition, where pstore can later read back and present as files in the mounted pstore filesystem. To make mtdpstore work, the "blkdev" of pstore/blk should be set as MTD device name or MTD device number. For more details, see Documentation/admin-guide/pstore-blk.rst This solves a number of issues: - Work duplication: both of pstore and mtdoops do the same job storing panic/oops log. They have very similar logic, registering to kmsg dumper and storing logs to several chunks one by one. - Layer violations: drivers should provides methods instead of polices. MTD should provide read/write/erase operations, and allow a higher level drivers to provide the chunk management, kmsg dump configuration, etc. - Missing features: pstore provides many additional features, including presenting the logs as files, logging dump time and count, and supporting other frontends like pmsg, console, etc. Signed-off-by: WeiXiong Liao Link: https://lore.kernel.org/lkml/20200511233229.27745-11-keescook@chromium.org/ Link: https://lore.kernel.org/r/1589266715-4168-1-git-send-email-liaoweixiong@allwinnertech.com Signed-off-by: Kees Cook --- Documentation/admin-guide/pstore-blk.rst | 9 +- drivers/mtd/Kconfig | 10 + drivers/mtd/Makefile | 1 + drivers/mtd/mtdpstore.c | 578 +++++++++++++++++++++++ 4 files changed, 596 insertions(+), 2 deletions(-) create mode 100644 drivers/mtd/mtdpstore.c diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst index d45341e55e82..296d5027787a 100644 --- a/Documentation/admin-guide/pstore-blk.rst +++ b/Documentation/admin-guide/pstore-blk.rst @@ -43,9 +43,9 @@ blkdev ~~~~~~ The block device to use. Most of the time, it is a partition of block device. -It's required for pstore/blk. +It's required for pstore/blk. It is also used for MTD device. -It accepts the following variants: +It accepts the following variants for block device: 1. device number in hexadecimal represents itself; no leading 0x, for example b302. @@ -64,6 +64,11 @@ It accepts the following variants: partition with a known unique id. #. : major and minor number of the device separated by a colon. +It accepts the following variants for MTD device: + +1. MTD device name. "pstore" is recommended. +#. MTD device number. + kmsg_size ~~~~~~~~~ diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index 42d401ea60ee..6ddab796216d 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -170,6 +170,16 @@ config MTD_OOPS buffer in a flash partition where it can be read back at some later point. +config MTD_PSTORE + tristate "Log panic/oops to an MTD buffer based on pstore" + depends on PSTORE_BLK + help + This enables panic and oops messages to be logged to a circular + buffer in a flash partition where it can be read back as files after + mounting pstore filesystem. + + If unsure, say N. + config MTD_SWAP tristate "Swap on MTD device support" depends on MTD && SWAP diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile index 56cc60ccc477..593d0593a038 100644 --- a/drivers/mtd/Makefile +++ b/drivers/mtd/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_RFD_FTL) += rfd_ftl.o obj-$(CONFIG_SSFDC) += ssfdc.o obj-$(CONFIG_SM_FTL) += sm_ftl.o obj-$(CONFIG_MTD_OOPS) += mtdoops.o +obj-$(CONFIG_MTD_PSTORE) += mtdpstore.o obj-$(CONFIG_MTD_SWAP) += mtdswap.o nftl-objs := nftlcore.o nftlmount.o diff --git a/drivers/mtd/mtdpstore.c b/drivers/mtd/mtdpstore.c new file mode 100644 index 000000000000..a4fe6060b960 --- /dev/null +++ b/drivers/mtd/mtdpstore.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define dev_fmt(fmt) "mtdoops-pstore: " fmt + +#include +#include +#include +#include +#include + +static struct mtdpstore_context { + int index; + struct pstore_blk_config info; + struct pstore_device_info dev; + struct mtd_info *mtd; + unsigned long *rmmap; /* removed bit map */ + unsigned long *usedmap; /* used bit map */ + /* + * used for panic write + * As there are no block_isbad for panic case, we should keep this + * status before panic to ensure panic_write not failed. + */ + unsigned long *badmap; /* bad block bit map */ +} oops_cxt; + +static int mtdpstore_block_isbad(struct mtdpstore_context *cxt, loff_t off) +{ + int ret; + struct mtd_info *mtd = cxt->mtd; + u64 blknum; + + off = ALIGN_DOWN(off, mtd->erasesize); + blknum = div_u64(off, mtd->erasesize); + + if (test_bit(blknum, cxt->badmap)) + return true; + ret = mtd_block_isbad(mtd, off); + if (ret < 0) { + dev_err(&mtd->dev, "mtd_block_isbad failed, aborting\n"); + return ret; + } else if (ret > 0) { + set_bit(blknum, cxt->badmap); + return true; + } + return false; +} + +static inline int mtdpstore_panic_block_isbad(struct mtdpstore_context *cxt, + loff_t off) +{ + struct mtd_info *mtd = cxt->mtd; + u64 blknum; + + off = ALIGN_DOWN(off, mtd->erasesize); + blknum = div_u64(off, mtd->erasesize); + return test_bit(blknum, cxt->badmap); +} + +static inline void mtdpstore_mark_used(struct mtdpstore_context *cxt, + loff_t off) +{ + struct mtd_info *mtd = cxt->mtd; + u64 zonenum = div_u64(off, cxt->info.kmsg_size); + + dev_dbg(&mtd->dev, "mark zone %llu used\n", zonenum); + set_bit(zonenum, cxt->usedmap); +} + +static inline void mtdpstore_mark_unused(struct mtdpstore_context *cxt, + loff_t off) +{ + struct mtd_info *mtd = cxt->mtd; + u64 zonenum = div_u64(off, cxt->info.kmsg_size); + + dev_dbg(&mtd->dev, "mark zone %llu unused\n", zonenum); + clear_bit(zonenum, cxt->usedmap); +} + +static inline void mtdpstore_block_mark_unused(struct mtdpstore_context *cxt, + loff_t off) +{ + struct mtd_info *mtd = cxt->mtd; + u32 zonecnt = mtd->erasesize / cxt->info.kmsg_size; + u64 zonenum; + + off = ALIGN_DOWN(off, mtd->erasesize); + zonenum = div_u64(off, cxt->info.kmsg_size); + while (zonecnt > 0) { + dev_dbg(&mtd->dev, "mark zone %llu unused\n", zonenum); + clear_bit(zonenum, cxt->usedmap); + zonenum++; + zonecnt--; + } +} + +static inline int mtdpstore_is_used(struct mtdpstore_context *cxt, loff_t off) +{ + u64 zonenum = div_u64(off, cxt->info.kmsg_size); + u64 blknum = div_u64(off, cxt->mtd->erasesize); + + if (test_bit(blknum, cxt->badmap)) + return true; + return test_bit(zonenum, cxt->usedmap); +} + +static int mtdpstore_block_is_used(struct mtdpstore_context *cxt, + loff_t off) +{ + struct mtd_info *mtd = cxt->mtd; + u32 zonecnt = mtd->erasesize / cxt->info.kmsg_size; + u64 zonenum; + + off = ALIGN_DOWN(off, mtd->erasesize); + zonenum = div_u64(off, cxt->info.kmsg_size); + while (zonecnt > 0) { + if (test_bit(zonenum, cxt->usedmap)) + return true; + zonenum++; + zonecnt--; + } + return false; +} + +static int mtdpstore_is_empty(struct mtdpstore_context *cxt, char *buf, + size_t size) +{ + struct mtd_info *mtd = cxt->mtd; + size_t sz; + int i; + + sz = min_t(uint32_t, size, mtd->writesize / 4); + for (i = 0; i < sz; i++) { + if (buf[i] != (char)0xFF) + return false; + } + return true; +} + +static void mtdpstore_mark_removed(struct mtdpstore_context *cxt, loff_t off) +{ + struct mtd_info *mtd = cxt->mtd; + u64 zonenum = div_u64(off, cxt->info.kmsg_size); + + dev_dbg(&mtd->dev, "mark zone %llu removed\n", zonenum); + set_bit(zonenum, cxt->rmmap); +} + +static void mtdpstore_block_clear_removed(struct mtdpstore_context *cxt, + loff_t off) +{ + struct mtd_info *mtd = cxt->mtd; + u32 zonecnt = mtd->erasesize / cxt->info.kmsg_size; + u64 zonenum; + + off = ALIGN_DOWN(off, mtd->erasesize); + zonenum = div_u64(off, cxt->info.kmsg_size); + while (zonecnt > 0) { + clear_bit(zonenum, cxt->rmmap); + zonenum++; + zonecnt--; + } +} + +static int mtdpstore_block_is_removed(struct mtdpstore_context *cxt, + loff_t off) +{ + struct mtd_info *mtd = cxt->mtd; + u32 zonecnt = mtd->erasesize / cxt->info.kmsg_size; + u64 zonenum; + + off = ALIGN_DOWN(off, mtd->erasesize); + zonenum = div_u64(off, cxt->info.kmsg_size); + while (zonecnt > 0) { + if (test_bit(zonenum, cxt->rmmap)) + return true; + zonenum++; + zonecnt--; + } + return false; +} + +static int mtdpstore_erase_do(struct mtdpstore_context *cxt, loff_t off) +{ + struct mtd_info *mtd = cxt->mtd; + struct erase_info erase; + int ret; + + off = ALIGN_DOWN(off, cxt->mtd->erasesize); + dev_dbg(&mtd->dev, "try to erase off 0x%llx\n", off); + erase.len = cxt->mtd->erasesize; + erase.addr = off; + ret = mtd_erase(cxt->mtd, &erase); + if (!ret) + mtdpstore_block_clear_removed(cxt, off); + else + dev_err(&mtd->dev, "erase of region [0x%llx, 0x%llx] on \"%s\" failed\n", + (unsigned long long)erase.addr, + (unsigned long long)erase.len, cxt->info.device); + return ret; +} + +/* + * called while removing file + * + * Avoiding over erasing, do erase block only when the whole block is unused. + * If the block contains valid log, do erase lazily on flush_removed() when + * unregister. + */ +static ssize_t mtdpstore_erase(size_t size, loff_t off) +{ + struct mtdpstore_context *cxt = &oops_cxt; + + if (mtdpstore_block_isbad(cxt, off)) + return -EIO; + + mtdpstore_mark_unused(cxt, off); + + /* If the block still has valid data, mtdpstore do erase lazily */ + if (likely(mtdpstore_block_is_used(cxt, off))) { + mtdpstore_mark_removed(cxt, off); + return 0; + } + + /* all zones are unused, erase it */ + return mtdpstore_erase_do(cxt, off); +} + +/* + * What is security for mtdpstore? + * As there is no erase for panic case, we should ensure at least one zone + * is writable. Otherwise, panic write will fail. + * If zone is used, write operation will return -ENOMSG, which means that + * pstore/blk will try one by one until gets an empty zone. So, it is not + * needed to ensure the next zone is empty, but at least one. + */ +static int mtdpstore_security(struct mtdpstore_context *cxt, loff_t off) +{ + int ret = 0, i; + struct mtd_info *mtd = cxt->mtd; + u32 zonenum = (u32)div_u64(off, cxt->info.kmsg_size); + u32 zonecnt = (u32)div_u64(cxt->mtd->size, cxt->info.kmsg_size); + u32 blkcnt = (u32)div_u64(cxt->mtd->size, cxt->mtd->erasesize); + u32 erasesize = cxt->mtd->erasesize; + + for (i = 0; i < zonecnt; i++) { + u32 num = (zonenum + i) % zonecnt; + + /* found empty zone */ + if (!test_bit(num, cxt->usedmap)) + return 0; + } + + /* If there is no any empty zone, we have no way but to do erase */ + while (blkcnt--) { + div64_u64_rem(off + erasesize, cxt->mtd->size, (u64 *)&off); + + if (mtdpstore_block_isbad(cxt, off)) + continue; + + ret = mtdpstore_erase_do(cxt, off); + if (!ret) { + mtdpstore_block_mark_unused(cxt, off); + break; + } + } + + if (ret) + dev_err(&mtd->dev, "all blocks bad!\n"); + dev_dbg(&mtd->dev, "end security\n"); + return ret; +} + +static ssize_t mtdpstore_write(const char *buf, size_t size, loff_t off) +{ + struct mtdpstore_context *cxt = &oops_cxt; + struct mtd_info *mtd = cxt->mtd; + size_t retlen; + int ret; + + if (mtdpstore_block_isbad(cxt, off)) + return -ENOMSG; + + /* zone is used, please try next one */ + if (mtdpstore_is_used(cxt, off)) + return -ENOMSG; + + dev_dbg(&mtd->dev, "try to write off 0x%llx size %zu\n", off, size); + ret = mtd_write(cxt->mtd, off, size, &retlen, (u_char *)buf); + if (ret < 0 || retlen != size) { + dev_err(&mtd->dev, "write failure at %lld (%zu of %zu written), err %d\n", + off, retlen, size, ret); + return -EIO; + } + mtdpstore_mark_used(cxt, off); + + mtdpstore_security(cxt, off); + return retlen; +} + +static inline bool mtdpstore_is_io_error(int ret) +{ + return ret < 0 && !mtd_is_bitflip(ret) && !mtd_is_eccerr(ret); +} + +/* + * All zones will be read as pstore/blk will read zone one by one when do + * recover. + */ +static ssize_t mtdpstore_read(char *buf, size_t size, loff_t off) +{ + struct mtdpstore_context *cxt = &oops_cxt; + struct mtd_info *mtd = cxt->mtd; + size_t retlen, done; + int ret; + + if (mtdpstore_block_isbad(cxt, off)) + return -ENOMSG; + + dev_dbg(&mtd->dev, "try to read off 0x%llx size %zu\n", off, size); + for (done = 0, retlen = 0; done < size; done += retlen) { + retlen = 0; + + ret = mtd_read(cxt->mtd, off + done, size - done, &retlen, + (u_char *)buf + done); + if (mtdpstore_is_io_error(ret)) { + dev_err(&mtd->dev, "read failure at %lld (%zu of %zu read), err %d\n", + off + done, retlen, size - done, ret); + /* the zone may be broken, try next one */ + return -ENOMSG; + } + + /* + * ECC error. The impact on log data is so small. Maybe we can + * still read it and try to understand. So mtdpstore just hands + * over what it gets and user can judge whether the data is + * valid or not. + */ + if (mtd_is_eccerr(ret)) { + dev_err(&mtd->dev, "ecc error at %lld (%zu of %zu read), err %d\n", + off + done, retlen, size - done, ret); + /* driver may not set retlen when ecc error */ + retlen = retlen == 0 ? size - done : retlen; + } + } + + if (mtdpstore_is_empty(cxt, buf, size)) + mtdpstore_mark_unused(cxt, off); + else + mtdpstore_mark_used(cxt, off); + + mtdpstore_security(cxt, off); + return retlen; +} + +static ssize_t mtdpstore_panic_write(const char *buf, size_t size, loff_t off) +{ + struct mtdpstore_context *cxt = &oops_cxt; + struct mtd_info *mtd = cxt->mtd; + size_t retlen; + int ret; + + if (mtdpstore_panic_block_isbad(cxt, off)) + return -ENOMSG; + + /* zone is used, please try next one */ + if (mtdpstore_is_used(cxt, off)) + return -ENOMSG; + + ret = mtd_panic_write(cxt->mtd, off, size, &retlen, (u_char *)buf); + if (ret < 0 || size != retlen) { + dev_err(&mtd->dev, "panic write failure at %lld (%zu of %zu read), err %d\n", + off, retlen, size, ret); + return -EIO; + } + mtdpstore_mark_used(cxt, off); + + return retlen; +} + +static void mtdpstore_notify_add(struct mtd_info *mtd) +{ + int ret; + struct mtdpstore_context *cxt = &oops_cxt; + struct pstore_blk_config *info = &cxt->info; + unsigned long longcnt; + + if (!strcmp(mtd->name, info->device)) + cxt->index = mtd->index; + + if (mtd->index != cxt->index || cxt->index < 0) + return; + + dev_dbg(&mtd->dev, "found matching MTD device %s\n", mtd->name); + + if (mtd->size < info->kmsg_size * 2) { + dev_err(&mtd->dev, "MTD partition %d not big enough\n", + mtd->index); + return; + } + /* + * kmsg_size must be aligned to 4096 Bytes, which is limited by + * psblk. The default value of kmsg_size is 64KB. If kmsg_size + * is larger than erasesize, some errors will occur since mtdpsotre + * is designed on it. + */ + if (mtd->erasesize < info->kmsg_size) { + dev_err(&mtd->dev, "eraseblock size of MTD partition %d too small\n", + mtd->index); + return; + } + if (unlikely(info->kmsg_size % mtd->writesize)) { + dev_err(&mtd->dev, "record size %lu KB must align to write size %d KB\n", + info->kmsg_size / 1024, + mtd->writesize / 1024); + return; + } + + longcnt = BITS_TO_LONGS(div_u64(mtd->size, info->kmsg_size)); + cxt->rmmap = kcalloc(longcnt, sizeof(long), GFP_KERNEL); + cxt->usedmap = kcalloc(longcnt, sizeof(long), GFP_KERNEL); + + longcnt = BITS_TO_LONGS(div_u64(mtd->size, mtd->erasesize)); + cxt->badmap = kcalloc(longcnt, sizeof(long), GFP_KERNEL); + + cxt->dev.total_size = mtd->size; + /* just support dmesg right now */ + cxt->dev.flags = PSTORE_FLAGS_DMESG; + cxt->dev.read = mtdpstore_read; + cxt->dev.write = mtdpstore_write; + cxt->dev.erase = mtdpstore_erase; + cxt->dev.panic_write = mtdpstore_panic_write; + + ret = register_pstore_device(&cxt->dev); + if (ret) { + dev_err(&mtd->dev, "mtd%d register to psblk failed\n", + mtd->index); + return; + } + cxt->mtd = mtd; + dev_info(&mtd->dev, "Attached to MTD device %d\n", mtd->index); +} + +static int mtdpstore_flush_removed_do(struct mtdpstore_context *cxt, + loff_t off, size_t size) +{ + struct mtd_info *mtd = cxt->mtd; + u_char *buf; + int ret; + size_t retlen; + struct erase_info erase; + + buf = kmalloc(mtd->erasesize, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* 1st. read to cache */ + ret = mtd_read(mtd, off, mtd->erasesize, &retlen, buf); + if (mtdpstore_is_io_error(ret)) + goto free; + + /* 2nd. erase block */ + erase.len = mtd->erasesize; + erase.addr = off; + ret = mtd_erase(mtd, &erase); + if (ret) + goto free; + + /* 3rd. write back */ + while (size) { + unsigned int zonesize = cxt->info.kmsg_size; + + /* there is valid data on block, write back */ + if (mtdpstore_is_used(cxt, off)) { + ret = mtd_write(mtd, off, zonesize, &retlen, buf); + if (ret) + dev_err(&mtd->dev, "write failure at %lld (%zu of %u written), err %d\n", + off, retlen, zonesize, ret); + } + + off += zonesize; + size -= min_t(unsigned int, zonesize, size); + } + +free: + kfree(buf); + return ret; +} + +/* + * What does mtdpstore_flush_removed() do? + * When user remove any log file on pstore filesystem, mtdpstore should do + * something to ensure log file removed. If the whole block is no longer used, + * it's nice to erase the block. However if the block still contains valid log, + * what mtdpstore can do is to erase and write the valid log back. + */ +static int mtdpstore_flush_removed(struct mtdpstore_context *cxt) +{ + struct mtd_info *mtd = cxt->mtd; + int ret; + loff_t off; + u32 blkcnt = (u32)div_u64(mtd->size, mtd->erasesize); + + for (off = 0; blkcnt > 0; blkcnt--, off += mtd->erasesize) { + ret = mtdpstore_block_isbad(cxt, off); + if (ret) + continue; + + ret = mtdpstore_block_is_removed(cxt, off); + if (!ret) + continue; + + ret = mtdpstore_flush_removed_do(cxt, off, mtd->erasesize); + if (ret) + return ret; + } + return 0; +} + +static void mtdpstore_notify_remove(struct mtd_info *mtd) +{ + struct mtdpstore_context *cxt = &oops_cxt; + + if (mtd->index != cxt->index || cxt->index < 0) + return; + + mtdpstore_flush_removed(cxt); + + unregister_pstore_device(&cxt->dev); + kfree(cxt->badmap); + kfree(cxt->usedmap); + kfree(cxt->rmmap); + cxt->mtd = NULL; + cxt->index = -1; +} + +static struct mtd_notifier mtdpstore_notifier = { + .add = mtdpstore_notify_add, + .remove = mtdpstore_notify_remove, +}; + +static int __init mtdpstore_init(void) +{ + int ret; + struct mtdpstore_context *cxt = &oops_cxt; + struct pstore_blk_config *info = &cxt->info; + + ret = pstore_blk_get_config(info); + if (unlikely(ret)) + return ret; + + if (strlen(info->device) == 0) { + pr_err("mtd device must be supplied (device name is empty)\n"); + return -EINVAL; + } + if (!info->kmsg_size) { + pr_err("no backend enabled (kmsg_size is 0)\n"); + return -EINVAL; + } + + /* Setup the MTD device to use */ + ret = kstrtoint((char *)info->device, 0, &cxt->index); + if (ret) + cxt->index = -1; + + register_mtd_user(&mtdpstore_notifier); + return 0; +} +module_init(mtdpstore_init); + +static void __exit mtdpstore_exit(void) +{ + unregister_mtd_user(&mtdpstore_notifier); +} +module_exit(mtdpstore_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("WeiXiong Liao "); +MODULE_DESCRIPTION("MTD backend for pstore/blk");