From 416ad3c9c0066405b83ec875b75496523549be09 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:06 +0000 Subject: [PATCH 01/16] freezer: add unsafe versions of freezable helpers for NFS NFS calls the freezable helpers with locks held, which is unsafe and will cause lockdep warnings when 6aa9707 "lockdep: check that no locks held at freeze time" is reapplied (it was reverted in dbf520a). NFS shouldn't be doing this, but it has long-running syscalls that must hold a lock but also shouldn't block suspend. Until NFS freeze handling is rewritten to use a signal to exit out of the critical section, add new *_unsafe versions of the helpers that will not run the lockdep test when 6aa9707 is reapplied, and call them from NFS. In practice the likley result of holding the lock while freezing is that a second task blocked on the lock will never freeze, aborting suspend, but it is possible to manufacture a case using the cgroup freezer, the lock, and the suspend freezer to create a deadlock. Silencing the lockdep warning here will allow problems to be found in other drivers that may have a more serious deadlock risk, and prevent new problems from being added. Signed-off-by: Colin Cross Acked-by: Pavel Machek Acked-by: Tejun Heo Signed-off-by: Rafael J. Wysocki --- fs/nfs/inode.c | 2 +- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 4 ++-- include/linux/freezer.h | 42 ++++++++++++++++++++++++++++++++++++++++- net/sunrpc/sched.c | 2 +- 5 files changed, 46 insertions(+), 6 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c1c7a9d78722..ce727047ee87 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -79,7 +79,7 @@ int nfs_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - freezable_schedule(); + freezable_schedule_unsafe(); return 0; } EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 43ea96ced28c..ce90eb4775c2 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -33,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) res = rpc_call_sync(clnt, msg, flags); if (res != -EJUKEBOX) break; - freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; } while (!fatal_signal_pending(current)); return res; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8fbc10054115..9b18af167815 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -268,7 +268,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) *timeout = NFS4_POLL_RETRY_MIN; if (*timeout > NFS4_POLL_RETRY_MAX) *timeout = NFS4_POLL_RETRY_MAX; - freezable_schedule_timeout_killable(*timeout); + freezable_schedule_timeout_killable_unsafe(*timeout); if (fatal_signal_pending(current)) res = -ERESTARTSYS; *timeout <<= 1; @@ -4528,7 +4528,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 static unsigned long nfs4_set_lock_task_retry(unsigned long timeout) { - freezable_schedule_timeout_killable(timeout); + freezable_schedule_timeout_killable_unsafe(timeout); timeout <<= 1; if (timeout > NFS4_LOCK_MAXTIMEOUT) return NFS4_LOCK_MAXTIMEOUT; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index e70df40d84f6..5b31e21c485f 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -46,7 +46,11 @@ extern int freeze_kernel_threads(void); extern void thaw_processes(void); extern void thaw_kernel_threads(void); -static inline bool try_to_freeze(void) +/* + * DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION + * If try_to_freeze causes a lockdep warning it means the caller may deadlock + */ +static inline bool try_to_freeze_unsafe(void) { might_sleep(); if (likely(!freezing(current))) @@ -54,6 +58,11 @@ static inline bool try_to_freeze(void) return __refrigerator(false); } +static inline bool try_to_freeze(void) +{ + return try_to_freeze_unsafe(); +} + extern bool freeze_task(struct task_struct *p); extern bool set_freezable(void); @@ -115,6 +124,14 @@ static inline void freezer_count(void) try_to_freeze(); } +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +static inline void freezer_count_unsafe(void) +{ + current->flags &= ~PF_FREEZER_SKIP; + smp_mb(); + try_to_freeze_unsafe(); +} + /** * freezer_should_skip - whether to skip a task when determining frozen * state is reached @@ -152,6 +169,14 @@ static inline bool freezer_should_skip(struct task_struct *p) freezer_count(); \ }) +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +#define freezable_schedule_unsafe() \ +({ \ + freezer_do_not_count(); \ + schedule(); \ + freezer_count_unsafe(); \ +}) + /* Like schedule_timeout_killable(), but should not block the freezer. */ #define freezable_schedule_timeout_killable(timeout) \ ({ \ @@ -162,6 +187,16 @@ static inline bool freezer_should_skip(struct task_struct *p) __retval; \ }) +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +#define freezable_schedule_timeout_killable_unsafe(timeout) \ +({ \ + long __retval; \ + freezer_do_not_count(); \ + __retval = schedule_timeout_killable(timeout); \ + freezer_count_unsafe(); \ + __retval; \ +}) + /* * Freezer-friendly wrappers around wait_event_interruptible(), * wait_event_killable() and wait_event_interruptible_timeout(), originally @@ -225,9 +260,14 @@ static inline void set_freezable(void) {} #define freezable_schedule() schedule() +#define freezable_schedule_unsafe() schedule() + #define freezable_schedule_timeout_killable(timeout) \ schedule_timeout_killable(timeout) +#define freezable_schedule_timeout_killable_unsafe(timeout) \ + schedule_timeout_killable(timeout) + #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f8529fc8e542..8dcfadcef5d3 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -254,7 +254,7 @@ static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - freezable_schedule(); + freezable_schedule_unsafe(); return 0; } From 5853cc2a89f726e21d51ca0fd75757a03126a84b Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 7 May 2013 17:52:05 +0000 Subject: [PATCH 02/16] freezer: add unsafe versions of freezable helpers for CIFS CIFS calls wait_event_freezekillable_unsafe with a VFS lock held, which is unsafe and will cause lockdep warnings when 6aa9707 "lockdep: check that no locks held at freeze time" is reapplied (it was reverted in dbf520a). CIFS shouldn't be doing this, but it has long-running syscalls that must hold a lock but also shouldn't block suspend. Until CIFS freeze handling is rewritten to use a signal to exit out of the critical section, add a new wait_event_freezekillable_unsafe helper that will not run the lockdep test when 6aa9707 is reapplied, and call it from CIFS. In practice the likley result of holding the lock while freezing is that a second task blocked on the lock will never freeze, aborting suspend, but it is possible to manufacture a case using the cgroup freezer, the lock, and the suspend freezer to create a deadlock. Silencing the lockdep warning here will allow problems to be found in other drivers that may have a more serious deadlock risk, and prevent new problems from being added. Acked-by: Pavel Machek Acked-by: Tejun Heo Reviewed-by: Jeff Layton Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- fs/cifs/transport.c | 2 +- include/linux/freezer.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bfbf4700d160..b70aa7c91394 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -447,7 +447,7 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) { int error; - error = wait_event_freezekillable(server->response_q, + error = wait_event_freezekillable_unsafe(server->response_q, midQ->mid_state != MID_REQUEST_SUBMITTED); if (error < 0) return -ERESTARTSYS; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 5b31e21c485f..d3c038ec9a88 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -212,6 +212,16 @@ static inline bool freezer_should_skip(struct task_struct *p) __retval; \ }) +/* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ +#define wait_event_freezekillable_unsafe(wq, condition) \ +({ \ + int __retval; \ + freezer_do_not_count(); \ + __retval = wait_event_killable(wq, (condition)); \ + freezer_count_unsafe(); \ + __retval; \ +}) + #define wait_event_freezable(wq, condition) \ ({ \ int __retval; \ @@ -277,6 +287,9 @@ static inline void set_freezable(void) {} #define wait_event_freezekillable(wq, condition) \ wait_event_killable(wq, condition) +#define wait_event_freezekillable_unsafe(wq, condition) \ + wait_event_killable(wq, condition) + #endif /* !CONFIG_FREEZER */ #endif /* FREEZER_H_INCLUDED */ From 1b1d2fb4444231f25ddabc598aa2b5a9c0833fba Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:08 +0000 Subject: [PATCH 03/16] lockdep: remove task argument from debug_check_no_locks_held The only existing caller to debug_check_no_locks_held calls it with 'current' as the task, and the freezer needs to call debug_check_no_locks_held but doesn't already have a current task pointer, so remove the argument. It is already assuming that the current task is relevant by dumping the current stack trace as part of the warning. This was originally part of 6aa9707099c (lockdep: check that no locks held at freeze time) which was reverted in dbf520a9d7d4. Original-author: Mandeep Singh Baines Acked-by: Pavel Machek Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- include/linux/debug_locks.h | 4 ++-- kernel/exit.c | 2 +- kernel/lockdep.c | 17 ++++++++--------- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 21ca773f77bf..822c1354f3a6 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -51,7 +51,7 @@ struct task_struct; extern void debug_show_all_locks(void); extern void debug_show_held_locks(struct task_struct *task); extern void debug_check_no_locks_freed(const void *from, unsigned long len); -extern void debug_check_no_locks_held(struct task_struct *task); +extern void debug_check_no_locks_held(void); #else static inline void debug_show_all_locks(void) { @@ -67,7 +67,7 @@ debug_check_no_locks_freed(const void *from, unsigned long len) } static inline void -debug_check_no_locks_held(struct task_struct *task) +debug_check_no_locks_held(void) { } #endif diff --git a/kernel/exit.c b/kernel/exit.c index af2eb3cbd499..e59756275000 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -835,7 +835,7 @@ void do_exit(long code) /* * Make sure we are holding no locks: */ - debug_check_no_locks_held(tsk); + debug_check_no_locks_held(); /* * We can do this unlocked here. The futex code uses this flag * just to verify whether the pi state cleanup has been done diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 1f3186b37fd5..e16c45b9ee77 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -4090,7 +4090,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) } EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); -static void print_held_locks_bug(struct task_struct *curr) +static void print_held_locks_bug(void) { if (!debug_locks_off()) return; @@ -4099,22 +4099,21 @@ static void print_held_locks_bug(struct task_struct *curr) printk("\n"); printk("=====================================\n"); - printk("[ BUG: lock held at task exit time! ]\n"); + printk("[ BUG: %s/%d still has locks held! ]\n", + current->comm, task_pid_nr(current)); print_kernel_ident(); printk("-------------------------------------\n"); - printk("%s/%d is exiting with locks still held!\n", - curr->comm, task_pid_nr(curr)); - lockdep_print_held_locks(curr); - + lockdep_print_held_locks(current); printk("\nstack backtrace:\n"); dump_stack(); } -void debug_check_no_locks_held(struct task_struct *task) +void debug_check_no_locks_held(void) { - if (unlikely(task->lockdep_depth > 0)) - print_held_locks_bug(task); + if (unlikely(current->lockdep_depth > 0)) + print_held_locks_bug(); } +EXPORT_SYMBOL_GPL(debug_check_no_locks_held); void debug_show_all_locks(void) { From 0f9548ca10916dec166eaf74c816bded7d8e611d Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Mon, 6 May 2013 23:50:09 +0000 Subject: [PATCH 04/16] lockdep: check that no locks held at freeze time We shouldn't try_to_freeze if locks are held. Holding a lock can cause a deadlock if the lock is later acquired in the suspend or hibernate path (e.g. by dpm). Holding a lock can also cause a deadlock in the case of cgroup_freezer if a lock is held inside a frozen cgroup that is later acquired by a process outside that group. History: This patch was originally applied as 6aa9707099c and reverted in dbf520a9d7d4 because NFS was freezing with locks held. It was deemed better to keep the bad freeze point in NFS to allow laptops to suspend consistently. The previous patch in this series converts NFS to call _unsafe versions of the freezable helpers so that lockdep doesn't complain about them until a more correct fix can be applied. [akpm@linux-foundation.org: export debug_check_no_locks_held] Signed-off-by: Mandeep Singh Baines Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Acked-by: Pavel Machek Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/freezer.h b/include/linux/freezer.h index d3c038ec9a88..bcf9e651cc85 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -3,6 +3,7 @@ #ifndef FREEZER_H_INCLUDED #define FREEZER_H_INCLUDED +#include #include #include #include @@ -60,6 +61,8 @@ static inline bool try_to_freeze_unsafe(void) static inline bool try_to_freeze(void) { + if (!(current->flags & PF_NOFREEZE)) + debug_check_no_locks_held(); return try_to_freeze_unsafe(); } From 18ad0c6297df1d671ecea83b608cd9e432642a05 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:10 +0000 Subject: [PATCH 05/16] freezer: shorten freezer sleep time using exponential backoff All tasks can easily be frozen in under 10 ms, switch to using an initial 1 ms sleep followed by exponential backoff until 8 ms. Also convert the printed time to ms instead of centiseconds. Acked-by: Pavel Machek Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- kernel/power/process.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index 98088e0e71e8..fc0df8486449 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -30,9 +30,10 @@ static int try_to_freeze_tasks(bool user_only) unsigned int todo; bool wq_busy = false; struct timeval start, end; - u64 elapsed_csecs64; - unsigned int elapsed_csecs; + u64 elapsed_msecs64; + unsigned int elapsed_msecs; bool wakeup = false; + int sleep_usecs = USEC_PER_MSEC; do_gettimeofday(&start); @@ -68,22 +69,25 @@ static int try_to_freeze_tasks(bool user_only) /* * We need to retry, but first give the freezing tasks some - * time to enter the refrigerator. + * time to enter the refrigerator. Start with an initial + * 1 ms sleep followed by exponential backoff until 8 ms. */ - msleep(10); + usleep_range(sleep_usecs / 2, sleep_usecs); + if (sleep_usecs < 8 * USEC_PER_MSEC) + sleep_usecs *= 2; } do_gettimeofday(&end); - elapsed_csecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); - do_div(elapsed_csecs64, NSEC_PER_SEC / 100); - elapsed_csecs = elapsed_csecs64; + elapsed_msecs64 = timeval_to_ns(&end) - timeval_to_ns(&start); + do_div(elapsed_msecs64, NSEC_PER_MSEC); + elapsed_msecs = elapsed_msecs64; if (todo) { printk("\n"); - printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " + printk(KERN_ERR "Freezing of tasks %s after %d.%03d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", wakeup ? "aborted" : "failed", - elapsed_csecs / 100, elapsed_csecs % 100, + elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); if (!wakeup) { @@ -96,8 +100,8 @@ static int try_to_freeze_tasks(bool user_only) read_unlock(&tasklist_lock); } } else { - printk("(elapsed %d.%02d seconds) ", elapsed_csecs / 100, - elapsed_csecs % 100); + printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, + elapsed_msecs % 1000); } return todo ? -EBUSY : 0; From 613f5d13b569859171f0896fbc73ee0bfa811fda Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:11 +0000 Subject: [PATCH 06/16] freezer: skip waking up tasks with PF_FREEZER_SKIP set Android goes through suspend/resume very often (every few seconds when on a busy wifi network with the screen off), and a significant portion of the energy used to go in and out of suspend is spent in the freezer. If a task has called freezer_do_not_count(), don't bother waking it up. If it happens to wake up later it will call freezer_count() and immediately enter the refrigerator. Combined with patches to convert freezable helpers to use freezer_do_not_count() and convert common sites where idle userspace tasks are blocked to use the freezable helpers, this reduces the time and energy required to suspend and resume. Acked-by: Tejun Heo Acked-by: Pavel Machek Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- kernel/freezer.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/freezer.c b/kernel/freezer.c index c38893b0efba..8b2afc1c9df0 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -110,6 +110,18 @@ bool freeze_task(struct task_struct *p) { unsigned long flags; + /* + * This check can race with freezer_do_not_count, but worst case that + * will result in an extra wakeup being sent to the task. It does not + * race with freezer_count(), the barriers in freezer_count() and + * freezer_should_skip() ensure that either freezer_count() sees + * freezing == true in try_to_freeze() and freezes, or + * freezer_should_skip() sees !PF_FREEZE_SKIP and freezes the task + * normally. + */ + if (freezer_should_skip(p)) + return false; + spin_lock_irqsave(&freezer_lock, flags); if (!freezing(p) || frozen(p)) { spin_unlock_irqrestore(&freezer_lock, flags); From b01235861b84c0f6107d3f9da189c9898fc3caaf Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:12 +0000 Subject: [PATCH 07/16] freezer: convert freezable helpers to freezer_do_not_count() Freezing tasks will wake up almost every userspace task from where it is blocking and force it to run until it hits a call to try_to_sleep(), generally on the exit path from the syscall it is blocking in. On resume each task will run again, usually restarting the syscall and running until it hits the same blocking call as it was originally blocked in. Convert the existing wait_event_freezable* wrappers to use freezer_do_not_count(). Combined with a previous patch, these tasks will not run during suspend or resume unless they wake up for another reason, in which case they will run until they hit the try_to_freeze() in freezer_count(), and then continue processing the wakeup after tasks are thawed. This results in a small change in behavior, previously a race between freezing and a normal wakeup would be won by the wakeup, now the task will freeze and then handle the wakeup after thawing. Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/include/linux/freezer.h b/include/linux/freezer.h index bcf9e651cc85..c71337af9bbd 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -228,27 +228,19 @@ static inline bool freezer_should_skip(struct task_struct *p) #define wait_event_freezable(wq, condition) \ ({ \ int __retval; \ - for (;;) { \ - __retval = wait_event_interruptible(wq, \ - (condition) || freezing(current)); \ - if (__retval || (condition)) \ - break; \ - try_to_freeze(); \ - } \ + freezer_do_not_count(); \ + __retval = wait_event_interruptible(wq, (condition)); \ + freezer_count(); \ __retval; \ }) #define wait_event_freezable_timeout(wq, condition, timeout) \ ({ \ long __retval = timeout; \ - for (;;) { \ - __retval = wait_event_interruptible_timeout(wq, \ - (condition) || freezing(current), \ - __retval); \ - if (__retval <= 0 || (condition)) \ - break; \ - try_to_freeze(); \ - } \ + freezer_do_not_count(); \ + __retval = wait_event_interruptible_timeout(wq, (condition), \ + __retval); \ + freezer_count(); \ __retval; \ }) From 8ee492d6595573a0d4be168ebda1c7ceb4ec509d Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:13 +0000 Subject: [PATCH 08/16] freezer: convert freezable helpers to static inline where possible Some of the freezable helpers have to be macros because their condition argument needs to get evaluated every time through the wait loop. Convert the others to static inline to make future changes easier. Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 58 ++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/include/linux/freezer.h b/include/linux/freezer.h index c71337af9bbd..8430d4c51ece 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -159,46 +159,46 @@ static inline bool freezer_should_skip(struct task_struct *p) } /* - * These macros are intended to be used whenever you want allow a sleeping + * These functions are intended to be used whenever you want allow a sleeping * task to be frozen. Note that neither return any clear indication of * whether a freeze event happened while in this function. */ /* Like schedule(), but should not block the freezer. */ -#define freezable_schedule() \ -({ \ - freezer_do_not_count(); \ - schedule(); \ - freezer_count(); \ -}) +static inline void freezable_schedule(void) +{ + freezer_do_not_count(); + schedule(); + freezer_count(); +} /* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -#define freezable_schedule_unsafe() \ -({ \ - freezer_do_not_count(); \ - schedule(); \ - freezer_count_unsafe(); \ -}) +static inline void freezable_schedule_unsafe(void) +{ + freezer_do_not_count(); + schedule(); + freezer_count_unsafe(); +} /* Like schedule_timeout_killable(), but should not block the freezer. */ -#define freezable_schedule_timeout_killable(timeout) \ -({ \ - long __retval; \ - freezer_do_not_count(); \ - __retval = schedule_timeout_killable(timeout); \ - freezer_count(); \ - __retval; \ -}) +static inline long freezable_schedule_timeout_killable(long timeout) +{ + long __retval; + freezer_do_not_count(); + __retval = schedule_timeout_killable(timeout); + freezer_count(); + return __retval; +} /* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ -#define freezable_schedule_timeout_killable_unsafe(timeout) \ -({ \ - long __retval; \ - freezer_do_not_count(); \ - __retval = schedule_timeout_killable(timeout); \ - freezer_count_unsafe(); \ - __retval; \ -}) +static inline long freezable_schedule_timeout_killable_unsafe(long timeout) +{ + long __retval; + freezer_do_not_count(); + __retval = schedule_timeout_killable(timeout); + freezer_count_unsafe(); + return __retval; +} /* * Freezer-friendly wrappers around wait_event_interruptible(), From dd5ec0f4e72bed3d0e589e21fdf46eedafc106b7 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:14 +0000 Subject: [PATCH 09/16] freezer: add new freezable helpers using freezer_do_not_count() Freezing tasks will wake up almost every userspace task from where it is blocking and force it to run until it hits a call to try_to_sleep(), generally on the exit path from the syscall it is blocking in. On resume each task will run again, usually restarting the syscall and running until it hits the same blocking call as it was originally blocked in. To allow tasks to avoid running on every suspend/resume cycle, this patch adds additional freezable wrappers around blocking calls that call freezer_do_not_count(). Combined with the previous patch, these tasks will not run during suspend or resume unless they wake up for another reason, in which case they will run until they hit the try_to_freeze() in freezer_count(), and then continue processing the wakeup after tasks are thawed. Additional patches will convert the most common locations that userspace blocks in to use freezable helpers. Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 61 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 8430d4c51ece..7fd81b8c4897 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -180,6 +180,32 @@ static inline void freezable_schedule_unsafe(void) freezer_count_unsafe(); } +/* + * Like freezable_schedule_timeout(), but should not block the freezer. Do not + * call this with locks held. + */ +static inline long freezable_schedule_timeout(long timeout) +{ + long __retval; + freezer_do_not_count(); + __retval = schedule_timeout(timeout); + freezer_count(); + return __retval; +} + +/* + * Like schedule_timeout_interruptible(), but should not block the freezer. Do not + * call this with locks held. + */ +static inline long freezable_schedule_timeout_interruptible(long timeout) +{ + long __retval; + freezer_do_not_count(); + __retval = schedule_timeout_interruptible(timeout); + freezer_count(); + return __retval; +} + /* Like schedule_timeout_killable(), but should not block the freezer. */ static inline long freezable_schedule_timeout_killable(long timeout) { @@ -200,6 +226,20 @@ static inline long freezable_schedule_timeout_killable_unsafe(long timeout) return __retval; } +/* + * Like schedule_hrtimeout_range(), but should not block the freezer. Do not + * call this with locks held. + */ +static inline int freezable_schedule_hrtimeout_range(ktime_t *expires, + unsigned long delta, const enum hrtimer_mode mode) +{ + int __retval; + freezer_do_not_count(); + __retval = schedule_hrtimeout_range(expires, delta, mode); + freezer_count(); + return __retval; +} + /* * Freezer-friendly wrappers around wait_event_interruptible(), * wait_event_killable() and wait_event_interruptible_timeout(), originally @@ -244,6 +284,16 @@ static inline long freezable_schedule_timeout_killable_unsafe(long timeout) __retval; \ }) +#define wait_event_freezable_exclusive(wq, condition) \ +({ \ + int __retval; \ + freezer_do_not_count(); \ + __retval = wait_event_interruptible_exclusive(wq, condition); \ + freezer_count(); \ + __retval; \ +}) + + #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } @@ -267,18 +317,29 @@ static inline void set_freezable(void) {} #define freezable_schedule_unsafe() schedule() +#define freezable_schedule_timeout(timeout) schedule_timeout(timeout) + +#define freezable_schedule_timeout_interruptible(timeout) \ + schedule_timeout_interruptible(timeout) + #define freezable_schedule_timeout_killable(timeout) \ schedule_timeout_killable(timeout) #define freezable_schedule_timeout_killable_unsafe(timeout) \ schedule_timeout_killable(timeout) +#define freezable_schedule_hrtimeout_range(expires, delta, mode) \ + schedule_hrtimeout_range(expires, delta, mode) + #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) #define wait_event_freezable_timeout(wq, condition, timeout) \ wait_event_interruptible_timeout(wq, condition, timeout) +#define wait_event_freezable_exclusive(wq, condition) \ + wait_event_interruptible_exclusive(wq, condition) + #define wait_event_freezekillable(wq, condition) \ wait_event_killable(wq, condition) From e2610b268bb74d24866a9578e78d8c3de90ed596 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:15 +0000 Subject: [PATCH 10/16] binder: use freezable blocking calls Avoid waking up every thread sleeping in a binder call during suspend and resume by calling a freezable blocking call. Previous patches modified the freezer to avoid sending wakeups to threads that are blocked in freezable blocking calls. This call was selected to be converted to a freezable call because it doesn't hold any locks or release any resources when interrupted that might be needed by another freezing task or a kernel driver during suspend, and is a common site where idle userspace tasks are blocked. Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- drivers/staging/android/binder.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 1567ac296b39..1ffc2ebdf612 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -2140,13 +2141,13 @@ static int binder_thread_read(struct binder_proc *proc, if (!binder_has_proc_work(proc, thread)) ret = -EAGAIN; } else - ret = wait_event_interruptible_exclusive(proc->wait, binder_has_proc_work(proc, thread)); + ret = wait_event_freezable_exclusive(proc->wait, binder_has_proc_work(proc, thread)); } else { if (non_block) { if (!binder_has_thread_work(thread)) ret = -EAGAIN; } else - ret = wait_event_interruptible(thread->wait, binder_has_thread_work(thread)); + ret = wait_event_freezable(thread->wait, binder_has_thread_work(thread)); } binder_lock(__func__); From 1c441e921201d523b5a6036aea22b0b426bf1af2 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:16 +0000 Subject: [PATCH 11/16] epoll: use freezable blocking call Avoid waking up every thread sleeping in an epoll_wait call during suspend and resume by calling a freezable blocking call. Previous patches modified the freezer to avoid sending wakeups to threads that are blocked in freezable blocking calls. This call was selected to be converted to a freezable call because it doesn't hold any locks or release any resources when interrupted that might be needed by another freezing task or a kernel driver during suspend, and is a common site where idle userspace tasks are blocked. Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- fs/eventpoll.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index deecc7294a67..0cff4434880d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1602,7 +1603,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } spin_unlock_irqrestore(&ep->lock, flags); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + if (!freezable_schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags); From 9745cdb36da83aeec198650b410ca06304cf7928 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:17 +0000 Subject: [PATCH 12/16] select: use freezable blocking call Avoid waking up every thread sleeping in a select call during suspend and resume by calling a freezable blocking call. Previous patches modified the freezer to avoid sending wakeups to threads that are blocked in freezable blocking calls. This call was selected to be converted to a freezable call because it doesn't hold any locks or release any resources when interrupted that might be needed by another freezing task or a kernel driver during suspend, and is a common site where idle userspace tasks are blocked. Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- fs/select.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/select.c b/fs/select.c index 8c1c96c27062..6b14dc7df3a4 100644 --- a/fs/select.c +++ b/fs/select.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -236,7 +237,8 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, set_current_state(state); if (!pwq->triggered) - rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); + rc = freezable_schedule_hrtimeout_range(expires, slack, + HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* From 56467c7697f5aef6974501fbe2c3e63674583549 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:18 +0000 Subject: [PATCH 13/16] futex: use freezable blocking call Avoid waking up every thread sleeping in a futex_wait call during suspend and resume by calling a freezable blocking call. Previous patches modified the freezer to avoid sending wakeups to threads that are blocked in freezable blocking calls. This call was selected to be converted to a freezable call because it doesn't hold any locks or release any resources when interrupted that might be needed by another freezing task or a kernel driver during suspend, and is a common site where idle userspace tasks are blocked. Acked-by: Tejun Heo Acked-by: Thomas Gleixner Acked-by: Darren Hart Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- kernel/futex.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index b26dcfc02c94..d710fae8abbe 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -61,6 +61,7 @@ #include #include #include +#include #include @@ -1807,7 +1808,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, * is no timeout, or if it has yet to expire. */ if (!timeout || timeout->task) - schedule(); + freezable_schedule(); } __set_current_state(TASK_RUNNING); } From b0f8c44f30e58c3aaaaaf864d5c3d3cc2e8a4c2d Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:19 +0000 Subject: [PATCH 14/16] nanosleep: use freezable blocking call Avoid waking up every thread sleeping in a nanosleep call during suspend and resume by calling a freezable blocking call. Previous patches modified the freezer to avoid sending wakeups to threads that are blocked in freezable blocking calls. This call was selected to be converted to a freezable call because it doesn't hold any locks or release any resources when interrupted that might be needed by another freezing task or a kernel driver during suspend, and is a common site where idle userspace tasks are blocked. Acked-by: Tejun Heo Acked-by: Thomas Gleixner Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- kernel/hrtimer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index fd4b13b131f8..3ee4d06c6fc2 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -47,6 +47,7 @@ #include #include #include +#include #include @@ -1545,7 +1546,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod t->task = NULL; if (likely(t->task)) - schedule(); + freezable_schedule(); hrtimer_cancel(&t->timer); mode = HRTIMER_MODE_ABS; From a2d5f1f5d941593e61071dc78e9de228eda5475f Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:20 +0000 Subject: [PATCH 15/16] sigtimedwait: use freezable blocking call Avoid waking up every thread sleeping in a sigtimedwait call during suspend and resume by calling a freezable blocking call. Previous patches modified the freezer to avoid sending wakeups to threads that are blocked in freezable blocking calls. This call was selected to be converted to a freezable call because it doesn't hold any locks or release any resources when interrupted that might be needed by another freezing task or a kernel driver during suspend, and is a common site where idle userspace tasks are blocked. Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index 113411bfe8b1..50e41075ac77 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2848,7 +2848,7 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info, recalc_sigpending(); spin_unlock_irq(&tsk->sighand->siglock); - timeout = schedule_timeout_interruptible(timeout); + timeout = freezable_schedule_timeout_interruptible(timeout); spin_lock_irq(&tsk->sighand->siglock); __set_task_blocked(tsk, &tsk->real_blocked); From 2b15af6f953012aac49984ead3f8ec744d941540 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 May 2013 23:50:21 +0000 Subject: [PATCH 16/16] af_unix: use freezable blocking calls in read Avoid waking up every thread sleeping in read call on an AF_UNIX socket during suspend and resume by calling a freezable blocking call. Previous patches modified the freezer to avoid sending wakeups to threads that are blocked in freezable blocking calls. This call was selected to be converted to a freezable call because it doesn't hold any locks or release any resources when interrupted that might be needed by another freezing task or a kernel driver during suspend, and is a common site where idle userspace tasks are blocked. Acked-by: Tejun Heo Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- net/unix/af_unix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 826e09938bff..c4ce243824bb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -114,6 +114,7 @@ #include #include #include +#include struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); @@ -1879,7 +1880,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); unix_state_unlock(sk); - timeo = schedule_timeout(timeo); + timeo = freezable_schedule_timeout(timeo); unix_state_lock(sk); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); }