From 8d7dc9283f399e1fda4e48a1c453f689326d9396 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Apr 2015 19:33:59 -0700 Subject: [PATCH 001/101] rcu: Control grace-period delays directly from value In a misguided attempt to avoid an #ifdef, the use of the gp_init_delay module parameter was conditioned on the corresponding RCU_TORTURE_TEST_SLOW_INIT Kconfig variable, using IS_ENABLED() at the point of use in the code. This meant that the compiler always saw the delay, which meant that RCU_TORTURE_TEST_SLOW_INIT_DELAY had to be unconditionally defined. This in turn caused "make oldconfig" to ask pointless questions about the value of RCU_TORTURE_TEST_SLOW_INIT_DELAY in cases where it was not even used. This commit avoids these pointless questions by defining gp_init_delay under #ifdef. In one branch, gp_init_delay is initialized to RCU_TORTURE_TEST_SLOW_INIT_DELAY and is also a module parameter (thus allowing boot-time modification), and in the other branch gp_init_delay is a const variable initialized by default to zero. This approach also simplifies the code at the delay point by eliminating the IS_DEFINED(). Because gp_init_delay is constant zero in the no-delay case intended for production use, the "gp_init_delay > 0" check causes the delay to become dead code, as desired in this case. In addition, this commit replaces magic constant "10" with the preprocessor variable PER_RCU_NODE_PERIOD, which controls the number of grace periods that are allowed to elapse at full speed before a delay is inserted. Reported-by: Linus Torvalds Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 16 +++++++++------- lib/Kconfig.debug | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 233165da782f..8cf7304b2867 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -162,11 +162,14 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; module_param(kthread_prio, int, 0644); -/* Delay in jiffies for grace-period initialization delays. */ -static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) - ? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY - : 0; +/* Delay in jiffies for grace-period initialization delays, debug only. */ +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT +static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; module_param(gp_init_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +static const int gp_init_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ +#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */ /* * Track the rcutorture test sequence number and the update version @@ -1843,9 +1846,8 @@ static int rcu_gp_init(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); ACCESS_ONCE(rsp->gp_activity) = jiffies; - if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) && - gp_init_delay > 0 && - !(rsp->gpnum % (rcu_num_nodes * 10))) + if (gp_init_delay > 0 && + !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD))) schedule_timeout_uninterruptible(gp_init_delay); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1ad74c0df01f..5f5ff7d7e5eb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1268,6 +1268,7 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY int "How much to slow down RCU grace-period initialization" range 0 5 default 3 + depends on RCU_TORTURE_TEST_SLOW_INIT help This option specifies the number of jiffies to wait between each rcu_node structure initialization. From c57dcb566d3d866a302a1da2e06344bec31d5bcd Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 2 Apr 2015 08:39:00 +0100 Subject: [PATCH 002/101] efivarfs: Ensure VariableName is NUL-terminated Some buggy firmware implementations update VariableNameSize on success such that it does not include the final NUL character which results in garbage in the efivarfs name entries. Use kzalloc on the efivar_entry (as is done in efivars.c) to ensure that the name is always NUL-terminated. The buggy firmware is: BIOS Information Vendor: Intel Corp. Version: S1200RP.86B.02.02.0005.102320140911 Release Date: 10/23/2014 BIOS Revision: 4.6 System Information Manufacturer: Intel Corporation Product Name: S1200RP_SE Signed-off-by: Ross Lagerwall Acked-by: Matthew Garrett Cc: Jeremy Kerr Cc: Signed-off-by: Matt Fleming --- fs/efivarfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index ddbce42548c9..acf9a67f6770 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -121,7 +121,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, int len, i; int err = -ENOMEM; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return err; From 98b228f55014870092c15d7d168fecac69f2f12a Mon Sep 17 00:00:00 2001 From: Roy Franz Date: Wed, 15 Apr 2015 16:32:24 -0700 Subject: [PATCH 003/101] x86/efi: Store upper bits of command line buffer address in ext_cmd_line_ptr Until now, the EFI stub was only setting the 32 bit cmd_line_ptr in the setup_header structure, so on 64 bit platforms this could be truncated. This patch adds setting the upper bits of the buffer address in ext_cmd_line_ptr. This case was likely never hit, as the allocation for this buffer is done at the lowest available address. Only x86_64 kernels have this problem, as the 1-1 mapping mandated by EFI ensures that all memory is 32 bit addressable on 32 bit platforms. The EFI stub does not support mixed mode, so the 32 bit kernel on 64 bit firmware case does not need to be handled. Signed-off-by: Roy Franz Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 92b9a5f2aed6..5999980206bf 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1110,6 +1110,8 @@ struct boot_params *make_boot_params(struct efi_config *c) if (!cmdline_ptr) goto fail; hdr->cmd_line_ptr = (unsigned long)cmdline_ptr; + /* Fill in upper bits of command line address, NOP on 32 bit */ + boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32; hdr->ramdisk_image = 0; hdr->ramdisk_size = 0; From 94d4b4765b7ddb8478b0d57663cf7a08e2263bbf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 23 Nov 2012 19:19:07 +0100 Subject: [PATCH 004/101] x86/mm: Clean up types in xlate_dev_mem_ptr() Pavel Machek reported the following compiler warning on x86/32 CONFIG_HIGHMEM64G=y builds: arch/x86/mm/ioremap.c:344:10: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] Clean up the types in this function by using a single natural type for internal calculations (unsigned long), to make it more apparent what's happening, and also to remove fragile casts. Reported-by: Pavel Machek Cc: jgross@suse.com Cc: roland@purestorage.com Link: http://lkml.kernel.org/r/20150416080440.GA507@amd Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index fdf617c00e2f..4bf037b20f47 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -332,18 +332,20 @@ EXPORT_SYMBOL(iounmap); */ void *xlate_dev_mem_ptr(phys_addr_t phys) { - void *addr; - unsigned long start = phys & PAGE_MASK; + unsigned long start = phys & PAGE_MASK; + unsigned long offset = phys & ~PAGE_MASK; + unsigned long vaddr; /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */ if (page_is_ram(start >> PAGE_SHIFT)) return __va(phys); - addr = (void __force *)ioremap_cache(start, PAGE_SIZE); - if (addr) - addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); + vaddr = (unsigned long)ioremap_cache(start, PAGE_SIZE); + /* Only add the offset on success and return NULL if the ioremap() failed: */ + if (vaddr) + vaddr += offset; - return addr; + return (void *)vaddr; } void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) From 3b6e042188994466ec257b71296b5f85b894dcd9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 21 Apr 2015 17:26:23 +0200 Subject: [PATCH 005/101] perf/x86/intel: Add cpu_(prepare|starting|dying) for core_pmu The core_pmu does not define cpu_* callbacks, which handles allocation of 'struct cpu_hw_events::shared_regs' data, initialization of debug store and PMU_FL_EXCL_CNTRS counters. While this probably won't happen on bare metal, virtual CPU can define x86_pmu.extra_regs together with PMU version 1 and thus be using core_pmu -> using shared_regs data without it being allocated. That could could leave to following panic: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] _spin_lock_irqsave+0x1f/0x40 SNIP [] __intel_shared_reg_get_constraints+0x69/0x1e0 [] intel_get_event_constraints+0x9b/0x180 [] x86_schedule_events+0x75/0x1d0 [] ? check_preempt_curr+0x7c/0x90 [] ? try_to_wake_up+0x24e/0x3e0 [] ? default_wake_function+0x12/0x20 [] ? autoremove_wake_function+0x16/0x40 [] ? __wake_up_common+0x59/0x90 [] ? __d_lookup+0xa7/0x150 [] ? do_lookup+0x9f/0x230 [] ? dput+0x9a/0x150 [] ? path_to_nameidata+0x25/0x60 [] ? __link_path_walk+0x7da/0x1000 [] ? x86_pmu_add+0xb9/0x170 [] x86_pmu_commit_txn+0x67/0xc0 [] ? mntput_no_expire+0x30/0x110 [] ? path_put+0x31/0x40 [] ? current_fs_time+0x27/0x30 [] ? mem_cgroup_get_reclaim_stat_from_page+0x20/0x70 [] group_sched_in+0x13a/0x170 [] ? sched_clock+0x9/0x10 [] ctx_sched_in+0x2e8/0x330 [] perf_event_sched_in+0x6b/0xb0 [] perf_event_context_sched_in+0x76/0xc0 [] perf_event_comm+0x1bb/0x2e0 [] set_task_comm+0x69/0x80 [] setup_new_exec+0xe1/0x2e0 [] load_elf_binary+0x3ce/0x1ab0 Adding cpu_(prepare|starting|dying) for core_pmu to have shared_regs data allocated for core_pmu. AFAICS there's no harm to initialize debug store and PMU_FL_EXCL_CNTRS either for core_pmu. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20150421152623.GC13169@krava.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 66 +++++++++++++++----------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 219d3fb423a1..960e85de13fb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2533,34 +2533,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config) return x86_event_sysfs_show(page, config, event); } -static __initconst const struct x86_pmu core_pmu = { - .name = "core", - .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, - .enable_all = core_pmu_enable_all, - .enable = core_pmu_enable_event, - .disable = x86_pmu_disable_event, - .hw_config = x86_pmu_hw_config, - .schedule_events = x86_schedule_events, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - .apic = 1, - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic event period: - */ - .max_period = (1ULL << 31) - 1, - .get_event_constraints = intel_get_event_constraints, - .put_event_constraints = intel_put_event_constraints, - .event_constraints = intel_core_event_constraints, - .guest_get_msrs = core_guest_get_msrs, - .format_attrs = intel_arch_formats_attr, - .events_sysfs_show = intel_event_sysfs_show, -}; - struct intel_shared_regs *allocate_shared_regs(int cpu) { struct intel_shared_regs *regs; @@ -2743,6 +2715,44 @@ static struct attribute *intel_arch3_formats_attr[] = { NULL, }; +static __initconst const struct x86_pmu core_pmu = { + .name = "core", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = core_pmu_enable_all, + .enable = core_pmu_enable_event, + .disable = x86_pmu_disable_event, + .hw_config = x86_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + /* + * Intel PMCs cannot be accessed sanely above 32-bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL<<31) - 1, + .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, + .event_constraints = intel_core_event_constraints, + .guest_get_msrs = core_guest_get_msrs, + .format_attrs = intel_arch_formats_attr, + .events_sysfs_show = intel_event_sysfs_show, + + /* + * Virtual (or funny metal) CPU can define x86_pmu.extra_regs + * together with PMU version 1 and thus be using core_pmu with + * shared_regs. We need following callbacks here to allocate + * it properly. + */ + .cpu_prepare = intel_pmu_cpu_prepare, + .cpu_starting = intel_pmu_cpu_starting, + .cpu_dying = intel_pmu_cpu_dying, +}; + static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, From 80bcffb376a6890dd7452b12c1ba032f8f24fef6 Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Mon, 20 Apr 2015 15:34:07 -0700 Subject: [PATCH 006/101] perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao Cc: Arnaldo Carvalho de Melo Cc: Bjorn Helgaas Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1429569247-16697-1-git-send-email-sonnyrao@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015b755c..ca75e70865ef 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,6 +1,9 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" +/* Uncore IMC PCI Id */ +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff #define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 @@ -472,6 +475,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +509,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver), /* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; From 0140e6141e4f1d4b15fb469e6912b0e71b7d1cc2 Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Tue, 21 Apr 2015 12:33:11 -0700 Subject: [PATCH 007/101] perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao Acked-by: Bjorn Helgaas Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1429644791-25724-1-git-send-email-sonnyrao@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +++++- include/linux/pci_ids.h | 4 ---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index ca75e70865ef..4562e9e22c60 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,7 +1,11 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" -/* Uncore IMC PCI Id */ +/* Uncore IMC PCI IDs */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 /* SNB event control */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e63c02a93f6b..a59385852233 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2539,10 +2539,6 @@ #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 From cb24d01d217497fb32467de22d773655f47d3896 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Apr 2015 10:04:23 -0300 Subject: [PATCH 008/101] perf trace: Enable events when doing system wide tracing and starting a workload commit f7aa222ff397 Author: Arnaldo Carvalho de Melo Date: Tue Feb 3 13:25:39 2015 -0300 perf trace: No need to enable evsels for workload started from perf The assumption was that whenever a workload is specified, the attr.enable_on_exec evsel flag would be set, but that is not happening when perf_record_opts.system_wide is set, for instance That resulted in both perf_evlist__enable() and attr.enable_on_exec being not called/set, which made the events to remain disabled while the workload runs, producing no output. Fix it, by calling perf_evlist__enable() in the 'trace' tool when forking and not targetting a workload started from trace v2: Test against !target__none(), as suggested by Namhyung Kim, that is what is used in perf_evsel__config() when deciding if the attr.enable_on_exec flag to be set. More work is needed to cover other cases such as opts->initial_delay. Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-27z7169pvfxgj8upic636syv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e124741be187..8842218e1856 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2241,10 +2241,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_mmap; + if (!target__none(&trace->opts.target)) + perf_evlist__enable(evlist); + if (forks) perf_evlist__start_workload(evlist); - else - perf_evlist__enable(evlist); trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1 || From 02ac5421ddc634767c732f9b6a10a395a9ecfc4f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Apr 2015 11:11:57 -0300 Subject: [PATCH 009/101] perf trace: Disable events and drain events when forked workload ends We were not checking in the inner event processing loop if the forked workload had finished, which, on a busy system, may make it take a long time trying to drain events, entering a seemingly neverending loop, waiting for the system to get idle enough to make it drain the buffers. Fix it by disabling the events when 'done' is true, in the inner loop, to start draining what is in the buffers. Now: [root@ssdandy ~]# time trace --filter-pids 14003 -a sleep 1 | tail 996.748 ( 0.002 ms): sh/30296 rt_sigprocmask(how: SETMASK, nset: 0x7ffc83418160, sigsetsize: 8) = 0 996.751 ( 0.002 ms): sh/30296 rt_sigprocmask(how: BLOCK, nset: 0x7ffc834181f0, oset: 0x7ffc83418270, sigsetsize: 8) = 0 996.755 ( 0.002 ms): sh/30296 rt_sigaction(sig: INT, act: 0x7ffc83417f50, oact: 0x7ffc83417ff0, sigsetsize: 8) = 0 1004.543 ( 0.362 ms): tail/30198 ... [continued]: read()) = 4096 1004.548 ( 7.791 ms): sh/30296 wait4(upid: -1, stat_addr: 0x7ffc834181a0) ... 1004.975 ( 0.427 ms): tail/30198 read(buf: 0x7633f0, count: 8192) = 4096 1005.390 ( 0.410 ms): tail/30198 read(buf: 0x765410, count: 8192) = 4096 1005.743 ( 0.348 ms): tail/30198 read(buf: 0x7633f0, count: 8192) = 4096 1006.197 ( 0.449 ms): tail/30198 read(buf: 0x765410, count: 8192) = 4096 1006.492 ( 0.290 ms): tail/30198 read(buf: 0x7633f0, count: 8192) = 4096 real 0m1.219s user 0m0.704s sys 0m0.331s [root@ssdandy ~]# Reported-by: Michael Petlan Suggested-by: Jiri Olsa Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-p6kpn1b26qcbe47pufpw0tex@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8842218e1856..e122970361f2 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2273,6 +2273,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (interrupted) goto out_disable; + + if (done && !draining) { + perf_evlist__disable(evlist); + draining = true; + } } } From 6145c259cd454bcb7a1288f7bbb7b4fbc18175dd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Apr 2015 14:40:37 +0100 Subject: [PATCH 010/101] perf kmem: Consistently use PRIu64 for printing u64 values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the perf tool for 32-bit ARM results in the following build error due to a combination of an incorrect conversion specifier and compiling with -Werror: builtin-kmem.c: In function ‘print_page_summary’: builtin-kmem.c:644:9: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘u64’ [-Werror=format=] nr_alloc_freed, (total_alloc_freed_bytes) / 1024); ^ builtin-kmem.c:647:9: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘u64’ [-Werror=format=] (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024); ^ cc1: all warnings being treated as errors This patch fixes the problem by consistently using PRIu64 for printing out u64 values. Signed-off-by: Will Deacon Cc: David Ahern Cc: Jiri Olsa Cc: Joonsoo Kim Cc: Minchan Kim Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1429796437-1790-1-git-send-email-will.deacon@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 63ea01349b6e..a1915b430044 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -640,9 +640,9 @@ static void print_page_summary(void) nr_page_frees, total_page_free_bytes / 1024); printf("\n"); - printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests", + printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests", nr_alloc_freed, (total_alloc_freed_bytes) / 1024); - printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total alloc-only requests", + printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc-only requests", nr_page_allocs - nr_alloc_freed, (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024); printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests", From de28c15daf60e9625bece22f13a091fac8d05f1d Mon Sep 17 00:00:00 2001 From: Bobby Powers Date: Tue, 21 Apr 2015 19:19:41 -0400 Subject: [PATCH 011/101] tools lib api: Undefine _FORTIFY_SOURCE before setting it Some toolchains (like Hardened Gentoo) define _FORTIFY_SOURCE in the built-in, default args. This causes perf builds to fail with: :0:0: error: "_FORTIFY_SOURCE" redefined [-Werror] : note: this is the location of the previous definition cc1: all warnings being treated as errors To avoid this, undefine _FORTIFY_SOURCE before (possibly re-)defining it in tools/lib/api. v2 applies cleanly on top of already pulled kbuild changes for 4.1-rc1. Signed-off-by: Bobby Powers Acked-by: Jiri Olsa Cc: Dirk Gouders Cc: Michal Marek Cc: Paul Mackerras Cc: Peter Zijlstra Cc: linux-kbuild@vger.kernel.org Link: http://lkml.kernel.org/r/1429658381-3039-1-git-send-email-bobbypowers@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index d8fe29fc19a4..8bd960658463 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -16,7 +16,7 @@ MAKEFLAGS += --no-print-directory LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 RM = rm -f From 4ad1f4300e3bddf63109aa63cfb2d37e8585ecc7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 14 Apr 2015 13:49:33 -0400 Subject: [PATCH 012/101] perf kmem: Fix compiles on RHEL6/OL6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0d68bc92c48 breaks compiles on RHEL6/OL6: cc1: warnings being treated as errors builtin-kmem.c: In function ‘search_page_alloc_stat’: builtin-kmem.c:322: error: declaration of ‘stat’ shadows a global declaration node = &parent->rb_left; /usr/include/sys/stat.h:455: error: shadowed declaration is here builtin-kmem.c: In function ‘perf_evsel__process_page_alloc_event’: builtin-kmem.c:378: error: declaration of ‘stat’ shadows a global declaration /usr/include/sys/stat.h:455: error: shadowed declaration is here builtin-kmem.c: In function ‘perf_evsel__process_page_free_event’: builtin-kmem.c:431: error: declaration of ‘stat’ shadows a global declaration /usr/include/sys/stat.h:455: error: shadowed declaration is here Rename local variable to pstat to avoid the name conflict. Signed-off-by: David Ahern Link: http://lkml.kernel.org/r/1429033773-31383-1-git-send-email-david.ahern@oracle.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kmem.c | 54 +++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index a1915b430044..1634186d537c 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -319,7 +319,7 @@ static int page_stat_cmp(struct page_stat *a, struct page_stat *b) return 0; } -static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create) +static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create) { struct rb_node **node = &page_alloc_tree.rb_node; struct rb_node *parent = NULL; @@ -331,7 +331,7 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre parent = *node; data = rb_entry(*node, struct page_stat, node); - cmp = page_stat_cmp(data, stat); + cmp = page_stat_cmp(data, pstat); if (cmp < 0) node = &parent->rb_left; else if (cmp > 0) @@ -345,10 +345,10 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre data = zalloc(sizeof(*data)); if (data != NULL) { - data->page = stat->page; - data->order = stat->order; - data->gfp_flags = stat->gfp_flags; - data->migrate_type = stat->migrate_type; + data->page = pstat->page; + data->order = pstat->order; + data->gfp_flags = pstat->gfp_flags; + data->migrate_type = pstat->migrate_type; rb_link_node(&data->node, parent, node); rb_insert_color(&data->node, &page_alloc_tree); @@ -375,7 +375,7 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel, unsigned int migrate_type = perf_evsel__intval(evsel, sample, "migratetype"); u64 bytes = kmem_page_size << order; - struct page_stat *stat; + struct page_stat *pstat; struct page_stat this = { .order = order, .gfp_flags = gfp_flags, @@ -401,21 +401,21 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel, * This is to find the current page (with correct gfp flags and * migrate type) at free event. */ - stat = search_page(page, true); - if (stat == NULL) + pstat = search_page(page, true); + if (pstat == NULL) return -ENOMEM; - stat->order = order; - stat->gfp_flags = gfp_flags; - stat->migrate_type = migrate_type; + pstat->order = order; + pstat->gfp_flags = gfp_flags; + pstat->migrate_type = migrate_type; this.page = page; - stat = search_page_alloc_stat(&this, true); - if (stat == NULL) + pstat = search_page_alloc_stat(&this, true); + if (pstat == NULL) return -ENOMEM; - stat->nr_alloc++; - stat->alloc_bytes += bytes; + pstat->nr_alloc++; + pstat->alloc_bytes += bytes; order_stats[order][migrate_type]++; @@ -428,7 +428,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel, u64 page; unsigned int order = perf_evsel__intval(evsel, sample, "order"); u64 bytes = kmem_page_size << order; - struct page_stat *stat; + struct page_stat *pstat; struct page_stat this = { .order = order, }; @@ -441,8 +441,8 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel, nr_page_frees++; total_page_free_bytes += bytes; - stat = search_page(page, false); - if (stat == NULL) { + pstat = search_page(page, false); + if (pstat == NULL) { pr_debug2("missing free at page %"PRIx64" (order: %d)\n", page, order); @@ -453,18 +453,18 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel, } this.page = page; - this.gfp_flags = stat->gfp_flags; - this.migrate_type = stat->migrate_type; + this.gfp_flags = pstat->gfp_flags; + this.migrate_type = pstat->migrate_type; - rb_erase(&stat->node, &page_tree); - free(stat); + rb_erase(&pstat->node, &page_tree); + free(pstat); - stat = search_page_alloc_stat(&this, false); - if (stat == NULL) + pstat = search_page_alloc_stat(&this, false); + if (pstat == NULL) return -ENOENT; - stat->nr_free++; - stat->free_bytes += bytes; + pstat->nr_free++; + pstat->free_bytes += bytes; return 0; } From 410ceb8f2f1d4edeb02d229ef192e76602005b8b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 24 Apr 2015 10:45:16 +0900 Subject: [PATCH 013/101] tools lib traceevent: Fix build failure on 32-bit arch In my i386 build, it failed like this: CC event-parse.o event-parse.c: In function 'print_str_arg': event-parse.c:3868:5: warning: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'uint64_t' [-Wformat] Signed-off-by: Namhyung Kim Acked-by: Javi Merino Link: http://lkml.kernel.org/r/20150424020218.GF1905@sejong Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 12a7e2a40c89..aa21bd55bd8a 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3865,7 +3865,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, } else if (el_size == 4) { trace_seq_printf(s, "%u", *(uint32_t *)num); } else if (el_size == 8) { - trace_seq_printf(s, "%lu", *(uint64_t *)num); + trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num); } else { trace_seq_printf(s, "BAD SIZE:%d 0x%x", el_size, *(uint8_t *)num); From 325d73bf8fea8af2227240b7305253fb052d3a68 Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Fri, 3 Apr 2015 14:42:58 +0800 Subject: [PATCH 014/101] xen/blkback: safely unmap purge persistent grants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c43cf3ea8385 ("xen-blkback: safely unmap grants in case they are still in use") use gnttab_unmap_refs_async() to wait until the mapped pages are no longer in use before unmapping them, but that commit missed the persistent case. Purge persistent pages can't be unmapped either unless no longer in use. Signed-off-by: Bob Liu Acked-by: Roger Pau Monné Signed-off-by: David Vrabel --- drivers/block/xen-blkback/blkback.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index bd2b3bbbb22c..48e98f2712b5 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -329,8 +329,18 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work) struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct persistent_gnt *persistent_gnt; - int ret, segs_to_unmap = 0; + int segs_to_unmap = 0; struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work); + struct gntab_unmap_queue_data unmap_data; + struct completion unmap_completion; + + init_completion(&unmap_completion); + + unmap_data.data = &unmap_completion; + unmap_data.done = &free_persistent_gnts_unmap_callback; + unmap_data.pages = pages; + unmap_data.unmap_ops = unmap; + unmap_data.kunmap_ops = NULL; while(!list_empty(&blkif->persistent_purge_list)) { persistent_gnt = list_first_entry(&blkif->persistent_purge_list, @@ -346,17 +356,19 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work) pages[segs_to_unmap] = persistent_gnt->page; if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) { - ret = gnttab_unmap_refs(unmap, NULL, pages, - segs_to_unmap); - BUG_ON(ret); + unmap_data.count = segs_to_unmap; + gnttab_unmap_refs_async(&unmap_data); + wait_for_completion(&unmap_completion); + put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; } kfree(persistent_gnt); } if (segs_to_unmap > 0) { - ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); - BUG_ON(ret); + unmap_data.count = segs_to_unmap; + gnttab_unmap_refs_async(&unmap_data); + wait_for_completion(&unmap_completion); put_free_pages(blkif, pages, segs_to_unmap); } } From b44166cd46e28dd608d5baa5873047a40f32919c Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Fri, 3 Apr 2015 14:42:59 +0800 Subject: [PATCH 015/101] xen/grant: introduce func gnttab_unmap_refs_sync() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are several place using gnttab async unmap and wait for completion, so move the common code to a function gnttab_unmap_refs_sync(). Signed-off-by: Bob Liu Acked-by: Roger Pau Monné Acked-by: Konrad Rzeszutek Wilk Signed-off-by: David Vrabel --- drivers/block/xen-blkback/blkback.c | 31 +++-------------------------- drivers/xen/gntdev.c | 28 +++----------------------- drivers/xen/grant-table.c | 28 ++++++++++++++++++++++++++ include/xen/grant_table.h | 1 + 4 files changed, 35 insertions(+), 53 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 48e98f2712b5..713fc9ff1149 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -265,17 +265,6 @@ static void put_persistent_gnt(struct xen_blkif *blkif, atomic_dec(&blkif->persistent_gnt_in_use); } -static void free_persistent_gnts_unmap_callback(int result, - struct gntab_unmap_queue_data *data) -{ - struct completion *c = data->data; - - /* BUG_ON used to reproduce existing behaviour, - but is this the best way to deal with this? */ - BUG_ON(result); - complete(c); -} - static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, unsigned int num) { @@ -285,12 +274,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, struct rb_node *n; int segs_to_unmap = 0; struct gntab_unmap_queue_data unmap_data; - struct completion unmap_completion; - init_completion(&unmap_completion); - - unmap_data.data = &unmap_completion; - unmap_data.done = &free_persistent_gnts_unmap_callback; unmap_data.pages = pages; unmap_data.unmap_ops = unmap; unmap_data.kunmap_ops = NULL; @@ -310,8 +294,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, !rb_next(&persistent_gnt->node)) { unmap_data.count = segs_to_unmap; - gnttab_unmap_refs_async(&unmap_data); - wait_for_completion(&unmap_completion); + BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; @@ -332,12 +315,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work) int segs_to_unmap = 0; struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work); struct gntab_unmap_queue_data unmap_data; - struct completion unmap_completion; - init_completion(&unmap_completion); - - unmap_data.data = &unmap_completion; - unmap_data.done = &free_persistent_gnts_unmap_callback; unmap_data.pages = pages; unmap_data.unmap_ops = unmap; unmap_data.kunmap_ops = NULL; @@ -357,9 +335,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work) if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) { unmap_data.count = segs_to_unmap; - gnttab_unmap_refs_async(&unmap_data); - wait_for_completion(&unmap_completion); - + BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; } @@ -367,8 +343,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work) } if (segs_to_unmap > 0) { unmap_data.count = segs_to_unmap; - gnttab_unmap_refs_async(&unmap_data); - wait_for_completion(&unmap_completion); + BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); put_free_pages(blkif, pages, segs_to_unmap); } } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index d5bb1a33d0a3..89274850741b 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -327,30 +327,10 @@ static int map_grant_pages(struct grant_map *map) return err; } -struct unmap_grant_pages_callback_data -{ - struct completion completion; - int result; -}; - -static void unmap_grant_callback(int result, - struct gntab_unmap_queue_data *data) -{ - struct unmap_grant_pages_callback_data* d = data->data; - - d->result = result; - complete(&d->completion); -} - static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) { int i, err = 0; struct gntab_unmap_queue_data unmap_data; - struct unmap_grant_pages_callback_data data; - - init_completion(&data.completion); - unmap_data.data = &data; - unmap_data.done= &unmap_grant_callback; if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { int pgno = (map->notify.addr >> PAGE_SHIFT); @@ -367,11 +347,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) unmap_data.pages = map->pages + offset; unmap_data.count = pages; - gnttab_unmap_refs_async(&unmap_data); - - wait_for_completion(&data.completion); - if (data.result) - return data.result; + err = gnttab_unmap_refs_sync(&unmap_data); + if (err) + return err; for (i = 0; i < pages; i++) { if (map->unmap_ops[offset+i].status) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 17972fbacddc..b1c7170e5c9e 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -123,6 +123,11 @@ struct gnttab_ops { int (*query_foreign_access)(grant_ref_t ref); }; +struct unmap_refs_callback_data { + struct completion completion; + int result; +}; + static struct gnttab_ops *gnttab_interface; static int grant_table_version; @@ -863,6 +868,29 @@ void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item) } EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async); +static void unmap_refs_callback(int result, + struct gntab_unmap_queue_data *data) +{ + struct unmap_refs_callback_data *d = data->data; + + d->result = result; + complete(&d->completion); +} + +int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item) +{ + struct unmap_refs_callback_data data; + + init_completion(&data.completion); + item->data = &data; + item->done = &unmap_refs_callback; + gnttab_unmap_refs_async(item); + wait_for_completion(&data.completion); + + return data.result; +} +EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync); + static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes) { int rc; diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 143ca5ffab7a..4478f4b4aae2 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -191,6 +191,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count); void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item); +int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item); /* Perform a batch of grant map/copy operations. Retry every batch slot From a526973e0291b245aefe12023a7f775f3c0e59a2 Mon Sep 17 00:00:00 2001 From: Andrew Andrianov Date: Sat, 11 Apr 2015 23:29:19 +0300 Subject: [PATCH 016/101] pinctrl: mvebu: Fix mapping of pin 63 (gpo -> gpio) Signed-off-by: Andrew Andrianov Signed-off-by: Linus Walleij --- drivers/pinctrl/mvebu/pinctrl-armada-370.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-370.c b/drivers/pinctrl/mvebu/pinctrl-armada-370.c index 42f930f70de3..03aa58c4cb85 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-370.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-370.c @@ -364,7 +364,7 @@ static struct mvebu_mpp_mode mv88f6710_mpp_modes[] = { MPP_FUNCTION(0x5, "audio", "mclk"), MPP_FUNCTION(0x6, "uart0", "cts")), MPP_MODE(63, - MPP_FUNCTION(0x0, "gpo", NULL), + MPP_FUNCTION(0x0, "gpio", NULL), MPP_FUNCTION(0x1, "spi0", "sck"), MPP_FUNCTION(0x2, "tclk", NULL)), MPP_MODE(64, From dc391502fdbf97a9cabdc58ba8c915175383f681 Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Fri, 17 Apr 2015 17:50:49 +0300 Subject: [PATCH 017/101] pinctrl: qcom-spmi: Fix pin direction configuration Pin direction configuration was incorrectly overwritten by output and function values in set_mux(). Fix this. Signed-off-by: Ivan T. Ivanov Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 1 + drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index b2d22218a258..de684ca93b5a 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -260,6 +260,7 @@ static int pmic_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned function, val = 1; } + val = val << PMIC_GPIO_REG_MODE_DIR_SHIFT; val |= pad->function << PMIC_GPIO_REG_MODE_FUNCTION_SHIFT; val |= pad->out_value & PMIC_GPIO_REG_MODE_VALUE_SHIFT; diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 8f36c5f91949..890df16353b3 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -370,6 +370,7 @@ static int pmic_mpp_set_mux(struct pinctrl_dev *pctldev, unsigned function, } } + val = val << PMIC_MPP_REG_MODE_DIR_SHIFT; val |= pad->function << PMIC_MPP_REG_MODE_FUNCTION_SHIFT; val |= pad->out_value & PMIC_MPP_REG_MODE_VALUE_MASK; From c671835021798c1c40ca0b55b49feff76ed5e0e1 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sat, 25 Apr 2015 07:25:03 +0000 Subject: [PATCH 018/101] perf top: Fix a segfault when kernel map is restricted. Perf top raise a warning if a kernel sample is collected but kernel map is restricted. The warning message needs to dereference al.map->dso... However, previous perf_event__preprocess_sample() doesn't always guarantee al.map != NULL, for example, when kernel map is restricted. This patch validates al.map before dereferencing, avoid the segfault. Before this patch: $ cat /proc/sys/kernel/kptr_restrict 1 $ perf top -p 120183 perf: Segmentation fault -------- backtrace -------- /path/to/perf[0x509868] /lib64/libc.so.6(+0x3545f)[0x7f9a1540045f] /path/to/perf[0x448820] /path/to/perf(cmd_top+0xe3c)[0x44a5dc] /path/to/perf[0x4766a2] /path/to/perf(main+0x5f5)[0x42e545] /lib64/libc.so.6(__libc_start_main+0xf4)[0x7f9a153ecbd4] /path/to/perf[0x42e674] And gdb call trace: Program received signal SIGSEGV, Segmentation fault. perf_event__process_sample (machine=0xa44030, sample=0x7fffffffa4c0, evsel=0xa43b00, event=0x7ffff41c3000, tool=0x7fffffffa8a0) at builtin-top.c:736 736 !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? (gdb) bt #0 perf_event__process_sample (machine=0xa44030, sample=0x7fffffffa4c0, evsel=0xa43b00, event=0x7ffff41c3000, tool=0x7fffffffa8a0) at builtin-top.c:736 #1 perf_top__mmap_read_idx (top=top@entry=0x7fffffffa8a0, idx=idx@entry=0) at builtin-top.c:855 #2 0x000000000044a5dd in perf_top__mmap_read (top=0x7fffffffa8a0) at builtin-top.c:872 #3 __cmd_top (top=0x7fffffffa8a0) at builtin-top.c:997 #4 cmd_top (argc=, argv=, prefix=) at builtin-top.c:1267 #5 0x00000000004766a3 in run_builtin (p=p@entry=0x8a6ce8 , argc=argc@entry=3, argv=argv@entry=0x7fffffffdf70) at perf.c:371 #6 0x000000000042e546 in handle_internal_command (argv=0x7fffffffdf70, argc=3) at perf.c:430 #7 run_argv (argv=0x7fffffffdcf0, argcp=0x7fffffffdcfc) at perf.c:474 #8 main (argc=3, argv=0x7fffffffdf70) at perf.c:589 (gdb) Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Zefan Li Link: http://lkml.kernel.org/r/1429946703-80807-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1cb3436276d1..6a4d5d41c671 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -733,7 +733,7 @@ static void perf_event__process_sample(struct perf_tool *tool, "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict.\n\n" "Kernel%s samples will not be resolved.\n", - !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? + al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? " modules" : ""); if (use_browser <= 0) sleep(5); From d13855ef18e1852b2c4dc86ddf5759c5b34628cb Mon Sep 17 00:00:00 2001 From: He Kuang Date: Sat, 25 Apr 2015 16:08:58 +0800 Subject: [PATCH 019/101] perf probe: Fix bug with global variables handling There are missing curly braces which causes find_variable() return wrong value when probing with global variables. This problem can be reproduced as following: $ perf probe -v --add='generic_perform_write global_variable_for_test' ... Try to find probe point from debuginfo. Probe point found: generic_perform_write+0 Searching 'global_variable_for_test' variable in context. An error occurred in debuginfo analysis (-2). Error: Failed to add events. Reason: No such file or directory (Code: -2) After this patch: $ perf probe -v --add='generic_perform_write global_variable_for_test' ... Converting variable global_variable_for_test into trace event. global_variable_for_test type is int. Found 1 probe_trace_events. Opening /sys/kernel/debug/tracing/kprobe_events write=1 Added new event: Writing event: p:probe/generic_perform_write _stext+1237464 global_variable_for_test=@global_variable_for_test+0:s32 probe:generic_perform_write (on generic_perform_write with global_variable_for_test) You can now use it in all perf tools, such as: perf record -e probe:generic_perform_write -aR sleep 1 Signed-off-by: He Kuang Acked-by: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1429949338-18678-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 44554c3c2220..1c3cc07937d5 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -578,10 +578,12 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) /* Search child die for local variables and parameters. */ if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) { /* Search again in global variables */ - if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die)) + if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, + 0, &vr_die)) { pr_warning("Failed to find '%s' in this function.\n", pf->pvar->var); ret = -ENOENT; + } } if (ret >= 0) ret = convert_variable(&vr_die, pf); From 052b0f6eaf8b1f02669884a177bc3ce463133a42 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 24 Apr 2015 10:00:48 -0700 Subject: [PATCH 020/101] perf bench futex: Fix hung wakeup tasks after requeueing The futex-requeue benchmark can hang because of missing wakeups once the benchmark is done, ie: [Run 1]: Requeued 1024 of 1024 threads in 0.3290 ms perf: couldn't wakeup all tasks (135/1024) This bug, while perhaps suggesting missing wakeups in kernel futex code, is merely a consequence of the crappy FUTEX_CMP_REQUEUE man page, incorrectly mentioning that the number of requeued tasks is in fact returned, not the wakeups. This patch acknowledges this and updates the corresponding futex_wake code around it. Signed-off-by: Davidlohr Bueso Cc: Mel Gorman Link: http://lkml.kernel.org/r/1429894848.10273.44.camel@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-requeue.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index bedff6b5b3cf..ad0d9b5342fb 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -132,6 +132,9 @@ int bench_futex_requeue(int argc, const char **argv, if (!fshared) futex_flag = FUTEX_PRIVATE_FLAG; + if (nrequeue > nthreads) + nrequeue = nthreads; + printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), " "%d at a time.\n\n", getpid(), nthreads, fshared ? "shared":"private", &futex1, &futex2, nrequeue); @@ -161,20 +164,18 @@ int bench_futex_requeue(int argc, const char **argv, /* Ok, all threads are patiently blocked, start requeueing */ gettimeofday(&start, NULL); - for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) { + while (nrequeued < nthreads) { /* * Do not wakeup any tasks blocked on futex1, allowing * us to really measure futex_wait functionality. */ - futex_cmp_requeue(&futex1, 0, &futex2, 0, - nrequeue, futex_flag); + nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0, + nrequeue, futex_flag); } + gettimeofday(&end, NULL); timersub(&end, &start, &runtime); - if (nrequeued > nthreads) - nrequeued = nthreads; - update_stats(&requeued_stats, nrequeued); update_stats(&requeuetime_stats, runtime.tv_usec); @@ -184,7 +185,7 @@ int bench_futex_requeue(int argc, const char **argv, } /* everybody should be blocked on futex2, wake'em up */ - nrequeued = futex_wake(&futex2, nthreads, futex_flag); + nrequeued = futex_wake(&futex2, nrequeued, futex_flag); if (nthreads != nrequeued) warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads); From 24f1ced167e5e011040b4c3aae75aee45a79eed5 Mon Sep 17 00:00:00 2001 From: Petr Holasek Date: Thu, 16 Apr 2015 17:38:17 +0200 Subject: [PATCH 021/101] perf bench numa: Fixes of --quiet argument Corrected description and fixed function of --quiet argument. Signed-off-by: Petr Holasek Reviewed-by: Ingo Molnar Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1429198699-25039-2-git-send-email-pholasek@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index ebfa163b80b5..cd872e9c3a9c 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -180,7 +180,7 @@ static const struct option options[] = { OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"), OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), - OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"), + OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"), OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), /* Special option string parsing callbacks: */ @@ -1395,7 +1395,7 @@ static void print_res(const char *name, double val, if (!name) name = "main,"; - if (g->p.show_quiet) + if (!g->p.show_quiet) printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short); else printf(" %14.3f %s\n", val, txt_long); From 1d90a685eb75a56648d7dd22c704a1a6da516de9 Mon Sep 17 00:00:00 2001 From: Petr Holasek Date: Thu, 16 Apr 2015 17:38:19 +0200 Subject: [PATCH 022/101] perf bench numa: Fix immediate meeting of convergence condition This patch fixes the race in the beginning of benchmark run when some threads hasn't got assigned curr_cpu yet so they don't occur in nodes-of-process stats and benchmark concludes that all remaining threads are converged already. The race can be reproduced with small amount of threads and some bigger amount of shared process memory, e.g. one process, two threads and 5GB of process memory. Signed-off-by: Petr Holasek Reviewed-by: Ingo Molnar Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1429198699-25039-4-git-send-email-pholasek@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index cd872e9c3a9c..ba5efa4710b5 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -828,6 +828,9 @@ static int count_process_nodes(int process_nr) td = g->threads + task_nr; node = numa_node_of_cpu(td->curr_cpu); + if (node < 0) /* curr_cpu was likely still -1 */ + return 0; + node_present[node] = 1; } @@ -882,6 +885,11 @@ static void calc_convergence_compression(int *strong) for (p = 0; p < g->p.nr_proc; p++) { unsigned int nodes = count_process_nodes(p); + if (!nodes) { + *strong = 0; + return; + } + nodes_min = min(nodes, nodes_min); nodes_max = max(nodes, nodes_max); } From 2a700d8edffdbfb8200332d96c3147e042b337f1 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 13 Apr 2015 11:18:51 -0300 Subject: [PATCH 023/101] [media] marvell-ccic: fix Y'CbCr ordering Various formats had their byte ordering implemented incorrectly, and the V4L2_PIX_FMT_UYVY is actually impossible to create, instead you get V4L2_PIX_FMT_YVYU. This was working before commit ad6ac452227b7cb93ac79beec092850d178740b1 ("add new formats support for marvell-ccic driver"). That commit broke the original format support and the OLPC XO-1 laptop showed wrong colors ever since (if you are crazy enough to attempt to run the latest kernel on it, like I did). The email addresses of the authors of that patch are no longer valid, so without a way to reach them and ask them about their test setup I am going with what I can test on the OLPC laptop. If this breaks something for someone on their non-OLPC setup, then contact the linux-media mailinglist. My suspicion however is that that commit went in untested. Signed-off-by: Hans Verkuil Acked-by: Jonathan Corbet Cc: # for v3.19 and up Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/marvell-ccic/mcam-core.c | 14 +++++++------- drivers/media/platform/marvell-ccic/mcam-core.h | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c index 9c64b5d01c6a..110fd70c7326 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.c +++ b/drivers/media/platform/marvell-ccic/mcam-core.c @@ -116,8 +116,8 @@ static struct mcam_format_struct { .planar = false, }, { - .desc = "UYVY 4:2:2", - .pixelformat = V4L2_PIX_FMT_UYVY, + .desc = "YVYU 4:2:2", + .pixelformat = V4L2_PIX_FMT_YVYU, .mbus_code = MEDIA_BUS_FMT_YUYV8_2X8, .bpp = 2, .planar = false, @@ -748,7 +748,7 @@ static void mcam_ctlr_image(struct mcam_camera *cam) switch (fmt->pixelformat) { case V4L2_PIX_FMT_YUYV: - case V4L2_PIX_FMT_UYVY: + case V4L2_PIX_FMT_YVYU: widthy = fmt->width * 2; widthuv = 0; break; @@ -784,15 +784,15 @@ static void mcam_ctlr_image(struct mcam_camera *cam) case V4L2_PIX_FMT_YUV420: case V4L2_PIX_FMT_YVU420: mcam_reg_write_mask(cam, REG_CTRL0, - C0_DF_YUV | C0_YUV_420PL | C0_YUVE_YVYU, C0_DF_MASK); + C0_DF_YUV | C0_YUV_420PL | C0_YUVE_VYUY, C0_DF_MASK); break; case V4L2_PIX_FMT_YUYV: mcam_reg_write_mask(cam, REG_CTRL0, - C0_DF_YUV | C0_YUV_PACKED | C0_YUVE_UYVY, C0_DF_MASK); + C0_DF_YUV | C0_YUV_PACKED | C0_YUVE_NOSWAP, C0_DF_MASK); break; - case V4L2_PIX_FMT_UYVY: + case V4L2_PIX_FMT_YVYU: mcam_reg_write_mask(cam, REG_CTRL0, - C0_DF_YUV | C0_YUV_PACKED | C0_YUVE_YUYV, C0_DF_MASK); + C0_DF_YUV | C0_YUV_PACKED | C0_YUVE_SWAP24, C0_DF_MASK); break; case V4L2_PIX_FMT_JPEG: mcam_reg_write_mask(cam, REG_CTRL0, diff --git a/drivers/media/platform/marvell-ccic/mcam-core.h b/drivers/media/platform/marvell-ccic/mcam-core.h index aa0c6eac254a..7ffdf4dbaf8c 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.h +++ b/drivers/media/platform/marvell-ccic/mcam-core.h @@ -330,10 +330,10 @@ int mccic_resume(struct mcam_camera *cam); #define C0_YUVE_YVYU 0x00010000 /* Y1CrY0Cb */ #define C0_YUVE_VYUY 0x00020000 /* CrY1CbY0 */ #define C0_YUVE_UYVY 0x00030000 /* CbY1CrY0 */ -#define C0_YUVE_XYUV 0x00000000 /* 420: .YUV */ -#define C0_YUVE_XYVU 0x00010000 /* 420: .YVU */ -#define C0_YUVE_XUVY 0x00020000 /* 420: .UVY */ -#define C0_YUVE_XVUY 0x00030000 /* 420: .VUY */ +#define C0_YUVE_NOSWAP 0x00000000 /* no bytes swapping */ +#define C0_YUVE_SWAP13 0x00010000 /* swap byte 1 and 3 */ +#define C0_YUVE_SWAP24 0x00020000 /* swap byte 2 and 4 */ +#define C0_YUVE_SWAP1324 0x00030000 /* swap bytes 1&3 and 2&4 */ /* Bayer bits 18,19 if needed */ #define C0_EOF_VSYNC 0x00400000 /* Generate EOF by VSYNC */ #define C0_VEDGE_CTRL 0x00800000 /* Detect falling edge of VSYNC */ From 5a9b06a27db6b006605421658418fb8943a6e217 Mon Sep 17 00:00:00 2001 From: Koji Matsuoka Date: Sun, 29 Mar 2015 10:04:56 -0300 Subject: [PATCH 024/101] [media] media: soc_camera: rcar_vin: Fix wait_for_completion When stopping abnormally, a driver can't return from wait_for_completion. This patch resolved this problem by changing wait_for_completion_timeout from wait_for_completion. Signed-off-by: Koji Matsuoka Signed-off-by: Yoshihiro Kaneko Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/soc_camera/rcar_vin.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/soc_camera/rcar_vin.c b/drivers/media/platform/soc_camera/rcar_vin.c index 9351f64dee7b..6460f8e1b07f 100644 --- a/drivers/media/platform/soc_camera/rcar_vin.c +++ b/drivers/media/platform/soc_camera/rcar_vin.c @@ -135,6 +135,8 @@ #define VIN_MAX_WIDTH 2048 #define VIN_MAX_HEIGHT 2048 +#define TIMEOUT_MS 100 + enum chip_id { RCAR_GEN2, RCAR_H1, @@ -820,7 +822,10 @@ static void rcar_vin_wait_stop_streaming(struct rcar_vin_priv *priv) if (priv->state == STOPPING) { priv->request_to_stop = true; spin_unlock_irq(&priv->lock); - wait_for_completion(&priv->capture_stop); + if (!wait_for_completion_timeout( + &priv->capture_stop, + msecs_to_jiffies(TIMEOUT_MS))) + priv->state = STOPPED; spin_lock_irq(&priv->lock); } } From fefad2d54beb8aad6bf4ac6daeb74f86f52565de Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 12 Apr 2015 09:09:05 -0300 Subject: [PATCH 025/101] [media] v4l: omap4iss: Replace outdated OMAP4 control pad API with syscon The omap4_ctrl_pad_readl and omap4_ctrl_pad_writel functions have been removed by commit efde234674d9 but are still used by the OMAP4 ISS driver, resulting in a compilation breakage: drivers/staging/media/omap4iss/iss_csiphy.c: In function 'omap4iss_csiphy_config': drivers/staging/media/omap4iss/iss_csiphy.c:167:2: error: implicit declaration of function 'omap4_ctrl_pad_writel' [-Werror=implicit-function-declaration] omap4_ctrl_pad_writel(cam_rx_ctrl, Fix the problem by using the syscon API to reaplace the control pad API. Lookup the syscon instance by compatible name for now as the OMAP4 ISS driver doesn't support DT yet. Fixes: efde234674d9 ("ARM: OMAP4+: control: remove support for legacy pad read/write") Signed-off-by: Laurent Pinchart Acked-by: Sakari Alius Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/omap4iss/Kconfig | 1 + drivers/staging/media/omap4iss/iss.c | 11 +++++++++++ drivers/staging/media/omap4iss/iss.h | 4 ++++ drivers/staging/media/omap4iss/iss_csiphy.c | 12 +++++++----- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/staging/media/omap4iss/Kconfig b/drivers/staging/media/omap4iss/Kconfig index b78643f907e7..072dac04a750 100644 --- a/drivers/staging/media/omap4iss/Kconfig +++ b/drivers/staging/media/omap4iss/Kconfig @@ -2,6 +2,7 @@ config VIDEO_OMAP4 bool "OMAP 4 Camera support" depends on VIDEO_V4L2=y && VIDEO_V4L2_SUBDEV_API && I2C=y && ARCH_OMAP4 depends on HAS_DMA + select MFD_SYSCON select VIDEOBUF2_DMA_CONTIG ---help--- Driver for an OMAP 4 ISS controller. diff --git a/drivers/staging/media/omap4iss/iss.c b/drivers/staging/media/omap4iss/iss.c index e0ad5e520e2d..7ced940bd807 100644 --- a/drivers/staging/media/omap4iss/iss.c +++ b/drivers/staging/media/omap4iss/iss.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1386,6 +1387,16 @@ static int iss_probe(struct platform_device *pdev) platform_set_drvdata(pdev, iss); + /* + * TODO: When implementing DT support switch to syscon regmap lookup by + * phandle. + */ + iss->syscon = syscon_regmap_lookup_by_compatible("syscon"); + if (IS_ERR(iss->syscon)) { + ret = PTR_ERR(iss->syscon); + goto error; + } + /* Clocks */ ret = iss_map_mem_resource(pdev, iss, OMAP4_ISS_MEM_TOP); if (ret < 0) diff --git a/drivers/staging/media/omap4iss/iss.h b/drivers/staging/media/omap4iss/iss.h index 734cfeeb0314..35df8b4709e6 100644 --- a/drivers/staging/media/omap4iss/iss.h +++ b/drivers/staging/media/omap4iss/iss.h @@ -29,6 +29,8 @@ #include "iss_ipipe.h" #include "iss_resizer.h" +struct regmap; + #define to_iss_device(ptr_module) \ container_of(ptr_module, struct iss_device, ptr_module) #define to_device(ptr_module) \ @@ -79,6 +81,7 @@ struct iss_reg { /* * struct iss_device - ISS device structure. + * @syscon: Regmap for the syscon register space * @crashed: Bitmask of crashed entities (indexed by entity ID) */ struct iss_device { @@ -93,6 +96,7 @@ struct iss_device { struct resource *res[OMAP4_ISS_MEM_LAST]; void __iomem *regs[OMAP4_ISS_MEM_LAST]; + struct regmap *syscon; u64 raw_dmamask; diff --git a/drivers/staging/media/omap4iss/iss_csiphy.c b/drivers/staging/media/omap4iss/iss_csiphy.c index 7c3d55d811ef..748607f8918f 100644 --- a/drivers/staging/media/omap4iss/iss_csiphy.c +++ b/drivers/staging/media/omap4iss/iss_csiphy.c @@ -13,6 +13,7 @@ #include #include +#include #include "../../../../arch/arm/mach-omap2/control.h" @@ -140,9 +141,11 @@ int omap4iss_csiphy_config(struct iss_device *iss, * - bit [18] : CSIPHY1 CTRLCLK enable * - bit [17:16] : CSIPHY1 config: 00 d-phy, 01/10 ccp2 */ - cam_rx_ctrl = omap4_ctrl_pad_readl( - OMAP4_CTRL_MODULE_PAD_CORE_CONTROL_CAMERA_RX); - + /* + * TODO: When implementing DT support specify the CONTROL_CAMERA_RX + * register offset in the syscon property instead of hardcoding it. + */ + regmap_read(iss->syscon, 0x68, &cam_rx_ctrl); if (subdevs->interface == ISS_INTERFACE_CSI2A_PHY1) { cam_rx_ctrl &= ~(OMAP4_CAMERARX_CSI21_LANEENABLE_MASK | @@ -166,8 +169,7 @@ int omap4iss_csiphy_config(struct iss_device *iss, cam_rx_ctrl |= OMAP4_CAMERARX_CSI22_CTRLCLKEN_MASK; } - omap4_ctrl_pad_writel(cam_rx_ctrl, - OMAP4_CTRL_MODULE_PAD_CORE_CONTROL_CAMERA_RX); + regmap_write(iss->syscon, 0x68, cam_rx_ctrl); /* Reset used lane count */ csi2->phy->used_data_lanes = 0; From 4e637ac212b63f4b5dd1da626aca34ffcbfd5daa Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Thu, 9 Apr 2015 18:18:37 +0300 Subject: [PATCH 026/101] pinctrl: qcom-spmi-mpp: Fix input value report Fix interpretation of the pmic_mpp_read() return code, negative value means an error. Signed-off-by: Ivan T. Ivanov Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c index 890df16353b3..211b942ad6d5 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c @@ -577,10 +577,11 @@ static void pmic_mpp_config_dbg_show(struct pinctrl_dev *pctldev, if (pad->input_enabled) { ret = pmic_mpp_read(state, pad, PMIC_MPP_REG_RT_STS); - if (!ret) { - ret &= PMIC_MPP_REG_RT_STS_VAL_MASK; - pad->out_value = ret; - } + if (ret < 0) + return; + + ret &= PMIC_MPP_REG_RT_STS_VAL_MASK; + pad->out_value = ret; } seq_printf(s, " %-4s", pad->output_enabled ? "out" : "in"); From 2b953a5e994ce279904ec70220f7d4f31d380a0a Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 28 Apr 2015 18:46:20 -0400 Subject: [PATCH 027/101] xen: Suspend ticks on all CPUs during suspend Commit 77e32c89a711 ("clockevents: Manage device's state separately for the core") decouples clockevent device's modes from states. With this change when a Xen guest tries to resume, it won't be calling its set_mode op which needs to be done on each VCPU in order to make the hypervisor aware that we are in oneshot mode. This happens because clockevents_tick_resume() (which is an intermediate step of resuming ticks on a processor) doesn't call clockevents_set_state() anymore and because during suspend clockevent devices on all VCPUs (except for the one doing the suspend) are left in ONESHOT state. As result, during resume the clockevents state machine will assume that device is already where it should be and doesn't need to be updated. To avoid this problem we should suspend ticks on all VCPUs during suspend. Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel --- arch/x86/xen/suspend.c | 10 ++++++++++ drivers/xen/manage.c | 9 ++++++--- include/xen/xen-ops.h | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index d9497698645a..53b4c0811f4f 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data) tick_resume_local(); } +static void xen_vcpu_notify_suspend(void *data) +{ + tick_suspend_local(); +} + void xen_arch_resume(void) { on_each_cpu(xen_vcpu_notify_restore, NULL, 1); } + +void xen_arch_suspend(void) +{ + on_each_cpu(xen_vcpu_notify_suspend, NULL, 1); +} diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index bf1940706422..9e6a85104a20 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -131,6 +131,8 @@ static void do_suspend(void) goto out_resume; } + xen_arch_suspend(); + si.cancelled = 1; err = stop_machine(xen_suspend, &si, cpumask_of(0)); @@ -148,11 +150,12 @@ static void do_suspend(void) si.cancelled = 1; } + xen_arch_resume(); + out_resume: - if (!si.cancelled) { - xen_arch_resume(); + if (!si.cancelled) xs_resume(); - } else + else xs_suspend_cancel(); dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index c643e6a94c9a..0ce4f32017ea 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -13,6 +13,7 @@ void xen_arch_post_suspend(int suspend_cancelled); void xen_timer_resume(void); void xen_arch_resume(void); +void xen_arch_suspend(void); void xen_resume_notifier_register(struct notifier_block *nb); void xen_resume_notifier_unregister(struct notifier_block *nb); From 8014bcc86ef112eab9ee1db312dba4e6b608cf89 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 13 Apr 2015 00:26:35 +0100 Subject: [PATCH 028/101] xen-pciback: Add name prefix to global 'permissive' variable The variable for the 'permissive' module parameter used to be static but was recently changed to be extern. This puts it in the kernel global namespace if the driver is built-in, so its name should begin with a prefix identifying the driver. Signed-off-by: Ben Hutchings Fixes: af6fc858a35b ("xen-pciback: limit guest control of command register") Signed-off-by: David Vrabel --- drivers/xen/xen-pciback/conf_space.c | 6 +++--- drivers/xen/xen-pciback/conf_space.h | 2 +- drivers/xen/xen-pciback/conf_space_header.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 75fe3d466515..9c234209d8b5 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -16,8 +16,8 @@ #include "conf_space.h" #include "conf_space_quirks.h" -bool permissive; -module_param(permissive, bool, 0644); +bool xen_pcibk_permissive; +module_param_named(permissive, xen_pcibk_permissive, bool, 0644); /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word, * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */ @@ -262,7 +262,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) * This means that some fields may still be read-only because * they have entries in the config_field list that intercept * the write and do nothing. */ - if (dev_data->permissive || permissive) { + if (dev_data->permissive || xen_pcibk_permissive) { switch (size) { case 1: err = pci_write_config_byte(dev, offset, diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h index 2e1d73d1d5d0..62461a8ba1d6 100644 --- a/drivers/xen/xen-pciback/conf_space.h +++ b/drivers/xen/xen-pciback/conf_space.h @@ -64,7 +64,7 @@ struct config_field_entry { void *data; }; -extern bool permissive; +extern bool xen_pcibk_permissive; #define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index c2260a0456c9..ad3d17d29c81 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -118,7 +118,7 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) cmd->val = value; - if (!permissive && (!dev_data || !dev_data->permissive)) + if (!xen_pcibk_permissive && (!dev_data || !dev_data->permissive)) return 0; /* Only allow the guest to control certain bits. */ From 2c62e8492ed7358bbe7da51666c7e0f6da9474ee Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 30 Apr 2015 12:41:28 +0800 Subject: [PATCH 029/101] x86/PCI/ACPI: Make all resources except [io 0xcf8-0xcff] available on PCI bus An IO port or MMIO resource assigned to a PCI host bridge may be consumed by the host bridge itself or available to its child bus/devices. The ACPI specification defines a bit (Producer/Consumer) to tell whether the resource is consumed by the host bridge itself, but firmware hasn't used that bit consistently, so we can't rely on it. Before commit 593669c2ac0f ("x86/PCI/ACPI: Use common ACPI resource interfaces to simplify implementation"), arch/x86/pci/acpi.c ignored all IO port resources defined by acpi_resource_io and acpi_resource_fixed_io to filter out IO ports consumed by the host bridge itself. Commit 593669c2ac0f ("x86/PCI/ACPI: Use common ACPI resource interfaces to simplify implementation") started accepting all IO port and MMIO resources, which caused a regression that IO port resources consumed by the host bridge itself became available to its child devices. Then commit 63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by host bridge itself") ignored resources consumed by the host bridge itself by checking the IORESOURCE_WINDOW flag, which accidently removed MMIO resources defined by acpi_resource_memory24, acpi_resource_memory32 and acpi_resource_fixed_memory32. On x86 and IA64 platforms, all IO port and MMIO resources are assumed to be available to child bus/devices except one special case: IO port [0xCF8-0xCFF] is consumed by the host bridge itself to access PCI configuration space. So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. This solution will also ease the way to consolidate ACPI PCI host bridge common code from x86, ia64 and ARM64. Related ACPI table are archived at: https://bugzilla.kernel.org/show_bug.cgi?id=94221 Related discussions at: http://patchwork.ozlabs.org/patch/461633/ https://lkml.org/lkml/2015/3/29/304 Fixes: 63f1789ec716 (Ignore resources consumed by host bridge itself) Reported-by: Bernhard Thaler Signed-off-by: Jiang Liu Cc: 4.0+ # 4.0+ Reviewed-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- arch/x86/pci/acpi.c | 24 ++++++++++++++++++++++-- drivers/acpi/resource.c | 2 +- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index e4695985f9de..d93963340c3c 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -325,6 +325,26 @@ static void release_pci_root_info(struct pci_host_bridge *bridge) kfree(info); } +/* + * An IO port or MMIO resource assigned to a PCI host bridge may be + * consumed by the host bridge itself or available to its child + * bus/devices. The ACPI specification defines a bit (Producer/Consumer) + * to tell whether the resource is consumed by the host bridge itself, + * but firmware hasn't used that bit consistently, so we can't rely on it. + * + * On x86 and IA64 platforms, all IO port and MMIO resources are assumed + * to be available to child bus/devices except one special case: + * IO port [0xCF8-0xCFF] is consumed by the host bridge itself + * to access PCI configuration space. + * + * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. + */ +static bool resource_is_pcicfg_ioport(struct resource *res) +{ + return (res->flags & IORESOURCE_IO) && + res->start == 0xCF8 && res->end == 0xCFF; +} + static void probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, int busnum, int domain, @@ -346,8 +366,8 @@ static void probe_pci_root_info(struct pci_root_info *info, "no IO and memory resources present in _CRS\n"); else resource_list_for_each_entry_safe(entry, tmp, list) { - if ((entry->res->flags & IORESOURCE_WINDOW) == 0 || - (entry->res->flags & IORESOURCE_DISABLED)) + if ((entry->res->flags & IORESOURCE_DISABLED) || + resource_is_pcicfg_ioport(entry->res)) resource_list_destroy_entry(entry); else entry->res->name = info->name; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 5589a6e2a023..8244f013f210 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -573,7 +573,7 @@ EXPORT_SYMBOL_GPL(acpi_dev_get_resources); * @ares: Input ACPI resource object. * @types: Valid resource types of IORESOURCE_XXX * - * This is a hepler function to support acpi_dev_get_resources(), which filters + * This is a helper function to support acpi_dev_get_resources(), which filters * ACPI resource objects according to resource types. */ int acpi_dev_filter_resource_type(struct acpi_resource *ares, From 3349fb64b2927407017d970dd5c4daf3c5ad69f8 Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Wed, 29 Apr 2015 21:21:40 +0100 Subject: [PATCH 030/101] ACPI / SBS: Add 5 us delay to fix SBS hangs on MacBook Commit 7bc5a2bad0b8 'ACPI: Support _OSI("Darwin") correctly' caused the MacBook firmware to expose the SBS, resulting in intermittent hangs of several minutes on boot, and failure to detect or report the battery. Fix this by adding a 5 us delay to the start of each SMBUS transaction. This timing is the result of experimentation - hangs were observed with 3 us but never with 5 us. Fixes: 7bc5a2bad0b8 'ACPI: Support _OSI("Darwin") correctly' Link: https://bugzilla.kernel.org/show_bug.cgi?id=94651 Signed-off-by: Chris Bainbridge Cc: 3.18+ # 3.18+ [ rjw: Subject and changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sbshc.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 26e5b5060523..bf034f8b7c1a 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "sbshc.h" #define PREFIX "ACPI: " @@ -87,6 +88,8 @@ enum acpi_smb_offset { ACPI_SMB_ALARM_DATA = 0x26, /* 2 bytes alarm data */ }; +static bool macbook; + static inline int smb_hc_read(struct acpi_smb_hc *hc, u8 address, u8 *data) { return ec_read(hc->offset + address, data); @@ -132,6 +135,8 @@ static int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol, } mutex_lock(&hc->lock); + if (macbook) + udelay(5); if (smb_hc_read(hc, ACPI_SMB_PROTOCOL, &temp)) goto end; if (temp) { @@ -257,12 +262,29 @@ extern int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, void *data); +static int macbook_dmi_match(const struct dmi_system_id *d) +{ + pr_debug("Detected MacBook, enabling workaround\n"); + macbook = true; + return 0; +} + +static struct dmi_system_id acpi_smbus_dmi_table[] = { + { macbook_dmi_match, "Apple MacBook", { + DMI_MATCH(DMI_BOARD_VENDOR, "Apple"), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook") }, + }, + { }, +}; + static int acpi_smbus_hc_add(struct acpi_device *device) { int status; unsigned long long val; struct acpi_smb_hc *hc; + dmi_check_system(acpi_smbus_dmi_table); + if (!device) return -EINVAL; From 2375a212ca06d35f90841bc511b3e9ae8a95a82e Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Wed, 29 Apr 2015 10:37:24 +0200 Subject: [PATCH 031/101] ACPI / documentation: fix a sentence about GPIO resources The sentence "These resources are used be used to pass ..." contains a suspicious repetition, likely the author meant "These resources can be used to pass ...". Simplify the wording. Signed-off-by: Antonio Ospite Signed-off-by: Rafael J. Wysocki --- Documentation/acpi/enumeration.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt index 750401f91341..15dfce708ebf 100644 --- a/Documentation/acpi/enumeration.txt +++ b/Documentation/acpi/enumeration.txt @@ -253,7 +253,7 @@ input driver: GPIO support ~~~~~~~~~~~~ ACPI 5 introduced two new resources to describe GPIO connections: GpioIo -and GpioInt. These resources are used be used to pass GPIO numbers used by +and GpioInt. These resources can be used to pass GPIO numbers used by the device to the driver. ACPI 5.1 extended this with _DSD (Device Specific Data) which made it possible to name the GPIOs among other things. From e944ec2ca00fb0170ba9d7f2aeec32c22dc0d4ec Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 29 Apr 2015 21:08:48 +0900 Subject: [PATCH 032/101] perf report: Fix -T/--threads option to work again The commit 512ae1bd6acb ("perf tools: Consolidate management of default sort orders") changed default value of the 'sort_order' variable to NULL indicating that users don't set any sort keys on the command line. However it missed to update a check in perf_evlist__tty_browse_hists() so that 'perf report -T' cannot show the per-thread values after the normal output. This patch fixes it to work again. Note that the -T option only works on --stdio and neither --sort nor --parent option was given. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1430309328-28317-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 476cdf7afcca..b63aeda719be 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -329,7 +329,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, fprintf(stdout, "\n\n"); } - if (sort_order == default_sort_order && + if (sort_order == NULL && parent_pattern == default_parent_pattern) { fprintf(stdout, "#\n# (%s)\n#\n", help); From 5f55d2ae699d1756ad6132786c7f9c27dc456b66 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 28 Apr 2015 10:23:30 -0600 Subject: [PATCH 033/101] vfio-pci: Log device requests more verbosely Log some clues indicating whether the user is receiving device request interfaces or not listening. This can help indicate why a driver unbind is blocked or explain why QEMU automatically unplugged a device from the VM. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 69fab0fd15ae..e9851add6f4e 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -907,8 +907,14 @@ static void vfio_pci_request(void *device_data, unsigned int count) mutex_lock(&vdev->igate); if (vdev->req_trigger) { - dev_dbg(&vdev->pdev->dev, "Requesting device from user\n"); + if (!(count % 10)) + dev_notice_ratelimited(&vdev->pdev->dev, + "Relaying device request to user (#%u)\n", + count); eventfd_signal(vdev->req_trigger, 1); + } else if (count == 0) { + dev_warn(&vdev->pdev->dev, + "No device request channel registered, blocked until released by user\n"); } mutex_unlock(&vdev->igate); From db7d4d7f40215843000cb9d441c9149fd42ea36b Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 1 May 2015 16:31:41 -0600 Subject: [PATCH 034/101] vfio: Fix runaway interruptible timeout Commit 13060b64b819 ("vfio: Add and use device request op for vfio bus drivers") incorrectly makes use of an interruptible timeout. When interrupted, the signal remains pending resulting in subsequent timeouts occurring instantly. This makes the loop spin at a much higher rate than intended. Instead of making this completely non-interruptible, we can change this into a sort of interruptible-once behavior and use the "once" to log debug information. The driver API doesn't allow us to abort and return an error code. Signed-off-by: Alex Williamson Fixes: 13060b64b819 Cc: stable@vger.kernel.org # v4.0 --- drivers/vfio/vfio.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 0d336625ac71..e1278fe04b1e 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -710,6 +710,8 @@ void *vfio_del_group_dev(struct device *dev) void *device_data = device->device_data; struct vfio_unbound_dev *unbound; unsigned int i = 0; + long ret; + bool interrupted = false; /* * The group exists so long as we have a device reference. Get @@ -755,9 +757,22 @@ void *vfio_del_group_dev(struct device *dev) vfio_device_put(device); - } while (wait_event_interruptible_timeout(vfio.release_q, - !vfio_dev_present(group, dev), - HZ * 10) <= 0); + if (interrupted) { + ret = wait_event_timeout(vfio.release_q, + !vfio_dev_present(group, dev), HZ * 10); + } else { + ret = wait_event_interruptible_timeout(vfio.release_q, + !vfio_dev_present(group, dev), HZ * 10); + if (ret == -ERESTARTSYS) { + interrupted = true; + dev_warn(dev, + "Device is currently in use, task" + " \"%s\" (%d) " + "blocked until device is released", + current->comm, task_pid_nr(current)); + } + } + } while (ret <= 0); vfio_group_put(group); From 9b071a43553d6b2df4364951639f61076a8dd676 Mon Sep 17 00:00:00 2001 From: Philippe Coval Date: Sat, 2 May 2015 15:14:08 +0200 Subject: [PATCH 035/101] ideapad_laptop: Add Lenovo G40-30 to devices without radio switch Lenovo G40-30 does not provide any physical radio switch to user. Therefore disable the rfkill switch identically to the Yoga 2 approach. (Note for later, models ids are sorted alphabetically). Benefit is to make wireless available again without unloading module. It was tested successfully on 4.1.0-rc1 base with this model: (LENOVO_MT_80FY_BU_idea_FM_Lenovo G40-30). BugLink: https://bugs.launchpad.net/ideapad-laptop/+bug/1450946 Cc: platform-driver-x86@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Philippe Coval Signed-off-by: Darren Hart --- drivers/platform/x86/ideapad-laptop.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index b3d419a84723..b496db87bc05 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -829,6 +829,13 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) * report all radios as hardware-blocked. */ static const struct dmi_system_id no_hw_rfkill_list[] = { + { + .ident = "Lenovo G40-30", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G40-30"), + }, + }, { .ident = "Lenovo Yoga 2 11 / 13 / Pro", .matches = { From 7829fb09a2b4268b30dd9bc782fa5ebee278b137 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 30 Apr 2015 04:13:52 +0200 Subject: [PATCH 036/101] lib: make memzero_explicit more robust against dead store elimination In commit 0b053c951829 ("lib: memzero_explicit: use barrier instead of OPTIMIZER_HIDE_VAR"), we made memzero_explicit() more robust in case LTO would decide to inline memzero_explicit() and eventually find out it could be elimiated as dead store. While using barrier() works well for the case of gcc, recent efforts from LLVMLinux people suggest to use llvm as an alternative to gcc, and there, Stephan found in a simple stand-alone user space example that llvm could nevertheless optimize and thus elimitate the memset(). A similar issue has been observed in the referenced llvm bug report, which is regarded as not-a-bug. Based on some experiments, icc is a bit special on its own, while it doesn't seem to eliminate the memset(), it could do so with an own implementation, and then result in similar findings as with llvm. The fix in this patch now works for all three compilers (also tested with more aggressive optimization levels). Arguably, in the current kernel tree it's more of a theoretical issue, but imho, it's better to be pedantic about it. It's clearly visible with gcc/llvm though, with the below code: if we would have used barrier() only here, llvm would have omitted clearing, not so with barrier_data() variant: static inline void memzero_explicit(void *s, size_t count) { memset(s, 0, count); barrier_data(s); } int main(void) { char buff[20]; memzero_explicit(buff, sizeof(buff)); return 0; } $ gcc -O2 test.c $ gdb a.out (gdb) disassemble main Dump of assembler code for function main: 0x0000000000400400 <+0>: lea -0x28(%rsp),%rax 0x0000000000400405 <+5>: movq $0x0,-0x28(%rsp) 0x000000000040040e <+14>: movq $0x0,-0x20(%rsp) 0x0000000000400417 <+23>: movl $0x0,-0x18(%rsp) 0x000000000040041f <+31>: xor %eax,%eax 0x0000000000400421 <+33>: retq End of assembler dump. $ clang -O2 test.c $ gdb a.out (gdb) disassemble main Dump of assembler code for function main: 0x00000000004004f0 <+0>: xorps %xmm0,%xmm0 0x00000000004004f3 <+3>: movaps %xmm0,-0x18(%rsp) 0x00000000004004f8 <+8>: movl $0x0,-0x8(%rsp) 0x0000000000400500 <+16>: lea -0x18(%rsp),%rax 0x0000000000400505 <+21>: xor %eax,%eax 0x0000000000400507 <+23>: retq End of assembler dump. As gcc, clang, but also icc defines __GNUC__, it's sufficient to define this in compiler-gcc.h only to be picked up. For a fallback or otherwise unsupported compiler, we define it as a barrier. Similarly, for ecc which does not support gcc inline asm. Reference: https://llvm.org/bugs/show_bug.cgi?id=15495 Reported-by: Stephan Mueller Tested-by: Stephan Mueller Signed-off-by: Daniel Borkmann Cc: Theodore Ts'o Cc: Stephan Mueller Cc: Hannes Frederic Sowa Cc: mancha security Cc: Mark Charlebois Cc: Behan Webster Signed-off-by: Herbert Xu --- include/linux/compiler-gcc.h | 16 +++++++++++++++- include/linux/compiler-intel.h | 3 +++ include/linux/compiler.h | 4 ++++ lib/string.c | 2 +- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cdf13ca7cac3..371e560d13cf 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -9,10 +9,24 @@ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) - /* Optimization barrier */ + /* The "volatile" is due to gcc bugs */ #define barrier() __asm__ __volatile__("": : :"memory") +/* + * This version is i.e. to prevent dead stores elimination on @ptr + * where gcc and llvm may behave differently when otherwise using + * normal barrier(): while gcc behavior gets along with a normal + * barrier(), llvm needs an explicit input variable to be assumed + * clobbered. The issue is as follows: while the inline asm might + * access any memory it wants, the compiler could have fit all of + * @ptr into memory registers instead, and since @ptr never escaped + * from that, it proofed that the inline asm wasn't touching any of + * it. This version works well with both compilers, i.e. we're telling + * the compiler that the inline asm absolutely may see the contents + * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 + */ +#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") /* * This macro obfuscates arithmetic on a variable address so that gcc diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index ba147a1727e6..0c9a2f2c2802 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -13,9 +13,12 @@ /* Intel ECC compiler doesn't support gcc specific asm stmts. * It uses intrinsics to do the equivalent things. */ +#undef barrier_data #undef RELOC_HIDE #undef OPTIMIZER_HIDE_VAR +#define barrier_data(ptr) barrier() + #define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ __ptr = (unsigned long) (ptr); \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0e41ca0e5927..867722591be2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -169,6 +169,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define barrier() __memory_barrier() #endif +#ifndef barrier_data +# define barrier_data(ptr) barrier() +#endif + /* Unreachable code */ #ifndef unreachable # define unreachable() do { } while (1) diff --git a/lib/string.c b/lib/string.c index a5792019193c..bb3d4b6993c4 100644 --- a/lib/string.c +++ b/lib/string.c @@ -607,7 +607,7 @@ EXPORT_SYMBOL(memset); void memzero_explicit(void *s, size_t count) { memset(s, 0, count); - barrier(); + barrier_data(s); } EXPORT_SYMBOL(memzero_explicit); From f440c4ee3e53f767974fe60bcbc0b6687a5fb53f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Sat, 2 May 2015 12:08:42 +0200 Subject: [PATCH 037/101] hwrng: bcm63xx - Fix driver compilation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - s/clk_didsable_unprepare/clk_disable_unprepare - s/prov/priv - s/error/ret (bcm63xx_rng_probe) Fixes: 6229c16060fe ("hwrng: bcm63xx - make use of devm_hwrng_register") Signed-off-by: Álvaro Fernández Rojas Acked-by: Florian Fainelli Signed-off-by: Herbert Xu --- drivers/char/hw_random/bcm63xx-rng.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c index d1494ecd9e11..4b31f1387f37 100644 --- a/drivers/char/hw_random/bcm63xx-rng.c +++ b/drivers/char/hw_random/bcm63xx-rng.c @@ -57,7 +57,7 @@ static void bcm63xx_rng_cleanup(struct hwrng *rng) val &= ~RNG_EN; __raw_writel(val, priv->regs + RNG_CTRL); - clk_didsable_unprepare(prov->clk); + clk_disable_unprepare(priv->clk); } static int bcm63xx_rng_data_present(struct hwrng *rng, int wait) @@ -97,14 +97,14 @@ static int bcm63xx_rng_probe(struct platform_device *pdev) priv->rng.name = pdev->name; priv->rng.init = bcm63xx_rng_init; priv->rng.cleanup = bcm63xx_rng_cleanup; - prov->rng.data_present = bcm63xx_rng_data_present; + priv->rng.data_present = bcm63xx_rng_data_present; priv->rng.data_read = bcm63xx_rng_data_read; priv->clk = devm_clk_get(&pdev->dev, "ipsec"); if (IS_ERR(priv->clk)) { - error = PTR_ERR(priv->clk); - dev_err(&pdev->dev, "no clock for device: %d\n", error); - return error; + ret = PTR_ERR(priv->clk); + dev_err(&pdev->dev, "no clock for device: %d\n", ret); + return ret; } if (!devm_request_mem_region(&pdev->dev, r->start, @@ -120,11 +120,11 @@ static int bcm63xx_rng_probe(struct platform_device *pdev) return -ENOMEM; } - error = devm_hwrng_register(&pdev->dev, &priv->rng); - if (error) { + ret = devm_hwrng_register(&pdev->dev, &priv->rng); + if (ret) { dev_err(&pdev->dev, "failed to register rng device: %d\n", - error); - return error; + ret); + return ret; } dev_info(&pdev->dev, "registered RNG driver\n"); From a00212e21928640486d3cc939cf4d908e8522016 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 May 2015 01:58:27 +0200 Subject: [PATCH 038/101] ACPI / documentation: Fix ambiguity in the GPIO properties document The first paragraph in Documentation/acpi/gpio-properties.txt is ambiguous, so make it more clear. Reported-by: Antonio Ospite Acked-by: Antonio Ospite Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- Documentation/acpi/gpio-properties.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/acpi/gpio-properties.txt b/Documentation/acpi/gpio-properties.txt index ae36fcf86dc7..f35dad11f0de 100644 --- a/Documentation/acpi/gpio-properties.txt +++ b/Documentation/acpi/gpio-properties.txt @@ -1,9 +1,9 @@ _DSD Device Properties Related to GPIO -------------------------------------- -With the release of ACPI 5.1 and the _DSD configuration objecte names -can finally be given to GPIOs (and other things as well) returned by -_CRS. Previously, we were only able to use an integer index to find +With the release of ACPI 5.1, the _DSD configuration object finally +allows names to be given to GPIOs (and other things as well) returned +by _CRS. Previously, we were only able to use an integer index to find the corresponding GPIO, which is pretty error prone (it depends on the _CRS output ordering, for example). From 74d77e50f23123938fbb7987eba71310864e6a7c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 20 Apr 2015 10:59:17 -0500 Subject: [PATCH 039/101] pinctrl: mediatek: mtk-common: initialize unmask cppcheck detected an uninitialized variable: [drivers/pinctrl/mediatek/pinctrl-mtk-common.c:897]: (error) Uninitialized variable: unmask unmask should be initialized to zero to ensure unmasking only occurs if a previous mask occurred. The current situation is that the unmask variable could contain any random garbage causing random unexpected unmasking. Signed-off-by: Colin Ian King Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 493294c0ebe6..474812e2b0cb 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -881,6 +881,8 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset, if (!mtk_eint_get_mask(pctl, eint_num)) { mtk_eint_mask(d); unmask = 1; + } else { + unmask = 0; } clr_bit = 0xff << eint_offset; From 622532bb2fad8fe342fb685727ae0be566f6be5d Mon Sep 17 00:00:00 2001 From: Witold Szczeponik Date: Fri, 1 May 2015 19:05:20 +0200 Subject: [PATCH 040/101] ACPI / PNP: add two IDs to list for PNPACPI device enumeration Commit eec15edbb0e1 (ACPI / PNP: use device ID list for PNPACPI device enumeration) changed the way how ACPI devices are enumerated and when they are added to the PNP bus. However, it broke the sound card support on (at least) a vintage IBM ThinkPad 600E: with said commit applied, two of the necessary "CSC01xx" devices are not added to the PNP bus and hence can not be found during the initialization of the "snd-cs4236" module. As a consequence, loading "snd-cs4236" causes null pointer exceptions. The attached patch fixes the problem end re-enables sound on the IBM ThinkPad 600E. Fixes: eec15edbb0e1 (ACPI / PNP: use device ID list for PNPACPI device enumeration) Signed-off-by: Witold Szczeponik Cc: 3.16+ # 3.16+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_pnp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/acpi_pnp.c b/drivers/acpi/acpi_pnp.c index b193f8425999..ff6d8adc9cda 100644 --- a/drivers/acpi/acpi_pnp.c +++ b/drivers/acpi/acpi_pnp.c @@ -304,6 +304,8 @@ static const struct acpi_device_id acpi_pnp_device_ids[] = { {"PNPb006"}, /* cs423x-pnpbios */ {"CSC0100"}, + {"CSC0103"}, + {"CSC0110"}, {"CSC0000"}, {"GIM0100"}, /* Guillemot Turtlebeach something appears to be cs4232 compatible */ /* es18xx-pnpbios */ From 5463e7c18e51152104aba9614e6abfc039a8b710 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 Apr 2015 10:40:54 -0700 Subject: [PATCH 041/101] Revert "f2fs: enhance multi-threads performance" This reports performance regression by Yuanhan Liu. The basic idea was to reduce one-point mutex, but it turns out this causes another contention like context swithes. https://lkml.org/lkml/2015/4/21/11 Until finishing the analysis on this issue, I'd like to revert this for a while. This reverts commit 78373b7319abdf15050af5b1632c4c8b8b398f33. --- fs/f2fs/data.c | 7 +++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 1 + 3 files changed, 9 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b91b0e10678e..1e1aae669fa8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1513,6 +1513,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + bool locked = false; int ret; long diff; @@ -1533,7 +1534,13 @@ static int f2fs_write_data_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, DATA, wbc); + if (!S_ISDIR(inode->i_mode)) { + mutex_lock(&sbi->writepages); + locked = true; + } ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); + if (locked) + mutex_unlock(&sbi->writepages); f2fs_submit_merged_bio(sbi, DATA, WRITE); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d8921cf2ba9a..8de34ab6d5b1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -625,6 +625,7 @@ struct f2fs_sb_info { struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ + struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 160b88346b24..b2dd1b01f076 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1035,6 +1035,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->raw_super = raw_super; sbi->raw_super_buf = raw_super_buf; mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); clear_sbi_flag(sbi, SBI_POR_DOING); From 7263b1bd0490fca68ee7eedb0b6973cb86d4701c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Apr 2015 11:03:48 -0700 Subject: [PATCH 042/101] f2fs: fix wrong error hanlder in f2fs_follow_link The page_follow_link_light returns NULL and its error pointer was remained in nd->path. Reported-by: Dan Carpenter Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7e3794edae42..658e8079aaf9 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -298,16 +298,14 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct page *page; + struct page *page = page_follow_link_light(dentry, nd); - page = page_follow_link_light(dentry, nd); - if (IS_ERR(page)) + if (IS_ERR_OR_NULL(page)) return page; /* this is broken symlink case */ if (*nd_get_link(nd) == 0) { - kunmap(page); - page_cache_release(page); + page_put_link(dentry, nd, page); return ERR_PTR(-ENOENT); } return page; From e8a4a2696fecb398b0288c43c0e0dbb91e265bb2 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Mon, 4 May 2015 21:15:31 -0700 Subject: [PATCH 043/101] x86/spinlocks: Fix regression in spinlock contention detection A spinlock is regarded as contended when there is at least one waiter. Currently, the code that checks whether there are any waiters rely on tail value being greater than head. However, this is not true if tail reaches the max value and wraps back to zero, so arch_spin_is_contended() incorrectly returns 0 (not contended) when tail is smaller than head. The original code (before regression) handled this case by casting the (tail - head) to an unsigned value. This change simply restores that behavior. Fixes: d6abfdb20223 ("x86/spinlocks/paravirt: Fix memory corruption on unlock") Signed-off-by: Tahsin Erdogan Cc: peterz@infradead.org Cc: Waiman.Long@hp.com Cc: borntraeger@de.ibm.com Cc: oleg@redhat.com Cc: raghavendra.kt@linux.vnet.ibm.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1430799331-20445-1-git-send-email-tahsin@google.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/spinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index cf87de3fc390..64b611782ef0 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -169,7 +169,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock) struct __raw_tickets tmp = READ_ONCE(lock->tickets); tmp.head &= ~TICKET_SLOWPATH_FLAG; - return (tmp.tail - tmp.head) > TICKET_LOCK_INC; + return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; } #define arch_spin_is_contended arch_spin_is_contended From 285214409a9e5fceba2215461b4682b6069d8e77 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 20 Apr 2015 14:01:11 -0600 Subject: [PATCH 044/101] RDMA/CMA: Canonize IPv4 on IPV6 sockets properly When accepting a new IPv4 connect to an IPv6 socket, the CMA tries to canonize the address family to IPv4, but does not properly process the listening sockaddr to get the listening port, and does not properly set the address family of the canonized sockaddr. Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager") Cc: Reported-By: Yotam Kenneth Signed-off-by: Jason Gunthorpe Tested-by: Haggai Eran Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d570030d899c..06441a43c3aa 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -859,19 +859,27 @@ static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id memcpy(&ib->sib_addr, &path->dgid, 16); } +static __be16 ss_get_port(const struct sockaddr_storage *ss) +{ + if (ss->ss_family == AF_INET) + return ((struct sockaddr_in *)ss)->sin_port; + else if (ss->ss_family == AF_INET6) + return ((struct sockaddr_in6 *)ss)->sin6_port; + BUG(); +} + static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, struct cma_hdr *hdr) { - struct sockaddr_in *listen4, *ip4; + struct sockaddr_in *ip4; - listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr; ip4 = (struct sockaddr_in *) &id->route.addr.src_addr; - ip4->sin_family = listen4->sin_family; + ip4->sin_family = AF_INET; ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr; - ip4->sin_port = listen4->sin_port; + ip4->sin_port = ss_get_port(&listen_id->route.addr.src_addr); ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr; - ip4->sin_family = listen4->sin_family; + ip4->sin_family = AF_INET; ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr; ip4->sin_port = hdr->port; } @@ -879,16 +887,15 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_i static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, struct cma_hdr *hdr) { - struct sockaddr_in6 *listen6, *ip6; + struct sockaddr_in6 *ip6; - listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr; ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr; - ip6->sin6_family = listen6->sin6_family; + ip6->sin6_family = AF_INET6; ip6->sin6_addr = hdr->dst_addr.ip6; - ip6->sin6_port = listen6->sin6_port; + ip6->sin6_port = ss_get_port(&listen_id->route.addr.src_addr); ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr; - ip6->sin6_family = listen6->sin6_family; + ip6->sin6_family = AF_INET6; ip6->sin6_addr = hdr->src_addr.ip6; ip6->sin6_port = hdr->port; } From 0b7410471d59ce2ea30453e68c03bdb941d5951e Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 22 Apr 2015 01:44:58 +0530 Subject: [PATCH 045/101] iw_cxgb4: Cleanup register defines/MACROS Cleanup macros and register defines for consistency Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++-- drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 57176ddd4c50..0493cca3ec15 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -675,7 +675,7 @@ static int send_connect(struct c4iw_ep *ep) if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); - opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + opt2 |= T5_ISS_F; } t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); @@ -2214,7 +2214,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); - opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + opt2 |= T5_ISS_F; rpl5 = (void *)rpl; memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16)); if (peer2peer) diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 5e53327fc647..343e8daf2270 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -848,6 +848,8 @@ enum { /* TCP congestion control algorithms */ #define CONG_CNTRL_V(x) ((x) << CONG_CNTRL_S) #define CONG_CNTRL_G(x) (((x) >> CONG_CNTRL_S) & CONG_CNTRL_M) -#define CONG_CNTRL_VALID (1 << 18) +#define T5_ISS_S 18 +#define T5_ISS_V(x) ((x) << T5_ISS_S) +#define T5_ISS_F T5_ISS_V(1U) #endif /* _T4FW_RI_API_H_ */ From 6198dd8d7a6a7f40dc4599cb0676101d9cb82776 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 22 Apr 2015 01:44:59 +0530 Subject: [PATCH 046/101] iw_cxgb4: 32b platform fixes - get_dma_mr() was using ~0UL which is should be ~0ULL. This causes the DMA MR to get setup incorrectly in hardware. - wr_log_show() needed a 64b divide function div64_u64() instead of doing division directly. - fixed warnings about recasting a pointer to a u64 Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- drivers/infiniband/hw/cxgb4/cq.c | 7 +++---- drivers/infiniband/hw/cxgb4/device.c | 6 +++--- drivers/infiniband/hw/cxgb4/mem.c | 6 +++--- drivers/infiniband/hw/cxgb4/qp.c | 10 +++++----- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0493cca3ec15..6fb31bacd5b4 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3571,7 +3571,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, * TP will ignore any value > 0 for MSS index. */ req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF)); - req->cookie = (unsigned long)skb; + req->cookie = (uintptr_t)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index ab7692ac2044..25dbd6986301 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -55,7 +55,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_NRES_V(1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (unsigned long) &wr_wait; + res_wr->cookie = (uintptr_t)&wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_RESET; @@ -125,7 +125,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_NRES_V(1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (unsigned long) &wr_wait; + res_wr->cookie = (uintptr_t)&wr_wait; res = res_wr->res; res->u.cq.restype = FW_RI_RES_TYPE_CQ; res->u.cq.op = FW_RI_RES_OP_WRITE; @@ -970,8 +970,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, } PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", __func__, chp->cq.cqid, chp, chp->cq.size, - chp->cq.memsize, - (unsigned long long) chp->cq.dma_addr); + chp->cq.memsize, (unsigned long long) chp->cq.dma_addr); return &chp->ibcq; err5: kfree(mm2); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 8fb295e4a9ab..7ed32537eb59 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -151,7 +151,7 @@ static int wr_log_show(struct seq_file *seq, void *v) int prev_ts_set = 0; int idx, end; -#define ts2ns(ts) div64_ul((ts) * dev->rdev.lldi.cclk_ps, 1000) +#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000) idx = atomic_read(&dev->rdev.wr_log_idx) & (dev->rdev.wr_log_size - 1); @@ -784,10 +784,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, rdev->lldi.vr->cq.size); - PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu " + PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " "qpmask 0x%x cqshift %lu cqmask 0x%x\n", (unsigned)pci_resource_len(rdev->lldi.pdev, 2), - (u64)pci_resource_start(rdev->lldi.pdev, 2), + (void *)pci_resource_start(rdev->lldi.pdev, 2), rdev->lldi.db_reg, rdev->lldi.gts_reg, rdev->qpshift, rdev->qpmask, diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 3ef0cf9f5c44..cff815b91707 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, if (i == (num_wqe-1)) { req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | FW_WR_COMPL_F); - req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait; + req->wr.wr_lo = (__force __be64)&wr_wait; } else req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR)); req->wr.wr_mid = cpu_to_be32( @@ -676,12 +676,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) mhp->attr.zbva = 0; mhp->attr.va_fbo = 0; mhp->attr.page_size = 0; - mhp->attr.len = ~0UL; + mhp->attr.len = ~0ULL; mhp->attr.pbl_size = 0; ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, FW_RI_STAG_NSMR, mhp->attr.perms, - mhp->attr.mw_bind_enable, 0, 0, ~0UL, 0, 0, 0); + mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0); if (ret) goto err1; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 15cae5a31018..389ced335bc5 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -275,7 +275,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_NRES_V(2) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); - res_wr->cookie = (unsigned long) &wr_wait; + res_wr->cookie = (uintptr_t)&wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; @@ -1209,7 +1209,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, wqe->flowid_len16 = cpu_to_be32( FW_WR_FLOWID_V(ep->hwtid) | FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); - wqe->cookie = (unsigned long) &ep->com.wr_wait; + wqe->cookie = (uintptr_t)&ep->com.wr_wait; wqe->u.fini.type = FW_RI_TYPE_FINI; ret = c4iw_ofld_send(&rhp->rdev, skb); @@ -1279,7 +1279,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) FW_WR_FLOWID_V(qhp->ep->hwtid) | FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16))); - wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait; + wqe->cookie = (uintptr_t)&qhp->ep->com.wr_wait; wqe->u.init.type = FW_RI_TYPE_INIT; wqe->u.init.mpareqbit_p2ptype = @@ -1766,11 +1766,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); insert_mmap(ucontext, mm2); mm3->key = uresp.sq_db_gts_key; - mm3->addr = (__force unsigned long) qhp->wq.sq.udb; + mm3->addr = (__force unsigned long)qhp->wq.sq.udb; mm3->len = PAGE_SIZE; insert_mmap(ucontext, mm3); mm4->key = uresp.rq_db_gts_key; - mm4->addr = (__force unsigned long) qhp->wq.rq.udb; + mm4->addr = (__force unsigned long)qhp->wq.rq.udb; mm4->len = PAGE_SIZE; insert_mmap(ucontext, mm4); if (mm5) { From 09ece8b9e983fe858de6eab7a386d58d194227b6 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 22 Apr 2015 01:45:00 +0530 Subject: [PATCH 047/101] iw_cxgb4: use BAR2 GTS register for T5 kernel mode CQs For T5, we must not use the kdb/kgts registers, in order avoid db drops under extreme loads. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 15 +++++++++++---- drivers/infiniband/hw/cxgb4/t4.h | 7 ++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 25dbd6986301..68ddb3710215 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -156,12 +156,19 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, goto err4; cq->gen = 1; - cq->gts = rdev->lldi.gts_reg; cq->rdev = rdev; if (user) { - cq->ugts = (u64)pci_resource_start(rdev->lldi.pdev, 2) + - (cq->cqid << rdev->cqshift); - cq->ugts &= PAGE_MASK; + u32 off = (cq->cqid << rdev->cqshift) & PAGE_MASK; + + cq->ugts = (u64)rdev->bar2_pa + off; + } else if (is_t4(rdev->lldi.adapter_type)) { + cq->gts = rdev->lldi.gts_reg; + cq->qid_mask = -1U; + } else { + u32 off = ((cq->cqid << rdev->cqshift) & PAGE_MASK) + 12; + + cq->gts = rdev->bar2_kva + off; + cq->qid_mask = rdev->qpmask; } return 0; err4: diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 871cdcac7be2..7f2a6c244d25 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -539,6 +539,7 @@ struct t4_cq { size_t memsize; __be64 bits_type_ts; u32 cqid; + u32 qid_mask; int vector; u16 size; /* including status page */ u16 cidx; @@ -563,12 +564,12 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se) set_bit(CQ_ARMED, &cq->flags); while (cq->cidx_inc > CIDXINC_M) { val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7) | - INGRESSQID_V(cq->cqid); + INGRESSQID_V(cq->cqid & cq->qid_mask); writel(val, cq->gts); cq->cidx_inc -= CIDXINC_M; } val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6) | - INGRESSQID_V(cq->cqid); + INGRESSQID_V(cq->cqid & cq->qid_mask); writel(val, cq->gts); cq->cidx_inc = 0; return 0; @@ -601,7 +602,7 @@ static inline void t4_hwcq_consume(struct t4_cq *cq) u32 val; val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7) | - INGRESSQID_V(cq->cqid); + INGRESSQID_V(cq->cqid & cq->qid_mask); writel(val, cq->gts); cq->cidx_inc = 0; } From 4a75a86c8d04390f268d7237cc49fe9a8e36efe7 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 22 Apr 2015 01:45:01 +0530 Subject: [PATCH 048/101] iw_cxgb4: enforce qp/cq id requirements Currently the iw_cxgb4 implementation requires the qp and cq qid densities to match as well as the qp and cq id ranges. So fail a device open if the device configuration doesn't meet the requirements. The reason for these restictions has to do with the fact that IQ qid X has a UGTS register in the same bar2 page as EQ qid X. Thus both qids need to be allocated to the same user process for security reasons. The logic that does this (the qpid allocator in iw_cxgb4/resource.c) handles this but requires the above restrictions. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/device.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 7ed32537eb59..83209bb38285 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -764,6 +764,29 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) c4iw_init_dev_ucontext(rdev, &rdev->uctx); + /* + * This implementation assumes udb_density == ucq_density! Eventually + * we might need to support this but for now fail the open. Also the + * cqid and qpid range must match for now. + */ + if (rdev->lldi.udb_density != rdev->lldi.ucq_density) { + pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n", + pci_name(rdev->lldi.pdev), rdev->lldi.udb_density, + rdev->lldi.ucq_density); + err = -EINVAL; + goto err1; + } + if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start || + rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) { + pr_err(MOD "%s: unsupported qp and cq id ranges " + "qp start %u size %u cq start %u size %u\n", + pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size, + rdev->lldi.vr->cq.size); + err = -EINVAL; + goto err1; + } + /* * qpshift is the number of bits to shift the qpid left in order * to get the correct address of the doorbell for that qp. From 6eec177461751f0fe191cf9977cde692b9481d0a Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 21 Apr 2015 16:28:10 -0400 Subject: [PATCH 049/101] RDMA/core: Enable the iWarp Port Mapper to provide the actual address of the connecting peer to its clients Add functionality to enable the port mapper on the passive side to provide to its clients the actual (non-mapped) ip/tcp address information of the connecting peer 1) Adding remote_info_cb() to process the address info of the connecting peer The address info is provided by the user space port mapper service when the connection is initiated by the peer 2) Adding a hash list to store the remote address info 3) Adding functionality to add/remove the remote address info After the info has been provided to the port mapper client, it is removed from the hash list Signed-off-by: Tatyana Nikolova Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwpm_msg.c | 73 +++++++++- drivers/infiniband/core/iwpm_util.c | 208 +++++++++++++++++++++++----- drivers/infiniband/core/iwpm_util.h | 15 ++ include/rdma/iw_portmap.h | 25 ++++ include/uapi/rdma/rdma_netlink.h | 1 + 5 files changed, 288 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index b85ddbc979e0..ab081702566f 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -468,7 +468,8 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) } EXPORT_SYMBOL(iwpm_add_mapping_cb); -/* netlink attribute policy for the response to add and query mapping request */ +/* netlink attribute policy for the response to add and query mapping request + * and response with remote address info */ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = { [IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 }, [IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) }, @@ -559,6 +560,76 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, } EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); +/* + * iwpm_remote_info_cb - Process a port mapper message, containing + * the remote connecting peer address info + */ +int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX]; + struct sockaddr_storage *local_sockaddr, *remote_sockaddr; + struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr; + struct iwpm_remote_info *rem_info; + const char *msg_type; + u8 nl_client; + int ret = -EINVAL; + + msg_type = "Remote Mapping info"; + if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX, + resp_query_policy, nltb, msg_type)) + return ret; + + nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); + if (!iwpm_valid_client(nl_client)) { + pr_info("%s: Invalid port mapper client = %d\n", + __func__, nl_client); + return ret; + } + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + + local_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]); + remote_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]); + mapped_loc_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]); + mapped_rem_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]); + + if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family || + mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) { + pr_info("%s: Sockaddr family doesn't match the requested one\n", + __func__); + return ret; + } + rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC); + if (!rem_info) { + pr_err("%s: Unable to allocate a remote info\n", __func__); + ret = -ENOMEM; + return ret; + } + memcpy(&rem_info->mapped_loc_sockaddr, mapped_loc_sockaddr, + sizeof(struct sockaddr_storage)); + memcpy(&rem_info->remote_sockaddr, remote_sockaddr, + sizeof(struct sockaddr_storage)); + memcpy(&rem_info->mapped_rem_sockaddr, mapped_rem_sockaddr, + sizeof(struct sockaddr_storage)); + rem_info->nl_client = nl_client; + + iwpm_add_remote_info(rem_info); + + iwpm_print_sockaddr(local_sockaddr, + "remote_info: Local sockaddr:"); + iwpm_print_sockaddr(mapped_loc_sockaddr, + "remote_info: Mapped local sockaddr:"); + iwpm_print_sockaddr(remote_sockaddr, + "remote_info: Remote sockaddr:"); + iwpm_print_sockaddr(mapped_rem_sockaddr, + "remote_info: Mapped remote sockaddr:"); + return ret; +} +EXPORT_SYMBOL(iwpm_remote_info_cb); + /* netlink attribute policy for the received request for mapping info */ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { [IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING, diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 69e9f84c1605..a626795bf9c7 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -33,8 +33,10 @@ #include "iwpm_util.h" -#define IWPM_HASH_BUCKET_SIZE 512 -#define IWPM_HASH_BUCKET_MASK (IWPM_HASH_BUCKET_SIZE - 1) +#define IWPM_MAPINFO_HASH_SIZE 512 +#define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) +#define IWPM_REMINFO_HASH_SIZE 64 +#define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) static LIST_HEAD(iwpm_nlmsg_req_list); static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); @@ -42,31 +44,49 @@ static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); static struct hlist_head *iwpm_hash_bucket; static DEFINE_SPINLOCK(iwpm_mapinfo_lock); +static struct hlist_head *iwpm_reminfo_bucket; +static DEFINE_SPINLOCK(iwpm_reminfo_lock); + static DEFINE_MUTEX(iwpm_admin_lock); static struct iwpm_admin_data iwpm_admin; int iwpm_init(u8 nl_client) { + int ret = 0; if (iwpm_valid_client(nl_client)) return -EINVAL; mutex_lock(&iwpm_admin_lock); if (atomic_read(&iwpm_admin.refcount) == 0) { - iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE * + iwpm_hash_bucket = kzalloc(IWPM_MAPINFO_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); if (!iwpm_hash_bucket) { - mutex_unlock(&iwpm_admin_lock); + ret = -ENOMEM; pr_err("%s Unable to create mapinfo hash table\n", __func__); - return -ENOMEM; + goto init_exit; + } + iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE * + sizeof(struct hlist_head), GFP_KERNEL); + if (!iwpm_reminfo_bucket) { + kfree(iwpm_hash_bucket); + ret = -ENOMEM; + pr_err("%s Unable to create reminfo hash table\n", __func__); + goto init_exit; } } atomic_inc(&iwpm_admin.refcount); +init_exit: mutex_unlock(&iwpm_admin_lock); - iwpm_set_valid(nl_client, 1); - return 0; + if (!ret) { + iwpm_set_valid(nl_client, 1); + pr_debug("%s: Mapinfo and reminfo tables are created\n", + __func__); + } + return ret; } EXPORT_SYMBOL(iwpm_init); static void free_hash_bucket(void); +static void free_reminfo_bucket(void); int iwpm_exit(u8 nl_client) { @@ -81,7 +101,8 @@ int iwpm_exit(u8 nl_client) } if (atomic_dec_and_test(&iwpm_admin.refcount)) { free_hash_bucket(); - pr_debug("%s: Mapinfo hash table is destroyed\n", __func__); + free_reminfo_bucket(); + pr_debug("%s: Resources are destroyed\n", __func__); } mutex_unlock(&iwpm_admin_lock); iwpm_set_valid(nl_client, 0); @@ -89,7 +110,7 @@ int iwpm_exit(u8 nl_client) } EXPORT_SYMBOL(iwpm_exit); -static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *, +static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *, struct sockaddr_storage *); int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, @@ -99,9 +120,10 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, struct hlist_head *hash_bucket_head; struct iwpm_mapping_info *map_info; unsigned long flags; + int ret = -EINVAL; if (!iwpm_valid_client(nl_client)) - return -EINVAL; + return ret; map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL); if (!map_info) { pr_err("%s: Unable to allocate a mapping info\n", __func__); @@ -115,13 +137,16 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - hash_bucket_head = get_hash_bucket_head( + hash_bucket_head = get_mapinfo_hash_bucket( &map_info->local_sockaddr, &map_info->mapped_sockaddr); - hlist_add_head(&map_info->hlist_node, hash_bucket_head); + if (hash_bucket_head) { + hlist_add_head(&map_info->hlist_node, hash_bucket_head); + ret = 0; + } } spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); - return 0; + return ret; } EXPORT_SYMBOL(iwpm_create_mapinfo); @@ -136,9 +161,12 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - hash_bucket_head = get_hash_bucket_head( + hash_bucket_head = get_mapinfo_hash_bucket( local_sockaddr, mapped_local_addr); + if (!hash_bucket_head) + goto remove_mapinfo_exit; + hlist_for_each_entry_safe(map_info, tmp_hlist_node, hash_bucket_head, hlist_node) { @@ -152,6 +180,7 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, } } } +remove_mapinfo_exit: spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); return ret; } @@ -166,7 +195,7 @@ static void free_hash_bucket(void) /* remove all the mapinfo data from the list */ spin_lock_irqsave(&iwpm_mapinfo_lock, flags); - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { hlist_for_each_entry_safe(map_info, tmp_hlist_node, &iwpm_hash_bucket[i], hlist_node) { @@ -180,6 +209,96 @@ static void free_hash_bucket(void) spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); } +static void free_reminfo_bucket(void) +{ + struct hlist_node *tmp_hlist_node; + struct iwpm_remote_info *rem_info; + unsigned long flags; + int i; + + /* remove all the remote info from the list */ + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + for (i = 0; i < IWPM_REMINFO_HASH_SIZE; i++) { + hlist_for_each_entry_safe(rem_info, tmp_hlist_node, + &iwpm_reminfo_bucket[i], hlist_node) { + + hlist_del_init(&rem_info->hlist_node); + kfree(rem_info); + } + } + /* free the hash list */ + kfree(iwpm_reminfo_bucket); + iwpm_reminfo_bucket = NULL; + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); +} + +static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage *, + struct sockaddr_storage *); + +void iwpm_add_remote_info(struct iwpm_remote_info *rem_info) +{ + struct hlist_head *hash_bucket_head; + unsigned long flags; + + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + if (iwpm_reminfo_bucket) { + hash_bucket_head = get_reminfo_hash_bucket( + &rem_info->mapped_loc_sockaddr, + &rem_info->mapped_rem_sockaddr); + if (hash_bucket_head) + hlist_add_head(&rem_info->hlist_node, hash_bucket_head); + } + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); +} + +int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, + struct sockaddr_storage *mapped_rem_addr, + struct sockaddr_storage *remote_addr, + u8 nl_client) +{ + struct hlist_node *tmp_hlist_node; + struct hlist_head *hash_bucket_head; + struct iwpm_remote_info *rem_info = NULL; + unsigned long flags; + int ret = -EINVAL; + + if (!iwpm_valid_client(nl_client)) { + pr_info("%s: Invalid client = %d\n", __func__, nl_client); + return ret; + } + spin_lock_irqsave(&iwpm_reminfo_lock, flags); + if (iwpm_reminfo_bucket) { + hash_bucket_head = get_reminfo_hash_bucket( + mapped_loc_addr, + mapped_rem_addr); + if (!hash_bucket_head) + goto get_remote_info_exit; + hlist_for_each_entry_safe(rem_info, tmp_hlist_node, + hash_bucket_head, hlist_node) { + + if (!iwpm_compare_sockaddr(&rem_info->mapped_loc_sockaddr, + mapped_loc_addr) && + !iwpm_compare_sockaddr(&rem_info->mapped_rem_sockaddr, + mapped_rem_addr)) { + + memcpy(remote_addr, &rem_info->remote_sockaddr, + sizeof(struct sockaddr_storage)); + iwpm_print_sockaddr(remote_addr, + "get_remote_info: Remote sockaddr:"); + + hlist_del_init(&rem_info->hlist_node); + kfree(rem_info); + ret = 0; + break; + } + } + } +get_remote_info_exit: + spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); + return ret; +} +EXPORT_SYMBOL(iwpm_get_remote_info); + struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, u8 nl_client, gfp_t gfp) { @@ -409,31 +528,54 @@ static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr) return hash; } -static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage - *local_sockaddr, - struct sockaddr_storage - *mapped_sockaddr) +static int get_hash_bucket(struct sockaddr_storage *a_sockaddr, + struct sockaddr_storage *b_sockaddr, u32 *hash) { - u32 local_hash, mapped_hash, hash; + u32 a_hash, b_hash; - if (local_sockaddr->ss_family == AF_INET) { - local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr); - mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr); + if (a_sockaddr->ss_family == AF_INET) { + a_hash = iwpm_ipv4_jhash((struct sockaddr_in *) a_sockaddr); + b_hash = iwpm_ipv4_jhash((struct sockaddr_in *) b_sockaddr); - } else if (local_sockaddr->ss_family == AF_INET6) { - local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr); - mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr); + } else if (a_sockaddr->ss_family == AF_INET6) { + a_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) a_sockaddr); + b_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) b_sockaddr); } else { pr_err("%s: Invalid sockaddr family\n", __func__); - return NULL; + return -EINVAL; } - if (local_hash == mapped_hash) /* if port mapper isn't available */ - hash = local_hash; + if (a_hash == b_hash) /* if port mapper isn't available */ + *hash = a_hash; else - hash = jhash_2words(local_hash, mapped_hash, 0); + *hash = jhash_2words(a_hash, b_hash, 0); + return 0; +} - return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK]; +static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage + *local_sockaddr, struct sockaddr_storage + *mapped_sockaddr) +{ + u32 hash; + int ret; + + ret = get_hash_bucket(local_sockaddr, mapped_sockaddr, &hash); + if (ret) + return NULL; + return &iwpm_hash_bucket[hash & IWPM_MAPINFO_HASH_MASK]; +} + +static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage + *mapped_loc_sockaddr, struct sockaddr_storage + *mapped_rem_sockaddr) +{ + u32 hash; + int ret; + + ret = get_hash_bucket(mapped_loc_sockaddr, mapped_rem_sockaddr, &hash); + if (ret) + return NULL; + return &iwpm_reminfo_bucket[hash & IWPM_REMINFO_HASH_MASK]; } static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) @@ -512,7 +654,7 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) } skb_num++; spin_lock_irqsave(&iwpm_mapinfo_lock, flags); - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { hlist_for_each_entry(map_info, &iwpm_hash_bucket[i], hlist_node) { if (map_info->nl_client != nl_client) @@ -595,7 +737,7 @@ int iwpm_mapinfo_available(void) spin_lock_irqsave(&iwpm_mapinfo_lock, flags); if (iwpm_hash_bucket) { - for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { if (!hlist_empty(&iwpm_hash_bucket[i])) { full_bucket = 1; break; diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 9777c869a140..ee2d9ff095be 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -76,6 +76,14 @@ struct iwpm_mapping_info { u8 nl_client; }; +struct iwpm_remote_info { + struct hlist_node hlist_node; + struct sockaddr_storage remote_sockaddr; + struct sockaddr_storage mapped_loc_sockaddr; + struct sockaddr_storage mapped_rem_sockaddr; + u8 nl_client; +}; + struct iwpm_admin_data { atomic_t refcount; atomic_t nlmsg_seq; @@ -127,6 +135,13 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); */ int iwpm_get_nlmsg_seq(void); +/** + * iwpm_add_reminfo - Add remote address info of the connecting peer + * to the remote info hash table + * @reminfo: The remote info to be added + */ +void iwpm_add_remote_info(struct iwpm_remote_info *reminfo); + /** * iwpm_valid_client - Check if the port mapper client is valid * @nl_client: The index of the netlink client diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h index 928b2775e992..fda31673a562 100644 --- a/include/rdma/iw_portmap.h +++ b/include/rdma/iw_portmap.h @@ -147,6 +147,16 @@ int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *); */ int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *); +/** + * iwpm_remote_info_cb - Process remote connecting peer address info, which + * the port mapper has received from the connecting peer + * + * @cb: Contains the received message (payload and netlink header) + * + * Stores the IPv4/IPv6 address info in a hash table + */ +int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *); + /** * iwpm_mapping_error_cb - Process port mapper notification for error * @@ -174,6 +184,21 @@ int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *); */ int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *); +/** + * iwpm_get_remote_info - Get the remote connecting peer address info + * + * @mapped_loc_addr: Mapped local address of the listening peer + * @mapped_rem_addr: Mapped remote address of the connecting peer + * @remote_addr: To store the remote address of the connecting peer + * @nl_client: The index of the netlink client + * + * The remote address info is retrieved and provided to the client in + * the remote_addr. After that it is removed from the hash table + */ +int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, + struct sockaddr_storage *mapped_rem_addr, + struct sockaddr_storage *remote_addr, u8 nl_client); + /** * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address * info in a hash table diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index de69170a30ce..6e4bb4270ca2 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -37,6 +37,7 @@ enum { RDMA_NL_IWPM_ADD_MAPPING, RDMA_NL_IWPM_QUERY_MAPPING, RDMA_NL_IWPM_REMOVE_MAPPING, + RDMA_NL_IWPM_REMOTE_INFO, RDMA_NL_IWPM_HANDLE_ERR, RDMA_NL_IWPM_MAPINFO, RDMA_NL_IWPM_MAPINFO_NUM, From 230da36ae919e690dbcc44d1be8a2154214c6e36 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 21 Apr 2015 16:28:25 -0400 Subject: [PATCH 050/101] RDMA/nes: Report the actual address of the remote connecting peer Get the actual (non-mapped) ip/tcp address of the connecting peer from the port mapper and report the address info to the user space application at the time of connection establishment Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes.c | 1 + drivers/infiniband/hw/nes/nes_cm.c | 63 ++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 3b2a6dc8ea99..9f9d5c563a61 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -116,6 +116,7 @@ static struct ibnl_client_cbs nes_nl_cb_table[] = { [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6f09a72e78d7..72b43417cbe3 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -596,27 +596,52 @@ static void nes_form_reg_msg(struct nes_vnic *nesvnic, memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE); } +static void record_sockaddr_info(struct sockaddr_storage *addr_info, + nes_addr_t *ip_addr, u16 *port_num) +{ + struct sockaddr_in *in_addr = (struct sockaddr_in *)addr_info; + + if (in_addr->sin_family == AF_INET) { + *ip_addr = ntohl(in_addr->sin_addr.s_addr); + *port_num = ntohs(in_addr->sin_port); + } +} + /* * nes_record_pm_msg - Save the received mapping info */ static void nes_record_pm_msg(struct nes_cm_info *cm_info, struct iwpm_sa_data *pm_msg) { - struct sockaddr_in *mapped_loc_addr = - (struct sockaddr_in *)&pm_msg->mapped_loc_addr; - struct sockaddr_in *mapped_rem_addr = - (struct sockaddr_in *)&pm_msg->mapped_rem_addr; + record_sockaddr_info(&pm_msg->mapped_loc_addr, + &cm_info->mapped_loc_addr, &cm_info->mapped_loc_port); - if (mapped_loc_addr->sin_family == AF_INET) { - cm_info->mapped_loc_addr = - ntohl(mapped_loc_addr->sin_addr.s_addr); - cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port); - } - if (mapped_rem_addr->sin_family == AF_INET) { - cm_info->mapped_rem_addr = - ntohl(mapped_rem_addr->sin_addr.s_addr); - cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port); - } + record_sockaddr_info(&pm_msg->mapped_rem_addr, + &cm_info->mapped_rem_addr, &cm_info->mapped_rem_port); +} + +/* + * nes_get_reminfo - Get the address info of the remote connecting peer + */ +static int nes_get_remote_addr(struct nes_cm_node *cm_node) +{ + struct sockaddr_storage mapped_loc_addr, mapped_rem_addr; + struct sockaddr_storage remote_addr; + int ret; + + nes_create_sockaddr(htonl(cm_node->mapped_loc_addr), + htons(cm_node->mapped_loc_port), &mapped_loc_addr); + nes_create_sockaddr(htonl(cm_node->mapped_rem_addr), + htons(cm_node->mapped_rem_port), &mapped_rem_addr); + + ret = iwpm_get_remote_info(&mapped_loc_addr, &mapped_rem_addr, + &remote_addr, RDMA_NL_NES); + if (ret) + nes_debug(NES_DBG_CM, "Unable to find remote peer address info\n"); + else + record_sockaddr_info(&remote_addr, &cm_node->rem_addr, + &cm_node->rem_port); + return ret; } /** @@ -1566,9 +1591,14 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, return NULL; /* set our node specific transport info */ - cm_node->loc_addr = cm_info->loc_addr; + if (listener) { + cm_node->loc_addr = listener->loc_addr; + cm_node->loc_port = listener->loc_port; + } else { + cm_node->loc_addr = cm_info->loc_addr; + cm_node->loc_port = cm_info->loc_port; + } cm_node->rem_addr = cm_info->rem_addr; - cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; cm_node->mapped_loc_addr = cm_info->mapped_loc_addr; @@ -2151,6 +2181,7 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->state = NES_CM_STATE_ESTABLISHED; if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + nes_get_remote_addr(cm_node); handle_rcv_mpa(cm_node, skb); } else { /* rcvd ACK only */ dev_kfree_skb_any(skb); From 5b6b8fe64053b2649660ded2f3c5be25ebddbfdb Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 21 Apr 2015 16:28:41 -0400 Subject: [PATCH 051/101] RDMA/cxgb4: Report the actual address of the remote connecting peer Get the actual (non-mapped) ip/tcp address of the connecting peer from the port mapper Also setup the passive side endpoint to correctly display the actual and mapped addresses for the new connection. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 54 +++++++++++++++++++++++++--- drivers/infiniband/hw/cxgb4/device.c | 1 + 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6fb31bacd5b4..3c3b00e4e7af 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -583,6 +583,22 @@ static void c4iw_record_pm_msg(struct c4iw_ep *ep, sizeof(ep->com.mapped_remote_addr)); } +static int get_remote_addr(struct c4iw_ep *ep) +{ + int ret; + + print_addr(&ep->com, __func__, "get_remote_addr"); + + ret = iwpm_get_remote_info(&ep->com.mapped_local_addr, + &ep->com.mapped_remote_addr, + &ep->com.remote_addr, RDMA_NL_C4IW); + if (ret) + pr_info(MOD "Unable to find remote peer addr info - err %d\n", + ret); + + return ret; +} + static void best_mtu(const unsigned short *mtus, unsigned short mtu, unsigned int *idx, int use_ts, int ipv6) { @@ -2352,27 +2368,57 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; + + /* + * The mapped_local and mapped_remote addresses get setup with + * the actual 4-tuple. The local address will be based on the + * actual local address of the connection, but on the port number + * of the parent listening endpoint. The remote address is + * setup based on a query to the IWPM since we don't know what it + * originally was before mapping. If no mapping was done, then + * mapped_remote == remote, and mapped_local == local. + */ if (iptype == 4) { struct sockaddr_in *sin = (struct sockaddr_in *) - &child_ep->com.local_addr; + &child_ep->com.mapped_local_addr; + sin->sin_family = PF_INET; sin->sin_port = local_port; sin->sin_addr.s_addr = *(__be32 *)local_ip; - sin = (struct sockaddr_in *)&child_ep->com.remote_addr; + + sin = (struct sockaddr_in *)&child_ep->com.local_addr; + sin->sin_family = PF_INET; + sin->sin_port = ((struct sockaddr_in *) + &parent_ep->com.local_addr)->sin_port; + sin->sin_addr.s_addr = *(__be32 *)local_ip; + + sin = (struct sockaddr_in *)&child_ep->com.mapped_remote_addr; sin->sin_family = PF_INET; sin->sin_port = peer_port; sin->sin_addr.s_addr = *(__be32 *)peer_ip; } else { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) - &child_ep->com.local_addr; + &child_ep->com.mapped_local_addr; + sin6->sin6_family = PF_INET6; sin6->sin6_port = local_port; memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); - sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr; + + sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_port; + memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); + + sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_remote_addr; sin6->sin6_family = PF_INET6; sin6->sin6_port = peer_port; memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); } + memcpy(&child_ep->com.remote_addr, &child_ep->com.mapped_remote_addr, + sizeof(child_ep->com.remote_addr)); + get_remote_addr(child_ep); + c4iw_get_ep(&parent_ep->com); child_ep->parent_ep = parent_ep; child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 83209bb38285..1ffbd038c0ae 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -93,6 +93,7 @@ static struct ibnl_client_cbs c4iw_nl_cb_table[] = { [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} }; From c1d383b5785b1e0fb5fb862864712a7208219e6a Mon Sep 17 00:00:00 2001 From: Guy Shapiro Date: Wed, 15 Apr 2015 18:17:56 +0300 Subject: [PATCH 052/101] IB/core: dma map/unmap locking optimizations Currently, while mapping or unmapping pages for ODP, the umem mutex is locked and unlocked once for each page. Such lock/unlock operation take few tens to hundreds of nsecs. This makes a significant impact when mapping or unmapping few MBs of memory. To avoid this, the mutex should be locked only once per operation, and not per page. Signed-off-by: Guy Shapiro Acked-by: Shachar Raindel Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem_odp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 8b8cc6fa0ab0..aba47398880d 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -446,7 +446,6 @@ static int ib_umem_odp_map_dma_single_page( int remove_existing_mapping = 0; int ret = 0; - mutex_lock(&umem->odp_data->umem_mutex); /* * Note: we avoid writing if seq is different from the initial seq, to * handle case of a racing notifier. This check also allows us to bail @@ -479,8 +478,6 @@ static int ib_umem_odp_map_dma_single_page( } out: - mutex_unlock(&umem->odp_data->umem_mutex); - /* On Demand Paging - avoid pinning the page */ if (umem->context->invalidate_range || !stored_page) put_page(page); @@ -586,6 +583,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); user_virt += npages << PAGE_SHIFT; + mutex_lock(&umem->odp_data->umem_mutex); for (j = 0; j < npages; ++j) { ret = ib_umem_odp_map_dma_single_page( umem, k, base_virt_addr, local_page_list[j], @@ -594,6 +592,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, break; k++; } + mutex_unlock(&umem->odp_data->umem_mutex); if (ret < 0) { /* Release left over pages when handling errors. */ @@ -633,9 +632,9 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ + mutex_lock(&umem->odp_data->umem_mutex); for (addr = virt; addr < bound; addr += (u64)umem->page_size) { idx = (addr - ib_umem_start(umem)) / PAGE_SIZE; - mutex_lock(&umem->odp_data->umem_mutex); if (umem->odp_data->page_list[idx]) { struct page *page = umem->odp_data->page_list[idx]; struct page *head_page = compound_head(page); @@ -663,7 +662,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, umem->odp_data->page_list[idx] = NULL; umem->odp_data->dma_list[idx] = 0; } - mutex_unlock(&umem->odp_data->umem_mutex); } + mutex_unlock(&umem->odp_data->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); From 325ad0617adaf163e32dd2d857b90baf65a25b5b Mon Sep 17 00:00:00 2001 From: Guy Shapiro Date: Wed, 15 Apr 2015 18:17:57 +0300 Subject: [PATCH 053/101] IB/core: dma unmap optimizations While unmapping an ODP writable page, the dirty bit of the page is set. In order to do so, the head of the compound page is found. Currently, the compound head is found even on non-writable pages, where it is never used, leading to unnecessary cpu barrier that impacts performance. This patch moves the search for the compound head to be done only when needed. Signed-off-by: Guy Shapiro Acked-by: Shachar Raindel Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem_odp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index aba47398880d..40becdb3196e 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -637,7 +637,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, idx = (addr - ib_umem_start(umem)) / PAGE_SIZE; if (umem->odp_data->page_list[idx]) { struct page *page = umem->odp_data->page_list[idx]; - struct page *head_page = compound_head(page); dma_addr_t dma = umem->odp_data->dma_list[idx]; dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK; @@ -645,7 +644,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); - if (dma & ODP_WRITE_ALLOWED_BIT) + if (dma & ODP_WRITE_ALLOWED_BIT) { + struct page *head_page = compound_head(page); /* * set_page_dirty prefers being called with * the page lock. However, MMU notifiers are @@ -656,6 +656,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, * be removed. */ set_page_dirty(head_page); + } /* on demand pinning support */ if (!umem->context->invalidate_range) put_page(page); From 87a26e976cb93e26742224bdd39f51f7861aa9b7 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 21 Apr 2015 14:50:34 -0700 Subject: [PATCH 054/101] IB/qib: add acounting for MTRR There is no good reason not to, we eventually delete it as well. Cc: Toshi Kani Cc: Suresh Siddha Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Juergen Gross Cc: Daniel Vetter Cc: Andy Lutomirski Cc: Dave Airlie Cc: Antonino Daplas Cc: Jean-Christophe Plagniol-Villard Cc: Tomi Valkeinen Cc: Mike Marciniszyn Cc: Roland Dreier Cc: Sean Hefty Cc: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Cc: linux-fbdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_wc_x86_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c index 81b225f2300a..fe0850ac6883 100644 --- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -118,7 +118,7 @@ int qib_enable_wc(struct qib_devdata *dd) if (!ret) { int cookie; - cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); + cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 1); if (cookie < 0) { { qib_devinfo(dd->pcidev, From d4988623cc605131bed8c77f007082c3555c39ee Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 22 Apr 2015 11:38:24 -0700 Subject: [PATCH 055/101] IB/qib: use arch_phys_wc_add() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver already makes use of ioremap_wc() on PIO buffers, so convert it to use arch_phys_wc_add(). The qib driver uses a mmap() special case for when PAT is not used, this behaviour used to be determined with a module parameter but since we have been asked to just remove that module parameter this checks for the WC cookie, if not set we can assume PAT was used. If its set we do what we used to do for the mmap for when MTRR was enabled. The removal of the module parameter is OK given that Andy notes that even if users of module parameter are still around it will not prevent loading of the module on recent kernels. Cc: Doug Ledford Cc: Toshi Kani Cc: Rickard Strandqvist Cc: Mike Marciniszyn Cc: Roland Dreier Cc: Sean Hefty Cc: Hal Rosenstock Cc: Dennis Dalessandro Cc: Andy Lutomirski Cc: Suresh Siddha Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Juergen Gross Cc: Daniel Vetter Cc: Dave Airlie Cc: Bjorn Helgaas Cc: Antonino Daplas Cc: Jean-Christophe Plagniol-Villard Cc: Tomi Valkeinen Cc: Dave Hansen Cc: Arnd Bergmann Cc: Michael S. Tsirkin Cc: Stefan Bader Cc: konrad.wilk@oracle.com Cc: ville.syrjala@linux.intel.com Cc: david.vrabel@citrix.com Cc: jbeulich@suse.com Cc: Roger Pau Monné Cc: infinipath@intel.com Cc: linux-rdma@vger.kernel.org Cc: linux-fbdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xensource.com Signed-off-by: Luis R. Rodriguez Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib.h | 1 - drivers/infiniband/hw/qib/qib_file_ops.c | 3 +- drivers/infiniband/hw/qib/qib_iba6120.c | 8 ++--- drivers/infiniband/hw/qib/qib_iba7220.c | 8 ++--- drivers/infiniband/hw/qib/qib_iba7322.c | 41 +++++++++++------------ drivers/infiniband/hw/qib/qib_init.c | 26 ++++---------- drivers/infiniband/hw/qib/qib_wc_x86_64.c | 31 +++-------------- 7 files changed, 39 insertions(+), 79 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index ffd48bfc4923..ba5173e24973 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1136,7 +1136,6 @@ extern struct qib_devdata *qib_lookup(int unit); extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; -extern unsigned qib_wc_pat; extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); int init_chip_wc_pat(struct qib_devdata *dd, u32); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 9ea6c440a00c..725881890c4a 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -835,7 +835,8 @@ static int mmap_piobufs(struct vm_area_struct *vma, vma->vm_flags &= ~VM_MAYREAD; vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - if (qib_wc_pat) + /* We used PAT if wc_cookie == 0 */ + if (!dd->wc_cookie) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 0d2ba59af30a..4b927809d1a1 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3315,11 +3315,9 @@ static int init_6120_variables(struct qib_devdata *dd) qib_6120_config_ctxts(dd); qib_set_ctxtcnt(dd); - if (qib_wc_pat) { - ret = init_chip_wc_pat(dd, 0); - if (ret) - goto bail; - } + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; set_6120_baseaddrs(dd); /* set chip access pointers now */ ret = 0; diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 22affda8af88..00b2af211157 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4126,11 +4126,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd) qib_7220_config_ctxts(dd); qib_set_ctxtcnt(dd); /* needed for PAT setup */ - if (qib_wc_pat) { - ret = init_chip_wc_pat(dd, 0); - if (ret) - goto bail; - } + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; set_7220_baseaddrs(dd); /* set chip access pointers now */ ret = 0; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ef97b71c8f7d..f32b4628e991 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6429,6 +6429,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) unsigned features, pidx, sbufcnt; int ret, mtu; u32 sbufs, updthresh; + resource_size_t vl15off; /* pport structs are contiguous, allocated after devdata */ ppd = (struct qib_pportdata *)(dd + 1); @@ -6677,29 +6678,27 @@ static int qib_init_7322_variables(struct qib_devdata *dd) qib_7322_config_ctxts(dd); qib_set_ctxtcnt(dd); - if (qib_wc_pat) { - resource_size_t vl15off; - /* - * We do not set WC on the VL15 buffers to avoid - * a rare problem with unaligned writes from - * interrupt-flushed store buffers, so we need - * to map those separately here. We can't solve - * this for the rarely used mtrr case. - */ - ret = init_chip_wc_pat(dd, 0); - if (ret) - goto bail; + /* + * We do not set WC on the VL15 buffers to avoid + * a rare problem with unaligned writes from + * interrupt-flushed store buffers, so we need + * to map those separately here. We can't solve + * this for the rarely used mtrr case. + */ + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; - /* vl15 buffers start just after the 4k buffers */ - vl15off = dd->physaddr + (dd->piobufbase >> 32) + - dd->piobcnt4k * dd->align4k; - dd->piovl15base = ioremap_nocache(vl15off, - NUM_VL15_BUFS * dd->align4k); - if (!dd->piovl15base) { - ret = -ENOMEM; - goto bail; - } + /* vl15 buffers start just after the 4k buffers */ + vl15off = dd->physaddr + (dd->piobufbase >> 32) + + dd->piobcnt4k * dd->align4k; + dd->piovl15base = ioremap_nocache(vl15off, + NUM_VL15_BUFS * dd->align4k); + if (!dd->piovl15base) { + ret = -ENOMEM; + goto bail; } + qib_7322_set_baseaddrs(dd); /* set chip access pointers now */ ret = 0; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 2ee36953e234..7e00470adc30 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -91,15 +91,6 @@ MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); unsigned qib_cc_table_size; module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); -/* - * qib_wc_pat parameter: - * 0 is WC via MTRR - * 1 is WC via PAT - * If PAT initialization fails, code reverts back to MTRR - */ -unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ -module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); -MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); static void verify_interrupt(unsigned long); @@ -1377,8 +1368,7 @@ static void cleanup_device_data(struct qib_devdata *dd) spin_unlock(&dd->pport[pidx].cc_shadow_lock); } - if (!qib_wc_pat) - qib_disable_wc(dd); + qib_disable_wc(dd); if (dd->pioavailregs_dma) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, @@ -1547,14 +1537,12 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto bail; } - if (!qib_wc_pat) { - ret = qib_enable_wc(dd); - if (ret) { - qib_dev_err(dd, - "Write combining not enabled (err %d): performance may be poor\n", - -ret); - ret = 0; - } + ret = qib_enable_wc(dd); + if (ret) { + qib_dev_err(dd, + "Write combining not enabled (err %d): performance may be poor\n", + -ret); + ret = 0; } qib_verify_pioperf(dd); diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c index fe0850ac6883..6d61ef98721c 100644 --- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -116,21 +116,9 @@ int qib_enable_wc(struct qib_devdata *dd) } if (!ret) { - int cookie; - - cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 1); - if (cookie < 0) { - { - qib_devinfo(dd->pcidev, - "mtrr_add() WC for PIO bufs failed (%d)\n", - cookie); - ret = -EINVAL; - } - } else { - dd->wc_cookie = cookie; - dd->wc_base = (unsigned long) pioaddr; - dd->wc_len = (unsigned long) piolen; - } + dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen); + if (dd->wc_cookie < 0) + ret = -EINVAL; } return ret; @@ -142,18 +130,7 @@ int qib_enable_wc(struct qib_devdata *dd) */ void qib_disable_wc(struct qib_devdata *dd) { - if (dd->wc_cookie) { - int r; - - r = mtrr_del(dd->wc_cookie, dd->wc_base, - dd->wc_len); - if (r < 0) - qib_devinfo(dd->pcidev, - "mtrr_del(%lx, %lx, %lx) failed: %d\n", - dd->wc_cookie, dd->wc_base, - dd->wc_len, r); - dd->wc_cookie = 0; /* even on failure */ - } + arch_phys_wc_del(dd->wc_cookie); } /** From d67e199611b986b345ea3087ee2e4a15da1c98b3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Apr 2015 16:46:28 +0300 Subject: [PATCH 056/101] efi: Fix error handling in add_sysfs_runtime_map_entry() I spotted two (difficult to hit) bugs while reviewing this. 1) There is a double free bug because we unregister "map_kset" in add_sysfs_runtime_map_entry() and also efi_runtime_map_init(). 2) If we fail to allocate "entry" then we should return ERR_PTR(-ENOMEM) instead of NULL. Signed-off-by: Dan Carpenter Cc: Dave Young Cc: Guangyu Sun Cc: Signed-off-by: Matt Fleming --- drivers/firmware/efi/runtime-map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c index 87b8e3b900d2..5c55227a34c8 100644 --- a/drivers/firmware/efi/runtime-map.c +++ b/drivers/firmware/efi/runtime-map.c @@ -120,7 +120,8 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr) entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { kset_unregister(map_kset); - return entry; + map_kset = NULL; + return ERR_PTR(-ENOMEM); } memcpy(&entry->md, efi_runtime_map + nr * efi_memdesc_size, @@ -132,6 +133,7 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr) if (ret) { kobject_put(&entry->kobj); kset_unregister(map_kset); + map_kset = NULL; return ERR_PTR(ret); } @@ -195,8 +197,6 @@ int __init efi_runtime_map_init(struct kobject *efi_kobj) entry = *(map_entries + j); kobject_put(&entry->kobj); } - if (map_kset) - kset_unregister(map_kset); out: return ret; } From e59d29e88f7b7e3d1231202b0203d0af6f15a440 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 28 Apr 2015 08:46:09 +0000 Subject: [PATCH 057/101] perf probe: Fix segfault if passed with ''. Since parse_perf_probe_point() deals with a user passed argument, we should not assume it to be a valid string. Without this patch, if pass '' to perf probe, a segfault raises: $ perf probe -a '' Segmentation fault This patch checks argument of parse_perf_probe_point() before string processing. After this patch: $ perf probe -a '' usage: perf probe [] 'PROBEDEF' ['PROBEDEF' ...] or: perf probe [] --add 'PROBEDEF' [--add 'PROBEDEF' ...] ... Signed-off-by: Wang Nan Acked-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Zefan Li Link: http://lkml.kernel.org/r/1430210769-94177-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index d8bb616ff57c..d05b77cf35f7 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1084,6 +1084,8 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * * TODO:Group name support */ + if (!arg) + return -EINVAL; ptr = strpbrk(arg, ";=@+%"); if (ptr && *ptr == '=') { /* Event name */ From 471e70583217728955436a3fa6e5201e5c8c296a Mon Sep 17 00:00:00 2001 From: Honggang LI Date: Wed, 29 Apr 2015 17:40:44 +0800 Subject: [PATCH 058/101] IB/core: change rdma_gid2ip into void function as it always return zero Signed-off-by: Honggang Li Acked-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 13 +++---------- include/rdma/ib_addr.h | 3 +-- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index f80da50d84a5..38339d220d7f 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -472,13 +472,8 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, } sgid_addr, dgid_addr; - ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid); - if (ret) - return ret; - - ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid); - if (ret) - return ret; + rdma_gid2ip(&sgid_addr._sockaddr, sgid); + rdma_gid2ip(&dgid_addr._sockaddr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); @@ -512,10 +507,8 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) struct sockaddr_in6 _sockaddr_in6; } gid_addr; - ret = rdma_gid2ip(&gid_addr._sockaddr, sgid); + rdma_gid2ip(&gid_addr._sockaddr, sgid); - if (ret) - return ret; memset(&dev_addr, 0, sizeof(dev_addr)); ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); if (ret) diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index ce55906b54a0..ac54c27a2bfd 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -160,7 +160,7 @@ static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid) } /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */ -static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid) +static inline void rdma_gid2ip(struct sockaddr *out, union ib_gid *gid) { if (ipv6_addr_v4mapped((struct in6_addr *)gid)) { struct sockaddr_in *out_in = (struct sockaddr_in *)out; @@ -173,7 +173,6 @@ static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid) out_in->sin6_family = AF_INET6; memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16); } - return 0; } static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, From 0d0f738f6a11856a704dcd8fd3a008b200f17625 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 3 May 2015 09:48:26 -0400 Subject: [PATCH 059/101] IB/core: Fix unaligned accesses Addresses the following kernel logs seen during boot of sparc systems: Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm] Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm] Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm] Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm] Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm] Signed-off-by: David Ahern Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 23 +++++++++++------------ drivers/infiniband/core/cm_msgs.h | 4 ++-- include/rdma/ib_cm.h | 7 ++++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index e28a494e2a3a..0c1419105ff0 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -437,39 +437,38 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) return cm_id_priv; } -static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask) +static void cm_mask_copy(u32 *dst, const u32 *src, const u32 *mask) { int i; - for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++) - ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] & - ((unsigned long *) mask)[i]; + for (i = 0; i < IB_CM_COMPARE_SIZE; i++) + dst[i] = src[i] & mask[i]; } static int cm_compare_data(struct ib_cm_compare_data *src_data, struct ib_cm_compare_data *dst_data) { - u8 src[IB_CM_COMPARE_SIZE]; - u8 dst[IB_CM_COMPARE_SIZE]; + u32 src[IB_CM_COMPARE_SIZE]; + u32 dst[IB_CM_COMPARE_SIZE]; if (!src_data || !dst_data) return 0; cm_mask_copy(src, src_data->data, dst_data->mask); cm_mask_copy(dst, dst_data->data, src_data->mask); - return memcmp(src, dst, IB_CM_COMPARE_SIZE); + return memcmp(src, dst, sizeof(src)); } -static int cm_compare_private_data(u8 *private_data, +static int cm_compare_private_data(u32 *private_data, struct ib_cm_compare_data *dst_data) { - u8 src[IB_CM_COMPARE_SIZE]; + u32 src[IB_CM_COMPARE_SIZE]; if (!dst_data) return 0; cm_mask_copy(src, private_data, dst_data->mask); - return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE); + return memcmp(src, dst_data->data, sizeof(src)); } /* @@ -538,7 +537,7 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) static struct cm_id_private * cm_find_listen(struct ib_device *device, __be64 service_id, - u8 *private_data) + u32 *private_data) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; @@ -953,7 +952,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask, cm_mask_copy(cm_id_priv->compare_data->data, compare_data->data, compare_data->mask); memcpy(cm_id_priv->compare_data->mask, compare_data->mask, - IB_CM_COMPARE_SIZE); + sizeof(compare_data->mask)); } cm_id->state = IB_CM_LISTEN; diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index be068f47e47e..8b76f0ef965e 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -103,7 +103,7 @@ struct cm_req_msg { /* local ACK timeout:5, rsvd:3 */ u8 alt_offset139; - u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE]; + u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; } __attribute__ ((packed)); @@ -801,7 +801,7 @@ struct cm_sidr_req_msg { __be16 rsvd; __be64 service_id; - u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE]; + u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; } __attribute__ ((packed)); struct cm_sidr_rep_msg { diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 0e3ff30647d5..39ed2d2fbd51 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -105,7 +105,8 @@ enum ib_cm_data_size { IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216, IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136, IB_CM_SIDR_REP_INFO_LENGTH = 72, - IB_CM_COMPARE_SIZE = 64 + /* compare done u32 at a time */ + IB_CM_COMPARE_SIZE = (64 / sizeof(u32)) }; struct ib_cm_id; @@ -337,8 +338,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id); #define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL) struct ib_cm_compare_data { - u8 data[IB_CM_COMPARE_SIZE]; - u8 mask[IB_CM_COMPARE_SIZE]; + u32 data[IB_CM_COMPARE_SIZE]; + u32 mask[IB_CM_COMPARE_SIZE]; }; /** From 179d03bbfd2ebc63934753a696467d28bf9f5b64 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Tue, 5 May 2015 03:55:24 +0530 Subject: [PATCH 060/101] iw_cxgb4: Remove negative advice dmesg warnings Remove these log messages in favor of per-endpoint counters as well as device-global counters that can be inspected via debugfs. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 27 +++++++++++++++++--------- drivers/infiniband/hw/cxgb4/device.c | 7 +++++++ drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 7 +++++++ 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 3c3b00e4e7af..bb95a6c0477b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2058,9 +2058,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) status, status2errno(status)); if (is_neg_adv(status)) { - dev_warn(&dev->rdev.lldi.pdev->dev, - "Connection problems for atid %u status %u (%s)\n", - atid, status, neg_adv_str(status)); + PDBG("%s Connection problems for atid %u status %u (%s)\n", + __func__, atid, status, neg_adv_str(status)); + ep->stats.connect_neg_adv++; + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.neg_adv++; + mutex_unlock(&dev->rdev.stats.lock); return 0; } @@ -2566,9 +2569,13 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_tid(t, tid); if (is_neg_adv(req->status)) { - dev_warn(&dev->rdev.lldi.pdev->dev, - "Negative advice on abort - tid %u status %d (%s)\n", - ep->hwtid, req->status, neg_adv_str(req->status)); + PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", + __func__, ep->hwtid, req->status, + neg_adv_str(req->status)); + ep->stats.abort_neg_adv++; + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.neg_adv++; + mutex_unlock(&dev->rdev.stats.lock); return 0; } PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, @@ -3977,9 +3984,11 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } if (is_neg_adv(req->status)) { - dev_warn(&dev->rdev.lldi.pdev->dev, - "Negative advice on abort - tid %u status %d (%s)\n", - ep->hwtid, req->status, neg_adv_str(req->status)); + PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", + __func__, ep->hwtid, req->status, + neg_adv_str(req->status)); + ep->stats.abort_neg_adv++; + dev->rdev.stats.neg_adv++; kfree_skb(skb); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 1ffbd038c0ae..cf54d6922dc4 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -490,6 +490,7 @@ static int stats_show(struct seq_file *seq, void *v) dev->rdev.stats.act_ofld_conn_fails); seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", dev->rdev.stats.pas_ofld_conn_fails); + seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv); seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird); return 0; } @@ -561,10 +562,13 @@ static int dump_ep(int id, void *p, void *data) cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " "history 0x%lx hwtid %d atid %d " + "conn_na %u abort_na %u " "%pI4:%d/%d <-> %pI4:%d/%d\n", ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, ep->com.flags, ep->com.history, ep->hwtid, ep->atid, + ep->stats.connect_neg_adv, + ep->stats.abort_neg_adv, &lsin->sin_addr, ntohs(lsin->sin_port), ntohs(mapped_lsin->sin_port), &rsin->sin_addr, ntohs(rsin->sin_port), @@ -582,10 +586,13 @@ static int dump_ep(int id, void *p, void *data) cc = snprintf(epd->buf + epd->pos, space, "ep %p cm_id %p qp %p state %d flags 0x%lx " "history 0x%lx hwtid %d atid %d " + "conn_na %u abort_na %u " "%pI6:%d/%d <-> %pI6:%d/%d\n", ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, ep->com.flags, ep->com.history, ep->hwtid, ep->atid, + ep->stats.connect_neg_adv, + ep->stats.abort_neg_adv, &lsin6->sin6_addr, ntohs(lsin6->sin6_port), ntohs(mapped_lsin6->sin6_port), &rsin6->sin6_addr, ntohs(rsin6->sin6_port), diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index d87e1650f643..97bb5550a6cf 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -137,6 +137,7 @@ struct c4iw_stats { u64 tcam_full; u64 act_ofld_conn_fails; u64 pas_ofld_conn_fails; + u64 neg_adv; }; struct c4iw_hw_queue { @@ -814,6 +815,11 @@ struct c4iw_listen_ep { int backlog; }; +struct c4iw_ep_stats { + unsigned connect_neg_adv; + unsigned abort_neg_adv; +}; + struct c4iw_ep { struct c4iw_ep_common com; struct c4iw_ep *parent_ep; @@ -846,6 +852,7 @@ struct c4iw_ep { unsigned int retry_count; int snd_win; int rcv_win; + struct c4iw_ep_stats stats; }; static inline void print_addr(struct c4iw_ep_common *epc, const char *func, From 8f71c1a27b84948720be17fffba71a67a1f0942d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 5 May 2015 13:01:39 +0200 Subject: [PATCH 061/101] IPoIB/CM: Fix indentation level See also patch "IPoIB/cm: Add connected mode support for devices without SRQs" (commit ID 68e995a29572). Detected by smatch. Signed-off-by: Bart Van Assche Cc: Pradeep Satyanarayana Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 56959adb6c7d..cf32a778e7d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -386,8 +386,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i rx->rx_ring[i].mapping, GFP_KERNEL)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); - ret = -ENOMEM; - goto err_count; + ret = -ENOMEM; + goto err_count; } ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i); if (ret) { From 954138dc25dca0263f315c5a3450059df05a4ea1 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 4 May 2015 14:31:03 +0200 Subject: [PATCH 062/101] MAINTAINERS: add include/rdma/ to InfiniBand subsystem Most headers for InfiniBand/RDMA are located under include/rdma/ and include/uapi/rdma. Signed-off-by: Yann Droneaud Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 781e099495d3..c80714ad3114 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5053,6 +5053,8 @@ S: Supported F: Documentation/infiniband/ F: drivers/infiniband/ F: include/uapi/linux/if_infiniband.h +F: include/uapi/rdma/ +F: include/rdma/ INOTIFY M: John McCutchan From b6b2bbe65b2117afd9766faa7cffea4d8f681455 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Tue, 5 May 2015 12:57:09 -0400 Subject: [PATCH 063/101] MAINTAINERS: Update InfiniBand subsystem maintainer Since Roland stepped down, the community asked me to take his place, and the nomination was followed by sufficient votes and no dissensions that we can move forward with the change. Signed-off-by: Doug Ledford --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c80714ad3114..40c14f14204d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5042,13 +5042,13 @@ S: Orphan F: drivers/video/fbdev/imsttfb.c INFINIBAND SUBSYSTEM -M: Roland Dreier +M: Doug Ledford M: Sean Hefty M: Hal Rosenstock L: linux-rdma@vger.kernel.org W: http://www.openfabrics.org/ Q: http://patchwork.kernel.org/project/linux-rdma/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git +T: git git://github.com/dledford/linux.git S: Supported F: Documentation/infiniband/ F: drivers/infiniband/ From 5cec98834989a014a9560b1841649eaca95cf00e Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 29 Apr 2015 17:10:12 -0400 Subject: [PATCH 064/101] xen/events: Clear cpu_evtchn_mask before resuming When a guest is resumed, the hypervisor may change event channel assignments. If this happens and the guest uses 2-level events it is possible for the interrupt to be claimed by wrong VCPU since cpu_evtchn_mask bits may be stale. This can happen even though evtchn_2l_bind_to_cpu() attempts to clear old bits: irq_info that is passed in is not necessarily the original one (from pre-migration times) but instead is freshly allocated during resume and so any information about which CPU the channel was bound to is lost. Thus we should clear the mask during resume. We also need to make sure that bits for xenstore and console channels are set when these two subsystems are resumed. While rebind_evtchn_irq() (which is invoked for both of them on a resume) calls irq_set_affinity(), the latter will in fact postpone setting affinity until handling the interrupt. But because cpu_evtchn_mask will have bits for these two cleared we won't be able to take the interrupt. With that in mind, we need to bind those two channels explicitly in rebind_evtchn_irq(). We will keep irq_set_affinity() so that we have a pass through generic irq affinity code later, in case something needs to be updated there as well. (Also replace cpumask_of(0) with cpumask_of(info->cpu) in rebind_evtchn_irq(): it should be set to zero in preceding xen_irq_info_evtchn_setup().) Signed-off-by: Boris Ostrovsky Reported-by: Annie Li Cc: # 3.14+ Signed-off-by: David Vrabel --- drivers/xen/events/events_2l.c | 10 ++++++++++ drivers/xen/events/events_base.c | 5 +++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index 5db43fc100a4..7dd46312c180 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -345,6 +345,15 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static void evtchn_2l_resume(void) +{ + int i; + + for_each_online_cpu(i) + memset(per_cpu(cpu_evtchn_mask, i), 0, sizeof(xen_ulong_t) * + EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD); +} + static const struct evtchn_ops evtchn_ops_2l = { .max_channels = evtchn_2l_max_channels, .nr_channels = evtchn_2l_max_channels, @@ -356,6 +365,7 @@ static const struct evtchn_ops evtchn_ops_2l = { .mask = evtchn_2l_mask, .unmask = evtchn_2l_unmask, .handle_events = evtchn_2l_handle_events, + .resume = evtchn_2l_resume, }; void __init xen_evtchn_2l_init(void) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 70fba973a107..a1ec564d791c 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1279,8 +1279,9 @@ void rebind_evtchn_irq(int evtchn, int irq) mutex_unlock(&irq_mapping_update_lock); - /* new event channels are always bound to cpu 0 */ - irq_set_affinity(irq, cpumask_of(0)); + bind_evtchn_to_cpu(evtchn, info->cpu); + /* This will be deferred until interrupt is processed */ + irq_set_affinity(irq, cpumask_of(info->cpu)); /* Unmask the event channel. */ enable_irq(irq); From 16f1cf3ba7303228372d3756677bf7d10e79cf9f Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 29 Apr 2015 17:10:13 -0400 Subject: [PATCH 065/101] xen/xenbus: Update xenbus event channel on resume After a resume the hypervisor/tools may change xenbus event channel number. We should re-query it. Signed-off-by: Boris Ostrovsky Cc: Signed-off-by: David Vrabel --- drivers/xen/xenbus/xenbus_probe.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 564b31584860..5390a674b5e3 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -735,6 +736,30 @@ static int __init xenstored_local_init(void) return err; } +static int xenbus_resume_cb(struct notifier_block *nb, + unsigned long action, void *data) +{ + int err = 0; + + if (xen_hvm_domain()) { + uint64_t v; + + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (!err && v) + xen_store_evtchn = v; + else + pr_warn("Cannot update xenstore event channel: %d\n", + err); + } else + xen_store_evtchn = xen_start_info->store_evtchn; + + return err; +} + +static struct notifier_block xenbus_resume_nb = { + .notifier_call = xenbus_resume_cb, +}; + static int __init xenbus_init(void) { int err = 0; @@ -793,6 +818,10 @@ static int __init xenbus_init(void) goto out_error; } + if ((xen_store_domain_type != XS_LOCAL) && + (xen_store_domain_type != XS_UNKNOWN)) + xen_resume_notifier_register(&xenbus_resume_nb); + #ifdef CONFIG_XEN_COMPAT_XENFS /* * Create xenfs mountpoint in /proc for compatibility with From b9d934f27c91b878c4b2e64299d6e419a4022f8d Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 29 Apr 2015 17:10:14 -0400 Subject: [PATCH 066/101] xen/console: Update console event channel on resume After a resume the hypervisor/tools may change console event channel number. We should re-query it. Signed-off-by: Boris Ostrovsky Cc: Signed-off-by: David Vrabel --- drivers/tty/hvc/hvc_xen.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index f1e57425e39f..5bab1c684bb1 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -299,11 +299,27 @@ static int xen_initial_domain_console_init(void) return 0; } +static void xen_console_update_evtchn(struct xencons_info *info) +{ + if (xen_hvm_domain()) { + uint64_t v; + int err; + + err = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); + if (!err && v) + info->evtchn = v; + } else + info->evtchn = xen_start_info->console.domU.evtchn; +} + void xen_console_resume(void) { struct xencons_info *info = vtermno_to_xencons(HVC_COOKIE); - if (info != NULL && info->irq) + if (info != NULL && info->irq) { + if (!xen_initial_domain()) + xen_console_update_evtchn(info); rebind_evtchn_irq(info->evtchn, info->irq); + } } static void xencons_disconnect_backend(struct xencons_info *info) From 16e6bd5970c88a2ac018b84a5f1dd5c2ff1fdf2c Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 29 Apr 2015 17:10:15 -0400 Subject: [PATCH 067/101] xen/events: Set irq_info->evtchn before binding the channel to CPU in __startup_pirq() .. because bind_evtchn_to_cpu(evtchn, cpu) will map evtchn to 'info' and pass 'info' down to xen_evtchn_port_bind_to_cpu(). Signed-off-by: Boris Ostrovsky Tested-by: Annie Li Cc: Signed-off-by: David Vrabel --- drivers/xen/events/events_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index a1ec564d791c..2b8553bd8715 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -529,8 +529,8 @@ static unsigned int __startup_pirq(unsigned int irq) if (rc) goto err; - bind_evtchn_to_cpu(evtchn, 0); info->evtchn = evtchn; + bind_evtchn_to_cpu(evtchn, 0); rc = xen_evtchn_port_setup(info); if (rc) From a71dbdaa8ca2933391b08e0ae5567083e3af0892 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 4 May 2015 11:02:15 -0400 Subject: [PATCH 068/101] hypervisor/x86/xen: Unset X86_BUG_SYSRET_SS_ATTRS on Xen PV guests Commit 61f01dd941ba ("x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue") makes AMD processors set SS to __KERNEL_DS in __switch_to() to deal with cases when SS is NULL. This breaks Xen PV guests who do not want to load SS with__KERNEL_DS. Since the problem that the commit is trying to address would have to be fixed in the hypervisor (if it in fact exists under Xen) there is no reason to set X86_BUG_SYSRET_SS_ATTRS flag for PV VPCUs here. This can be easily achieved by adding x86_hyper_xen_hvm.set_cpu_features op which will clear this flag. (And since this structure is no longer HVM-specific we should do some renaming). Signed-off-by: Boris Ostrovsky Reported-by: Sander Eikelenboom Signed-off-by: David Vrabel --- arch/x86/include/asm/hypervisor.h | 2 +- arch/x86/kernel/cpu/hypervisor.c | 4 ++-- arch/x86/xen/enlighten.c | 29 +++++++++++++++++++---------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index e42f758a0fbd..055ea9941dd5 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -50,7 +50,7 @@ extern const struct hypervisor_x86 *x86_hyper; /* Recognized hypervisors */ extern const struct hypervisor_x86 x86_hyper_vmware; extern const struct hypervisor_x86 x86_hyper_ms_hyperv; -extern const struct hypervisor_x86 x86_hyper_xen_hvm; +extern const struct hypervisor_x86 x86_hyper_xen; extern const struct hypervisor_x86 x86_hyper_kvm; extern void init_hypervisor(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 36ce402a3fa5..d820d8eae96b 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -27,8 +27,8 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = { -#ifdef CONFIG_XEN_PVHVM - &x86_hyper_xen_hvm, +#ifdef CONFIG_XEN + &x86_hyper_xen, #endif &x86_hyper_vmware, &x86_hyper_ms_hyperv, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 94578efd3067..46957ead3060 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1760,6 +1760,9 @@ static struct notifier_block xen_hvm_cpu_notifier = { static void __init xen_hvm_guest_init(void) { + if (xen_pv_domain()) + return; + init_hvm_pv_info(); xen_hvm_init_shared_info(); @@ -1775,6 +1778,7 @@ static void __init xen_hvm_guest_init(void) xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); } +#endif static bool xen_nopv = false; static __init int xen_parse_nopv(char *arg) @@ -1784,14 +1788,11 @@ static __init int xen_parse_nopv(char *arg) } early_param("xen_nopv", xen_parse_nopv); -static uint32_t __init xen_hvm_platform(void) +static uint32_t __init xen_platform(void) { if (xen_nopv) return 0; - if (xen_pv_domain()) - return 0; - return xen_cpuid_base(); } @@ -1809,11 +1810,19 @@ bool xen_hvm_need_lapic(void) } EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); -const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { - .name = "Xen HVM", - .detect = xen_hvm_platform, +static void xen_set_cpu_features(struct cpuinfo_x86 *c) +{ + if (xen_pv_domain()) + clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); +} + +const struct hypervisor_x86 x86_hyper_xen = { + .name = "Xen", + .detect = xen_platform, +#ifdef CONFIG_XEN_PVHVM .init_platform = xen_hvm_guest_init, - .x2apic_available = xen_x2apic_para_available, -}; -EXPORT_SYMBOL(x86_hyper_xen_hvm); #endif + .x2apic_available = xen_x2apic_para_available, + .set_cpu_features = xen_set_cpu_features, +}; +EXPORT_SYMBOL(x86_hyper_xen); From de71ad2c97862eae1516aa36528cc3b317c17b2f Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Mon, 4 May 2015 15:16:44 -0300 Subject: [PATCH 069/101] x86: Make cpu_tss available to external modules Commit 75182b1632 ("x86/asm/entry: Switch all C consumers of kernel_stack to this_cpu_sp0()") changed current_thread_info to use this_cpu_sp0, and indirectly made it rely on init_tss which was exported with EXPORT_PER_CPU_SYMBOL_GPL. As a result some macros and inline functions such as set/get_fs, test_thread_flag and variants have been made unusable for external modules. Make cpu_tss exported with EXPORT_PER_CPU_SYMBOL so that these functions are accessible again, as they were previously. Signed-off-by: Marc Dionne Acked-by: Andy Lutomirski Link: http://lkml.kernel.org/r/1430763404-21221-1-git-send-email-marc.dionne@your-file-system.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8213da62b1b7..bfc99b3b6522 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -57,7 +57,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif }; -EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss); +EXPORT_PER_CPU_SYMBOL(cpu_tss); #ifdef CONFIG_X86_64 static DEFINE_PER_CPU(unsigned char, is_idle); From d467f7a405cf0e7f06ed8d3175607ebb4ed06671 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 26 Mar 2015 13:35:18 -0500 Subject: [PATCH 070/101] ipmi_ssif: Fix the logic on user-supplied addresses Returning zero is success. Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index f40e3bd2c69c..1de1914f5f89 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1832,7 +1832,7 @@ static int init_ipmi_ssif(void) rv = new_ssif_client(addr[i], adapter_name[i], dbg[i], slave_addrs[i], SI_HARDCODED); - if (!rv) + if (rv) pr_err(PFX "Couldn't add hardcoded device at addr 0x%x\n", addr[i]); From b0e9aaa99dfb3036829e91d4f0aae449639e221a Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 31 Mar 2015 12:48:53 -0500 Subject: [PATCH 071/101] ipmi:ssif: Ignore spaces when comparing I2C adapter names Some of the adapters have spaces in their names, but that's really hard to pass in as a module or kernel parameters. So ignore the spaces. Signed-off-by: Corey Minyard --- Documentation/IPMI.txt | 5 ++++- drivers/char/ipmi/ipmi_ssif.c | 25 ++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt index 653d5d739d7f..31d1d658827f 100644 --- a/Documentation/IPMI.txt +++ b/Documentation/IPMI.txt @@ -505,7 +505,10 @@ at module load time (for a module) with: The addresses are normal I2C addresses. The adapter is the string name of the adapter, as shown in /sys/class/i2c-adapter/i2c-/name. -It is *NOT* i2c- itself. +It is *NOT* i2c- itself. Also, the comparison is done ignoring +spaces, so if the name is "This is an I2C chip" you can say +adapter_name=ThisisanI2cchip. This is because it's hard to pass in +spaces in kernel parameters. The debug flags are bit flags for each BMC found, they are: IPMI messages: 1, driver state: 2, timing: 4, I2C probe: 8 diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 1de1914f5f89..3c3b7257867b 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1258,6 +1258,23 @@ static const struct file_operations smi_stats_proc_ops = { .release = single_release, }; +static int strcmp_nospace(char *s1, char *s2) +{ + while (*s1 && *s2) { + while (isspace(*s1)) + s1++; + while (isspace(*s2)) + s2++; + if (*s1 > *s2) + return 1; + if (*s1 < *s2) + return -1; + s1++; + s2++; + } + return 0; +} + static struct ssif_addr_info *ssif_info_find(unsigned short addr, char *adapter_name, bool match_null_name) @@ -1272,8 +1289,10 @@ static struct ssif_addr_info *ssif_info_find(unsigned short addr, /* One is NULL and one is not */ continue; } - if (strcmp(info->adapter_name, adapter_name)) - /* Names to not match */ + if (adapter_name && + strcmp_nospace(info->adapter_name, + adapter_name)) + /* Names do not match */ continue; } found = info; @@ -1407,7 +1426,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) } else { no_support: /* Assume no multi-part or PEC support */ - pr_info(PFX "Error fetching SSIF: %d %d %2.2x, your system probably doesn't support this command so using defaults\n", + pr_info(PFX "Error fetching SSIF: %d %d %2.2x, your system probably doesn't support this command so using defaults\n", rv, len, resp[2]); ssif_info->max_xmit_msg_size = 32; From 5e33cd0c5a299772b5ec1a493f0a77548664ae06 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 22 Feb 2015 10:21:07 -0800 Subject: [PATCH 072/101] ipmi: Remove incorrect use of seq_has_overflowed commit d6c5dc18d863 ("ipmi: Remove uses of return value of seq_printf") incorrectly changed the return value of various proc_show functions to use seq_has_overflowed(). These functions should return 0 on completion rather than 1/true on overflow. 1 is the same as #define SEQ_SKIP which would cause the output to not be emitted (skipped) instead. This is a logical defect only as the length of these outputs are all smaller than the initial allocation done by the seq filesystem. Signed-off-by: Joe Perches Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 4 ++-- drivers/char/ipmi/ipmi_si_intf.c | 4 ++-- drivers/char/ipmi/ipmi_ssif.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 9bb592872532..bf75f6361773 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2000,7 +2000,7 @@ static int smi_ipmb_proc_show(struct seq_file *m, void *v) seq_printf(m, " %x", intf->channels[i].address); seq_putc(m, '\n'); - return seq_has_overflowed(m); + return 0; } static int smi_ipmb_proc_open(struct inode *inode, struct file *file) @@ -2023,7 +2023,7 @@ static int smi_version_proc_show(struct seq_file *m, void *v) ipmi_version_major(&intf->bmc->id), ipmi_version_minor(&intf->bmc->id)); - return seq_has_overflowed(m); + return 0; } static int smi_version_proc_open(struct inode *inode, struct file *file) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 5e90a18afbaf..468c75e10330 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -3080,7 +3080,7 @@ static int smi_type_proc_show(struct seq_file *m, void *v) seq_printf(m, "%s\n", si_to_str[smi->si_type]); - return seq_has_overflowed(m); + return 0; } static int smi_type_proc_open(struct inode *inode, struct file *file) @@ -3153,7 +3153,7 @@ static int smi_params_proc_show(struct seq_file *m, void *v) smi->irq, smi->slave_addr); - return seq_has_overflowed(m); + return 0; } static int smi_params_proc_open(struct inode *inode, struct file *file) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 3c3b7257867b..ee3b8c5e7e21 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1200,7 +1200,7 @@ static int smi_type_proc_show(struct seq_file *m, void *v) { seq_puts(m, "ssif\n"); - return seq_has_overflowed(m); + return 0; } static int smi_type_proc_open(struct inode *inode, struct file *file) From b1e65e71535aa128089d4cb1b6d90db7551fcb05 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 10 Apr 2015 20:19:18 -0500 Subject: [PATCH 073/101] ipmi: Don't report err in the SI driver for SSIF devices Really ignore them by returning -ENODEV from the probe, but not doing anything. Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 468c75e10330..461274168d0f 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2244,7 +2244,7 @@ static int ipmi_pnp_probe(struct pnp_dev *dev, acpi_handle handle; acpi_status status; unsigned long long tmp; - int rv; + int rv = -EINVAL; acpi_dev = pnp_acpi_device(dev); if (!acpi_dev) @@ -2276,6 +2276,7 @@ static int ipmi_pnp_probe(struct pnp_dev *dev, info->si_type = SI_BT; break; case 4: /* SSIF, just ignore */ + rv = -ENODEV; goto err_free; default: dev_info(&dev->dev, "unknown IPMI type %lld\n", tmp); @@ -2336,7 +2337,7 @@ static int ipmi_pnp_probe(struct pnp_dev *dev, err_free: kfree(info); - return -EINVAL; + return rv; } static void ipmi_pnp_remove(struct pnp_dev *dev) From df6dd1b35b0ec0ac6a5298378ceaf487091f448c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 27 Apr 2015 09:45:06 +0200 Subject: [PATCH 074/101] thinkpad_acpi: Fix warning for static not at beginning Fix the following warning: warning: "static" is not at beginning of declaration void static hotkey_mask_warn_incomplete_mask(void) ^ Signed-off-by: Jean Delvare Cc: Henrique de Moraes Holschuh Cc: Darren Hart Signed-off-by: Darren Hart --- drivers/platform/x86/thinkpad_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 7769575345d8..9bb9ad6d4a1b 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2115,7 +2115,7 @@ static int hotkey_mask_get(void) return 0; } -void static hotkey_mask_warn_incomplete_mask(void) +static void hotkey_mask_warn_incomplete_mask(void) { /* log only what the user can fix... */ const u32 wantedmask = hotkey_driver_mask & From 99ebbd30e3640f6addb37f222b4d6ad4b807d9ea Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 5 May 2015 16:23:25 -0700 Subject: [PATCH 075/101] revert "zram: move compact_store() to sysfs functions area" Revert commit c72c6160d967ed26a0b136dbab337f821d233509 It was intended to be a cosmetic change that w/o any functional change and was part of a bigger change: http://lkml.iu.edu/hypermail/linux/kernel/1503.1/01818.html Sergey Senozhatsky Cc: Linus Torvalds Cc: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c94386aa563d..8dcbced0eafd 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -74,6 +74,27 @@ static inline struct zram *dev_to_zram(struct device *dev) return (struct zram *)dev_to_disk(dev)->private_data; } +static ssize_t compact_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + unsigned long nr_migrated; + struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta; + + down_read(&zram->init_lock); + if (!init_done(zram)) { + up_read(&zram->init_lock); + return -EINVAL; + } + + meta = zram->meta; + nr_migrated = zs_compact(meta->mem_pool); + atomic64_add(nr_migrated, &zram->stats.num_migrated); + up_read(&zram->init_lock); + + return len; +} + static ssize_t disksize_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1038,6 +1059,7 @@ static const struct block_device_operations zram_devops = { .owner = THIS_MODULE }; +static DEVICE_ATTR_WO(compact); static DEVICE_ATTR_RW(disksize); static DEVICE_ATTR_RO(initstate); static DEVICE_ATTR_WO(reset); @@ -1114,6 +1136,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_num_writes.attr, &dev_attr_failed_reads.attr, &dev_attr_failed_writes.attr, + &dev_attr_compact.attr, &dev_attr_invalid_io.attr, &dev_attr_notify_free.attr, &dev_attr_zero_pages.attr, From 74f3037c4015f3a440dc4cb4e31477875fa9791c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 5 May 2015 16:23:28 -0700 Subject: [PATCH 076/101] zram: add Designated Reviewer for zram in MAINTAINERS Sergey Senozhatsky has contributed/reviewed to zram for a long time. He is really helpful for maintaining zram so I want for him to continue helping me as Designated Reviewer unless he hates it. Signed-off-by: Minchan Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 781e099495d3..1c7fd3c85ba7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11037,6 +11037,7 @@ F: drivers/media/pci/zoran/ ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER M: Minchan Kim M: Nitin Gupta +R: Sergey Senozhatsky L: linux-kernel@vger.kernel.org S: Maintained F: drivers/block/zram/ From 48b945a19cf6e7e548b2ce545ec88f93284ab276 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 5 May 2015 16:23:30 -0700 Subject: [PATCH 077/101] MAINTAINERS: add co-maintainer for LED subsystem Add myself (Jacek Anaszewski) as a co-maintainer for the LED subsystem. Signed-off-by: Jacek Anaszewski Acked-by: Bryan Wu Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1c7fd3c85ba7..b399b34a2496 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5805,6 +5805,7 @@ F: drivers/scsi/53c700* LED SUBSYSTEM M: Bryan Wu M: Richard Purdie +M: Jacek Anaszewski L: linux-leds@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds.git S: Maintained From 7d616e4ddb9c0754ed6245a43332d5b867e4db11 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Tue, 5 May 2015 16:23:33 -0700 Subject: [PATCH 078/101] lib: delete lib/find_last_bit.c The file lib/find_last_bit.c was no longer used and supposed to be deleted by commit 8f6f19dd51 ("lib: move find_last_bit to lib/find_next_bit.c") but that delete didn't happen. This gets rid of it. Signed-off-by: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/find_last_bit.c | 41 ----------------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 lib/find_last_bit.c diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c deleted file mode 100644 index 3e3be40c6a6e..000000000000 --- a/lib/find_last_bit.c +++ /dev/null @@ -1,41 +0,0 @@ -/* find_last_bit.c: fallback find next bit implementation - * - * Copyright (C) 2008 IBM Corporation - * Written by Rusty Russell - * (Inspired by David Howell's find_next_bit implementation) - * - * Rewritten by Yury Norov to decrease - * size and improve performance, 2015. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include - -#ifndef find_last_bit - -unsigned long find_last_bit(const unsigned long *addr, unsigned long size) -{ - if (size) { - unsigned long val = BITMAP_LAST_WORD_MASK(size); - unsigned long idx = (size-1) / BITS_PER_LONG; - - do { - val &= addr[idx]; - if (val) - return idx * BITS_PER_LONG + __fls(val); - - val = ~0ul; - } while (idx--); - } - return size; -} -EXPORT_SYMBOL(find_last_bit); - -#endif From 09789e5de18e4e442870b2d700831f5cb802eb05 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 5 May 2015 16:23:35 -0700 Subject: [PATCH 079/101] mm/memory-failure: call shake_page() when error hits thp tail page Currently memory_failure() calls shake_page() to sweep pages out from pcplists only when the victim page is 4kB LRU page or thp head page. But we should do this for a thp tail page too. Consider that a memory error hits a thp tail page whose head page is on a pcplist when memory_failure() runs. Then, the current kernel skips shake_pages() part, so hwpoison_user_mappings() returns without calling split_huge_page() nor try_to_unmap() because PageLRU of the thp head is still cleared due to the skip of shake_page(). As a result, me_huge_page() runs for the thp, which is broken behavior. One effect is a leak of the thp. And another is to fail to isolate the memory error, so later access to the error address causes another MCE, which kills the processes which used the thp. This patch fixes this problem by calling shake_page() for thp tail case. Fixes: 385de35722c9 ("thp: allow a hwpoisoned head page to be put back to LRU") Signed-off-by: Naoya Horiguchi Reviewed-by: Andi Kleen Acked-by: Dean Nelson Cc: Andrea Arcangeli Cc: Hidetoshi Seto Cc: Jin Dongming Cc: [3.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d9359b770cd9..22e0f270e4f7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1187,10 +1187,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * The check (unnecessarily) ignores LRU pages being isolated and * walked by the page reclaim code, however that's not a big loss. */ - if (!PageHuge(p) && !PageTransTail(p)) { - if (!PageLRU(p)) - shake_page(p, 0); - if (!PageLRU(p)) { + if (!PageHuge(p)) { + if (!PageLRU(hpage)) + shake_page(hpage, 0); + if (!PageLRU(hpage)) { /* * shake_page could have turned it free. */ From 01e76903f655a4d88c2e09d3182436c65f6e1213 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 16:23:38 -0700 Subject: [PATCH 080/101] kasan: show gcc version requirements in Kconfig and Documentation The documentation shows a need for gcc > 4.9.2, but it's really >=. The Kconfig entries don't show require versions so add them. Correct a latter/later typo too. Also mention that gcc 5 required to catch out of bounds accesses to global and stack variables. Signed-off-by: Joe Perches Signed-off-by: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kasan.txt | 8 +++++--- lib/Kconfig.kasan | 8 ++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt index 092fc10961fe..4692241789b1 100644 --- a/Documentation/kasan.txt +++ b/Documentation/kasan.txt @@ -9,7 +9,9 @@ a fast and comprehensive solution for finding use-after-free and out-of-bounds bugs. KASan uses compile-time instrumentation for checking every memory access, -therefore you will need a certain version of GCC > 4.9.2 +therefore you will need a gcc version of 4.9.2 or later. KASan could detect out +of bounds accesses to stack or global variables, but only if gcc 5.0 or later was +used to built the kernel. Currently KASan is supported only for x86_64 architecture and requires that the kernel be built with the SLUB allocator. @@ -23,8 +25,8 @@ To enable KASAN configure kernel with: and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline/inline is compiler instrumentation types. The former produces smaller binary the -latter is 1.1 - 2 times faster. Inline instrumentation requires GCC 5.0 or -latter. +latter is 1.1 - 2 times faster. Inline instrumentation requires a gcc version +of 5.0 or later. Currently KASAN works only with the SLUB memory allocator. For better bug detection and nicer report, enable CONFIG_STACKTRACE and put diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 4fecaedc80a2..777eda7d1ab4 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -10,8 +10,11 @@ config KASAN help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. - This is strictly debugging feature. It consumes about 1/8 - of available memory and brings about ~x3 performance slowdown. + This is strictly a debugging feature and it requires a gcc version + of 4.9.2 or later. Detection of out of bounds accesses to stack or + global variables requires gcc 5.0 or later. + This feature consumes about 1/8 of available memory and brings about + ~x3 performance slowdown. For better error detection enable CONFIG_STACKTRACE, and add slub_debug=U to boot cmdline. @@ -40,6 +43,7 @@ config KASAN_INLINE memory accesses. This is faster than outline (in some workloads it gives about x2 boost over outline instrumentation), but make kernel's .text size much bigger. + This requires a gcc version of 5.0 or later. endchoice From c71f1e05e62f3eb843b6458eeb7298f269b34b1e Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 5 May 2015 16:23:41 -0700 Subject: [PATCH 081/101] Documentation: bindings: add abracon,abx80x Document the bindings for abracon,abx80x and related compatibles. Signed-off-by: Alexandre Belloni Cc: Philippe De Muyter Cc: Alessandro Zummo Cc: Arnd Bergmann Cc: Paul Bolle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../bindings/rtc/abracon,abx80x.txt | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 Documentation/devicetree/bindings/rtc/abracon,abx80x.txt diff --git a/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt new file mode 100644 index 000000000000..be789685a1c2 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt @@ -0,0 +1,30 @@ +Abracon ABX80X I2C ultra low power RTC/Alarm chip + +The Abracon ABX80X family consist of the ab0801, ab0803, ab0804, ab0805, ab1801, +ab1803, ab1804 and ab1805. The ab0805 is the superset of ab080x and the ab1805 +is the superset of ab180x. + +Required properties: + + - "compatible": should one of: + "abracon,abx80x" + "abracon,ab0801" + "abracon,ab0803" + "abracon,ab0804" + "abracon,ab0805" + "abracon,ab1801" + "abracon,ab1803" + "abracon,ab1804" + "abracon,ab1805" + Using "abracon,abx80x" will enable chip autodetection. + - "reg": I2C bus address of the device + +Optional properties: + +The abx804 and abx805 have a trickle charger that is able to charge the +connected battery or supercap. Both the following properties have to be defined +and valid to enable charging: + + - "abracon,tc-diode": should be "standard" (0.6V) or "schottky" (0.3V) + - "abracon,tc-resistor": should be <0>, <3>, <6> or <11>. 0 disables the output + resistor, the other values are in ohm. From 4d61ff6b9960cb00cf2c12abd5769aa2dd475415 Mon Sep 17 00:00:00 2001 From: Philippe De Muyter Date: Tue, 5 May 2015 16:23:44 -0700 Subject: [PATCH 082/101] rtc: add rtc-abx80x, a driver for the Abracon AB x80x i2c rtc This is a basic driver for the ultra-low-power Abracon AB x80x series of RTC chips. It supports in particular, the supersets AB0805 and AB1805. It allows reading and writing the time, and enables the supercapacitor/ battery charger. [arnd@arndb.de: abx805 depends on i2c] [alexandre.belloni@free-electrons.com: renam buffer from date to buf in abx80x_rtc_read_time()] Signed-off-by: Philippe De Muyter Cc: Alessandro Zummo Signed-off-by: Alexandre Belloni Signed-off-by: Arnd Bergmann Cc: Paul Bolle Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/Kconfig | 10 ++ drivers/rtc/Makefile | 1 + drivers/rtc/rtc-abx80x.c | 307 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 318 insertions(+) create mode 100644 drivers/rtc/rtc-abx80x.c diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 6149ae01e11f..0fe4ad8826b2 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -164,6 +164,16 @@ config RTC_DRV_ABB5ZES3 This driver can also be built as a module. If so, the module will be called rtc-ab-b5ze-s3. +config RTC_DRV_ABX80X + tristate "Abracon ABx80x" + help + If you say yes here you get support for Abracon AB080X and AB180X + families of ultra-low-power battery- and capacitor-backed real-time + clock chips. + + This driver can also be built as a module. If so, the module + will be called rtc-abx80x. + config RTC_DRV_AS3722 tristate "ams AS3722 RTC driver" depends on MFD_AS3722 diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index c31731c29762..2b82e2b0311b 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_RTC_DRV_88PM80X) += rtc-88pm80x.o obj-$(CONFIG_RTC_DRV_AB3100) += rtc-ab3100.o obj-$(CONFIG_RTC_DRV_AB8500) += rtc-ab8500.o obj-$(CONFIG_RTC_DRV_ABB5ZES3) += rtc-ab-b5ze-s3.o +obj-$(CONFIG_RTC_DRV_ABX80X) += rtc-abx80x.o obj-$(CONFIG_RTC_DRV_ARMADA38X) += rtc-armada38x.o obj-$(CONFIG_RTC_DRV_AS3722) += rtc-as3722.o obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c new file mode 100644 index 000000000000..4337c3bc6ace --- /dev/null +++ b/drivers/rtc/rtc-abx80x.c @@ -0,0 +1,307 @@ +/* + * A driver for the I2C members of the Abracon AB x8xx RTC family, + * and compatible: AB 1805 and AB 0805 + * + * Copyright 2014-2015 Macq S.A. + * + * Author: Philippe De Muyter + * Author: Alexandre Belloni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include + +#define ABX8XX_REG_HTH 0x00 +#define ABX8XX_REG_SC 0x01 +#define ABX8XX_REG_MN 0x02 +#define ABX8XX_REG_HR 0x03 +#define ABX8XX_REG_DA 0x04 +#define ABX8XX_REG_MO 0x05 +#define ABX8XX_REG_YR 0x06 +#define ABX8XX_REG_WD 0x07 + +#define ABX8XX_REG_CTRL1 0x10 +#define ABX8XX_CTRL_WRITE BIT(1) +#define ABX8XX_CTRL_12_24 BIT(6) + +#define ABX8XX_REG_CFG_KEY 0x1f +#define ABX8XX_CFG_KEY_MISC 0x9d + +#define ABX8XX_REG_ID0 0x28 + +#define ABX8XX_REG_TRICKLE 0x20 +#define ABX8XX_TRICKLE_CHARGE_ENABLE 0xa0 +#define ABX8XX_TRICKLE_STANDARD_DIODE 0x8 +#define ABX8XX_TRICKLE_SCHOTTKY_DIODE 0x4 + +static u8 trickle_resistors[] = {0, 3, 6, 11}; + +enum abx80x_chip {AB0801, AB0803, AB0804, AB0805, + AB1801, AB1803, AB1804, AB1805, ABX80X}; + +struct abx80x_cap { + u16 pn; + bool has_tc; +}; + +static struct abx80x_cap abx80x_caps[] = { + [AB0801] = {.pn = 0x0801}, + [AB0803] = {.pn = 0x0803}, + [AB0804] = {.pn = 0x0804, .has_tc = true}, + [AB0805] = {.pn = 0x0805, .has_tc = true}, + [AB1801] = {.pn = 0x1801}, + [AB1803] = {.pn = 0x1803}, + [AB1804] = {.pn = 0x1804, .has_tc = true}, + [AB1805] = {.pn = 0x1805, .has_tc = true}, + [ABX80X] = {.pn = 0} +}; + +static struct i2c_driver abx80x_driver; + +static int abx80x_enable_trickle_charger(struct i2c_client *client, + u8 trickle_cfg) +{ + int err; + + /* + * Write the configuration key register to enable access to the Trickle + * register + */ + err = i2c_smbus_write_byte_data(client, ABX8XX_REG_CFG_KEY, + ABX8XX_CFG_KEY_MISC); + if (err < 0) { + dev_err(&client->dev, "Unable to write configuration key\n"); + return -EIO; + } + + err = i2c_smbus_write_byte_data(client, ABX8XX_REG_TRICKLE, + ABX8XX_TRICKLE_CHARGE_ENABLE | + trickle_cfg); + if (err < 0) { + dev_err(&client->dev, "Unable to write trickle register\n"); + return -EIO; + } + + return 0; +} + +static int abx80x_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned char buf[8]; + int err; + + err = i2c_smbus_read_i2c_block_data(client, ABX8XX_REG_HTH, + sizeof(buf), buf); + if (err < 0) { + dev_err(&client->dev, "Unable to read date\n"); + return -EIO; + } + + tm->tm_sec = bcd2bin(buf[ABX8XX_REG_SC] & 0x7F); + tm->tm_min = bcd2bin(buf[ABX8XX_REG_MN] & 0x7F); + tm->tm_hour = bcd2bin(buf[ABX8XX_REG_HR] & 0x3F); + tm->tm_wday = buf[ABX8XX_REG_WD] & 0x7; + tm->tm_mday = bcd2bin(buf[ABX8XX_REG_DA] & 0x3F); + tm->tm_mon = bcd2bin(buf[ABX8XX_REG_MO] & 0x1F) - 1; + tm->tm_year = bcd2bin(buf[ABX8XX_REG_YR]) + 100; + + err = rtc_valid_tm(tm); + if (err < 0) + dev_err(&client->dev, "retrieved date/time is not valid.\n"); + + return err; +} + +static int abx80x_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned char buf[8]; + int err; + + if (tm->tm_year < 100) + return -EINVAL; + + buf[ABX8XX_REG_HTH] = 0; + buf[ABX8XX_REG_SC] = bin2bcd(tm->tm_sec); + buf[ABX8XX_REG_MN] = bin2bcd(tm->tm_min); + buf[ABX8XX_REG_HR] = bin2bcd(tm->tm_hour); + buf[ABX8XX_REG_DA] = bin2bcd(tm->tm_mday); + buf[ABX8XX_REG_MO] = bin2bcd(tm->tm_mon + 1); + buf[ABX8XX_REG_YR] = bin2bcd(tm->tm_year - 100); + buf[ABX8XX_REG_WD] = tm->tm_wday; + + err = i2c_smbus_write_i2c_block_data(client, ABX8XX_REG_HTH, + sizeof(buf), buf); + if (err < 0) { + dev_err(&client->dev, "Unable to write to date registers\n"); + return -EIO; + } + + return 0; +} + +static const struct rtc_class_ops abx80x_rtc_ops = { + .read_time = abx80x_rtc_read_time, + .set_time = abx80x_rtc_set_time, +}; + +static int abx80x_dt_trickle_cfg(struct device_node *np) +{ + const char *diode; + int trickle_cfg = 0; + int i, ret; + u32 tmp; + + ret = of_property_read_string(np, "abracon,tc-diode", &diode); + if (ret) + return ret; + + if (!strcmp(diode, "standard")) + trickle_cfg |= ABX8XX_TRICKLE_STANDARD_DIODE; + else if (!strcmp(diode, "schottky")) + trickle_cfg |= ABX8XX_TRICKLE_SCHOTTKY_DIODE; + else + return -EINVAL; + + ret = of_property_read_u32(np, "abracon,tc-resistor", &tmp); + if (ret) + return ret; + + for (i = 0; i < sizeof(trickle_resistors); i++) + if (trickle_resistors[i] == tmp) + break; + + if (i == sizeof(trickle_resistors)) + return -EINVAL; + + return (trickle_cfg | i); +} + +static int abx80x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device_node *np = client->dev.of_node; + struct rtc_device *rtc; + int i, data, err, trickle_cfg = -EINVAL; + char buf[7]; + unsigned int part = id->driver_data; + unsigned int partnumber; + unsigned int majrev, minrev; + unsigned int lot; + unsigned int wafer; + unsigned int uid; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + return -ENODEV; + + err = i2c_smbus_read_i2c_block_data(client, ABX8XX_REG_ID0, + sizeof(buf), buf); + if (err < 0) { + dev_err(&client->dev, "Unable to read partnumber\n"); + return -EIO; + } + + partnumber = (buf[0] << 8) | buf[1]; + majrev = buf[2] >> 3; + minrev = buf[2] & 0x7; + lot = ((buf[4] & 0x80) << 2) | ((buf[6] & 0x80) << 1) | buf[3]; + uid = ((buf[4] & 0x7f) << 8) | buf[5]; + wafer = (buf[6] & 0x7c) >> 2; + dev_info(&client->dev, "model %04x, revision %u.%u, lot %x, wafer %x, uid %x\n", + partnumber, majrev, minrev, lot, wafer, uid); + + data = i2c_smbus_read_byte_data(client, ABX8XX_REG_CTRL1); + if (data < 0) { + dev_err(&client->dev, "Unable to read control register\n"); + return -EIO; + } + + err = i2c_smbus_write_byte_data(client, ABX8XX_REG_CTRL1, + ((data & ~ABX8XX_CTRL_12_24) | + ABX8XX_CTRL_WRITE)); + if (err < 0) { + dev_err(&client->dev, "Unable to write control register\n"); + return -EIO; + } + + /* part autodetection */ + if (part == ABX80X) { + for (i = 0; abx80x_caps[i].pn; i++) + if (partnumber == abx80x_caps[i].pn) + break; + if (abx80x_caps[i].pn == 0) { + dev_err(&client->dev, "Unknown part: %04x\n", + partnumber); + return -EINVAL; + } + part = i; + } + + if (partnumber != abx80x_caps[part].pn) { + dev_err(&client->dev, "partnumber mismatch %04x != %04x\n", + partnumber, abx80x_caps[part].pn); + return -EINVAL; + } + + if (np && abx80x_caps[part].has_tc) + trickle_cfg = abx80x_dt_trickle_cfg(np); + + if (trickle_cfg > 0) { + dev_info(&client->dev, "Enabling trickle charger: %02x\n", + trickle_cfg); + abx80x_enable_trickle_charger(client, trickle_cfg); + } + + rtc = devm_rtc_device_register(&client->dev, abx80x_driver.driver.name, + &abx80x_rtc_ops, THIS_MODULE); + + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + i2c_set_clientdata(client, rtc); + + return 0; +} + +static int abx80x_remove(struct i2c_client *client) +{ + return 0; +} + +static const struct i2c_device_id abx80x_id[] = { + { "abx80x", ABX80X }, + { "ab0801", AB0801 }, + { "ab0803", AB0803 }, + { "ab0804", AB0804 }, + { "ab0805", AB0805 }, + { "ab1801", AB1801 }, + { "ab1803", AB1803 }, + { "ab1804", AB1804 }, + { "ab1805", AB1805 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, abx80x_id); + +static struct i2c_driver abx80x_driver = { + .driver = { + .name = "rtc-abx80x", + }, + .probe = abx80x_probe, + .remove = abx80x_remove, + .id_table = abx80x_id, +}; + +module_i2c_driver(abx80x_driver); + +MODULE_AUTHOR("Philippe De Muyter "); +MODULE_AUTHOR("Alexandre Belloni "); +MODULE_DESCRIPTION("Abracon ABX80X RTC driver"); +MODULE_LICENSE("GPL v2"); From 602498f9aa43d4951eece3fd6ad95a6d0a78d537 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 5 May 2015 16:23:46 -0700 Subject: [PATCH 083/101] mm: soft-offline: fix num_poisoned_pages counting on concurrent events If multiple soft offline events hit one free page/hugepage concurrently, soft_offline_page() can handle the free page/hugepage multiple times, which makes num_poisoned_pages counter increased more than once. This patch fixes this wrong counting by checking TestSetPageHWPoison for normal papes and by checking the return value of dequeue_hwpoisoned_huge_page() for hugepages. Signed-off-by: Naoya Horiguchi Acked-by: Dean Nelson Cc: Andi Kleen Cc: [3.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 22e0f270e4f7..501820c815b3 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1777,12 +1777,12 @@ int soft_offline_page(struct page *page, int flags) } else if (ret == 0) { /* for free pages */ if (PageHuge(page)) { set_page_hwpoison_huge_page(hpage); - dequeue_hwpoisoned_huge_page(hpage); - atomic_long_add(1 << compound_order(hpage), + if (!dequeue_hwpoisoned_huge_page(hpage)) + atomic_long_add(1 << compound_order(hpage), &num_poisoned_pages); } else { - SetPageHWPoison(page); - atomic_long_inc(&num_poisoned_pages); + if (!TestSetPageHWPoison(page)) + atomic_long_inc(&num_poisoned_pages); } } unset_migratetype_isolate(page, MIGRATE_MOVABLE); From 7ea434a4eb49db83d17cc076f2267704c52938ae Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 5 May 2015 16:23:49 -0700 Subject: [PATCH 084/101] mm/hwpoison-inject: fix refcounting in no-injection case Hwpoison injection via debugfs:hwpoison/corrupt-pfn takes a refcount of the target page. But current code doesn't release it if the target page is not supposed to be injected, which results in memory leak. This patch simply adds the refcount releasing code. Signed-off-by: Naoya Horiguchi Acked-by: Dean Nelson Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Hidetoshi Seto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hwpoison-inject.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index 329caf56df22..2b3f933e3282 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -40,7 +40,7 @@ static int hwpoison_inject(void *data, u64 val) * This implies unable to support non-LRU pages. */ if (!PageLRU(p) && !PageHuge(p)) - return 0; + goto put_out; /* * do a racy check with elevated page count, to make sure PG_hwpoison @@ -52,11 +52,14 @@ static int hwpoison_inject(void *data, u64 val) err = hwpoison_filter(hpage); unlock_page(hpage); if (err) - return 0; + goto put_out; inject: pr_info("Injecting memory failure at pfn %#lx\n", pfn); return memory_failure(pfn, 18, MF_COUNT_INCREASED); +put_out: + put_page(hpage); + return 0; } static int hwpoison_unpoison(void *data, u64 val) From e386eed89c764f102fcc3c0d4c78c65a357f7399 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 5 May 2015 16:23:52 -0700 Subject: [PATCH 085/101] mm/hwpoison-inject: check PageLRU of hpage Hwpoison injector checks PageLRU of the raw target page to find out whether the page is an appropriate target, but current code now filters out thp tail pages, which prevents us from testing for such cases via this interface. So let's check hpage instead of p. Signed-off-by: Naoya Horiguchi Acked-by: Dean Nelson Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Hidetoshi Seto Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hwpoison-inject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index 2b3f933e3282..4ca5fe0042e1 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -34,12 +34,12 @@ static int hwpoison_inject(void *data, u64 val) if (!hwpoison_filter_enable) goto inject; - if (!PageLRU(p) && !PageHuge(p)) - shake_page(p, 0); + if (!PageLRU(hpage) && !PageHuge(p)) + shake_page(hpage, 0); /* * This implies unable to support non-LRU pages. */ - if (!PageLRU(p) && !PageHuge(p)) + if (!PageLRU(hpage) && !PageHuge(p)) goto put_out; /* From f5b697700c86d7d01489202bfd37d86665754afd Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Tue, 5 May 2015 16:23:54 -0700 Subject: [PATCH 086/101] configfs: init configfs module earlier at boot time We need this earlier in the boot process to allow various subsystems to use configfs (e.g Industrial IIO). Also, debugfs is at core_initcall level and configfs should be on the same level from infrastructure point of view. Signed-off-by: Daniel Baluta Suggested-by: Lars-Peter Clausen Reviewed-by: Christoph Hellwig Cc: Al Viro Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/configfs/mount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index da94e41bdbf6..537356742091 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -173,5 +173,5 @@ MODULE_LICENSE("GPL"); MODULE_VERSION("0.0.2"); MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration."); -module_init(configfs_init); +core_initcall(configfs_init); module_exit(configfs_exit); From 05836c378c7af9527b98a83746f32c7289a5f3c8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 5 May 2015 16:23:57 -0700 Subject: [PATCH 087/101] util_macros.h: have array pointer point to array of constants Using the new find_closest() macro can result in the following sparse warnings. drivers/hwmon/lm85.c:194:16: warning: incorrect type in initializer (different modifiers) drivers/hwmon/lm85.c:194:16: expected int *__fc_a drivers/hwmon/lm85.c:194:16: got int static const [toplevel] * drivers/hwmon/lm85.c:210:16: warning: incorrect type in initializer (different modifiers) drivers/hwmon/lm85.c:210:16: expected int *__fc_a drivers/hwmon/lm85.c:210:16: got int const *map This is because the array passed to find_closest() will typically be declared as array of constants, but the macro declares a non-constant pointer to it. Signed-off-by: Guenter Roeck Cc: Bartosz Golaszewski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/util_macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index d5f4fb69dba3..f9b2ce58039b 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -5,7 +5,7 @@ ({ \ typeof(as) __fc_i, __fc_as = (as) - 1; \ typeof(x) __fc_x = (x); \ - typeof(*a) *__fc_a = (a); \ + typeof(*a) const *__fc_a = (a); \ for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) { \ if (__fc_x op DIV_ROUND_CLOSEST(__fc_a[__fc_i] + \ __fc_a[__fc_i + 1], 2)) \ From d8fd150fe3935e1692bf57c66691e17409ebb9c1 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 5 May 2015 16:24:00 -0700 Subject: [PATCH 088/101] nilfs2: fix sanity check of btree level in nilfs_btree_root_broken() The range check for b-tree level parameter in nilfs_btree_root_broken() is wrong; it accepts the case of "level == NILFS_BTREE_LEVEL_MAX" even though the level is limited to values in the range of 0 to (NILFS_BTREE_LEVEL_MAX - 1). Since the level parameter is read from storage device and used to index nilfs_btree_path array whose element count is NILFS_BTREE_LEVEL_MAX, it can cause memory overrun during btree operations if the boundary value is set to the level parameter on device. This fixes the broken sanity check and adds a comment to clarify that the upper bound NILFS_BTREE_LEVEL_MAX is exclusive. Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/btree.c | 2 +- include/linux/nilfs2_fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 059f37137f9a..919fd5bb14a8 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -388,7 +388,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || - level > NILFS_BTREE_LEVEL_MAX || + level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index ff3fea3194c6..9abb763e4b86 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -460,7 +460,7 @@ struct nilfs_btree_node { /* level */ #define NILFS_BTREE_LEVEL_DATA 0 #define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) -#define NILFS_BTREE_LEVEL_MAX 14 +#define NILFS_BTREE_LEVEL_MAX 14 /* Max level (exclusive) */ /** * struct nilfs_palloc_group_desc - block group descriptor From b1432a2a35565f538586774a03bf277c27fc267d Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Tue, 5 May 2015 16:24:02 -0700 Subject: [PATCH 089/101] ocfs2: dlm: fix race between purge and get lock resource There is a race window in dlm_get_lock_resource(), which may return a lock resource which has been purged. This will cause the process to hang forever in dlmlock() as the ast msg can't be handled due to its lock resource not existing. dlm_get_lock_resource { ... spin_lock(&dlm->spinlock); tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash); if (tmpres) { spin_unlock(&dlm->spinlock); >>>>>>>> race window, dlm_run_purge_list() may run and purge the lock resource spin_lock(&tmpres->spinlock); ... spin_unlock(&tmpres->spinlock); } } Signed-off-by: Junxiao Bi Cc: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmmaster.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a6944b25fd5b..fdf4b41d0609 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -757,6 +757,19 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, if (tmpres) { spin_unlock(&dlm->spinlock); spin_lock(&tmpres->spinlock); + + /* + * Right after dlm spinlock was released, dlm_thread could have + * purged the lockres. Check if lockres got unhashed. If so + * start over. + */ + if (hlist_unhashed(&tmpres->hash_node)) { + spin_unlock(&tmpres->spinlock); + dlm_lockres_put(tmpres); + tmpres = NULL; + goto lookup; + } + /* Wait on the thread that is mastering the resource */ if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { __dlm_wait_on_lockres(tmpres); From 489405fe5ed38e65f6f82f131a39c67f3bae6045 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 5 May 2015 16:24:05 -0700 Subject: [PATCH 090/101] rtc: armada38x: fix concurrency access in armada38x_rtc_set_time While setting the time, the RTC TIME register should not be accessed. However due to hardware constraints, setting the RTC time involves sleeping during 100ms. This sleep was done outside the critical section protected by the spinlock, so it was possible to read the RTC TIME register and get an incorrect value. This patch introduces a mutex for protecting the RTC TIME access, unlike the spinlock it is allowed to sleep in a critical section protected by a mutex. The RTC STATUS register can still be used from the interrupt handler but it has no effect on setting the time. Signed-off-by: Gregory CLEMENT Acked-by: Alexandre Belloni Acked-by: Andrew Lunn Cc: Alessandro Zummo Cc: [4.0] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-armada38x.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c index 43e04af39e09..cb70ced7e0db 100644 --- a/drivers/rtc/rtc-armada38x.c +++ b/drivers/rtc/rtc-armada38x.c @@ -40,6 +40,13 @@ struct armada38x_rtc { void __iomem *regs; void __iomem *regs_soc; spinlock_t lock; + /* + * While setting the time, the RTC TIME register should not be + * accessed. Setting the RTC time involves sleeping during + * 100ms, so a mutex instead of a spinlock is used to protect + * it + */ + struct mutex mutex_time; int irq; }; @@ -59,8 +66,7 @@ static int armada38x_rtc_read_time(struct device *dev, struct rtc_time *tm) struct armada38x_rtc *rtc = dev_get_drvdata(dev); unsigned long time, time_check, flags; - spin_lock_irqsave(&rtc->lock, flags); - + mutex_lock(&rtc->mutex_time); time = readl(rtc->regs + RTC_TIME); /* * WA for failing time set attempts. As stated in HW ERRATA if @@ -71,7 +77,7 @@ static int armada38x_rtc_read_time(struct device *dev, struct rtc_time *tm) if ((time_check - time) > 1) time_check = readl(rtc->regs + RTC_TIME); - spin_unlock_irqrestore(&rtc->lock, flags); + mutex_unlock(&rtc->mutex_time); rtc_time_to_tm(time_check, tm); @@ -94,19 +100,12 @@ static int armada38x_rtc_set_time(struct device *dev, struct rtc_time *tm) * then wait for 100ms before writing to the time register to be * sure that the data will be taken into account. */ - spin_lock_irqsave(&rtc->lock, flags); - + mutex_lock(&rtc->mutex_time); rtc_delayed_write(0, rtc, RTC_STATUS); - - spin_unlock_irqrestore(&rtc->lock, flags); - msleep(100); - - spin_lock_irqsave(&rtc->lock, flags); - rtc_delayed_write(time, rtc, RTC_TIME); + mutex_unlock(&rtc->mutex_time); - spin_unlock_irqrestore(&rtc->lock, flags); out: return ret; } @@ -230,6 +229,7 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev) return -ENOMEM; spin_lock_init(&rtc->lock); + mutex_init(&rtc->mutex_time); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rtc"); rtc->regs = devm_ioremap_resource(&pdev->dev, res); From 15c5725e6b86cb8dfc4ca655a22005cc678a6f6f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 16 Apr 2015 21:09:53 +0800 Subject: [PATCH 091/101] ipmi: Remove unused including Remove including that don't need it. Signed-off-by: Wei Yongjun Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index ee3b8c5e7e21..f6ea4fa444b3 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -31,7 +31,6 @@ * interface into the I2C driver, I believe. */ -#include #if defined(MODVERSIONS) #include #endif From a182a4b2b3e85a559ea2cd3545f4311db41325f2 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 22 Apr 2015 13:25:40 -0500 Subject: [PATCH 092/101] ipmi: Report an error if ACPI _IFT doesn't exist When probing an ACPI table, report a specific error, instead of just returning an error, if _IFT doesn't exist. Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 461274168d0f..b5a1b450471f 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2262,8 +2262,10 @@ static int ipmi_pnp_probe(struct pnp_dev *dev, /* _IFT tells us the interface type: KCS, BT, etc */ status = acpi_evaluate_integer(handle, "_IFT", NULL, &tmp); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { + dev_err(&dev->dev, "Could not find ACPI IPMI interface type\n"); goto err_free; + } switch (tmp) { case 1: From 9f8127048ab8b47b43f8aeaaec9fec2da44be9a1 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Thu, 23 Apr 2015 11:16:44 +0900 Subject: [PATCH 093/101] ipmi: Fix a problem that messages are not issued in run_to_completion mode start_next_msg() issues a message placed in smi_info->waiting_msg if it is non-NULL. However, sender() sets a message to smi_info->curr_msg and NULL to smi_info->waiting_msg in the context of run_to_completion mode. As the result, it leads an infinite loop by waiting the completion of unissued message when leaving dying message after kernel panic. sender() should set the message to smi_info->waiting_msg not curr_msg. Signed-off-by: Hidehiro Kawai Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index b5a1b450471f..8a45e92ff60c 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -942,8 +942,7 @@ static void sender(void *send_info, * If we are running to completion, start it and run * transactions until everything is clear. */ - smi_info->curr_msg = msg; - smi_info->waiting_msg = NULL; + smi_info->waiting_msg = msg; /* * Run to completion means we are single-threaded, no From 9162052173d2381e2bbabc224c3c1457acb4c54c Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Fri, 24 Apr 2015 07:46:06 -0500 Subject: [PATCH 094/101] ipmi: Add alert handling to SSIF The SSIF interface can optionally have an SMBus alert come in when data is ready. Unfortunately, the IPMI spec gives wiggle room to the implementer to allow them to always have the alert enabled, even if the driver doesn't enable it. So implement alerts. If you don't in this situation, the SMBus alert handling will constantly complain. Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 132 +++++++++++++++++++++++++++++----- 1 file changed, 116 insertions(+), 16 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index f6ea4fa444b3..5b82d9947ec5 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -165,6 +165,9 @@ enum ssif_stat_indexes { /* Number of watchdog pretimeouts. */ SSIF_STAT_watchdog_pretimeouts, + /* Number of alers received. */ + SSIF_STAT_alerts, + /* Always add statistics before this value, it must be last. */ SSIF_NUM_STATS }; @@ -213,7 +216,16 @@ struct ssif_info { #define WDT_PRE_TIMEOUT_INT 0x08 unsigned char msg_flags; + u8 global_enables; bool has_event_buffer; + bool supports_alert; + + /* + * Used to tell what we should do with alerts. If we are + * waiting on a response, read the data immediately. + */ + bool got_alert; + bool waiting_alert; /* * If set to true, this will request events the next time the @@ -517,14 +529,10 @@ static int ssif_i2c_send(struct ssif_info *ssif_info, static void msg_done_handler(struct ssif_info *ssif_info, int result, unsigned char *data, unsigned int len); -static void retry_timeout(unsigned long data) +static void start_get(struct ssif_info *ssif_info) { - struct ssif_info *ssif_info = (void *) data; int rv; - if (ssif_info->stopping) - return; - ssif_info->rtc_us_timer = 0; rv = ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ, @@ -539,6 +547,46 @@ static void retry_timeout(unsigned long data) } } +static void retry_timeout(unsigned long data) +{ + struct ssif_info *ssif_info = (void *) data; + unsigned long oflags, *flags; + bool waiting; + + if (ssif_info->stopping) + return; + + flags = ipmi_ssif_lock_cond(ssif_info, &oflags); + waiting = ssif_info->waiting_alert; + ssif_info->waiting_alert = false; + ipmi_ssif_unlock_cond(ssif_info, flags); + + if (waiting) + start_get(ssif_info); +} + + +static void ssif_alert(struct i2c_client *client, unsigned int data) +{ + struct ssif_info *ssif_info = i2c_get_clientdata(client); + unsigned long oflags, *flags; + bool do_get = false; + + ssif_inc_stat(ssif_info, alerts); + + flags = ipmi_ssif_lock_cond(ssif_info, &oflags); + if (ssif_info->waiting_alert) { + ssif_info->waiting_alert = false; + del_timer(&ssif_info->retry_timer); + do_get = true; + } else if (ssif_info->curr_msg) { + ssif_info->got_alert = true; + } + ipmi_ssif_unlock_cond(ssif_info, flags); + if (do_get) + start_get(ssif_info); +} + static int start_resend(struct ssif_info *ssif_info); static void msg_done_handler(struct ssif_info *ssif_info, int result, @@ -558,9 +606,12 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, if (ssif_info->retries_left > 0) { ssif_inc_stat(ssif_info, receive_retries); + flags = ipmi_ssif_lock_cond(ssif_info, &oflags); + ssif_info->waiting_alert = true; + ssif_info->rtc_us_timer = SSIF_MSG_USEC; mod_timer(&ssif_info->retry_timer, jiffies + SSIF_MSG_JIFFIES); - ssif_info->rtc_us_timer = SSIF_MSG_USEC; + ipmi_ssif_unlock_cond(ssif_info, flags); return; } @@ -649,7 +700,7 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, if (rv < 0) { if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) pr_info(PFX - "Error from i2c_non_blocking_op(2)\n"); + "Error from ssif_i2c_send\n"); result = -EIO; } else @@ -863,15 +914,32 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, msg_done_handler(ssif_info, -EIO, NULL, 0); } } else { + unsigned long oflags, *flags; + bool got_alert; + ssif_inc_stat(ssif_info, sent_messages); ssif_inc_stat(ssif_info, sent_messages_parts); - /* Wait a jiffie then request the next message */ - ssif_info->retries_left = SSIF_RECV_RETRIES; - ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC; - mod_timer(&ssif_info->retry_timer, - jiffies + SSIF_MSG_PART_JIFFIES); - return; + flags = ipmi_ssif_lock_cond(ssif_info, &oflags); + got_alert = ssif_info->got_alert; + if (got_alert) { + ssif_info->got_alert = false; + ssif_info->waiting_alert = false; + } + + if (got_alert) { + ipmi_ssif_unlock_cond(ssif_info, flags); + /* The alert already happened, try now. */ + retry_timeout((unsigned long) ssif_info); + } else { + /* Wait a jiffie then request the next message */ + ssif_info->waiting_alert = true; + ssif_info->retries_left = SSIF_RECV_RETRIES; + ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC; + mod_timer(&ssif_info->retry_timer, + jiffies + SSIF_MSG_PART_JIFFIES); + ipmi_ssif_unlock_cond(ssif_info, flags); + } } } @@ -880,6 +948,8 @@ static int start_resend(struct ssif_info *ssif_info) int rv; int command; + ssif_info->got_alert = false; + if (ssif_info->data_len > 32) { command = SSIF_IPMI_MULTI_PART_REQUEST_START; ssif_info->multi_data = ssif_info->data; @@ -1242,6 +1312,8 @@ static int smi_stats_proc_show(struct seq_file *m, void *v) ssif_get_stat(ssif_info, events)); seq_printf(m, "watchdog_pretimeouts: %u\n", ssif_get_stat(ssif_info, watchdog_pretimeouts)); + seq_printf(m, "alerts: %u\n", + ssif_get_stat(ssif_info, alerts)); return 0; } @@ -1324,6 +1396,12 @@ static bool check_acpi(struct ssif_info *ssif_info, struct device *dev) return false; } +/* + * Global enables we care about. + */ +#define GLOBAL_ENABLES_MASK (IPMI_BMC_EVT_MSG_BUFF | IPMI_BMC_RCV_MSG_INTR | \ + IPMI_BMC_EVT_MSG_INTR) + static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) { unsigned char msg[3]; @@ -1454,6 +1532,8 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) goto found; } + ssif_info->global_enables = resp[3]; + if (resp[3] & IPMI_BMC_EVT_MSG_BUFF) { ssif_info->has_event_buffer = true; /* buffer is already enabled, nothing to do. */ @@ -1462,18 +1542,37 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) msg[0] = IPMI_NETFN_APP_REQUEST << 2; msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD; - msg[2] = resp[3] | IPMI_BMC_EVT_MSG_BUFF; + msg[2] = ssif_info->global_enables | IPMI_BMC_EVT_MSG_BUFF; rv = do_cmd(client, 3, msg, &len, resp); if (rv || (len < 2)) { - pr_warn(PFX "Error getting global enables: %d %d %2.2x\n", + pr_warn(PFX "Error setting global enables: %d %d %2.2x\n", rv, len, resp[2]); rv = 0; /* Not fatal */ goto found; } - if (resp[2] == 0) + if (resp[2] == 0) { /* A successful return means the event buffer is supported. */ ssif_info->has_event_buffer = true; + ssif_info->global_enables |= IPMI_BMC_EVT_MSG_BUFF; + } + + msg[0] = IPMI_NETFN_APP_REQUEST << 2; + msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD; + msg[2] = ssif_info->global_enables | IPMI_BMC_RCV_MSG_INTR; + rv = do_cmd(client, 3, msg, &len, resp); + if (rv || (len < 2)) { + pr_warn(PFX "Error setting global enables: %d %d %2.2x\n", + rv, len, resp[2]); + rv = 0; /* Not fatal */ + goto found; + } + + if (resp[2] == 0) { + /* A successful return means the alert is supported. */ + ssif_info->supports_alert = true; + ssif_info->global_enables |= IPMI_BMC_RCV_MSG_INTR; + } found: ssif_info->intf_num = atomic_inc_return(&next_intf); @@ -1831,6 +1930,7 @@ static struct i2c_driver ssif_i2c_driver = { }, .probe = ssif_probe, .remove = ssif_remove, + .alert = ssif_alert, .id_table = ssif_id, .detect = ssif_detect }; From 3d69d43baa2749c3d187ce70940d7aebe609e149 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 29 Apr 2015 17:59:21 -0500 Subject: [PATCH 095/101] ipmi: Fix multi-part message handling Lots of little fixes for multi-part messages: The values was not being re-initialized, if something went wrong handling a multi-part message and it got left in a bad state, it might be an issue. The commands were not correct when issuing multi-part reads, the code was not passing in the proper value for commands. Also clean up some minor formatting issues. Get the block number from the right location, limit the maximum send message size to 63 bytes and explain why, and fix some minor sylistic issues. Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 51 ++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 5b82d9947ec5..207689c444a8 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -489,13 +489,13 @@ static int ipmi_ssif_thread(void *data) if (ssif_info->i2c_read_write == I2C_SMBUS_WRITE) { result = i2c_smbus_write_block_data( - ssif_info->client, SSIF_IPMI_REQUEST, + ssif_info->client, ssif_info->i2c_command, ssif_info->i2c_data[0], ssif_info->i2c_data + 1); ssif_info->done_handler(ssif_info, result, NULL, 0); } else { result = i2c_smbus_read_block_data( - ssif_info->client, SSIF_IPMI_RESPONSE, + ssif_info->client, ssif_info->i2c_command, ssif_info->i2c_data); if (result < 0) ssif_info->done_handler(ssif_info, result, @@ -534,6 +534,7 @@ static void start_get(struct ssif_info *ssif_info) int rv; ssif_info->rtc_us_timer = 0; + ssif_info->multi_pos = 0; rv = ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ, SSIF_IPMI_RESPONSE, @@ -631,9 +632,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_inc_stat(ssif_info, received_message_parts); /* Remove the multi-part read marker. */ - for (i = 0; i < (len-2); i++) - ssif_info->data[i] = data[i+2]; len -= 2; + for (i = 0; i < len; i++) + ssif_info->data[i] = data[i+2]; ssif_info->multi_len = len; ssif_info->multi_pos = 1; @@ -660,9 +661,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, goto continue_op; } - blocknum = data[ssif_info->multi_len]; + blocknum = data[0]; - if (ssif_info->multi_len+len-1 > IPMI_MAX_MSG_LENGTH) { + if (ssif_info->multi_len + len - 1 > IPMI_MAX_MSG_LENGTH) { /* Received message too big, abort the operation. */ result = -E2BIG; if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) @@ -672,15 +673,15 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, } /* Remove the blocknum from the data. */ - for (i = 0; i < (len-1); i++) - ssif_info->data[i+ssif_info->multi_len] = data[i+1]; len--; + for (i = 0; i < len; i++) + ssif_info->data[i + ssif_info->multi_len] = data[i + 1]; ssif_info->multi_len += len; if (blocknum == 0xff) { /* End of read */ len = ssif_info->multi_len; data = ssif_info->data; - } else if ((blocknum+1) != ssif_info->multi_pos) { + } else if (blocknum + 1 != ssif_info->multi_pos) { /* * Out of sequence block, just abort. Block * numbers start at zero for the second block, @@ -880,7 +881,11 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, } if (ssif_info->multi_data) { - /* In the middle of a multi-data write. */ + /* + * In the middle of a multi-data write. See the comment + * in the SSIF_MULTI_n_PART case in the probe function + * for details on the intricacies of this. + */ int left; ssif_inc_stat(ssif_info, sent_messages_parts); @@ -984,7 +989,7 @@ static int start_send(struct ssif_info *ssif_info, return -E2BIG; ssif_info->retries_left = SSIF_SEND_RETRIES; - memcpy(ssif_info->data+1, data, len); + memcpy(ssif_info->data + 1, data, len); ssif_info->data_len = len; return start_resend(ssif_info); } @@ -1487,13 +1492,33 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) break; case SSIF_MULTI_2_PART: - if (ssif_info->max_xmit_msg_size > 64) - ssif_info->max_xmit_msg_size = 64; + if (ssif_info->max_xmit_msg_size > 63) + ssif_info->max_xmit_msg_size = 63; if (ssif_info->max_recv_msg_size > 62) ssif_info->max_recv_msg_size = 62; break; case SSIF_MULTI_n_PART: + /* + * The specification is rather confusing at + * this point, but I think I understand what + * is meant. At least I have a workable + * solution. With multi-part messages, you + * cannot send a message that is a multiple of + * 32-bytes in length, because the start and + * middle messages are 32-bytes and the end + * message must be at least one byte. You + * can't fudge on an extra byte, that would + * screw up things like fru data writes. So + * we limit the length to 63 bytes. That way + * a 32-byte message gets sent as a single + * part. A larger message will be a 32-byte + * start and the next message is always going + * to be 1-31 bytes in length. Not ideal, but + * it should work. + */ + if (ssif_info->max_xmit_msg_size > 63) + ssif_info->max_xmit_msg_size = 63; break; default: From c88d47480d300eaad80c213d50c9bf6077fc49bc Mon Sep 17 00:00:00 2001 From: Bobby Powers Date: Mon, 27 Apr 2015 08:10:41 -0700 Subject: [PATCH 096/101] x86/fpu: Always restore_xinit_state() when use_eager_cpu() The following commit: f893959b0898 ("x86/fpu: Don't abuse drop_init_fpu() in flush_thread()") removed drop_init_fpu() usage from flush_thread(). This seems to break things for me - the Go 1.4 test suite fails all over the place with floating point comparision errors (offending commit found through bisection). The functional change was that flush_thread() after this commit only calls restore_init_xstate() when both use_eager_fpu() and !used_math() are true. drop_init_fpu() (now fpu_reset_state()) calls restore_init_xstate() regardless of whether current used_math() - apply the same logic here. Switch used_math() -> tsk_used_math(tsk) to consistently use the grabbed tsk instead of current, like in the rest of flush_thread(). Tested-by: Dave Hansen Signed-off-by: Bobby Powers Signed-off-by: Borislav Petkov Acked-by: Oleg Nesterov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Pekka Riikonen Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Suresh Siddha Cc: Thomas Gleixner Fixes: f893959b ("x86/fpu: Don't abuse drop_init_fpu() in flush_thread()") Link: http://lkml.kernel.org/r/1430147441-9820-1-git-send-email-bobbypowers@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index bfc99b3b6522..6e338e3b1dc0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -156,11 +156,13 @@ void flush_thread(void) /* FPU state will be reallocated lazily at the first use. */ drop_fpu(tsk); free_thread_xstate(tsk); - } else if (!used_math()) { - /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(init_fpu(tsk))) - force_sig(SIGKILL, tsk); - user_fpu_begin(); + } else { + if (!tsk_used_math(tsk)) { + /* kthread execs. TODO: cleanup this horror. */ + if (WARN_ON(init_fpu(tsk))) + force_sig(SIGKILL, tsk); + user_fpu_begin(); + } restore_init_xstate(); } } From 8746515d7f04c9ea94cf43e2db1fd2cfca93276d Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 24 Apr 2015 10:16:40 +0100 Subject: [PATCH 097/101] xen: Add __GFP_DMA flag when xen_swiotlb_init gets free pages on ARM Make sure that xen_swiotlb_init allocates buffers that are DMA capable when at least one memblock is available below 4G. Otherwise we assume that all devices on the SoC can cope with >4G addresses. We do this on ARM and ARM64, where dom0 is mapped 1:1, so pfn == mfn in this case. No functional changes on x86. From: Chen Baozi Signed-off-by: Chen Baozi Signed-off-by: Stefano Stabellini Tested-by: Chen Baozi Acked-by: Konrad Rzeszutek Wilk Signed-off-by: David Vrabel --- arch/arm/include/asm/xen/page.h | 1 + arch/arm/xen/mm.c | 15 +++++++++++++++ arch/x86/include/asm/xen/page.h | 5 +++++ drivers/xen/swiotlb-xen.c | 2 +- 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 2f7e6ff67d51..0b579b2f4e0e 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -110,5 +110,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) bool xen_arch_need_swiotlb(struct device *dev, unsigned long pfn, unsigned long mfn); +unsigned long xen_get_swiotlb_free_pages(unsigned int order); #endif /* _ASM_ARM_XEN_PAGE_H */ diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 793551d15f1d..498325074a06 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,20 @@ #include #include +unsigned long xen_get_swiotlb_free_pages(unsigned int order) +{ + struct memblock_region *reg; + gfp_t flags = __GFP_NOWARN; + + for_each_memblock(memory, reg) { + if (reg->base < (phys_addr_t)0xffffffff) { + flags |= __GFP_DMA; + break; + } + } + return __get_free_pages(flags, order); +} + enum dma_cache_op { DMA_UNMAP, DMA_MAP, diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 358dcd338915..c44a5d53e464 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -269,4 +269,9 @@ static inline bool xen_arch_need_swiotlb(struct device *dev, return false; } +static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order) +{ + return __get_free_pages(__GFP_NOWARN, order); +} + #endif /* _ASM_X86_XEN_PAGE_H */ diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 810ad419e34c..4c549323c605 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -235,7 +235,7 @@ int __ref xen_swiotlb_init(int verbose, bool early) #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order); + xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order); if (xen_io_tlb_start) break; order--; From c5272a28566b00cce79127ad382406e0a8650690 Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Fri, 1 May 2015 09:01:27 -0700 Subject: [PATCH 098/101] pinctrl: Don't just pretend to protect pinctrl_maps, do it for real Way back, when the world was a simpler place and there was no war, no evil, and no kernel bugs, there was just a single pinctrl lock. That was how the world was when (57291ce pinctrl: core device tree mapping table parsing support) was written. In that case, there were instances where the pinctrl mutex was already held when pinctrl_register_map() was called, hence a "locked" parameter was passed to the function to indicate that the mutex was already locked (so we shouldn't lock it again). A few years ago in (42fed7b pinctrl: move subsystem mutex to pinctrl_dev struct), we switched to a separate pinctrl_maps_mutex. ...but (oops) we forgot to re-think about the whole "locked" parameter for pinctrl_register_map(). Basically the "locked" parameter appears to still refer to whether the bigger pinctrl_dev mutex is locked, but we're using it to skip locks of our (now separate) pinctrl_maps_mutex. That's kind of a bad thing(TM). Probably nobody noticed because most of the calls to pinctrl_register_map happen at boot time and we've got synchronous device probing. ...and even cases where we're asynchronous don't end up actually hitting the race too often. ...but after banging my head against the wall for a bug that reproduced 1 out of 1000 reboots and lots of looking through kgdb, I finally noticed this. Anyway, we can now safely remove the "locked" parameter and go back to a war-free, evil-free, and kernel-bug-free world. Fixes: 42fed7ba44e4 ("pinctrl: move subsystem mutex to pinctrl_dev struct") Signed-off-by: Doug Anderson Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 10 ++++------ drivers/pinctrl/core.h | 2 +- drivers/pinctrl/devicetree.c | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 89dca77ca038..18ee2089df4a 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1110,7 +1110,7 @@ void devm_pinctrl_put(struct pinctrl *p) EXPORT_SYMBOL_GPL(devm_pinctrl_put); int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps, - bool dup, bool locked) + bool dup) { int i, ret; struct pinctrl_maps *maps_node; @@ -1178,11 +1178,9 @@ int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps, maps_node->maps = maps; } - if (!locked) - mutex_lock(&pinctrl_maps_mutex); + mutex_lock(&pinctrl_maps_mutex); list_add_tail(&maps_node->node, &pinctrl_maps); - if (!locked) - mutex_unlock(&pinctrl_maps_mutex); + mutex_unlock(&pinctrl_maps_mutex); return 0; } @@ -1197,7 +1195,7 @@ int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps, int pinctrl_register_mappings(struct pinctrl_map const *maps, unsigned num_maps) { - return pinctrl_register_map(maps, num_maps, true, false); + return pinctrl_register_map(maps, num_maps, true); } void pinctrl_unregister_map(struct pinctrl_map const *map) diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h index 75476b3d87da..b24ea846c867 100644 --- a/drivers/pinctrl/core.h +++ b/drivers/pinctrl/core.h @@ -183,7 +183,7 @@ static inline struct pin_desc *pin_desc_get(struct pinctrl_dev *pctldev, } int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps, - bool dup, bool locked); + bool dup); void pinctrl_unregister_map(struct pinctrl_map const *map); extern int pinctrl_force_sleep(struct pinctrl_dev *pctldev); diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index eda13de2e7c0..0bbf7d71b281 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -92,7 +92,7 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, dt_map->num_maps = num_maps; list_add_tail(&dt_map->node, &p->dt_maps); - return pinctrl_register_map(map, num_maps, false, true); + return pinctrl_register_map(map, num_maps, false); } struct pinctrl_dev *of_pinctrl_get(struct device_node *np) From 8b10c5e2b59ef2a80a07ab594a3b4987a4676211 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 1 May 2015 16:08:46 +0200 Subject: [PATCH 099/101] perf: Annotate inherited event ctx->mutex recursion While fuzzing Sasha tripped over another ctx->mutex recursion lockdep splat. Annotate this. Reported-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- kernel/events/core.c | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 81aa3a4ece9f..1a3bf48743ce 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx) * Those places that change perf_event::ctx will hold both * perf_event_ctx::mutex of the 'old' and 'new' ctx value. * - * Lock ordering is by mutex address. There is one other site where - * perf_event_context::mutex nests and that is put_event(). But remember that - * that is a parent<->child context relation, and migration does not affect - * children, therefore these two orderings should not interact. + * Lock ordering is by mutex address. There are two other sites where + * perf_event_context::mutex nests and those are: + * + * - perf_event_exit_task_context() [ child , 0 ] + * __perf_event_exit_task() + * sync_child_event() + * put_event() [ parent, 1 ] + * + * - perf_event_init_context() [ parent, 0 ] + * inherit_task_group() + * inherit_group() + * inherit_event() + * perf_event_alloc() + * perf_init_event() + * perf_try_init_event() [ child , 1 ] + * + * While it appears there is an obvious deadlock here -- the parent and child + * nesting levels are inverted between the two. This is in fact safe because + * life-time rules separate them. That is an exiting task cannot fork, and a + * spawning task cannot (yet) exit. + * + * But remember that that these are parent<->child context relations, and + * migration does not affect children, therefore these two orderings should not + * interact. * * The change in perf_event::ctx does not affect children (as claimed above) * because the sys_perf_event_open() case will install a new event and break @@ -3657,9 +3677,6 @@ static void perf_remove_from_owner(struct perf_event *event) } } -/* - * Called when the last reference to the file is gone. - */ static void put_event(struct perf_event *event) { struct perf_event_context *ctx; @@ -3697,6 +3714,9 @@ int perf_event_release_kernel(struct perf_event *event) } EXPORT_SYMBOL_GPL(perf_event_release_kernel); +/* + * Called when the last reference to the file is gone. + */ static int perf_release(struct inode *inode, struct file *file) { put_event(file->private_data); @@ -7364,7 +7384,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) return -ENODEV; if (event->group_leader != event) { - ctx = perf_event_ctx_lock(event->group_leader); + /* + * This ctx->mutex can nest when we're called through + * inheritance. See the perf_event_ctx_lock_nested() comment. + */ + ctx = perf_event_ctx_lock_nested(event->group_leader, + SINGLE_DEPTH_NESTING); BUG_ON(!ctx); } From 6d374056354a742eed4d0050498101e56e794c4b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 21 Apr 2015 05:34:41 -0400 Subject: [PATCH 100/101] perf/x86/intel: Fix SLM cache event list iTLB-load-misses and LLC-load-misses count incorrectly on SLM. There is no ITLB.MISSES support on SLM. Event PAGE_WALKS.I_SIDE_WALK should be used to count iTLB-load-misses. This event counts when an instruction (I) page walk is completed or started. Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks. DMND_DATA_RD counts both demand and DCU prefetch data reads. However, LLC-load-misses should only count demand reads. There is no way to not include prefetches with a single counter on SLM. So the LLC-load-misses support should be removed on SLM. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429608881-5055-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 960e85de13fb..3998131d1a68 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, - [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, @@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids [ C(OP_READ) ] = { /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_WRITE) ] = { /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ @@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ + [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, From 44b11fee51711ca85aa2b121a49bf029d18a3722 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 23 Apr 2015 09:07:09 +0200 Subject: [PATCH 101/101] perf/x86/rapl: Enable Broadwell-U RAPL support This patch enables RAPL counters (energy consumption counters) support for Intel Broadwell-U processors (Model 61): To use: $ perf stat -a -I 1000 -e power/energy-cores/,power/energy-pkg/,power/energy-ram/ sleep 10 Signed-off-by: Stephane Eranian Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Cc: jacob.jun.pan@linux.intel.com Cc: kan.liang@intel.com Cc: peterz@infradead.org Cc: sonnyrao@chromium.org Link: http://lkml.kernel.org/r/20150423070709.GA4970@thinkpad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_rapl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 999289b94025..358c54ad20d4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void) break; case 60: /* Haswell */ case 69: /* Haswell-Celeron */ + case 61: /* Broadwell */ rapl_cntr_mask = RAPL_IDX_HSW; rapl_pmu_events_group.attrs = rapl_events_hsw_attr; break;