From 4a262b14c57d6ec88982d40273e742f7b9151560 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 12 Oct 2016 20:31:41 +0800 Subject: [PATCH 001/319] clk: rockchip: don't return NULL when failing to register ddrclk branch rockchip_clk_register_ddrclk should not return NULL when failing to call clk_register, otherwise rockchip_clk_register_branches prints "unknown clock type". The actual case is that it's a known clock type but we fail to register it, which may makes user confuse the reason of failure. And the pr_err here is pointless as rockchip_clk_register_branches will also print the similar message. Signed-off-by: Shawn Lin Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-ddr.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/clk/rockchip/clk-ddr.c b/drivers/clk/rockchip/clk-ddr.c index 8feba93672c5..e8075359366b 100644 --- a/drivers/clk/rockchip/clk-ddr.c +++ b/drivers/clk/rockchip/clk-ddr.c @@ -144,11 +144,8 @@ struct clk *rockchip_clk_register_ddrclk(const char *name, int flags, ddrclk->ddr_flag = ddr_flag; clk = clk_register(NULL, &ddrclk->hw); - if (IS_ERR(clk)) { - pr_err("%s: could not register ddrclk %s\n", __func__, name); + if (IS_ERR(clk)) kfree(ddrclk); - return NULL; - } return clk; } From faead41cc7213ccef5a58c1bf518ac24816fe8a6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Sep 2016 10:31:41 +0200 Subject: [PATCH 002/319] iwlwifi: pcie: mark command queue lock with separate lockdep class Emmanuel reports that when CMD_WANT_ASYNC_CALLBACK is used by mvm, the callback will be called with the command queue lock held, and mvm will try to stop all (other) TX queues, which acquires their locks - this caused a false lockdep recursive locking report. Suppress this report by marking the command queue lock with a new, separate, lock class so lockdep can tell the difference between the two types of queues. Fixes: 156f92f2b471 ("iwlwifi: block the queues when we send ADD_STA for uAPSD") Reported-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index e9a278b60dfd..5f840f16f40b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -592,6 +592,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num, u32 txq_id) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int ret; txq->need_update = false; @@ -606,6 +607,13 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, return ret; spin_lock_init(&txq->lock); + + if (txq_id == trans_pcie->cmd_queue) { + static struct lock_class_key iwl_pcie_cmd_queue_lock_class; + + lockdep_set_class(&txq->lock, &iwl_pcie_cmd_queue_lock_class); + } + __skb_queue_head_init(&txq->overflow_q); /* From 276c4b4b74b6d5bc3cab35534409f3ad32464b78 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 28 Sep 2016 11:32:35 +0300 Subject: [PATCH 003/319] iwlwifi: mvm: use ssize_t for len in iwl_debugfs_mem_read() In iwl_dbgfs_mem_read(), the len variable may become negative and is compared to < 0 (an error case). Comparing size_t (which is unsigned) to < 0 causes a warning on certain platforms (like i386): drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c:1561:5-8: WARNING: Unsigned expression compared with zero: len < 0 To prevent that, use ssize_t for len instead. Fixes: commit 2b55f43f8e47 ("iwlwifi: mvm: Add mem debugfs entry") Reported-by: kbuild test robot Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index 539d718df797..06805a63f091 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1529,8 +1529,8 @@ static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf, .data = { &cmd, }, .len = { sizeof(cmd) }, }; - size_t delta, len; - ssize_t ret; + size_t delta; + ssize_t ret, len; hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR, DEBUG_GROUP, 0); From 85cd69b8f1f7e289fe931a82889e673fd0f04842 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 5 Oct 2016 11:24:12 +0300 Subject: [PATCH 004/319] iwlwifi: mvm: fix d3_test with unified D0/D3 images When a unified D0/D3 image is used, we don't restart the FW in the D0->D3->D0 transitions. Therefore, the d3_test functionality should not call ieee8021_restart_hw() when the resuming either. Fixes: commit 23ae61282b88 ("iwlwifi: mvm: Do not switch to D3 image on suspend") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 25 ++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 4fdc3dad3e85..0e17cb238643 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2271,7 +2271,8 @@ static void iwl_mvm_d3_test_disconn_work_iter(void *_data, u8 *mac, static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) { struct iwl_mvm *mvm = inode->i_private; - int remaining_time = 10; + bool unified_image = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); mvm->d3_test_active = false; @@ -2282,18 +2283,22 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file) mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED; iwl_abort_notification_waits(&mvm->notif_wait); - ieee80211_restart_hw(mvm->hw); + if (!unified_image) { + int remaining_time = 10; - /* wait for restart and disconnect all interfaces */ - while (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && - remaining_time > 0) { - remaining_time--; - msleep(1000); + ieee80211_restart_hw(mvm->hw); + + /* wait for restart and disconnect all interfaces */ + while (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && + remaining_time > 0) { + remaining_time--; + msleep(1000); + } + + if (remaining_time == 0) + IWL_ERR(mvm, "Timed out waiting for HW restart!\n"); } - if (remaining_time == 0) - IWL_ERR(mvm, "Timed out waiting for HW restart to finish!\n"); - ieee80211_iterate_active_interfaces_atomic( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_d3_test_disconn_work_iter, mvm->keep_vif); From 5bfadc8255e2cd92be7538fd7dfa777c27f58be0 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Mon, 12 Sep 2016 10:24:19 +0300 Subject: [PATCH 005/319] iwlwifi: mvm: comply with fw_restart mod param on suspend If the suspend flow fails, we restart the hardware to go back to the D0 image (with non-unified images), but we don't comply with the fw_restart module parameter. If something goes wrong when starting the D3 image, we may want to debug it, so we should comply with the fw_restart flag to avoid clearing everything up and losing the firmware state when the error occurred. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 0e17cb238643..03a8fc586548 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1254,7 +1254,10 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, out: if (ret < 0) { iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN); - ieee80211_restart_hw(mvm->hw); + if (mvm->restart_fw > 0) { + mvm->restart_fw--; + ieee80211_restart_hw(mvm->hw); + } iwl_mvm_free_nd(mvm); } out_noreset: From 3a732c65de427fdae67a243fd331356034b5a1e8 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Sun, 9 Oct 2016 17:34:24 +0300 Subject: [PATCH 006/319] iwlwifi: mvm: wake the wait queue when the RX sync counter is zero When we sync the RX queues the driver waits to receive echo notification on all the RX queues. The wait queue is set with timeout until all queues have received the notification. However, iwl_mvm_rx_queue_notif() never woke up the wait queue, with the result of the counter value being checked only when the timeout expired. This may cause a latency of up to 1 second. Fixes: 0636b938214c ("iwlwifi: mvm: implement driver RX queues sync command") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +-- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 1 + drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 3 ++- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 318efd814037..1db1dc13e988 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4121,7 +4121,6 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, struct iwl_mvm_internal_rxq_notif *notif, u32 size) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(notif_waitq); u32 qmask = BIT(mvm->trans->num_rx_queues) - 1; int ret; @@ -4143,7 +4142,7 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, } if (notif->sync) - ret = wait_event_timeout(notif_waitq, + ret = wait_event_timeout(mvm->rx_sync_waitq, atomic_read(&mvm->queue_sync_counter) == 0, HZ); WARN_ON_ONCE(!ret); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index d17cbf603f7c..c60703e0c246 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -937,6 +937,7 @@ struct iwl_mvm { /* sync d0i3_tx queue and IWL_MVM_STATUS_IN_D0I3 status flag */ spinlock_t d0i3_tx_lock; wait_queue_head_t d0i3_exit_waitq; + wait_queue_head_t rx_sync_waitq; /* BT-Coex */ struct iwl_bt_coex_profile_notif last_bt_notif; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 05fe6dd1a2c8..4d35deb628bc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -619,6 +619,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, spin_lock_init(&mvm->refs_lock); skb_queue_head_init(&mvm->d0i3_tx); init_waitqueue_head(&mvm->d0i3_exit_waitq); + init_waitqueue_head(&mvm->rx_sync_waitq); atomic_set(&mvm->queue_sync_counter, 0); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index a57c6ef5bc14..6c802cee900c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -547,7 +547,8 @@ void iwl_mvm_rx_queue_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb, "Received expired RX queue sync message\n"); return; } - atomic_dec(&mvm->queue_sync_counter); + if (!atomic_dec_return(&mvm->queue_sync_counter)) + wake_up(&mvm->rx_sync_waitq); } switch (internal_notif->type) { From aa156c8aee22a24865bf94d0c4a5f604f687fa7d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 14 Oct 2016 10:15:35 -0300 Subject: [PATCH 007/319] rtc: asm9260: fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled so user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/rtc/rtc-asm9260.ko | grep alias $ After this patch: $ modinfo drivers/rtc/rtc-asm9260.ko | grep alias alias: of:N*T*Calphascale,asm9260-rtcC* alias: of:N*T*Calphascale,asm9260-rtc Signed-off-by: Javier Martinez Canillas Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-asm9260.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-asm9260.c b/drivers/rtc/rtc-asm9260.c index 18a93d3e3f93..d36534965635 100644 --- a/drivers/rtc/rtc-asm9260.c +++ b/drivers/rtc/rtc-asm9260.c @@ -327,6 +327,7 @@ static const struct of_device_id asm9260_dt_ids[] = { { .compatible = "alphascale,asm9260-rtc", }, {} }; +MODULE_DEVICE_TABLE(of, asm9260_dt_ids); static struct platform_driver asm9260_rtc_driver = { .probe = asm9260_rtc_probe, From a3a0673b9db6fad2a3f7874c34e4b5cbc5fa01c6 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Tue, 18 Oct 2016 16:39:54 +0200 Subject: [PATCH 008/319] rtc: cmos: remove all __exit_p annotations I got the following stack trace under qemu: [ 7.575243] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [ 7.596098] IP: [] cmos_set_alarm+0x38/0x280 [ 7.615699] PGD 3ccbe067 [ 7.615923] PUD 3daf2067 [ 7.635156] PMD 0 [ 7.654358] Oops: 0000 [#1] SMP [ 7.673869] Modules linked in: [ 7.693235] CPU: 0 PID: 1701 Comm: hwclock Tainted: G W 4.9.0-rc1+ #24 [ 7.712455] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 [ 7.753569] task: ffff88003d88dc40 task.stack: ffffc90000224000 [ 7.773743] RIP: 0010:[] [] cmos_set_alarm+0x38/0x280 [ 7.794893] RSP: 0018:ffffc90000227c10 EFLAGS: 00010296 [ 7.815890] RAX: 000000000000001d RBX: ffffc90000227d28 RCX: ffffffff8182be78 [ 7.836057] RDX: 0000000000000001 RSI: 0000000000000202 RDI: 0000000000000202 [ 7.856612] RBP: ffffc90000227c48 R08: 0000000000000000 R09: 0000000000000001 [ 7.877561] R10: 00000000000001c0 R11: 00000000000001c0 R12: 0000000000000000 [ 7.897072] R13: ffff88003d96f400 R14: ffff88003dac6410 R15: ffff88003dac6420 [ 7.917403] FS: 00007f77f42d9700(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 [ 7.938293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7.958364] CR2: 0000000000000010 CR3: 000000003ccbb000 CR4: 00000000000006f0 [ 7.978028] Stack: [ 7.997120] ffff88003dac6000 ffff88003dac6410 0000000058049d01 ffffc90000227d28 [ 8.016993] ffff88003dac6000 ffff88003dac6410 ffff88003dac6420 ffffc90000227c98 [ 8.039505] ffffffff814f225d 0000001800227c98 000000090000002a 0000000900000011 [ 8.059985] Call Trace: [ 8.080110] [] __rtc_set_alarm+0x8d/0xa0 [ 8.099421] [] rtc_timer_enqueue+0x119/0x190 [ 8.119925] [] rtc_update_irq_enable+0xbe/0x100 [ 8.140583] [] rtc_dev_ioctl+0x3c0/0x480 [ 8.161162] [] ? user_path_at_empty+0x3a/0x50 [ 8.182717] [] do_vfs_ioctl+0x96/0x5c0 [ 8.204624] [] ? vfs_stat+0x16/0x20 [ 8.225994] [] ? SyS_newstat+0x15/0x30 [ 8.247043] [] SyS_ioctl+0x47/0x80 [ 8.267191] [] entry_SYSCALL_64_fastpath+0x1a/0xa9 [ 8.288719] Code: 6a 81 48 89 e5 41 57 41 56 41 55 49 89 fd 41 54 53 48 89 f3 48 c7 c6 20 c4 78 81 48 83 ec 10 e8 8f 00 ef ff 4d 8b a5 a0 00 00 00 <41> 8b 44 24 10 85 c0 0f 8e 2b 02 00 00 4c 89 ef 31 c0 b9 53 01 [ 8.335233] RIP [] cmos_set_alarm+0x38/0x280 [ 8.357096] RSP [ 8.379051] CR2: 0000000000000010 [ 8.401736] ---[ end trace 5cbcd83a1f225ed3 ]--- This occur only when CONFIG_DEBUG_TEST_DRIVER_REMOVE is enabled and CONFIG_RTC_DRV_CMOS builtin. When cmos_set_alarm() is called dev is NULL and so trigger the deref via cmos->irq The problem comes from that the device is removed but no remove function are called due to _exit_p(). This patch remove all _exit_p() annotation. Signed-off-by: Corentin Labbe Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index dd3d59806ffa..6f0e12e66296 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -776,7 +776,7 @@ static void cmos_do_shutdown(int rtc_irq) spin_unlock_irq(&rtc_lock); } -static void __exit cmos_do_remove(struct device *dev) +static void cmos_do_remove(struct device *dev) { struct cmos_rtc *cmos = dev_get_drvdata(dev); struct resource *ports; @@ -1129,7 +1129,7 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) pnp_irq(pnp, 0)); } -static void __exit cmos_pnp_remove(struct pnp_dev *pnp) +static void cmos_pnp_remove(struct pnp_dev *pnp) { cmos_do_remove(&pnp->dev); } @@ -1161,7 +1161,7 @@ static struct pnp_driver cmos_pnp_driver = { .name = (char *) driver_name, .id_table = rtc_ids, .probe = cmos_pnp_probe, - .remove = __exit_p(cmos_pnp_remove), + .remove = cmos_pnp_remove, .shutdown = cmos_pnp_shutdown, /* flag ensures resume() gets called, and stops syslog spam */ @@ -1238,7 +1238,7 @@ static int __init cmos_platform_probe(struct platform_device *pdev) return cmos_do_probe(&pdev->dev, resource, irq); } -static int __exit cmos_platform_remove(struct platform_device *pdev) +static int cmos_platform_remove(struct platform_device *pdev) { cmos_do_remove(&pdev->dev); return 0; @@ -1263,7 +1263,7 @@ static void cmos_platform_shutdown(struct platform_device *pdev) MODULE_ALIAS("platform:rtc_cmos"); static struct platform_driver cmos_platform_driver = { - .remove = __exit_p(cmos_platform_remove), + .remove = cmos_platform_remove, .shutdown = cmos_platform_shutdown, .driver = { .name = driver_name, From e0d9727c111a5917a1184c71c1a8e6f78c7fc41d Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 13 Oct 2016 10:07:07 +0300 Subject: [PATCH 009/319] iwlwifi: pcie: fix SPLC structure parsing The SPLC data parsing is too restrictive and was not trying find the correct element for WiFi. This causes problems with some BIOSes where the SPLC method exists, but doesn't have a WiFi entry on the first element of the list. The domain type values are also incorrect according to the specification. Fix this by complying with the actual specification. Additionally, replace all occurrences of SPLX to SPLC, since SPLX is only a structure internal to the ACPI tables, and may not even exist. Fixes: bcb079a14d75 ("iwlwifi: pcie: retrieve and parse ACPI power limitations") Reported-by: Chris Rorvick Tested-by: Paul Bolle Tested-by: Chris Rorvick Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 81 +++++++++++-------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 001be406a3d3..2f8134b2a504 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -541,48 +541,64 @@ static const struct pci_device_id iwl_hw_card_ids[] = { MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids); #ifdef CONFIG_ACPI -#define SPL_METHOD "SPLC" -#define SPL_DOMAINTYPE_MODULE BIT(0) -#define SPL_DOMAINTYPE_WIFI BIT(1) -#define SPL_DOMAINTYPE_WIGIG BIT(2) -#define SPL_DOMAINTYPE_RFEM BIT(3) +#define ACPI_SPLC_METHOD "SPLC" +#define ACPI_SPLC_DOMAIN_WIFI (0x07) -static u64 splx_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splx) +static u64 splc_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splc) { - union acpi_object *limits, *domain_type, *power_limit; + union acpi_object *data_pkg, *dflt_pwr_limit; + int i; - if (splx->type != ACPI_TYPE_PACKAGE || - splx->package.count != 2 || - splx->package.elements[0].type != ACPI_TYPE_INTEGER || - splx->package.elements[0].integer.value != 0) { - IWL_ERR(trans, "Unsupported splx structure\n"); + /* We need at least two elements, one for the revision and one + * for the data itself. Also check that the revision is + * supported (currently only revision 0). + */ + if (splc->type != ACPI_TYPE_PACKAGE || + splc->package.count < 2 || + splc->package.elements[0].type != ACPI_TYPE_INTEGER || + splc->package.elements[0].integer.value != 0) { + IWL_DEBUG_INFO(trans, + "Unsupported structure returned by the SPLC method. Ignoring.\n"); return 0; } - limits = &splx->package.elements[1]; - if (limits->type != ACPI_TYPE_PACKAGE || - limits->package.count < 2 || - limits->package.elements[0].type != ACPI_TYPE_INTEGER || - limits->package.elements[1].type != ACPI_TYPE_INTEGER) { - IWL_ERR(trans, "Invalid limits element\n"); + /* loop through all the packages to find the one for WiFi */ + for (i = 1; i < splc->package.count; i++) { + union acpi_object *domain; + + data_pkg = &splc->package.elements[i]; + + /* Skip anything that is not a package with the right + * amount of elements (i.e. at least 2 integers). + */ + if (data_pkg->type != ACPI_TYPE_PACKAGE || + data_pkg->package.count < 2 || + data_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || + data_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) + continue; + + domain = &data_pkg->package.elements[0]; + if (domain->integer.value == ACPI_SPLC_DOMAIN_WIFI) + break; + + data_pkg = NULL; + } + + if (!data_pkg) { + IWL_DEBUG_INFO(trans, + "No element for the WiFi domain returned by the SPLC method.\n"); return 0; } - domain_type = &limits->package.elements[0]; - power_limit = &limits->package.elements[1]; - if (!(domain_type->integer.value & SPL_DOMAINTYPE_WIFI)) { - IWL_DEBUG_INFO(trans, "WiFi power is not limited\n"); - return 0; - } - - return power_limit->integer.value; + dflt_pwr_limit = &data_pkg->package.elements[1]; + return dflt_pwr_limit->integer.value; } static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) { acpi_handle pxsx_handle; acpi_handle handle; - struct acpi_buffer splx = {ACPI_ALLOCATE_BUFFER, NULL}; + struct acpi_buffer splc = {ACPI_ALLOCATE_BUFFER, NULL}; acpi_status status; pxsx_handle = ACPI_HANDLE(&pdev->dev); @@ -593,23 +609,24 @@ static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) } /* Get the method's handle */ - status = acpi_get_handle(pxsx_handle, (acpi_string)SPL_METHOD, &handle); + status = acpi_get_handle(pxsx_handle, (acpi_string)ACPI_SPLC_METHOD, + &handle); if (ACPI_FAILURE(status)) { - IWL_DEBUG_INFO(trans, "SPL method not found\n"); + IWL_DEBUG_INFO(trans, "SPLC method not found\n"); return; } /* Call SPLC with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &splx); + status = acpi_evaluate_object(handle, NULL, NULL, &splc); if (ACPI_FAILURE(status)) { IWL_ERR(trans, "SPLC invocation failed (0x%x)\n", status); return; } - trans->dflt_pwr_limit = splx_get_pwr_limit(trans, splx.pointer); + trans->dflt_pwr_limit = splc_get_pwr_limit(trans, splc.pointer); IWL_DEBUG_INFO(trans, "Default power limit set to %lld\n", trans->dflt_pwr_limit); - kfree(splx.pointer); + kfree(splc.pointer); } #else /* CONFIG_ACPI */ From 5a143db8c4a28dab6423cb6197e9f1389da375f2 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 5 Oct 2016 09:28:53 +0300 Subject: [PATCH 010/319] iwlwifi: mvm: fix netdetect starting/stopping for unified images With unified images, we need to make sure the net-detect scan is stopped after resuming, since we don't restart the FW. Also, we need to make sure we check if there are enough scan slots available to run it, as we do with other scans. Fixes: commit 23ae61282b88 ("iwlwifi: mvm: Do not switch to D3 image on suspend") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 19 +++++++++++ drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 33 +++++++++++++++---- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 03a8fc586548..b88e2048ae0b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1087,6 +1087,15 @@ iwl_mvm_netdetect_config(struct iwl_mvm *mvm, ret = iwl_mvm_switch_to_d3(mvm); if (ret) return ret; + } else { + /* In theory, we wouldn't have to stop a running sched + * scan in order to start another one (for + * net-detect). But in practice this doesn't seem to + * work properly, so stop any running sched_scan now. + */ + ret = iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_SCHED, true); + if (ret) + return ret; } /* rfkill release can be either for wowlan or netdetect */ @@ -2091,6 +2100,16 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) iwl_mvm_update_changed_regdom(mvm); if (mvm->net_detect) { + /* If this is a non-unified image, we restart the FW, + * so no need to stop the netdetect scan. If that + * fails, continue and try to get the wake-up reasons, + * but trigger a HW restart by keeping a failure code + * in ret. + */ + if (unified_image) + ret = iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_NETDETECT, + false); + iwl_mvm_query_netdetect_reasons(mvm, vif); /* has unlocked the mutex, so skip that */ goto out; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index f279fdd6eb44..fa9743205491 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1199,6 +1199,9 @@ static int iwl_mvm_num_scans(struct iwl_mvm *mvm) static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) { + bool unified_image = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); + /* This looks a bit arbitrary, but the idea is that if we run * out of possible simultaneous scans and the userspace is * trying to run a scan type that is already running, we @@ -1225,12 +1228,30 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) return -EBUSY; return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_REGULAR, true); case IWL_MVM_SCAN_NETDETECT: - /* No need to stop anything for net-detect since the - * firmware is restarted anyway. This way, any sched - * scans that were running will be restarted when we - * resume. - */ - return 0; + /* For non-unified images, there's no need to stop + * anything for net-detect since the firmware is + * restarted anyway. This way, any sched scans that + * were running will be restarted when we resume. + */ + if (!unified_image) + return 0; + + /* If this is a unified image and we ran out of scans, + * we need to stop something. Prefer stopping regular + * scans, because the results are useless at this + * point, and we should be able to keep running + * another scheduled scan while suspended. + */ + if (mvm->scan_status & IWL_MVM_SCAN_REGULAR_MASK) + return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_REGULAR, + true); + if (mvm->scan_status & IWL_MVM_SCAN_SCHED_MASK) + return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_SCHED, + true); + + /* fall through, something is wrong if no scan was + * running but we ran out of scans. + */ default: WARN_ON(1); break; From 446647d4b9b90335f29a06ec21a05b508e269866 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 19 Oct 2016 13:27:40 +0300 Subject: [PATCH 011/319] ideapad-laptop: Add another DMI entry for Yoga 900 This particular laptop has its motherboard replaced and after that, even with the latest BIOS, some DMI identification strings have become "INVALID". This includes DMI_PRODUCT_VERSION which results Wifi being blocked. It seems that DMI_BOARD_NAME is still valid so use that as an alternative for Lenovo Yoga 900. Signed-off-by: Mika Westerberg Signed-off-by: Darren Hart --- drivers/platform/x86/ideapad-laptop.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index a2323941e677..a7614fc542b5 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -933,6 +933,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 900"), }, }, + { + .ident = "Lenovo Yoga 900", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BOARD_NAME, "VIUU4"), + }, + }, { .ident = "Lenovo YOGA 910-13IKB", .matches = { From 1c80e9603fe8341ed5bea696747d07083d5e0476 Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Thu, 25 Aug 2016 12:50:55 -0600 Subject: [PATCH 012/319] toshiba-wmi: Fix loading the driver on non Toshiba laptops Bug 150611 uncovered that the WMI ID used by the toshiba-wmi driver is not Toshiba specific, and as such, the driver was being loaded on non Toshiba laptops too. This patch adds a DMI matching list checking for TOSHIBA as the vendor, refusing to load if it is not. Also the WMI GUID was renamed, dropping the TOSHIBA_ prefix, to better reflect that such GUID is not a Toshiba specific one. Cc: # 4.4+ Signed-off-by: Azael Avalos Signed-off-by: Darren Hart --- drivers/platform/x86/toshiba-wmi.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/toshiba-wmi.c b/drivers/platform/x86/toshiba-wmi.c index feac4576b837..2df07ee8f3c3 100644 --- a/drivers/platform/x86/toshiba-wmi.c +++ b/drivers/platform/x86/toshiba-wmi.c @@ -24,14 +24,15 @@ #include #include #include +#include MODULE_AUTHOR("Azael Avalos"); MODULE_DESCRIPTION("Toshiba WMI Hotkey Driver"); MODULE_LICENSE("GPL"); -#define TOSHIBA_WMI_EVENT_GUID "59142400-C6A3-40FA-BADB-8A2652834100" +#define WMI_EVENT_GUID "59142400-C6A3-40FA-BADB-8A2652834100" -MODULE_ALIAS("wmi:"TOSHIBA_WMI_EVENT_GUID); +MODULE_ALIAS("wmi:"WMI_EVENT_GUID); static struct input_dev *toshiba_wmi_input_dev; @@ -63,6 +64,16 @@ static void toshiba_wmi_notify(u32 value, void *context) kfree(response.pointer); } +static struct dmi_system_id toshiba_wmi_dmi_table[] __initdata = { + { + .ident = "Toshiba laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + }, + }, + {} +}; + static int __init toshiba_wmi_input_setup(void) { acpi_status status; @@ -81,7 +92,7 @@ static int __init toshiba_wmi_input_setup(void) if (err) goto err_free_dev; - status = wmi_install_notify_handler(TOSHIBA_WMI_EVENT_GUID, + status = wmi_install_notify_handler(WMI_EVENT_GUID, toshiba_wmi_notify, NULL); if (ACPI_FAILURE(status)) { err = -EIO; @@ -95,7 +106,7 @@ static int __init toshiba_wmi_input_setup(void) return 0; err_remove_notifier: - wmi_remove_notify_handler(TOSHIBA_WMI_EVENT_GUID); + wmi_remove_notify_handler(WMI_EVENT_GUID); err_free_keymap: sparse_keymap_free(toshiba_wmi_input_dev); err_free_dev: @@ -105,7 +116,7 @@ static int __init toshiba_wmi_input_setup(void) static void toshiba_wmi_input_destroy(void) { - wmi_remove_notify_handler(TOSHIBA_WMI_EVENT_GUID); + wmi_remove_notify_handler(WMI_EVENT_GUID); sparse_keymap_free(toshiba_wmi_input_dev); input_unregister_device(toshiba_wmi_input_dev); } @@ -114,7 +125,8 @@ static int __init toshiba_wmi_init(void) { int ret; - if (!wmi_has_guid(TOSHIBA_WMI_EVENT_GUID)) + if (!wmi_has_guid(WMI_EVENT_GUID) || + !dmi_check_system(toshiba_wmi_dmi_table)) return -ENODEV; ret = toshiba_wmi_input_setup(); @@ -130,7 +142,7 @@ static int __init toshiba_wmi_init(void) static void __exit toshiba_wmi_exit(void) { - if (wmi_has_guid(TOSHIBA_WMI_EVENT_GUID)) + if (wmi_has_guid(WMI_EVENT_GUID)) toshiba_wmi_input_destroy(); } From 368e21aebe9535c1643b272aaa9819298a6bc3e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 19 Oct 2016 21:02:04 +0300 Subject: [PATCH 013/319] rtc: cmos: Don't enable interrupts in the middle of the interrupt handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using spin_lock_irq()/spin_unlock_irq() from within the interrupt handler is a no-no. Let's save/restore the flags to avoid turning on interrupts prematurely. We hit this in a bunch of our CI systems, but for whatever reason I wasn't able to reproduce on my own machine, so this fix is just based on the backtrace. [ 202.634918] WARNING: CPU: 0 PID: 0 at kernel/locking/lockdep.c:2729 trace_hardirqs_on_caller+0x113/0x1b0 [ 202.634919] DEBUG_LOCKS_WARN_ON(current->hardirq_context) [ 202.634929] Modules linked in: snd_hda_intel i915 x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel lpc_ich snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_codec snd_hwdep i2c_designware_platform i2c_designware_core snd_hda_core mei_me mei snd_pcm r8169 mii sdhci_acpi sdhci mmc_core i2c_hid [last unloaded: i915] [ 202.634930] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U 4.9.0-rc1-CI-CI_DRM_1734+ #1 [ 202.634931] Hardware name: GIGABYTE M4HM87P-00/M4HM87P-00, BIOS F6 12/10/2014 [ 202.634933] ffff88011ea03d68 ffffffff8142dce5 ffff88011ea03db8 0000000000000000 [ 202.634934] ffff88011ea03da8 ffffffff8107e496 00000aa900000002 ffffffff81e249a0 [ 202.634935] ffffffff81815637 ffffffff82e7c280 0000000000000000 0000000000000004 [ 202.634936] Call Trace: [ 202.634939] [ 202.634939] [] dump_stack+0x67/0x92 [ 202.634941] [] __warn+0xc6/0xe0 [ 202.634944] [] ? _raw_spin_unlock_irq+0x27/0x50 [ 202.634945] [] warn_slowpath_fmt+0x4a/0x50 [ 202.634946] [] trace_hardirqs_on_caller+0x113/0x1b0 [ 202.634948] [] trace_hardirqs_on+0xd/0x10 [ 202.634949] [] _raw_spin_unlock_irq+0x27/0x50 [ 202.634951] [] rtc_handler+0x32/0xa0 [ 202.634954] [] acpi_ev_fixed_event_detect+0xd4/0xfb [ 202.634956] [] acpi_ev_sci_xrupt_handler+0xf/0x2d [ 202.634957] [] acpi_irq+0x11/0x2c [ 202.634960] [] __handle_irq_event_percpu+0x58/0x370 [ 202.634961] [] handle_irq_event_percpu+0x1e/0x50 [ 202.634962] [] handle_irq_event+0x34/0x60 [ 202.634963] [] handle_fasteoi_irq+0xa6/0x170 [ 202.634966] [] handle_irq+0x15/0x20 [ 202.634967] [] do_IRQ+0x68/0x130 [ 202.634968] [] common_interrupt+0x89/0x89 [ 202.634970] [ 202.634970] [] ? mwait_idle+0x93/0x210 [ 202.634971] [] ? mwait_idle+0x8a/0x210 [ 202.634972] [] arch_cpu_idle+0xa/0x10 [ 202.634973] [] default_idle_call+0x1e/0x30 [ 202.634974] [] cpu_startup_entry+0x17c/0x1f0 [ 202.634976] [] rest_init+0x127/0x130 [ 202.634978] [] start_kernel+0x3f6/0x403 [ 202.634980] [] x86_64_start_reservations+0x2a/0x2c [ 202.634981] [] x86_64_start_kernel+0x173/0x186 [ 202.634982] ---[ end trace 293c99618fa08d34 ]--- Cc: Gabriele Mazzotta Cc: Alexandre Belloni Fixes: 983bf1256edb ("rtc: cmos: Clear ACPI-driven alarms upon resume") Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 6f0e12e66296..7030d7cd3861 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -996,8 +996,9 @@ static u32 rtc_handler(void *context) struct cmos_rtc *cmos = dev_get_drvdata(dev); unsigned char rtc_control = 0; unsigned char rtc_intr; + unsigned long flags; - spin_lock_irq(&rtc_lock); + spin_lock_irqsave(&rtc_lock, flags); if (cmos_rtc.suspend_ctrl) rtc_control = CMOS_READ(RTC_CONTROL); if (rtc_control & RTC_AIE) { @@ -1006,7 +1007,7 @@ static u32 rtc_handler(void *context) rtc_intr = CMOS_READ(RTC_INTR_FLAGS); rtc_update_irq(cmos->rtc, 1, rtc_intr); } - spin_unlock_irq(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); pm_wakeup_event(dev, 0); acpi_clear_event(ACPI_EVENT_RTC); From b77428b12b55437b28deae738d9ce8b2e0663b55 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 24 Oct 2016 14:21:18 +1100 Subject: [PATCH 014/319] xfs: defer should abort intent items if the trans roll fails If the deferred ops transaction roll fails, we need to abort the intent items if we haven't already logged a done item for it, regardless of whether or not the deferred ops has had a transaction committed. Dave found this while running generic/388. Move the tracepoint to make it easier to track object lifetimes. Reported-by: Dave Chinner Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_defer.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 613c5cf19436..5c2929f94bd3 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -199,9 +199,9 @@ xfs_defer_intake_work( struct xfs_defer_pending *dfp; list_for_each_entry(dfp, &dop->dop_intake, dfp_list) { - trace_xfs_defer_intake_work(tp->t_mountp, dfp); dfp->dfp_intent = dfp->dfp_type->create_intent(tp, dfp->dfp_count); + trace_xfs_defer_intake_work(tp->t_mountp, dfp); list_sort(tp->t_mountp, &dfp->dfp_work, dfp->dfp_type->diff_items); list_for_each(li, &dfp->dfp_work) @@ -221,21 +221,14 @@ xfs_defer_trans_abort( struct xfs_defer_pending *dfp; trace_xfs_defer_trans_abort(tp->t_mountp, dop); - /* - * If the transaction was committed, drop the intent reference - * since we're bailing out of here. The other reference is - * dropped when the intent hits the AIL. If the transaction - * was not committed, the intent is freed by the intent item - * unlock handler on abort. - */ - if (!dop->dop_committed) - return; - /* Abort intent items. */ + /* Abort intent items that don't have a done item. */ list_for_each_entry(dfp, &dop->dop_pending, dfp_list) { trace_xfs_defer_pending_abort(tp->t_mountp, dfp); - if (!dfp->dfp_done) + if (dfp->dfp_intent && !dfp->dfp_done) { dfp->dfp_type->abort_intent(dfp->dfp_intent); + dfp->dfp_intent = NULL; + } } /* Shut down FS. */ From 86a6c211d676add579a75b7e172a72bb3e2c21f8 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 15 Jun 2016 15:02:55 -0400 Subject: [PATCH 015/319] NFS: Trim extra slash in v4 nfs_path A NFSv4 mount of a subdirectory will show an extra slash (as in 'server://path') in proc's mountinfo which will not match the device name and path. This can cause problems for programs searching for the mount. Fix this by checking for a leading slash in the dentry path, if so trim away any trailing slashes in the device name. Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index c8162c660c44..5551e8ef67fd 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -98,7 +98,7 @@ char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, return end; } namelen = strlen(base); - if (flags & NFS_PATH_CANONICAL) { + if (*end == '/') { /* Strip off excess slashes in base string */ while (namelen > 0 && base[namelen - 1] == '/') namelen--; From 68a564006a21ae59c7c51b4359e2e8efa42ae4af Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:05:35 +0200 Subject: [PATCH 016/319] NFSv4.1: work around -Wmaybe-uninitialized warning A bugfix introduced a harmless gcc warning in nfs4_slot_seqid_in_use if we enable -Wmaybe-uninitialized again: fs/nfs/nfs4session.c:203:54: error: 'cur_seq' may be used uninitialized in this function [-Werror=maybe-uninitialized] gcc is not smart enough to conclude that the IS_ERR/PTR_ERR pair results in a nonzero return value here. Using PTR_ERR_OR_ZERO() instead makes this clear to the compiler. The warning originally did not appear in v4.8 as it was globally disabled, but the bugfix that introduced the warning got backported to stable kernels which again enable it, and this is now the only warning in the v4.7 builds. Fixes: e09c978aae5b ("NFSv4.1: Fix Oopsable condition in server callback races") Signed-off-by: Arnd Bergmann Cc: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4session.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index b62973045a3e..150c5a1879bf 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -178,12 +178,14 @@ static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid, __must_hold(&tbl->slot_tbl_lock) { struct nfs4_slot *slot; + int ret; slot = nfs4_lookup_slot(tbl, slotid); - if (IS_ERR(slot)) - return PTR_ERR(slot); - *seq_nr = slot->seq_nr; - return 0; + ret = PTR_ERR_OR_ZERO(slot); + if (!ret) + *seq_nr = slot->seq_nr; + + return ret; } /* From 34eee70a7b82b09dbda4cb453e0e21d460dae226 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 17:22:01 +0200 Subject: [PATCH 017/319] staging: iio: ad5933: avoid uninitialized variable in error case The ad5933_i2c_read function returns an error code to indicate whether it could read data or not. However ad5933_work() ignores this return code and just accesses the data unconditionally, which gets detected by gcc as a possible bug: drivers/staging/iio/impedance-analyzer/ad5933.c: In function 'ad5933_work': drivers/staging/iio/impedance-analyzer/ad5933.c:649:16: warning: 'status' may be used uninitialized in this function [-Wmaybe-uninitialized] This adds minimal error handling so we only evaluate the data if it was correctly read. Link: https://patchwork.kernel.org/patch/8110281/ Signed-off-by: Arnd Bergmann Acked-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/staging/iio/impedance-analyzer/ad5933.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index 5eecf1cb1028..3892a7470410 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -655,6 +655,7 @@ static void ad5933_work(struct work_struct *work) __be16 buf[2]; int val[2]; unsigned char status; + int ret; mutex_lock(&indio_dev->mlock); if (st->state == AD5933_CTRL_INIT_START_FREQ) { @@ -662,19 +663,22 @@ static void ad5933_work(struct work_struct *work) ad5933_cmd(st, AD5933_CTRL_START_SWEEP); st->state = AD5933_CTRL_START_SWEEP; schedule_delayed_work(&st->work, st->poll_time_jiffies); - mutex_unlock(&indio_dev->mlock); - return; + goto out; } - ad5933_i2c_read(st->client, AD5933_REG_STATUS, 1, &status); + ret = ad5933_i2c_read(st->client, AD5933_REG_STATUS, 1, &status); + if (ret) + goto out; if (status & AD5933_STAT_DATA_VALID) { int scan_count = bitmap_weight(indio_dev->active_scan_mask, indio_dev->masklength); - ad5933_i2c_read(st->client, + ret = ad5933_i2c_read(st->client, test_bit(1, indio_dev->active_scan_mask) ? AD5933_REG_REAL_DATA : AD5933_REG_IMAG_DATA, scan_count * 2, (u8 *)buf); + if (ret) + goto out; if (scan_count == 2) { val[0] = be16_to_cpu(buf[0]); @@ -686,8 +690,7 @@ static void ad5933_work(struct work_struct *work) } else { /* no data available - try again later */ schedule_delayed_work(&st->work, st->poll_time_jiffies); - mutex_unlock(&indio_dev->mlock); - return; + goto out; } if (status & AD5933_STAT_SWEEP_DONE) { @@ -700,7 +703,7 @@ static void ad5933_work(struct work_struct *work) ad5933_cmd(st, AD5933_CTRL_INC_FREQ); schedule_delayed_work(&st->work, st->poll_time_jiffies); } - +out: mutex_unlock(&indio_dev->mlock); } From d3532ea6ce4ea501e421d130555e59edc2945f99 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:13:40 +0200 Subject: [PATCH 018/319] brcmfmac: avoid maybe-uninitialized warning in brcmf_cfg80211_start_ap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A bugfix added a sanity check around the assignment and use of the 'is_11d' variable, which looks correct to me, but as the function is rather complex already, this confuses the compiler to the point where it can no longer figure out if the variable is always initialized correctly: brcm80211/brcmfmac/cfg80211.c: In function ‘brcmf_cfg80211_start_ap’: brcm80211/brcmfmac/cfg80211.c:4586:10: error: ‘is_11d’ may be used uninitialized in this function [-Werror=maybe-uninitialized] This adds an initialization for the newly introduced case in which the variable should not really be used, in order to make the warning go away. Fixes: b3589dfe0212 ("brcmfmac: ignore 11d configuration errors") Cc: Hante Meuleman Cc: Arend van Spriel Cc: Kalle Valo Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index b777e1b2f87a..78d9966a3957 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4516,7 +4516,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, /* store current 11d setting */ if (brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY, &ifp->vif->is_11d)) { - supports_11d = false; + is_11d = supports_11d = false; } else { country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail, settings->beacon.tail_len, From bb6a6e8e091353770074608c1d1bfde0e20b8154 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:24 +0800 Subject: [PATCH 019/319] netfilter: nft_dynset: fix panic if NFT_SET_HASH is not enabled When CONFIG_NFT_SET_HASH is not enabled and I input the following rule: "nft add rule filter output flow table test {ip daddr counter }", kernel panic happened on my system: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [< (null)>] (null) [...] Call Trace: [] ? nft_dynset_eval+0x56/0x100 [nf_tables] [] nft_do_chain+0xfb/0x4e0 [nf_tables] [] ? nf_conntrack_tuple_taken+0x61/0x210 [nf_conntrack] [] ? get_unique_tuple+0x136/0x560 [nf_nat] [] ? __nf_ct_ext_add_length+0x111/0x130 [nf_conntrack] [] ? nf_nat_setup_info+0x87/0x3b0 [nf_nat] [] ? ipt_do_table+0x327/0x610 [] ? __nf_nat_alloc_null_binding+0x57/0x80 [nf_nat] [] nft_ipv4_output+0xaf/0xd0 [nf_tables_ipv4] [] nf_iterate+0x55/0x60 [] nf_hook_slow+0x73/0xd0 Because in rbtree type set, ops->update is not implemented. So just keep it simple, in such case, report -EOPNOTSUPP to the user space. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 517f08767a3c..bfdb689664b0 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -139,6 +139,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return PTR_ERR(set); } + if (set->ops->update == NULL) + return -EOPNOTSUPP; + if (set->flags & NFT_SET_CONSTANT) return -EBUSY; From 61f9e2924f4981d626b3a931fed935f2fa3cb4de Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:25 +0800 Subject: [PATCH 020/319] netfilter: nf_tables: fix *leak* when expr clone fail When nft_expr_clone failed, a series of problems will happen: 1. module refcnt will leak, we call __module_get at the beginning but we forget to put it back if ops->clone returns fail 2. memory will be leaked, if clone fail, we just return NULL and forget to free the alloced element 3. set->nelems will become incorrect when set->size is specified. If clone fail, we should decrease the set->nelems Now this patch fixes these problems. And fortunately, clone fail will only happen on counter expression when memory is exhausted. Fixes: 086f332167d6 ("netfilter: nf_tables: add clone interface to expression operations") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++++-- net/netfilter/nf_tables_api.c | 11 ++++++----- net/netfilter/nft_dynset.c | 16 ++++++++++------ net/netfilter/nft_set_hash.c | 4 ++-- net/netfilter/nft_set_rbtree.c | 2 +- 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5031e072567b..741dcded5b4f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -542,7 +542,8 @@ void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *data, u64 timeout, gfp_t gfp); -void nft_set_elem_destroy(const struct nft_set *set, void *elem); +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr); /** * struct nft_set_gc_batch_head - nf_tables set garbage collection batch @@ -693,7 +694,6 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) { int err; - __module_get(src->ops->type->owner); if (src->ops->clone) { dst->ops = src->ops; err = src->ops->clone(dst, src); @@ -702,6 +702,8 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) } else { memcpy(dst, src, src->ops->size); } + + __module_get(src->ops->type->owner); return 0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 24db22257586..86e48aeb20be 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3452,14 +3452,15 @@ void *nft_set_elem_init(const struct nft_set *set, return elem; } -void nft_set_elem_destroy(const struct nft_set *set, void *elem) +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_uninit(nft_set_ext_data(ext), set->dtype); - if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); kfree(elem); @@ -3812,7 +3813,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu) gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); for (i = 0; i < gcb->head.cnt; i++) - nft_set_elem_destroy(gcb->head.set, gcb->elems[i]); + nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); kfree(gcb); } EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); @@ -4030,7 +4031,7 @@ static void nf_tables_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem(trans).priv, true); break; } kfree(trans); @@ -4171,7 +4172,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem(trans).priv, true); break; } kfree(trans); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index bfdb689664b0..31ca94793aa9 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, ®s->data[priv->sreg_key], ®s->data[priv->sreg_data], timeout, GFP_ATOMIC); - if (elem == NULL) { - if (set->size) - atomic_dec(&set->nelems); - return NULL; - } + if (elem == NULL) + goto err1; ext = nft_set_elem_ext(set, elem); if (priv->expr != NULL && nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0) - return NULL; + goto err2; return elem; + +err2: + nft_set_elem_destroy(set, elem, false); +err1: + if (set->size) + atomic_dec(&set->nelems); + return NULL; } static void nft_dynset_eval(const struct nft_expr *expr, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 3794cb2fc788..88d9fc8343e7 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -120,7 +120,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, return true; err2: - nft_set_elem_destroy(set, he); + nft_set_elem_destroy(set, he, true); err1: return false; } @@ -332,7 +332,7 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy((const struct nft_set *)arg, ptr); + nft_set_elem_destroy((const struct nft_set *)arg, ptr, true); } static void nft_hash_destroy(const struct nft_set *set) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 38b5bda242f8..36493a7cae88 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -266,7 +266,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe); + nft_set_elem_destroy(set, rbe, true); } } From dab45060a56a9732b027d2031c1b6100bc75eea2 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:26 +0800 Subject: [PATCH 021/319] netfilter: nf_tables: fix race when create new element in dynset Packets may race when create the new element in nft_hash_update: CPU0 CPU1 lookup_fast - fail lookup_fast - fail new - ok new - ok insert - ok insert - fail(EEXIST) So when race happened, we reuse the existing element. Otherwise, these *racing* packets will not be handled properly. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 88d9fc8343e7..a3dface3e6e6 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -98,7 +98,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; + struct nft_hash_elem *he, *prev; struct nft_hash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, @@ -112,9 +112,18 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key, he = new(set, expr, regs); if (he == NULL) goto err1; - if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params)) + + prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); + if (IS_ERR(prev)) goto err2; + + /* Another cpu may race to insert the element with the same key */ + if (prev) { + nft_set_elem_destroy(set, he, true); + he = prev; + } + out: *ext = &he->ext; return true; From 444f901742d054a4cd5ff045871eac5131646cfb Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Mon, 24 Oct 2016 18:07:23 +0200 Subject: [PATCH 022/319] netfilter: nf_conntrack_sip: extend request line validation on SIP requests, so a fragmented TCP SIP packet from an allow header starting with INVITE,NOTIFY,OPTIONS,REFER,REGISTER,UPDATE,SUBSCRIBE Content-Length: 0 will not bet interpreted as an INVITE request. Also Request-URI must start with an alphabetic character. Confirm with RFC 3261 Request-Line = Method SP Request-URI SP SIP-Version CRLF Fixes: 30f33e6dee80 ("[NETFILTER]: nf_conntrack_sip: support method specific request/response handling") Signed-off-by: Ulrich Weber Acked-by: Marco Angaroni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 621b81c7bddc..c3fc14e021ec 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1436,9 +1436,12 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, handler = &sip_handlers[i]; if (handler->request == NULL) continue; - if (*datalen < handler->len || + if (*datalen < handler->len + 2 || strncasecmp(*dptr, handler->method, handler->len)) continue; + if ((*dptr)[handler->len] != ' ' || + !isalpha((*dptr)[handler->len+1])) + continue; if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, &matchoff, &matchlen) <= 0) { From f1d505bb762e30bf316ff5d3b604914649d6aed3 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 25 Oct 2016 15:56:39 -0400 Subject: [PATCH 023/319] netfilter: nf_tables: fix type mismatch with error return from nft_parse_u32_check Commit 36b701fae12ac ("netfilter: nf_tables: validate maximum value of u32 netlink attributes") introduced nft_parse_u32_check with a return value of "unsigned int", yet on error it returns "-ERANGE". This patch corrects the mismatch by changing the return value to "int", which happens to match the actual users of nft_parse_u32_check already. Found by Coverity, CID 1373930. Note that commit 21a9e0f1568ea ("netfilter: nft_exthdr: fix error handling in nft_exthdr_init()) attempted to address the issue, but did not address the return type of nft_parse_u32_check. Signed-off-by: John W. Linville Cc: Laura Garcia Liebana Cc: Pablo Neira Ayuso Cc: Dan Carpenter Fixes: 36b701fae12ac ("netfilter: nf_tables: validate maximum value...") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 741dcded5b4f..d79d1e9b9546 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -145,7 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 86e48aeb20be..365d31b86816 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4422,7 +4422,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, * Otherwise a 0 is returned and the attribute value is stored in the * destination variable. */ -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) { u32 val; From cdb436d181d21af4d273b49ec7734eecd6a37fe9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Oct 2016 23:46:17 +0200 Subject: [PATCH 024/319] netfilter: conntrack: avoid excess memory allocation This is now a fixed-size extension, so we don't need to pass a variable alloc size. This (harmless) error results in allocating 32 instead of the needed 16 bytes for this extension as the size gets passed twice. Fixes: 23014011ba420 ("netfilter: conntrack: support a fixed size of 128 distinct labels") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 498814626e28..1723a67c0b0a 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -30,8 +30,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) if (net->ct.labels_used == 0) return NULL; - return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, - sizeof(struct nf_conn_labels), GFP_ATOMIC); + return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC); #else return NULL; #endif From 5c4a9129b81027eca12aeaf2fa9defb45150f533 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 26 Oct 2016 08:12:20 +0200 Subject: [PATCH 025/319] clk/samsung: Use CLK_OF_DECLARE_DRIVER initialization method for CLKOUT The Exynos PMU node is an interrupt, clock and PMU (Power Management Unit) controller, and these functionalities are supported by different drivers that matches the same compatible strings. Since commit 989eafd0b609 ("clk: core: Avoid double initialization of clocks") the OF core flags clock controllers registered with the CLK_OF_DECLARE() macro as OF_POPULATED, so platform devices with the same compatible string will not be registered. This prevents the PMU platform device to be created, so the Exynos PMU driver is never probed. This breaks (among other things) Suspend-to-RAM. Fix this by changing CLKOUT driver initialization method to CLK_OF_DECLARE_DRIVER(), which doesn't clear the OF_POPULATED flag, so later a platform device is created and the Exynos PMU platform driver can be be probed properly. Fixes: 989eafd0b609 ("clk: core: Avoid double initialization of clocks") Signed-off-by: Marek Szyprowski Reviewed-by: Javier Martinez Canillas Reviewed-by: Chanwoo Choi Signed-off-by: Stephen Boyd --- drivers/clk/samsung/clk-exynos-clkout.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c index 96fab6cfb202..6c6afb87b4ce 100644 --- a/drivers/clk/samsung/clk-exynos-clkout.c +++ b/drivers/clk/samsung/clk-exynos-clkout.c @@ -132,28 +132,34 @@ static void __init exynos_clkout_init(struct device_node *node, u32 mux_mask) pr_err("%s: failed to register clkout clock\n", __func__); } +/* + * We use CLK_OF_DECLARE_DRIVER initialization method to avoid setting + * the OF_POPULATED flag on the pmu device tree node, so later the + * Exynos PMU platform device can be properly probed with PMU driver. + */ + static void __init exynos4_clkout_init(struct device_node *node) { exynos_clkout_init(node, EXYNOS4_CLKOUT_MUX_MASK); } -CLK_OF_DECLARE(exynos4210_clkout, "samsung,exynos4210-pmu", +CLK_OF_DECLARE_DRIVER(exynos4210_clkout, "samsung,exynos4210-pmu", exynos4_clkout_init); -CLK_OF_DECLARE(exynos4212_clkout, "samsung,exynos4212-pmu", +CLK_OF_DECLARE_DRIVER(exynos4212_clkout, "samsung,exynos4212-pmu", exynos4_clkout_init); -CLK_OF_DECLARE(exynos4412_clkout, "samsung,exynos4412-pmu", +CLK_OF_DECLARE_DRIVER(exynos4412_clkout, "samsung,exynos4412-pmu", exynos4_clkout_init); -CLK_OF_DECLARE(exynos3250_clkout, "samsung,exynos3250-pmu", +CLK_OF_DECLARE_DRIVER(exynos3250_clkout, "samsung,exynos3250-pmu", exynos4_clkout_init); static void __init exynos5_clkout_init(struct device_node *node) { exynos_clkout_init(node, EXYNOS5_CLKOUT_MUX_MASK); } -CLK_OF_DECLARE(exynos5250_clkout, "samsung,exynos5250-pmu", +CLK_OF_DECLARE_DRIVER(exynos5250_clkout, "samsung,exynos5250-pmu", exynos5_clkout_init); -CLK_OF_DECLARE(exynos5410_clkout, "samsung,exynos5410-pmu", +CLK_OF_DECLARE_DRIVER(exynos5410_clkout, "samsung,exynos5410-pmu", exynos5_clkout_init); -CLK_OF_DECLARE(exynos5420_clkout, "samsung,exynos5420-pmu", +CLK_OF_DECLARE_DRIVER(exynos5420_clkout, "samsung,exynos5420-pmu", exynos5_clkout_init); -CLK_OF_DECLARE(exynos5433_clkout, "samsung,exynos5433-pmu", +CLK_OF_DECLARE_DRIVER(exynos5433_clkout, "samsung,exynos5433-pmu", exynos5_clkout_init); From 5747620257812530adda58cbff591fede6fb261e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 17:34:32 +0200 Subject: [PATCH 026/319] netfilter: ip_vs_sync: fix bogus maybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the ip_vs_sync code with CONFIG_OPTIMIZE_INLINING on x86 confuses the compiler to the point where it produces a rather dubious warning message: net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘opt.init_seq’ may be used uninitialized in this function [-Werror=maybe-uninitialized] struct ip_vs_sync_conn_options opt; ^~~ net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘opt.delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘opt.previous_delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘*((void *)&opt+12).init_seq’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘*((void *)&opt+12).delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] net/netfilter/ipvs/ip_vs_sync.c:1073:33: error: ‘*((void *)&opt+12).previous_delta’ may be used uninitialized in this function [-Werror=maybe-uninitialized] The problem appears to be a combination of a number of factors, including the __builtin_bswap32 compiler builtin being slightly odd, having a large amount of code inlined into a single function, and the way that some functions only get partially inlined here. I've spent way too much time trying to work out a way to improve the code, but the best I've come up with is to add an explicit memset right before the ip_vs_seq structure is first initialized here. When the compiler works correctly, this has absolutely no effect, but in the case that produces the warning, the warning disappears. In the process of analysing this warning, I also noticed that we use memcpy to copy the larger ip_vs_sync_conn_options structure over two members of the ip_vs_conn structure. This works because the layout is identical, but seems error-prone, so I'm changing this in the process to directly copy the two members. This change seemed to have no effect on the object code or the warning, but it deals with the same data, so I kept the two changes together. Signed-off-by: Arnd Bergmann Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_sync.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 1b07578bedf3..9350530c16c1 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -283,6 +283,7 @@ struct ip_vs_sync_buff { */ static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) { + memset(ho, 0, sizeof(*ho)); ho->init_seq = get_unaligned_be32(&no->init_seq); ho->delta = get_unaligned_be32(&no->delta); ho->previous_delta = get_unaligned_be32(&no->previous_delta); @@ -917,8 +918,10 @@ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *pa kfree(param->pe_data); } - if (opt) - memcpy(&cp->in_seq, opt, sizeof(*opt)); + if (opt) { + cp->in_seq = opt->in_seq; + cp->out_seq = opt->out_seq; + } atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); cp->state = state; cp->old_state = cp->state; From 06b113e9f28f8657715919087a3f54b77d1634ed Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 28 Oct 2016 09:59:38 -0700 Subject: [PATCH 027/319] clk: xgene: Don't call __pa on ioremaped address ioremaped addresses are not linearly mapped so the physical address can not be figured out via __pa. More generally, there is no guarantee that backing value of an ioremapped address is a physical address at all. The value here is only used for debugging so just drop the call to __pa on the ioremapped address. Fixes: 6ae5fd381251 ("clk: xgene: Silence sparse warnings") Signed-off-by: Laura Abbott Acked-by: Loc Ho Signed-off-by: Stephen Boyd --- drivers/clk/clk-xgene.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/clk/clk-xgene.c b/drivers/clk/clk-xgene.c index 5daddf5ecc4b..bc37030e38ba 100644 --- a/drivers/clk/clk-xgene.c +++ b/drivers/clk/clk-xgene.c @@ -463,22 +463,20 @@ static int xgene_clk_enable(struct clk_hw *hw) struct xgene_clk *pclk = to_xgene_clk(hw); unsigned long flags = 0; u32 data; - phys_addr_t reg; if (pclk->lock) spin_lock_irqsave(pclk->lock, flags); if (pclk->param.csr_reg != NULL) { pr_debug("%s clock enabled\n", clk_hw_get_name(hw)); - reg = __pa(pclk->param.csr_reg); /* First enable the clock */ data = xgene_clk_read(pclk->param.csr_reg + pclk->param.reg_clk_offset); data |= pclk->param.reg_clk_mask; xgene_clk_write(data, pclk->param.csr_reg + pclk->param.reg_clk_offset); - pr_debug("%s clock PADDR base %pa clk offset 0x%08X mask 0x%08X value 0x%08X\n", - clk_hw_get_name(hw), ®, + pr_debug("%s clk offset 0x%08X mask 0x%08X value 0x%08X\n", + clk_hw_get_name(hw), pclk->param.reg_clk_offset, pclk->param.reg_clk_mask, data); @@ -488,8 +486,8 @@ static int xgene_clk_enable(struct clk_hw *hw) data &= ~pclk->param.reg_csr_mask; xgene_clk_write(data, pclk->param.csr_reg + pclk->param.reg_csr_offset); - pr_debug("%s CSR RESET PADDR base %pa csr offset 0x%08X mask 0x%08X value 0x%08X\n", - clk_hw_get_name(hw), ®, + pr_debug("%s csr offset 0x%08X mask 0x%08X value 0x%08X\n", + clk_hw_get_name(hw), pclk->param.reg_csr_offset, pclk->param.reg_csr_mask, data); } From 646cccb55b26b95b981ea9a63512260d0c21cac3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Oct 2016 16:41:39 -0400 Subject: [PATCH 028/319] drm/amdgpu: add support for new smc firmware on tonga Newer tonga parts require new smc firmware. Reviewed-by: Huang Rui Reviewed-by: Eric Huang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/vi.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 7a8bfa34682f..d1267ea1d631 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -798,7 +798,11 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, strcpy(fw_name, "amdgpu/topaz_smc.bin"); break; case CHIP_TONGA: - strcpy(fw_name, "amdgpu/tonga_smc.bin"); + if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) || + ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) + strcpy(fw_name, "amdgpu/tonga_k_smc.bin"); + else + strcpy(fw_name, "amdgpu/tonga_smc.bin"); break; case CHIP_FIJI: strcpy(fw_name, "amdgpu/fiji_smc.bin"); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 7c13090df7c0..12404fc47520 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -81,6 +81,7 @@ MODULE_FIRMWARE("amdgpu/topaz_smc.bin"); MODULE_FIRMWARE("amdgpu/tonga_smc.bin"); +MODULE_FIRMWARE("amdgpu/tonga_k_smc.bin"); MODULE_FIRMWARE("amdgpu/fiji_smc.bin"); MODULE_FIRMWARE("amdgpu/polaris10_smc.bin"); MODULE_FIRMWARE("amdgpu/polaris10_smc_sk.bin"); From 3b496626ee8f07919256a4e99cddf42ecd4ba891 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Oct 2016 18:33:00 -0400 Subject: [PATCH 029/319] drm/amdgpu: add support for new smc firmware on iceland Newer iceland parts require new smc firmware. Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/vi.c | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index d1267ea1d631..662976292535 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -795,7 +795,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, if (!adev->pm.fw) { switch (adev->asic_type) { case CHIP_TOPAZ: - strcpy(fw_name, "amdgpu/topaz_smc.bin"); + if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) || + ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) || + ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87))) + strcpy(fw_name, "amdgpu/topaz_k_smc.bin"); + else + strcpy(fw_name, "amdgpu/topaz_smc.bin"); break; case CHIP_TONGA: if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) || diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 12404fc47520..f62f1a74f890 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -80,6 +80,7 @@ #include "dce_virtual.h" MODULE_FIRMWARE("amdgpu/topaz_smc.bin"); +MODULE_FIRMWARE("amdgpu/topaz_k_smc.bin"); MODULE_FIRMWARE("amdgpu/tonga_smc.bin"); MODULE_FIRMWARE("amdgpu/tonga_k_smc.bin"); MODULE_FIRMWARE("amdgpu/fiji_smc.bin"); From d304286abbbe7ed6228a553a56ba054e900907eb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 25 Oct 2016 23:07:38 +0200 Subject: [PATCH 030/319] iio: st_sensors: fix scale configuration for h3lis331dl fix scale configuration/parsing for h3lis331dl accel driver when sensitivity is higher than 1(m/s^2)/digit Signed-off-by: Lorenzo Bianconi Fixes: 1e52fefc9b0c ("iio: accel: Add support for the h3lis331dl accelerometer") Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_core.c | 12 ++++++++---- drivers/iio/common/st_sensors/st_sensors_core.c | 8 +++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index da3fb069ec5c..ce69048c88e9 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -743,8 +743,8 @@ static int st_accel_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: - *val = 0; - *val2 = adata->current_fullscale->gain; + *val = adata->current_fullscale->gain / 1000000; + *val2 = adata->current_fullscale->gain % 1000000; return IIO_VAL_INT_PLUS_MICRO; case IIO_CHAN_INFO_SAMP_FREQ: *val = adata->odr; @@ -763,9 +763,13 @@ static int st_accel_write_raw(struct iio_dev *indio_dev, int err; switch (mask) { - case IIO_CHAN_INFO_SCALE: - err = st_sensors_set_fullscale_by_gain(indio_dev, val2); + case IIO_CHAN_INFO_SCALE: { + int gain; + + gain = val * 1000000 + val2; + err = st_sensors_set_fullscale_by_gain(indio_dev, gain); break; + } case IIO_CHAN_INFO_SAMP_FREQ: if (val2) return -EINVAL; diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 285a64a589d7..975a1f19f747 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -612,7 +612,7 @@ EXPORT_SYMBOL(st_sensors_sysfs_sampling_frequency_avail); ssize_t st_sensors_sysfs_scale_avail(struct device *dev, struct device_attribute *attr, char *buf) { - int i, len = 0; + int i, len = 0, q, r; struct iio_dev *indio_dev = dev_get_drvdata(dev); struct st_sensor_data *sdata = iio_priv(indio_dev); @@ -621,8 +621,10 @@ ssize_t st_sensors_sysfs_scale_avail(struct device *dev, if (sdata->sensor_settings->fs.fs_avl[i].num == 0) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ", - sdata->sensor_settings->fs.fs_avl[i].gain); + q = sdata->sensor_settings->fs.fs_avl[i].gain / 1000000; + r = sdata->sensor_settings->fs.fs_avl[i].gain % 1000000; + + len += scnprintf(buf + len, PAGE_SIZE - len, "%u.%06u ", q, r); } mutex_unlock(&indio_dev->mlock); buf[len - 1] = '\n'; From 0b944d3a4bba6b25f43aed530f4fa85c04d162a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 30 Oct 2016 11:42:01 -0500 Subject: [PATCH 031/319] aio: hold an extra file reference over AIO read/write operations Otherwise we might dereference an already freed file and/or inode when aio_complete is called before we return from the read_iter or write_iter method. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/aio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 1157e13a36d6..0aa71d338c04 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1460,6 +1460,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, return ret; } + get_file(file); if (rw == WRITE) file_start_write(file); @@ -1467,6 +1468,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, if (rw == WRITE) file_end_write(file); + fput(file); kfree(iovec); break; From 723c038475b78edc9327eb952f95f9881cc9d79d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 30 Oct 2016 11:42:02 -0500 Subject: [PATCH 032/319] fs: remove the never implemented aio_fsync file operation Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 1 - Documentation/filesystems/vfs.txt | 1 - fs/aio.c | 14 -------------- fs/ntfs/dir.c | 2 -- include/linux/fs.h | 1 - 5 files changed, 19 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 14cdc101d165..1b5f15653b1b 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -447,7 +447,6 @@ prototypes: int (*flush) (struct file *); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t start, loff_t end, int datasync); - int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index d619c8d71966..b5039a00caaf 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -828,7 +828,6 @@ struct file_operations { int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t, loff_t, int datasync); - int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); diff --git a/fs/aio.c b/fs/aio.c index 0aa71d338c04..2a6030af6ba5 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1472,20 +1472,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, kfree(iovec); break; - case IOCB_CMD_FDSYNC: - if (!file->f_op->aio_fsync) - return -EINVAL; - - ret = file->f_op->aio_fsync(req, 1); - break; - - case IOCB_CMD_FSYNC: - if (!file->f_op->aio_fsync) - return -EINVAL; - - ret = file->f_op->aio_fsync(req, 0); - break; - default: pr_debug("EINVAL: no operation provided\n"); return -EINVAL; diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index a18613579001..0ee19ecc982d 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1544,8 +1544,6 @@ const struct file_operations ntfs_dir_ops = { .iterate = ntfs_readdir, /* Read directory contents. */ #ifdef NTFS_RW .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ - /*.aio_fsync = ,*/ /* Sync all outstanding async - i/o operations on a kiocb. */ #endif /* NTFS_RW */ /*.ioctl = ,*/ /* Perform function on the mounted filesystem. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 16d2b6e874d6..ff7bcd9e8398 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1709,7 +1709,6 @@ struct file_operations { int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t, loff_t, int datasync); - int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); From 89319d31d2d097da8e27fb0e0ae9d532f4f16827 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 30 Oct 2016 11:42:03 -0500 Subject: [PATCH 033/319] fs: remove aio_run_iocb Pass the ABI iocb structure to aio_setup_rw and let it handle the non-vectored I/O case as well. With that and a new helper for the AIO return value handling we can now define new aio_read and aio_write helpers that implement reads and writes in a self-contained way without duplicating too much code. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/aio.c | 188 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 97 insertions(+), 91 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 2a6030af6ba5..c19755187ca5 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1392,110 +1392,100 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) return -EINVAL; } -typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); - -static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len, - struct iovec **iovec, - bool compat, - struct iov_iter *iter) +static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec, + bool vectored, bool compat, struct iov_iter *iter) { + void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; + size_t len = iocb->aio_nbytes; + + if (!vectored) { + ssize_t ret = import_single_range(rw, buf, len, *iovec, iter); + *iovec = NULL; + return ret; + } #ifdef CONFIG_COMPAT if (compat) - return compat_import_iovec(rw, - (struct compat_iovec __user *)buf, - len, UIO_FASTIOV, iovec, iter); + return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec, + iter); #endif - return import_iovec(rw, (struct iovec __user *)buf, - len, UIO_FASTIOV, iovec, iter); + return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter); } -/* - * aio_run_iocb: - * Performs the initial checks and io submission. - */ -static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, - char __user *buf, size_t len, bool compat) +static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret) { - struct file *file = req->ki_filp; - ssize_t ret; - int rw; - fmode_t mode; - rw_iter_op *iter_op; - struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; - struct iov_iter iter; - - switch (opcode) { - case IOCB_CMD_PREAD: - case IOCB_CMD_PREADV: - mode = FMODE_READ; - rw = READ; - iter_op = file->f_op->read_iter; - goto rw_common; - - case IOCB_CMD_PWRITE: - case IOCB_CMD_PWRITEV: - mode = FMODE_WRITE; - rw = WRITE; - iter_op = file->f_op->write_iter; - goto rw_common; -rw_common: - if (unlikely(!(file->f_mode & mode))) - return -EBADF; - - if (!iter_op) - return -EINVAL; - - if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) - ret = aio_setup_vectored_rw(rw, buf, len, - &iovec, compat, &iter); - else { - ret = import_single_range(rw, buf, len, iovec, &iter); - iovec = NULL; - } - if (!ret) - ret = rw_verify_area(rw, file, &req->ki_pos, - iov_iter_count(&iter)); - if (ret < 0) { - kfree(iovec); - return ret; - } - - get_file(file); - if (rw == WRITE) - file_start_write(file); - - ret = iter_op(req, &iter); - - if (rw == WRITE) - file_end_write(file); - fput(file); - kfree(iovec); - break; - - default: - pr_debug("EINVAL: no operation provided\n"); - return -EINVAL; - } - - if (ret != -EIOCBQUEUED) { + switch (ret) { + case -EIOCBQUEUED: + return ret; + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: /* * There's no easy way to restart the syscall since other AIO's * may be already running. Just fail this IO with EINTR. */ - if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || - ret == -ERESTARTNOHAND || - ret == -ERESTART_RESTARTBLOCK)) - ret = -EINTR; + ret = -EINTR; + /*FALLTHRU*/ + default: aio_complete(req, ret, 0); + return 0; } +} - return 0; +static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored, + bool compat) +{ + struct file *file = req->ki_filp; + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; + struct iov_iter iter; + ssize_t ret; + + if (unlikely(!(file->f_mode & FMODE_READ))) + return -EBADF; + if (unlikely(!file->f_op->read_iter)) + return -EINVAL; + + ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); + if (ret) + return ret; + ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); + if (!ret) + ret = aio_ret(req, file->f_op->read_iter(req, &iter)); + kfree(iovec); + return ret; +} + +static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, + bool compat) +{ + struct file *file = req->ki_filp; + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; + struct iov_iter iter; + ssize_t ret; + + if (unlikely(!(file->f_mode & FMODE_WRITE))) + return -EBADF; + if (unlikely(!file->f_op->write_iter)) + return -EINVAL; + + ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); + if (ret) + return ret; + ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); + if (!ret) { + file_start_write(file); + ret = aio_ret(req, file->f_op->write_iter(req, &iter)); + file_end_write(file); + } + kfree(iovec); + return ret; } static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb, bool compat) { struct aio_kiocb *req; + struct file *file; ssize_t ret; /* enforce forwards compatibility on users */ @@ -1518,7 +1508,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, if (unlikely(!req)) return -EAGAIN; - req->common.ki_filp = fget(iocb->aio_fildes); + req->common.ki_filp = file = fget(iocb->aio_fildes); if (unlikely(!req->common.ki_filp)) { ret = -EBADF; goto out_put_req; @@ -1553,13 +1543,29 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_user_iocb = user_iocb; req->ki_user_data = iocb->aio_data; - ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode, - (char __user *)(unsigned long)iocb->aio_buf, - iocb->aio_nbytes, - compat); - if (ret) - goto out_put_req; + get_file(file); + switch (iocb->aio_lio_opcode) { + case IOCB_CMD_PREAD: + ret = aio_read(&req->common, iocb, false, compat); + break; + case IOCB_CMD_PWRITE: + ret = aio_write(&req->common, iocb, false, compat); + break; + case IOCB_CMD_PREADV: + ret = aio_read(&req->common, iocb, true, compat); + break; + case IOCB_CMD_PWRITEV: + ret = aio_write(&req->common, iocb, true, compat); + break; + default: + pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode); + ret = -EINVAL; + break; + } + fput(file); + if (ret && ret != -EIOCBQUEUED) + goto out_put_req; return 0; out_put_req: put_reqs_available(ctx, 1); From 70fe2f48152e60664809e2fed76bbb50c9fa2aa3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 30 Oct 2016 11:42:04 -0500 Subject: [PATCH 034/319] aio: fix freeze protection of aio writes Currently we dropped freeze protection of aio writes just after IO was submitted. Thus aio write could be in flight while the filesystem was frozen and that could result in unexpected situation like aio completion wanting to convert extent type on frozen filesystem. Testcase from Dmitry triggering this is like: for ((i=0;i<60;i++));do fsfreeze -f /mnt ;sleep 1;fsfreeze -u /mnt;done & fio --bs=4k --ioengine=libaio --iodepth=128 --size=1g --direct=1 \ --runtime=60 --filename=/mnt/file --name=rand-write --rw=randwrite Fix the problem by dropping freeze protection only once IO is completed in aio_complete(). Reported-by: Dmitry Monakhov Signed-off-by: Jan Kara [hch: forward ported on top of various VFS and aio changes] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/aio.c | 19 ++++++++++++++++++- include/linux/fs.h | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index c19755187ca5..428484f2f841 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1078,6 +1078,17 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2) unsigned tail, pos, head; unsigned long flags; + if (kiocb->ki_flags & IOCB_WRITE) { + struct file *file = kiocb->ki_filp; + + /* + * Tell lockdep we inherited freeze protection from submission + * thread. + */ + __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE); + file_end_write(file); + } + /* * Special case handling for sync iocbs: * - events go directly into the iocb for fast handling @@ -1473,9 +1484,15 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, return ret; ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) { + req->ki_flags |= IOCB_WRITE; file_start_write(file); ret = aio_ret(req, file->f_op->write_iter(req, &iter)); - file_end_write(file); + /* + * We release freeze protection in aio_complete(). Fool lockdep + * by telling it the lock got released so that it doesn't + * complain about held lock when we return to userspace. + */ + __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); } kfree(iovec); return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index ff7bcd9e8398..dc0478c07b2a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -321,6 +321,7 @@ struct writeback_control; #define IOCB_HIPRI (1 << 3) #define IOCB_DSYNC (1 << 4) #define IOCB_SYNC (1 << 5) +#define IOCB_WRITE (1 << 6) struct kiocb { struct file *ki_filp; From b9a321b48af40e0606009df8aff0a8c65dfbbfd8 Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Sun, 30 Oct 2016 19:28:27 -0400 Subject: [PATCH 035/319] r8152: Fix broken RX checksums. The r8152 driver has been broken since (approx) 3.16.xx when support was added for hardware RX checksums on newer chip versions. Symptoms include random segfaults and silent data corruption over NFS. The hardware checksum logig does not work on the VER_02 dongles I have here when used with a slow embedded system CPU. Google reveals others reporting similar issues on Raspberry Pi. So, disable hardware RX checksum support for VER_02, and fix an obvious coding error for IPV6 checksums in the same function. Because this bug results in silent data corruption, it is a good candidate for back-porting to -stable >= 3.16.xx. Signed-off-by: Mark Lord Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 44d439f50961..75c516889645 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1730,7 +1730,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) u8 checksum = CHECKSUM_NONE; u32 opts2, opts3; - if (tp->version == RTL_VER_01) + if (tp->version == RTL_VER_01 || tp->version == RTL_VER_02) goto return_result; opts2 = le32_to_cpu(rx_desc->opts2); @@ -1745,7 +1745,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc) checksum = CHECKSUM_NONE; else checksum = CHECKSUM_UNNECESSARY; - } else if (RD_IPV6_CS) { + } else if (opts2 & RD_IPV6_CS) { if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF)) checksum = CHECKSUM_UNNECESSARY; else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF)) From 9b9d7cdd0a20a8c26a022604580f93516ad69c36 Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Fri, 21 Oct 2016 16:21:07 +0530 Subject: [PATCH 036/319] usb: dwc3: Fix error handling for core init Fixing the sequence of events in dwc3_core_init() error exit path. dwc3_core_exit() call is also removed from the error path since, whatever it's doing is already done. Fixes: c499ff7 usb: dwc3: core: re-factor init and exit paths Cc: Felipe Balbi Cc: Greg KH Cc: Stable # 4.8+ Signed-off-by: Vivek Gautam Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 7287a763cd0c..fea446900cad 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -769,15 +769,14 @@ static int dwc3_core_init(struct dwc3 *dwc) return 0; err4: - phy_power_off(dwc->usb2_generic_phy); + phy_power_off(dwc->usb3_generic_phy); err3: - phy_power_off(dwc->usb3_generic_phy); + phy_power_off(dwc->usb2_generic_phy); err2: usb_phy_set_suspend(dwc->usb2_phy, 1); usb_phy_set_suspend(dwc->usb3_phy, 1); - dwc3_core_exit(dwc); err1: usb_phy_shutdown(dwc->usb2_phy); From 4accb8a1ee7d82f02bcbacba0e50995c531918d4 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 21 Oct 2016 15:23:59 +0300 Subject: [PATCH 037/319] usb: dwc3: st: add missing include dwc3-st uses pinctrl_pm_select_*_state() however it doesn't include the necessary header. Fix the build break caused by that, by simply including the missing header. Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-st.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c index 89a2f712fdfe..aaaf256f71dd 100644 --- a/drivers/usb/dwc3/dwc3-st.c +++ b/drivers/usb/dwc3/dwc3-st.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "core.h" From c17c3cdff10b9f59ef1244a14604f10949f17117 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 29 Oct 2016 22:03:05 +0800 Subject: [PATCH 038/319] netfilter: nf_tables: destroy the set if fail to add transaction When the memory is exhausted, then we will fail to add the NFT_MSG_NEWSET transaction. In such case, we should destroy the set before we free it. Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 365d31b86816..7d6a626b08f1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2956,12 +2956,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) - goto err2; + goto err3; list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0; +err3: + ops->destroy(set); err2: kfree(set); err1: From b73b8a1ba598236296a46103d81c10d629d9a470 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 29 Oct 2016 22:09:51 +0800 Subject: [PATCH 039/319] netfilter: nft_dup: do not use sreg_dev if the user doesn't specify it The NFTA_DUP_SREG_DEV attribute is not a must option, so we should use it in routing lookup only when the user specify it. Fixes: d877f07112f1 ("netfilter: nf_tables: add nft_dup expression") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_dup_ipv4.c | 6 ++++-- net/ipv6/netfilter/nft_dup_ipv6.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index bf855e64fc45..0c01a270bf9f 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -28,7 +28,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr, struct in_addr gw = { .s_addr = (__force __be32)regs->data[priv->sreg_addr], }; - int oif = regs->data[priv->sreg_dev]; + int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif); } @@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_dup_ipv4 *priv = nft_expr_priv(expr); - if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr)) + goto nla_put_failure; + if (priv->sreg_dev && nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) goto nla_put_failure; diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 8bfd470cbe72..831f86e1ec08 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -26,7 +26,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr, { struct nft_dup_ipv6 *priv = nft_expr_priv(expr); struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; - int oif = regs->data[priv->sreg_dev]; + int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1; nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif); } @@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_dup_ipv6 *priv = nft_expr_priv(expr); - if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) || + if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr)) + goto nla_put_failure; + if (priv->sreg_dev && nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) goto nla_put_failure; From c5f0627488be996e833038bdba01e45698ddaa26 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 11 Oct 2016 13:41:02 -0500 Subject: [PATCH 040/319] driver core: skip removal test for non-removable drivers Some drivers do not support removal/unbinding. These drivers should have drv->suppress_bind_attrs set to true, so use that to skip the removal test. This doesn't fix anything reported so far, but should prevent some other cases. Some drivers will need fixes to set suppress_bind_attrs to avoid this test. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=177021 Fixes: bea5b158ff0d ("driver core: add test of driver remove calls during probe") Reported-by: Laszlo Ersek Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index d22a7260f42b..8937a7ad7165 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -324,7 +324,8 @@ static int really_probe(struct device *dev, struct device_driver *drv) { int ret = -EPROBE_DEFER; int local_trigger_count = atomic_read(&deferred_trigger_count); - bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE); + bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) && + !drv->suppress_bind_attrs; if (defer_all_probes) { /* From bdacd1b426db83ac8ecf21aef1848120ffe53c07 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 11 Oct 2016 13:41:03 -0500 Subject: [PATCH 041/319] driver core: fix smatch warning on dev->bus check Commit d42a09802174 (driver core: skip removal test for non-removable drivers) introduced a smatch warning: drivers/base/dd.c:386 really_probe() warn: variable dereferenced before check 'dev->bus' (see line 373) Fix the warning by removing the dev->bus NULL check. dev->bus will never be NULL, so the check was unnecessary. Reported-by: Dan Carpenter Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 8937a7ad7165..d76cd97a98b6 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -384,7 +384,7 @@ static int really_probe(struct device *dev, struct device_driver *drv) if (test_remove) { test_remove = false; - if (dev->bus && dev->bus->remove) + if (dev->bus->remove) dev->bus->remove(dev); else if (drv->remove) drv->remove(dev); From 066f1f0b4719eb4573ef09bfc63c2bbb6f7676ca Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 31 Oct 2016 10:41:49 -0400 Subject: [PATCH 042/319] drm/radeon: disable runtime pm in certain cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the platform does not support hybrid graphics or ATPX dGPU power control. bug: https://bugzilla.kernel.org/show_bug.cgi?id=51381 Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_device.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index eb92aef46e3c..621af069a3d2 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -104,6 +104,14 @@ static const char radeon_family_name[][16] = { "LAST", }; +#if defined(CONFIG_VGA_SWITCHEROO) +bool radeon_has_atpx_dgpu_power_cntl(void); +bool radeon_is_atpx_hybrid(void); +#else +static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; } +static inline bool radeon_is_atpx_hybrid(void) { return false; } +#endif + #define RADEON_PX_QUIRK_DISABLE_PX (1 << 0) #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1) @@ -160,6 +168,11 @@ static void radeon_device_handle_px_quirks(struct radeon_device *rdev) if (rdev->px_quirk_flags & RADEON_PX_QUIRK_DISABLE_PX) rdev->flags &= ~RADEON_IS_PX; + + /* disable PX is the system doesn't support dGPU power control or hybrid gfx */ + if (!radeon_is_atpx_hybrid() && + !radeon_has_atpx_dgpu_power_cntl()) + rdev->flags &= ~RADEON_IS_PX; } /** From 84b1528e8cef55274f0df20e93513b3060ce495a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 31 Oct 2016 11:02:31 -0400 Subject: [PATCH 043/319] drm/amdgpu: disable runtime pm in certain cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the platform does not support hybrid graphics or ATPX dGPU power control. Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 203d98b00555..3938fca1ea8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -99,6 +99,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) if ((amdgpu_runtime_pm != 0) && amdgpu_has_atpx() && + (amdgpu_is_atpx_hybrid() || + amdgpu_has_atpx_dgpu_power_cntl()) && ((flags & AMD_IS_APU) == 0)) flags |= AMD_IS_PX; From 0a6e21056eaa859353945c4b164f3ef574d84271 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 31 Oct 2016 12:19:39 -0400 Subject: [PATCH 044/319] drm/radeon: Fix kernel panic on shutdown Since commit a481daa88fd4 ("drm/radeon: always apply pci shutdown callbacks"), a Dell Latitude D600 laptop has crashed on shutdown. The PCI Identification of the graphics adapter is "VGA compatible controller [0300]: Advanced Micro Devices, Inc. [AMD/ATI] RV250/M9 GL [Mobility FireGL 9000/Radeon 9000] [1002:4c66] (rev 01)". Prior to commit b0c80bd5d2e3 ("drm/radeon: fix up dp aux tear down (v2)"), I have no idea where the panic happened as the screen was blanked before the crash. Since that more recent change, the panic has been in routine radeon_connector_unregister(), and has been shown to be due to a NULL value in the ddc_bus member of struct drm_connector. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=178421 Fixes: a481daa88fd4 ("drm/radeon: always apply pci shutdown callbacks") Signed-off-by: Larry Finger Cc: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_connectors.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index e18839d52e3e..27affbde058c 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -931,7 +931,7 @@ static void radeon_connector_unregister(struct drm_connector *connector) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); - if (radeon_connector->ddc_bus->has_aux) { + if (radeon_connector->ddc_bus && radeon_connector->ddc_bus->has_aux) { drm_dp_aux_unregister(&radeon_connector->ddc_bus->aux); radeon_connector->ddc_bus->has_aux = false; } From 582ab27a063a506ccb55fc48afcc325342a2deba Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Mon, 31 Oct 2016 19:02:39 +0200 Subject: [PATCH 045/319] mei: bus: fix received data size check in NFC fixup NFC version reply size checked against only header size, not against full message size. That may lead potentially to uninitialized memory access in version data. That leads to warnings when version data is accessed: drivers/misc/mei/bus-fixup.c: warning: '*((void *)&ver+11)' may be used uninitialized in this function [-Wuninitialized]: => 212:2 Reported in Build regressions/improvements in v4.9-rc3 https://lkml.org/lkml/2016/10/30/57 Fixes: 59fcd7c63abf (mei: nfc: Initial nfc implementation) Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus-fixup.c | 2 +- drivers/nfc/mei_phy.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index e9e6ea3ab73c..75b9d4ac8b1e 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -178,7 +178,7 @@ static int mei_nfc_if_version(struct mei_cl *cl, ret = 0; bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length); - if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) { + if (bytes_recv < if_version_length) { dev_err(bus->dev, "Could not read IF version\n"); ret = -EIO; goto err; diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c index 83deda4bb4d6..6f9563a96488 100644 --- a/drivers/nfc/mei_phy.c +++ b/drivers/nfc/mei_phy.c @@ -133,7 +133,7 @@ static int mei_nfc_if_version(struct nfc_mei_phy *phy) return -ENOMEM; bytes_recv = mei_cldev_recv(phy->cldev, (u8 *)reply, if_version_length); - if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) { + if (bytes_recv < 0 || bytes_recv < if_version_length) { pr_err("Could not read IF version\n"); r = -EIO; goto err; From eef2b41122425dbddd16d70371bd000f601161d6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 31 Oct 2016 12:27:52 -0400 Subject: [PATCH 046/319] drm/amdgpu: make sure ddc_bus is valid in connector unregister This should only happen on boards TV connectors which do not have a ddc bus for those connectors. None of the asics supported by amdgpu support tv, so we shouldn't hit this, but check to be on the safe side (e.g., bios bug for example). Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index e3281d4e3e41..086aa5c9c634 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -769,7 +769,7 @@ static void amdgpu_connector_unregister(struct drm_connector *connector) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - if (amdgpu_connector->ddc_bus->has_aux) { + if (amdgpu_connector->ddc_bus && amdgpu_connector->ddc_bus->has_aux) { drm_dp_aux_unregister(&amdgpu_connector->ddc_bus->aux); amdgpu_connector->ddc_bus->has_aux = false; } From 91efdb2718e0c5ff014f0cf98cac99f088a9a4d2 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sat, 29 Oct 2016 23:32:44 +0300 Subject: [PATCH 047/319] drm/amd/powerplay: don't succeed in getters if fan is missing Otherwise callers end up using uninitialized data. Reviewed-by: Edward O'Callaghan Signed-off-by: Grazvydas Ignotas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c index fb6c6f6106d5..29d0319b22e6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c @@ -30,7 +30,7 @@ int smu7_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr, struct phm_fan_speed_info *fan_speed_info) { if (hwmgr->thermal_controller.fanInfo.bNoFan) - return 0; + return -ENODEV; fan_speed_info->supports_percent_read = true; fan_speed_info->supports_percent_write = true; @@ -60,7 +60,7 @@ int smu7_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, uint64_t tmp64; if (hwmgr->thermal_controller.fanInfo.bNoFan) - return 0; + return -ENODEV; duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_FDO_CTRL1, FMAX_DUTY100); @@ -89,7 +89,7 @@ int smu7_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed) if (hwmgr->thermal_controller.fanInfo.bNoFan || (hwmgr->thermal_controller.fanInfo. ucTachometerPulsesPerRevolution == 0)) - return 0; + return -ENODEV; tach_period = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, CG_TACH_STATUS, TACH_PERIOD); From c24784f01549ecdf23fc00d0588423bcf8956714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 28 Oct 2016 17:04:07 +0200 Subject: [PATCH 048/319] drm/amd: fix scheduler fence teardown order v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some fences might be alive even after we have stopped the scheduler leading to warnings about leaked objects from the SLUB allocator. Fix this by allocating/freeing the SLUB allocator from the module init/fini functions just like we do it for hw fences. v2: make variable static, add link to bug Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=97500 Reported-by: Grazvydas Ignotas Signed-off-by: Christian König Reviewed-by: Alex Deucher (v1) Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 13 ------------- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 6 +++--- drivers/gpu/drm/amd/scheduler/sched_fence.c | 19 +++++++++++++++++++ 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 71ed27eb3dde..73f2415630f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -737,6 +737,7 @@ static int __init amdgpu_init(void) { amdgpu_sync_init(); amdgpu_fence_slab_init(); + amd_sched_fence_slab_init(); if (vgacon_text_force()) { DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); return -EINVAL; @@ -756,6 +757,7 @@ static void __exit amdgpu_exit(void) drm_pci_exit(driver, pdriver); amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); + amd_sched_fence_slab_fini(); amdgpu_fence_slab_fini(); } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 910b8d5b21c5..ffe1f85ce300 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -34,9 +34,6 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity); static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); static void amd_sched_process_job(struct fence *f, struct fence_cb *cb); -struct kmem_cache *sched_fence_slab; -atomic_t sched_fence_slab_ref = ATOMIC_INIT(0); - /* Initialize a given run queue struct */ static void amd_sched_rq_init(struct amd_sched_rq *rq) { @@ -618,13 +615,6 @@ int amd_sched_init(struct amd_gpu_scheduler *sched, INIT_LIST_HEAD(&sched->ring_mirror_list); spin_lock_init(&sched->job_list_lock); atomic_set(&sched->hw_rq_count, 0); - if (atomic_inc_return(&sched_fence_slab_ref) == 1) { - sched_fence_slab = kmem_cache_create( - "amd_sched_fence", sizeof(struct amd_sched_fence), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!sched_fence_slab) - return -ENOMEM; - } /* Each scheduler will run on a seperate kernel thread */ sched->thread = kthread_run(amd_sched_main, sched, sched->name); @@ -645,7 +635,4 @@ void amd_sched_fini(struct amd_gpu_scheduler *sched) { if (sched->thread) kthread_stop(sched->thread); - rcu_barrier(); - if (atomic_dec_and_test(&sched_fence_slab_ref)) - kmem_cache_destroy(sched_fence_slab); } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 7cbbbfb502ef..51068e6c3d9a 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -30,9 +30,6 @@ struct amd_gpu_scheduler; struct amd_sched_rq; -extern struct kmem_cache *sched_fence_slab; -extern atomic_t sched_fence_slab_ref; - /** * A scheduler entity is a wrapper around a job queue or a group * of other entities. Entities take turns emitting jobs from their @@ -145,6 +142,9 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity); void amd_sched_entity_push_job(struct amd_sched_job *sched_job); +int amd_sched_fence_slab_init(void); +void amd_sched_fence_slab_fini(void); + struct amd_sched_fence *amd_sched_fence_create( struct amd_sched_entity *s_entity, void *owner); void amd_sched_fence_scheduled(struct amd_sched_fence *fence); diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index 3653b5a40494..88fc2d662579 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -27,6 +27,25 @@ #include #include "gpu_scheduler.h" +static struct kmem_cache *sched_fence_slab; + +int amd_sched_fence_slab_init(void) +{ + sched_fence_slab = kmem_cache_create( + "amd_sched_fence", sizeof(struct amd_sched_fence), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!sched_fence_slab) + return -ENOMEM; + + return 0; +} + +void amd_sched_fence_slab_fini(void) +{ + rcu_barrier(); + kmem_cache_destroy(sched_fence_slab); +} + struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity, void *owner) { From 245ae5e915853ced749eb47a343749cf0a9c4109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 28 Oct 2016 17:39:08 +0200 Subject: [PATCH 049/319] drm/amdgpu: add some error handling to amdgpu_init v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just to be clean should we ever run into -ENOMEM during module init. v2: fix typo in commit message Signed-off-by: Christian König Reviewed-by: Alex Deucher (v1) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 26 ++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 73f2415630f8..02ff0747197c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -735,9 +735,20 @@ static struct pci_driver amdgpu_kms_pci_driver = { static int __init amdgpu_init(void) { - amdgpu_sync_init(); - amdgpu_fence_slab_init(); - amd_sched_fence_slab_init(); + int r; + + r = amdgpu_sync_init(); + if (r) + goto error_sync; + + r = amdgpu_fence_slab_init(); + if (r) + goto error_fence; + + r = amd_sched_fence_slab_init(); + if (r) + goto error_sched; + if (vgacon_text_force()) { DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); return -EINVAL; @@ -749,6 +760,15 @@ static int __init amdgpu_init(void) amdgpu_register_atpx_handler(); /* let modprobe override vga console setting */ return drm_pci_init(driver, pdriver); + +error_sched: + amdgpu_fence_slab_fini(); + +error_fence: + amdgpu_sync_fini(); + +error_sync: + return r; } static void __exit amdgpu_exit(void) From f89c56ce710afa65e1b2ead555b52c4807f34ff7 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 26 Oct 2016 11:21:14 +0200 Subject: [PATCH 050/319] ipv6: Don't use ufo handling on later transformed packets Similar to commit c146066ab802 ("ipv4: Don't use ufo handling on later transformed packets"), don't perform UFO on packets that will be IPsec transformed. To detect it we rely on the fact that headerlen in dst_entry is non-zero only for transformation bundles (xfrm_dst objects). Unwanted segmentation can be observed with a NETIF_F_UFO capable device, such as a dummy device: DEV=dum0 LEN=1493 ip li add $DEV type dummy ip addr add fc00::1/64 dev $DEV nodad ip link set $DEV up ip xfrm policy add dir out src fc00::1 dst fc00::2 \ tmpl src fc00::1 dst fc00::2 proto esp spi 1 ip xfrm state add src fc00::1 dst fc00::2 \ proto esp spi 1 enc 'aes' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b tcpdump -n -nn -i $DEV -t & socat /dev/zero,readbytes=$LEN udp6:[fc00::2]:$LEN tcpdump output before: IP6 fc00::1 > fc00::2: frag (0|1448) ESP(spi=0x00000001,seq=0x1), length 1448 IP6 fc00::1 > fc00::2: frag (1448|48) IP6 fc00::1 > fc00::2: ESP(spi=0x00000001,seq=0x2), length 88 ... and after: IP6 fc00::1 > fc00::2: frag (0|1448) ESP(spi=0x00000001,seq=0x1), length 1448 IP6 fc00::1 > fc00::2: frag (1448|80) Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach") Signed-off-by: Jakub Sitnicki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6001e781164e..59eb4ed99ce8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1366,7 +1366,7 @@ static int __ip6_append_data(struct sock *sk, if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, From 19bda36c4299ce3d7e5bce10bebe01764a655a6d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 28 Oct 2016 18:18:01 +0800 Subject: [PATCH 051/319] ipv6: add mtu lock check in __ip6_rt_update_pmtu Prior to this patch, ipv6 didn't do mtu lock check in ip6_update_pmtu. It leaded to that mtu lock doesn't really work when receiving the pkt of ICMPV6_PKT_TOOBIG. This patch is to add mtu lock check in __ip6_rt_update_pmtu just as ipv4 did in __ip_rt_update_pmtu. Acked-by: Hannes Frederic Sowa Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 947ed1ded026..7403d90dcb38 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1364,6 +1364,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (rt6->rt6i_flags & RTF_LOCAL) return; + if (dst_metric_locked(dst, RTAX_MTU)) + return; + dst_confirm(dst); mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) From ce6dd23329b1ee6a794acf5f7e40f8e89b8317ee Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Oct 2016 18:43:11 +0200 Subject: [PATCH 052/319] dctcp: avoid bogus doubling of cwnd after loss If a congestion control module doesn't provide .undo_cwnd function, tcp_undo_cwnd_reduction() will set cwnd to tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); ... which makes sense for reno (it sets ssthresh to half the current cwnd), but it makes no sense for dctcp, which sets ssthresh based on the current congestion estimate. This can cause severe growth of cwnd (eventually overflowing u32). Fix this by saving last cwnd on loss and restore cwnd based on that, similar to cubic and other algorithms. Fixes: e3118e8359bb7c ("net: tcp: add DCTCP congestion control algorithm") Cc: Lawrence Brakmo Cc: Andrew Shewmaker Cc: Glenn Judd Acked-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/tcp_dctcp.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 10d728b6804c..ab37c6775630 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -56,6 +56,7 @@ struct dctcp { u32 next_seq; u32 ce_state; u32 delayed_ack_reserved; + u32 loss_cwnd; }; static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */ @@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk) ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); ca->delayed_ack_reserved = 0; + ca->loss_cwnd = 0; ca->ce_state = 0; dctcp_reset(tp, ca); @@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk) static u32 dctcp_ssthresh(struct sock *sk) { - const struct dctcp *ca = inet_csk_ca(sk); + struct dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + ca->loss_cwnd = tp->snd_cwnd; return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); } @@ -308,12 +311,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, return 0; } +static u32 dctcp_cwnd_undo(struct sock *sk) +{ + const struct dctcp *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} + static struct tcp_congestion_ops dctcp __read_mostly = { .init = dctcp_init, .in_ack_event = dctcp_update_alpha, .cwnd_event = dctcp_cwnd_event, .ssthresh = dctcp_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = dctcp_cwnd_undo, .set_state = dctcp_state, .get_info = dctcp_get_info, .flags = TCP_CONG_NEEDS_ECN, From e551c32d57c88923f99f8f010e89ca7ed0735e83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Oct 2016 13:40:24 -0700 Subject: [PATCH 053/319] net: clear sk_err_soft in sk_clone_lock() At accept() time, it is possible the parent has a non zero sk_err_soft, leftover from a prior error. Make sure we do not leave this value in the child, as it makes future getsockopt(SO_ERROR) calls quite unreliable. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/sock.c b/net/core/sock.c index c73e28fc9c2a..df171acfe232 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1543,6 +1543,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); newsk->sk_err = 0; + newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); From 4f2e4ad56a65f3b7d64c258e373cb71e8d2499f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 29 Oct 2016 11:02:36 -0700 Subject: [PATCH 054/319] net: mangle zero checksum in skb_checksum_help() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sending zero checksum is ok for TCP, but not for UDP. UDPv6 receiver should by default drop a frame with a 0 checksum, and UDPv4 would not verify the checksum and might accept a corrupted packet. Simply replace such checksum by 0xffff, regardless of transport. This error was caught on SIT tunnels, but seems generic. Signed-off-by: Eric Dumazet Cc: Maciej Żenczykowski Cc: Willem de Bruijn Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 820bac239738..eaad4c28069f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2484,7 +2484,7 @@ int skb_checksum_help(struct sk_buff *skb) goto out; } - *(__sum16 *)(skb->data + offset) = csum_fold(csum); + *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0; out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: From cbbf049a7c346180cc61ae0a9245c5d749d20a12 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 30 Oct 2016 10:25:42 +0200 Subject: [PATCH 055/319] qede: Fix statistics' strings for Tx/Rx queues When an interface is configured to use Tx/Rx-only queues, the length of the statistics would be shortened to accomodate only the statistics required per-each queue, and the values would be provided accordingly. However, the strings provided would still contain both Tx and Rx strings for each one of the queues [regardless of its configuration], which might lead to out-of-bound access when filling the buffers as well as incorrect statistics presented. Fixes: 9a4d7e86acf3 ("qede: Add support for Tx/Rx-only queues.") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- .../net/ethernet/qlogic/qede/qede_ethtool.c | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 12251a1032d1..7567cc464b88 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -175,16 +175,23 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf) for (i = 0, k = 0; i < QEDE_QUEUE_CNT(edev); i++) { int tc; - for (j = 0; j < QEDE_NUM_RQSTATS; j++) - sprintf(buf + (k + j) * ETH_GSTRING_LEN, - "%d: %s", i, qede_rqstats_arr[j].string); - k += QEDE_NUM_RQSTATS; - for (tc = 0; tc < edev->num_tc; tc++) { - for (j = 0; j < QEDE_NUM_TQSTATS; j++) + if (edev->fp_array[i].type & QEDE_FASTPATH_RX) { + for (j = 0; j < QEDE_NUM_RQSTATS; j++) sprintf(buf + (k + j) * ETH_GSTRING_LEN, - "%d.%d: %s", i, tc, - qede_tqstats_arr[j].string); - k += QEDE_NUM_TQSTATS; + "%d: %s", i, + qede_rqstats_arr[j].string); + k += QEDE_NUM_RQSTATS; + } + + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + for (j = 0; j < QEDE_NUM_TQSTATS; j++) + sprintf(buf + (k + j) * + ETH_GSTRING_LEN, + "%d.%d: %s", i, tc, + qede_tqstats_arr[j].string); + k += QEDE_NUM_TQSTATS; + } } } From 46d0847cdd4a3fc1920e56827b9189b9a105d362 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 30 Oct 2016 10:09:22 +0100 Subject: [PATCH 056/319] mlxsw: spectrum: Fix incorrect reuse of MID entries In the device, a MID entry represents a group of local ports, which can later be bound to a MDB entry. The lookup of an existing MID entry is currently done using the provided MC MAC address and VID, from the Linux bridge. However, this can result in an incorrect reuse of the same MID index in different VLAN-unaware bridges (same IP MC group and VID 0). Fix this by performing the lookup based on FID instead of VID, which is unique across different bridges. Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") Signed-off-by: Ido Schimmel Acked-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 +- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 9b22863a924b..97bbc1d21df8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -115,7 +115,7 @@ struct mlxsw_sp_rif { struct mlxsw_sp_mid { struct list_head list; unsigned char addr[ETH_ALEN]; - u16 vid; + u16 fid; u16 mid; unsigned int ref_count; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 5e00c79e8133..1e2c8eca3af1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -929,12 +929,12 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, const unsigned char *addr, - u16 vid) + u16 fid) { struct mlxsw_sp_mid *mid; list_for_each_entry(mid, &mlxsw_sp->br_mids.list, list) { - if (ether_addr_equal(mid->addr, addr) && mid->vid == vid) + if (ether_addr_equal(mid->addr, addr) && mid->fid == fid) return mid; } return NULL; @@ -942,7 +942,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, const unsigned char *addr, - u16 vid) + u16 fid) { struct mlxsw_sp_mid *mid; u16 mid_idx; @@ -958,7 +958,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, set_bit(mid_idx, mlxsw_sp->br_mids.mapped); ether_addr_copy(mid->addr, addr); - mid->vid = vid; + mid->fid = fid; mid->mid = mid_idx; mid->ref_count = 0; list_add_tail(&mid->list, &mlxsw_sp->br_mids.list); @@ -991,9 +991,9 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, if (switchdev_trans_ph_prepare(trans)) return 0; - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid); + mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid); if (!mid) { - mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, mdb->vid); + mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid); if (!mid) { netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; @@ -1137,7 +1137,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid_idx; int err = 0; - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid); + mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid); if (!mid) { netdev_err(dev, "Unable to remove port from MC DB\n"); return -EINVAL; From 460d2830b00db407be2b72ed792eb3596f245192 Mon Sep 17 00:00:00 2001 From: Lukas Resch Date: Mon, 10 Oct 2016 08:07:32 +0000 Subject: [PATCH 057/319] can: sja1000: plx_pci: Add support for Moxa CAN devices This patch adds support for Moxa CAN devices. Signed-off-by: Lukas Resch Signed-off-by: Christoph Zehentner Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/plx_pci.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/can/sja1000/plx_pci.c b/drivers/net/can/sja1000/plx_pci.c index 3eb7430dffbf..f8ff25c8ee2e 100644 --- a/drivers/net/can/sja1000/plx_pci.c +++ b/drivers/net/can/sja1000/plx_pci.c @@ -142,6 +142,9 @@ struct plx_pci_card { #define CTI_PCI_VENDOR_ID 0x12c4 #define CTI_PCI_DEVICE_ID_CRG001 0x0900 +#define MOXA_PCI_VENDOR_ID 0x1393 +#define MOXA_PCI_DEVICE_ID 0x0100 + static void plx_pci_reset_common(struct pci_dev *pdev); static void plx9056_pci_reset_common(struct pci_dev *pdev); static void plx_pci_reset_marathon_pci(struct pci_dev *pdev); @@ -258,6 +261,14 @@ static struct plx_pci_card_info plx_pci_card_info_elcus = { /* based on PLX9030 */ }; +static struct plx_pci_card_info plx_pci_card_info_moxa = { + "MOXA", 2, + PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR, + {0, 0x00, 0x00}, { {0, 0x00, 0x80}, {1, 0x00, 0x80} }, + &plx_pci_reset_common + /* based on PLX9052 */ +}; + static const struct pci_device_id plx_pci_tbl[] = { { /* Adlink PCI-7841/cPCI-7841 */ @@ -357,6 +368,13 @@ static const struct pci_device_id plx_pci_tbl[] = { 0, 0, (kernel_ulong_t)&plx_pci_card_info_elcus }, + { + /* moxa */ + MOXA_PCI_VENDOR_ID, MOXA_PCI_DEVICE_ID, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + (kernel_ulong_t)&plx_pci_card_info_moxa + }, { 0,} }; MODULE_DEVICE_TABLE(pci, plx_pci_tbl); From deb507f91f1adbf64317ad24ac46c56eeccfb754 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 24 Oct 2016 21:11:26 +0200 Subject: [PATCH 058/319] can: bcm: fix warning in bcm_connect/proc_register Andrey Konovalov reported an issue with proc_register in bcm.c. As suggested by Cong Wang this patch adds a lock_sock() protection and a check for unsuccessful proc_create_data() in bcm_connect(). Reference: http://marc.info/?l=linux-netdev&m=147732648731237 Reported-by: Andrey Konovalov Suggested-by: Cong Wang Signed-off-by: Oliver Hartkopp Acked-by: Cong Wang Tested-by: Andrey Konovalov Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 8e999ffdf28b..8af9d25ff988 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1549,24 +1549,31 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); + int ret = 0; if (len < sizeof(*addr)) return -EINVAL; - if (bo->bound) - return -EISCONN; + lock_sock(sk); + + if (bo->bound) { + ret = -EISCONN; + goto fail; + } /* bind a device to this socket */ if (addr->can_ifindex) { struct net_device *dev; dev = dev_get_by_index(&init_net, addr->can_ifindex); - if (!dev) - return -ENODEV; - + if (!dev) { + ret = -ENODEV; + goto fail; + } if (dev->type != ARPHRD_CAN) { dev_put(dev); - return -ENODEV; + ret = -ENODEV; + goto fail; } bo->ifindex = dev->ifindex; @@ -1577,17 +1584,24 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, bo->ifindex = 0; } - bo->bound = 1; - if (proc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_data(bo->procname, 0644, proc_dir, &bcm_proc_fops, sk); + if (!bo->bcm_proc_read) { + ret = -ENOMEM; + goto fail; + } } - return 0; + bo->bound = 1; + +fail: + release_sock(sk); + + return ret; } static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, From 87557efc27f6a50140fb20df06a917f368ce3c66 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Mon, 31 Oct 2016 13:38:29 +0800 Subject: [PATCH 059/319] xen-netfront: do not cast grant table reference to signed short While grant reference is of type uint32_t, xen-netfront erroneously casts it to signed short in BUG_ON(). This would lead to the xen domU panic during boot-up or migration when it is attached with lots of paravirtual devices. Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e17879dd5d5a..189a28dcd80d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -304,7 +304,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) queue->rx_skbs[id] = skb; ref = gnttab_claim_grant_reference(&queue->gref_rx_head); - BUG_ON((signed short)ref < 0); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); queue->grant_rx_ref[id] = ref; page = skb_frag_page(&skb_shinfo(skb)->frags[0]); @@ -428,7 +428,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); ref = gnttab_claim_grant_reference(&queue->gref_tx_head); - BUG_ON((signed short)ref < 0); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, gfn, GNTMAP_readonly); From cd26da4ff4eb7189921d4e7ad87e8adebb7b416b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:31 +0800 Subject: [PATCH 060/319] sctp: hold transport instead of assoc in sctp_diag In sctp_transport_lookup_process(), Commit 1cceda784980 ("sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock") moved cb() out of rcu lock, but it put transport and hold assoc instead, and ignore that cb() still uses transport. It may cause a use-after-free issue. This patch is to hold transport instead of assoc there. Fixes: 1cceda784980 ("sctp: fix the issue sctp_diag uses lock_sock in rcu_read_lock") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9fbb6feb8c27..71b75f9d9c1b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4480,12 +4480,9 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), if (!transport || !sctp_transport_hold(transport)) goto out; - sctp_association_hold(transport->asoc); - sctp_transport_put(transport); - rcu_read_unlock(); err = cb(transport, p); - sctp_association_put(transport->asoc); + sctp_transport_put(transport); out: return err; From 7c17fcc726903ffed1716351efdc617e752533ed Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:32 +0800 Subject: [PATCH 061/319] sctp: return back transport in __sctp_rcv_init_lookup Prior to this patch, it used a local variable to save the transport that is looked up by __sctp_lookup_association(), and didn't return it back. But in sctp_rcv, it is used to initialize chunk->transport. So when hitting this, even if it found the transport, it was still initializing chunk->transport with null instead. This patch is to return the transport back through transport pointer that is from __sctp_rcv_lookup_harder(). Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index a2ea1d1cc06a..8e0bc58eec20 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1021,7 +1021,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, struct sctphdr *sh = sctp_hdr(skb); union sctp_params params; sctp_init_chunk_t *init; - struct sctp_transport *transport; struct sctp_af *af; /* @@ -1052,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net, af->from_addr_param(paddr, params.addr, sh->source, 0); - asoc = __sctp_lookup_association(net, laddr, paddr, &transport); + asoc = __sctp_lookup_association(net, laddr, paddr, transportp); if (asoc) return asoc; } From dae399d7fdee84d8f5227a9711d95bb4e9a05d4e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:33 +0800 Subject: [PATCH 062/319] sctp: hold transport instead of assoc when lookup assoc in rx path Prior to this patch, in rx path, before calling lock_sock, it needed to hold assoc when got it by __sctp_lookup_association, in case other place would free/put assoc. But in __sctp_lookup_association, it lookup and hold transport, then got assoc by transport->assoc, then hold assoc and put transport. It means it didn't hold transport, yet it was returned and later on directly assigned to chunk->transport. Without the protection of sock lock, the transport may be freed/put by other places, which would cause a use-after-free issue. This patch is to fix this issue by holding transport instead of assoc. As holding transport can make sure to access assoc is also safe, and actually it looks up assoc by searching transport rhashtable, to hold transport here makes more sense. Note that the function will be renamed later on on another patch. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- net/sctp/input.c | 32 ++++++++++++++++---------------- net/sctp/ipv6.c | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 87a7f42e7639..31acc3f4f132 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -152,7 +152,7 @@ void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, struct sctphdr *, struct sctp_association **, struct sctp_transport **); -void sctp_err_finish(struct sock *, struct sctp_association *); +void sctp_err_finish(struct sock *, struct sctp_transport *); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); void sctp_icmp_redirect(struct sock *, struct sctp_transport *, diff --git a/net/sctp/input.c b/net/sctp/input.c index 8e0bc58eec20..a01a56ec8b8c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -181,9 +181,10 @@ int sctp_rcv(struct sk_buff *skb) * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { - if (asoc) { - sctp_association_put(asoc); + if (transport) { + sctp_transport_put(transport); asoc = NULL; + transport = NULL; } else { sctp_endpoint_put(ep); ep = NULL; @@ -269,8 +270,8 @@ int sctp_rcv(struct sk_buff *skb) bh_unlock_sock(sk); /* Release the asoc/ep ref we took in the lookup calls. */ - if (asoc) - sctp_association_put(asoc); + if (transport) + sctp_transport_put(transport); else sctp_endpoint_put(ep); @@ -283,8 +284,8 @@ int sctp_rcv(struct sk_buff *skb) discard_release: /* Release the asoc/ep ref we took in the lookup calls. */ - if (asoc) - sctp_association_put(asoc); + if (transport) + sctp_transport_put(transport); else sctp_endpoint_put(ep); @@ -300,6 +301,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_inq *inqueue = &chunk->rcvr->inqueue; + struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = NULL; int backloged = 0; @@ -351,7 +353,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) done: /* Release the refs we took in sctp_add_backlog */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_put(sctp_assoc(rcvr)); + sctp_transport_put(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_put(sctp_ep(rcvr)); else @@ -363,6 +365,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; + struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = chunk->rcvr; int ret; @@ -373,7 +376,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) * from us */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_hold(sctp_assoc(rcvr)); + sctp_transport_hold(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_hold(sctp_ep(rcvr)); else @@ -537,15 +540,15 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, return sk; out: - sctp_association_put(asoc); + sctp_transport_put(transport); return NULL; } /* Common cleanup code for icmp/icmpv6 error handler. */ -void sctp_err_finish(struct sock *sk, struct sctp_association *asoc) +void sctp_err_finish(struct sock *sk, struct sctp_transport *t) { bh_unlock_sock(sk); - sctp_association_put(asoc); + sctp_transport_put(t); } /* @@ -641,7 +644,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) } out_unlock: - sctp_err_finish(sk, asoc); + sctp_err_finish(sk, transport); } /* @@ -952,11 +955,8 @@ static struct sctp_association *__sctp_lookup_association( goto out; asoc = t->asoc; - sctp_association_hold(asoc); *pt = t; - sctp_transport_put(t); - out: return asoc; } @@ -986,7 +986,7 @@ int sctp_has_association(struct net *net, struct sctp_transport *transport; if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) { - sctp_association_put(asoc); + sctp_transport_put(transport); return 1; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f473779e8b1c..176af3080a2b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -198,7 +198,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } out_unlock: - sctp_err_finish(sk, asoc); + sctp_err_finish(sk, transport); out: if (likely(idev != NULL)) in6_dev_put(idev); From 091c531b09c151c2d712a8f347009ca3698a2467 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Mon, 17 Oct 2016 18:41:41 -0700 Subject: [PATCH 063/319] pinctrl: iproc: Fix iProc and NSP GPIO support Since commit 44a7185c2ae6 ("of/platform: Add common method to populate default bus"), ARM64 platform devices are populated at the arch_initcall_sync level; as a result, the platform_driver_probe calls in both the iProc and NSP GPIO drivers fail with -ENODEV since by that time the platform device was not yet registered. Replace platform_driver_probe with platform_driver_register, that allow the device to be register later Fixes: 44a7185c2ae6 ("of/platform: Add common method to populate default bus") Signed-off-by: Ray Jui Tested-by: Eric Anholt Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-iproc-gpio.c | 2 +- drivers/pinctrl/bcm/pinctrl-nsp-gpio.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c index 7f7700716398..5d1e505c3c63 100644 --- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c @@ -844,6 +844,6 @@ static struct platform_driver iproc_gpio_driver = { static int __init iproc_gpio_init(void) { - return platform_driver_probe(&iproc_gpio_driver, iproc_gpio_probe); + return platform_driver_register(&iproc_gpio_driver); } arch_initcall_sync(iproc_gpio_init); diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c index 35783db1c10b..c8deb8be1da7 100644 --- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c @@ -741,6 +741,6 @@ static struct platform_driver nsp_gpio_driver = { static int __init nsp_gpio_init(void) { - return platform_driver_probe(&nsp_gpio_driver, nsp_gpio_probe); + return platform_driver_register(&nsp_gpio_driver); } arch_initcall_sync(nsp_gpio_init); From c35e7790dcbe74b0c85f4c26bbe7e15510422684 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Tue, 18 Oct 2016 09:16:28 +0200 Subject: [PATCH 064/319] pinctrl: st: don't specify default interrupt trigger Thanks to 332e99d5ae4 which now alerts of default trigger usage when configuring interrupts. Signed-off-by: Patrice Chotard Acked-by: Peter Griffin Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-st.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index 99da4cf91031..b7bb37167969 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1512,7 +1512,7 @@ static int st_gpiolib_register_bank(struct st_pinctrl *info, if (info->irqmux_base || gpio_irq > 0) { err = gpiochip_irqchip_add(&bank->gpio_chip, &st_gpio_irqchip, 0, handle_simple_irq, - IRQ_TYPE_LEVEL_LOW); + IRQ_TYPE_NONE); if (err) { gpiochip_remove(&bank->gpio_chip); dev_info(dev, "could not add irqchip\n"); From 1064a2b41579d77af16164f5dd7bc6948ba1d5be Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Thu, 20 Oct 2016 15:26:51 +0200 Subject: [PATCH 065/319] pinctrl: stm32: remove dependency with interrupt controller This patch allows to probe stm32 pinctrl driver even if no interrupt controller is defined to manage gpio irqs. Signed-off-by: Alexandre TORGUE Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 200667f08c37..efc43711ff5c 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1092,9 +1092,11 @@ int stm32_pctl_probe(struct platform_device *pdev) return -EINVAL; } - ret = stm32_pctrl_dt_setup_irq(pdev, pctl); - if (ret) - return ret; + if (of_find_property(np, "interrupt-parent", NULL)) { + ret = stm32_pctrl_dt_setup_irq(pdev, pctl); + if (ret) + return ret; + } for_each_child_of_node(np, child) if (of_property_read_bool(child, "gpio-controller")) From 0553d8d0b03ad58f9917460c40a2e2b680f5bfdb Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Thu, 20 Oct 2016 15:26:52 +0200 Subject: [PATCH 066/319] pinctrl: stm32: move gpio irqs binding to optional stm32 pinctrl driver could be probed even if no interrupt controller is defined to manage gpio irqs. Entries related to gpio irq management are moved to optional. Signed-off-by: Alexandre TORGUE Signed-off-by: Linus Walleij --- .../devicetree/bindings/pinctrl/st,stm32-pinctrl.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt index f9753c416974..b24583aa34c3 100644 --- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt @@ -14,11 +14,6 @@ Required properies: - #size-cells : The value of this property must be 1 - ranges : defines mapping between pin controller node (parent) to gpio-bank node (children). - - interrupt-parent: phandle of the interrupt parent to which the external - GPIO interrupts are forwarded to. - - st,syscfg: Should be phandle/offset pair. The phandle to the syscon node - which includes IRQ mux selection register, and the offset of the IRQ mux - selection register. - pins-are-numbered: Specify the subnodes are using numbered pinmux to specify pins. @@ -37,6 +32,11 @@ Required properties: Optional properties: - reset: : Reference to the reset controller + - interrupt-parent: phandle of the interrupt parent to which the external + GPIO interrupts are forwarded to. + - st,syscfg: Should be phandle/offset pair. The phandle to the syscon node + which includes IRQ mux selection register, and the offset of the IRQ mux + selection register. Example: #include From 9999fe5df58773489b9564467b5c8cfb364e0b80 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 18 Oct 2016 14:09:15 -0700 Subject: [PATCH 067/319] pinctrl: imx: reset group index on probe Group index is incremented on every new group parsed. Since the field is part of struct imx_pinctrl_soc_info, which is typically a global variable passed by the individual pinctrl-imx.c based driver, it does not get cleared automatically when re-probing the driver. This lead imx_pinctrl_parse_functions passing a group pointer which is outside of the allocated group space on second probe and onwards. Typically this ended up in a NULL pointer dereference when accessing the name field like this: Unable to handle kernel NULL pointer dereference at virtual address 00000000 ... PC is at strcmp+0x18/0x44 LR is at imx_dt_node_to_map+0xc4/0x290 Avoid this by setting group_index to 0 on probe. This has been observed when using DEBUG_TEST_DRIVER_REMOVE. Signed-off-by: Stefan Agner Signed-off-by: Linus Walleij --- drivers/pinctrl/freescale/pinctrl-imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index 47613201269a..79c4e14a5a75 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -687,6 +687,7 @@ static int imx_pinctrl_probe_dt(struct platform_device *pdev, if (!info->functions) return -ENOMEM; + info->group_index = 0; if (flat_funcs) { info->ngroups = of_get_child_count(np); } else { From 6227e9f0e9ee988e2cfb425b02ef17f233b1d8d7 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 31 Oct 2016 15:59:11 +0300 Subject: [PATCH 068/319] ARC: Enable PERF_EVENTS in nSIM driven platforms Now when we have properly working performance counters in nSIM even with interrupt support (fix should be a part of upcoming nSIM engineering build 2016.12-005) we may enable perf support by default for all platforms that use nSIM for ARC cores simulation. Note 1: PCT node was missing for some reason in nsimosci.dts while all other nSIM-related .dts files already had PCT node for quite some time, so adding it now. Note 2: All defconfigs were regenerated with "make savedefconfig" which led to some clean-ups in nsimosci_hs_smp_defconfig: CONFIG_FRAMEBUFFER_CONSOLE=y was removed because it is automatically selected now by DRM. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/axc001.dtsi | 2 +- arch/arc/boot/dts/nsim_700.dts | 2 +- arch/arc/boot/dts/nsimosci.dts | 4 ++++ arch/arc/configs/nsim_700_defconfig | 1 + arch/arc/configs/nsim_hs_defconfig | 1 + arch/arc/configs/nsim_hs_smp_defconfig | 1 + arch/arc/configs/nsimosci_defconfig | 1 + arch/arc/configs/nsimosci_hs_defconfig | 1 + arch/arc/configs/nsimosci_hs_smp_defconfig | 3 +-- 9 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index 6ae2c476ad82..53ce226f77a5 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -71,7 +71,7 @@ debug_uart: dw-apb-uart@0x5000 { reg-io-width = <4>; }; - arcpmu0: pmu { + arcpct0: pct { compatible = "snps,arc700-pct"; }; }; diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts index ce0ccd20b5bf..5ee96b067c08 100644 --- a/arch/arc/boot/dts/nsim_700.dts +++ b/arch/arc/boot/dts/nsim_700.dts @@ -69,7 +69,7 @@ phy0: ethernet-phy@0 { }; }; - arcpmu0: pmu { + arcpct0: pct { compatible = "snps,arc700-pct"; }; }; diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index bcf603142a33..3c391ba565ed 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -83,5 +83,9 @@ eth0: ethernet@f0003000 { reg = <0xf0003000 0x44>; interrupts = <7>; }; + + arcpct0: pct { + compatible = "snps,arc700-pct"; + }; }; }; diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 7314f538847b..b0066a749d4c 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index 65ab9fbf83f2..ebe9ebb92933 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index 3b3990cddbe1..4bde43278be6 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -12,6 +12,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 98cf20933bbb..f6fb3d26557e 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index ddf8b96d494e..b9f0fe00044b 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index ceb90745326e..6da71ba253a9 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -10,6 +10,7 @@ CONFIG_IKCONFIG_PROC=y # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" +CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y CONFIG_MODULES=y @@ -34,7 +35,6 @@ CONFIG_INET=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y @@ -72,7 +72,6 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HWMON is not set CONFIG_DRM=y CONFIG_DRM_ARCPGU=y -CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set From 8f6d9eb2a3f38f1acd04efa0aeb8b81f5373c923 Mon Sep 17 00:00:00 2001 From: Noam Camus Date: Sun, 30 Oct 2016 09:48:42 +0200 Subject: [PATCH 069/319] ARC: [SMP] avoid overriding present cpumask At smp_prepare_cpus() we set present cpu mask as part of init for all CPUs at range [0-max_cpus]. This is done without checking if this mask is already being set. At platform of eznps this mask is already being initialized at smp_init_cpus() by using hook plat_smp_ops.init_early_smp(). So to avoid overriding of present cpu mask we check the number of bits which are set in this mask. At the begin only bit for boot CPU is set so if number of bits already set is no more than one we can be assure that there is no overriding of this mask. Signed-off-by: Noam Camus Signed-off-by: Vineet Gupta --- arch/arc/kernel/smp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index f183cc648851..f00029e9cbe4 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -67,11 +67,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int i; /* - * Initialise the present map, which describes the set of CPUs - * actually populated at the present time. + * if platform didn't set the present map already, do it now + * boot cpu is set to present already by init/main.c */ - for (i = 0; i < max_cpus; i++) - set_cpu_present(i, true); + if (num_present_cpus() <= 1) { + for (i = 0; i < max_cpus; i++) + set_cpu_present(i, true); + } } void __init smp_cpus_done(unsigned int max_cpus) From fcdefccac976ee51dd6071832b842d8fb41c479c Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Mon, 31 Oct 2016 13:32:03 -0400 Subject: [PATCH 070/319] bgmac: stop clearing DMA receive control register right after it is set Current bgmac code initializes some DMA settings in the receive control register for some hardware and then immediately clears those settings. Not clearing those settings results in ~420Mbps *improvement* in throughput; this system can now receive frames at line-rate on Broadcom 5871x hardware compared to ~520Mbps today. I also tested a few other values but found there to be no discernible difference in CPU utilization even if burst size and prefetching values are different. On the hardware tested there was no need to keep the code that cleared all but bits 16-17, but since there is a wide variety of hardware that used this driver (I did not look at all hardware docs for hardware using this IP block), I find it wise to move this call up and clear bits just after reading the default value from the hardware rather than completely removing it. This is a good candidate for -stable >=3.14 since that is when the code that was supposed to improve performance (but did not) was introduced. Signed-off-by: Andy Gospodarek Fixes: 56ceecde1f29 ("bgmac: initialize the DMA controller of core...") Cc: Hauke Mehrtens Acked-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 31ca204b38d2..91cbf92de971 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac, u32 ctl; ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL); + + /* preserve ONLY bits 16-17 from current hardware value */ + ctl &= BGMAC_DMA_RX_ADDREXT_MASK; + if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) { ctl &= ~BGMAC_DMA_RX_BL_MASK; ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT; @@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac, ctl &= ~BGMAC_DMA_RX_PT_MASK; ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT; } - ctl &= BGMAC_DMA_RX_ADDREXT_MASK; ctl |= BGMAC_DMA_RX_ENABLE; ctl |= BGMAC_DMA_RX_PARITY_DISABLE; ctl |= BGMAC_DMA_RX_OVERFLOW_CONT; From c25badc9ceb612c6cc227a6fc4b0aaf678e3bcf9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 19 Oct 2016 15:53:52 -0700 Subject: [PATCH 071/319] cpupower: Correct return type of cpu_power_is_cpu_online() in cpufreq-set When converting to a shared library in ac5a181d065d ("cpupower: Add cpuidle parts into library"), cpu_freq_cpu_exists() was converted to cpupower_is_cpu_online(). cpu_req_cpu_exists() returned 0 on success and -ENOSYS on failure whereas cpupower_is_cpu_online returns 1 on success. Check for the correct return value in cpufreq-set. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1374212 Fixes: ac5a181d065d (cpupower: Add cpuidle parts into library) Reported-by: Julian Seward Signed-off-by: Laura Abbott Acked-by: Thomas Renninger Cc: 4.7+ # 4.7+ Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/utils/cpufreq-set.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index b4bf76971dc9..1eef0aed6423 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -296,7 +296,7 @@ int cmd_freq_set(int argc, char **argv) struct cpufreq_affected_cpus *cpus; if (!bitmask_isbitset(cpus_chosen, cpu) || - cpupower_is_cpu_online(cpu)) + cpupower_is_cpu_online(cpu) != 1) continue; cpus = cpufreq_get_related_cpus(cpu); @@ -316,10 +316,7 @@ int cmd_freq_set(int argc, char **argv) cpu <= bitmask_last(cpus_chosen); cpu++) { if (!bitmask_isbitset(cpus_chosen, cpu) || - cpupower_is_cpu_online(cpu)) - continue; - - if (cpupower_is_cpu_online(cpu) != 1) + cpupower_is_cpu_online(cpu) != 1) continue; printf(_("Setting cpu: %d\n"), cpu); From fd9afd3cbe404998d732be6cc798f749597c5114 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 1 Nov 2016 13:20:22 +0200 Subject: [PATCH 072/319] usb: gadget: u_ether: remove interrupt throttling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Dave Miller "the networking stack has a hard requirement that all SKBs which are transmitted must have their completion signalled in a fininte amount of time. This is because, until the SKB is freed by the driver, it holds onto socket, netfilter, and other subsystem resources." In summary, this means that using TX IRQ throttling for the networking gadgets is, at least, complex and we should avoid it for the time being. Cc: Reported-by: Ville Syrjälä Tested-by: Ville Syrjälä Suggested-by: David Miller Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/u_ether.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index fe1811650dbc..5d1bd13a56c1 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -588,14 +588,6 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, req->length = length; - /* throttle high/super speed IRQ rate back slightly */ - if (gadget_is_dualspeed(dev->gadget)) - req->no_interrupt = (((dev->gadget->speed == USB_SPEED_HIGH || - dev->gadget->speed == USB_SPEED_SUPER)) && - !list_empty(&dev->tx_reqs)) - ? ((atomic_read(&dev->tx_qlen) % dev->qmult) != 0) - : 0; - retval = usb_ep_queue(in, req, GFP_ATOMIC); switch (retval) { default: From d6124b409ca33c100170ffde51cd8dff761454a1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 12:13:31 +0100 Subject: [PATCH 073/319] uwb: fix device reference leaks This subsystem consistently fails to drop the device reference taken by class_find_device(). Note that some of these lookup functions already take a reference to the returned data, while others claim no reference is needed (or does not seem need one). Fixes: 183b9b592a62 ("uwb: add the UWB stack (core files)") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/uwb/lc-rc.c | 16 +++++++++++++--- drivers/uwb/pal.c | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c index d059ad4d0dbd..97ee1b46db69 100644 --- a/drivers/uwb/lc-rc.c +++ b/drivers/uwb/lc-rc.c @@ -56,8 +56,11 @@ static struct uwb_rc *uwb_rc_find_by_index(int index) struct uwb_rc *rc = NULL; dev = class_find_device(&uwb_rc_class, NULL, &index, uwb_rc_index_match); - if (dev) + if (dev) { rc = dev_get_drvdata(dev); + put_device(dev); + } + return rc; } @@ -467,7 +470,9 @@ struct uwb_rc *__uwb_rc_try_get(struct uwb_rc *target_rc) if (dev) { rc = dev_get_drvdata(dev); __uwb_rc_get(rc); + put_device(dev); } + return rc; } EXPORT_SYMBOL_GPL(__uwb_rc_try_get); @@ -520,8 +525,11 @@ struct uwb_rc *uwb_rc_get_by_grandpa(const struct device *grandpa_dev) dev = class_find_device(&uwb_rc_class, NULL, grandpa_dev, find_rc_grandpa); - if (dev) + if (dev) { rc = dev_get_drvdata(dev); + put_device(dev); + } + return rc; } EXPORT_SYMBOL_GPL(uwb_rc_get_by_grandpa); @@ -553,8 +561,10 @@ struct uwb_rc *uwb_rc_get_by_dev(const struct uwb_dev_addr *addr) struct uwb_rc *rc = NULL; dev = class_find_device(&uwb_rc_class, NULL, addr, find_rc_dev); - if (dev) + if (dev) { rc = dev_get_drvdata(dev); + put_device(dev); + } return rc; } diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c index c1304b8d4985..678e93741ae1 100644 --- a/drivers/uwb/pal.c +++ b/drivers/uwb/pal.c @@ -97,6 +97,8 @@ static bool uwb_rc_class_device_exists(struct uwb_rc *target_rc) dev = class_find_device(&uwb_rc_class, NULL, target_rc, find_rc); + put_device(dev); + return (dev != NULL); } From f6b2db084b65b9dc0f910bc48d5f77c0e5166dc6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 1 Nov 2016 00:01:59 -0700 Subject: [PATCH 074/319] vmbus: make sysfs names consistent with PCI In commit 9a56e5d6a0ba ("Drivers: hv: make VMBus bus ids persistent") the name of vmbus devices in sysfs changed to be (in 4.9-rc1): /sys/bus/vmbus/vmbus-6aebe374-9ba0-11e6-933c-00259086b36b The prefix ("vmbus-") is redundant and differs from how PCI is represented in sysfs. Therefore simplify to: /sys/bus/vmbus/6aebe374-9ba0-11e6-933c-00259086b36b Please merge this before 4.9 is released and the old format has to live forever. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a259e18d22d5..0276d2ef06ee 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -961,7 +961,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) { int ret = 0; - dev_set_name(&child_device_obj->device, "vmbus-%pUl", + dev_set_name(&child_device_obj->device, "%pUl", child_device_obj->channel->offermsg.offer.if_instance.b); child_device_obj->device.bus = &hv_bus; From b5a4a3eb4ec7382780aa153224780b9ecdc76ceb Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Mon, 31 Oct 2016 16:00:26 -0700 Subject: [PATCH 075/319] drivers: net: xgene: fix: Disable coalescing on v1 hardware Since ethernet v1 hardware has a bug related to coalescing, disabling this feature. Signed-off-by: Iyappan Subramanian Signed-off-by: Toan Le Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 12 ------------ drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 3 ++- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index c481f104a8fe..5390ae89136c 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -204,17 +204,6 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) return num_msgs; } -static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring) -{ - u32 data = 0x7777; - - xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e); - xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data); - xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80); -} - void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, struct xgene_enet_pdata *pdata, enum xgene_enet_err_code status) @@ -929,5 +918,4 @@ struct xgene_ring_ops xgene_ring1_ops = { .clear = xgene_enet_clear_ring, .wr_cmd = xgene_enet_wr_cmd, .len = xgene_enet_ring_len, - .coalesce = xgene_enet_setup_coalescing, }; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 429f18fc5503..8158d4698734 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1188,7 +1188,8 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev) tx_ring->dst_ring_num = xgene_enet_dst_ring_num(cp_ring); } - pdata->ring_ops->coalesce(pdata->tx_ring[0]); + if (pdata->ring_ops->coalesce) + pdata->ring_ops->coalesce(pdata->tx_ring[0]); pdata->tx_qcnt_hi = pdata->tx_ring[0]->slots - 128; return 0; From f126df8503275facc96dd05a818086afbb89b77d Mon Sep 17 00:00:00 2001 From: Iyappan Subramanian Date: Mon, 31 Oct 2016 16:00:27 -0700 Subject: [PATCH 076/319] drivers: net: xgene: fix: Coalescing values for v2 hardware Changing the interrupt trigger region id to 2 and the corresponding threshold set0/set1 values to 8/16. Signed-off-by: Iyappan Subramanian Signed-off-by: Toan Le Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.h | 2 ++ drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c | 12 +++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 8456337a237d..06e598c8bc16 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -55,8 +55,10 @@ enum xgene_enet_rm { #define PREFETCH_BUF_EN BIT(21) #define CSR_RING_ID_BUF 0x000c #define CSR_PBM_COAL 0x0014 +#define CSR_PBM_CTICK0 0x0018 #define CSR_PBM_CTICK1 0x001c #define CSR_PBM_CTICK2 0x0020 +#define CSR_PBM_CTICK3 0x0024 #define CSR_THRESHOLD0_SET1 0x0030 #define CSR_THRESHOLD1_SET1 0x0034 #define CSR_RING_NE_INT_MODE 0x017c diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c index 2b76732add5d..af51dd5844ce 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c @@ -30,7 +30,7 @@ static void xgene_enet_ring_init(struct xgene_enet_desc_ring *ring) ring_cfg[0] |= SET_VAL(X2_INTLINE, ring->id & RING_BUFNUM_MASK); ring_cfg[3] |= SET_BIT(X2_DEQINTEN); } - ring_cfg[0] |= SET_VAL(X2_CFGCRID, 1); + ring_cfg[0] |= SET_VAL(X2_CFGCRID, 2); addr >>= 8; ring_cfg[2] |= QCOHERENT | SET_VAL(RINGADDRL, addr); @@ -192,13 +192,15 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring) { - u32 data = 0x7777; + u32 data = 0x77777777; xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK0, data); xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data); - xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40); - xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data); + xgene_enet_ring_wr32(ring, CSR_PBM_CTICK3, data); + xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x08); + xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x10); } struct xgene_ring_ops xgene_ring2_ops = { From 7bb9f731d1026bd48b84cee7853cba7f5678193c Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 31 Oct 2016 18:18:42 -0500 Subject: [PATCH 077/319] net: qcom/emac: use correct value for SGMII_LN_UCDR_SO_GAIN_MODE0 The documentation says that SGMII_LN_UCDR_SO_GAIN_MODE0 should be set to 0, not 6, on the Qualcomm Technologies QDF2432. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-sgmii.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index 75c1b530e39e..72fe343c7a36 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -421,7 +421,7 @@ static const struct emac_reg_write sgmii_v2_laned[] = { /* CDR Settings */ {EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0, UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)}, - {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(6)}, + {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)}, {EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)}, /* TX/RX Settings */ From e7947ea770d0de434d38a0f823e660d3fd4bebb5 Mon Sep 17 00:00:00 2001 From: Isaac Boukris Date: Tue, 1 Nov 2016 02:41:35 +0200 Subject: [PATCH 078/319] unix: escape all null bytes in abstract unix domain socket Abstract unix domain socket may embed null characters, these should be translated to '@' when printed out to proc the same way the null prefix is currently being translated. This helps for tools such as netstat, lsof and the proc based implementation in ss to show all the significant bytes of the name (instead of getting cut at the first null occurrence). Signed-off-by: Isaac Boukris Signed-off-by: David S. Miller --- net/unix/af_unix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 145082e2ba36..5d1c14a2f268 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2812,7 +2812,8 @@ static int unix_seq_show(struct seq_file *seq, void *v) i++; } for ( ; i < len; i++) - seq_putc(seq, u->addr->name->sun_path[i]); + seq_putc(seq, u->addr->name->sun_path[i] ?: + '@'); } unix_state_unlock(s); seq_putc(seq, '\n'); From 42fb18fd5852dba9c43ac008558e4bc8062bda57 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Tue, 1 Nov 2016 08:10:53 +0100 Subject: [PATCH 079/319] net/mlx5: Simplify a test 'create_root_ns()' does not return an error pointer, so the test can be simplified to be more consistent. Signed-off-by: Christophe JAILLET Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 89696048b045..914e5466f729 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1690,7 +1690,7 @@ static int init_root_ns(struct mlx5_flow_steering *steering) { steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); - if (IS_ERR_OR_NULL(steering->root_ns)) + if (!steering->root_ns) goto cleanup; if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node)) From b9b84fc07dbc6ac74d85f1b1a401b41c403fecb1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 1 Nov 2016 10:50:01 +0000 Subject: [PATCH 080/319] net: mv643xx_eth: ensure coalesce settings survive read-modify-write The coalesce settings behave badly when changing just one value: ... # ethtool -c eth0 rx-usecs: 249 ... # ethtool -C eth0 tx-usecs 250 ... # ethtool -c eth0 rx-usecs: 248 This occurs due to rounding errors when calculating the microseconds value - the divisons round down. This causes (eg) the rx-usecs to decrease by one every time the tx-usecs value is set as per the above. Fix this by making the divison round-to-nearest. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index bf5cc55ba24c..5b12022adf1f 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -1381,6 +1381,7 @@ static unsigned int get_rx_coal(struct mv643xx_eth_private *mp) temp = (val & 0x003fff00) >> 8; temp *= 64000000; + temp += mp->t_clk / 2; do_div(temp, mp->t_clk); return (unsigned int)temp; @@ -1417,6 +1418,7 @@ static unsigned int get_tx_coal(struct mv643xx_eth_private *mp) temp = (rdlp(mp, TX_FIFO_URGENT_THRESHOLD) & 0x3fff0) >> 4; temp *= 64000000; + temp += mp->t_clk / 2; do_div(temp, mp->t_clk); return (unsigned int)temp; From 8ec4b736d709562193566156c0dd40e327df2cbb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 29 Oct 2016 08:10:03 -0200 Subject: [PATCH 081/319] Documentation/ABI: ibm_rtl: The "What:" fields are incomplete The "What:" field at the ABI should describe the location of the ABI, e. g. the position under a mounted sysfs. However, this file has only the basename without the path. Fix it. Cc: Vernon Mauery Cc: platform-driver-x86@vger.kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Darren Hart --- Documentation/ABI/testing/sysfs-devices-system-ibm-rtl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl b/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl index b82deeaec314..470def06ab0a 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl +++ b/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl @@ -1,4 +1,4 @@ -What: state +What: /sys/devices/system/ibm_rtl/state Date: Sep 2010 KernelVersion: 2.6.37 Contact: Vernon Mauery @@ -10,7 +10,7 @@ Description: The state file allows a means by which to change in and Users: The ibm-prtm userspace daemon uses this interface. -What: version +What: /sys/devices/system/ibm_rtl/version Date: Sep 2010 KernelVersion: 2.6.37 Contact: Vernon Mauery From 1fdd14279eab2e9f79745631379f0c53cb8f9a5a Mon Sep 17 00:00:00 2001 From: "tang.junhui" Date: Fri, 28 Oct 2016 15:54:07 +0800 Subject: [PATCH 082/319] scsi: scsi_dh_alua: fix missing kref_put() in alua_rtpg_work() Reference count of pg leaks in alua_rtpg_work() since kref_put() is not called to decrease the reference count of pg when the condition pg->rtpg_sdev==NULL satisfied (actually it is easy to satisfy), it would cause memory of pg leakage. Signed-off-by: tang.junhui Cc: Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 241829e59668..f375167f16ea 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -793,6 +793,7 @@ static void alua_rtpg_work(struct work_struct *work) WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); WARN_ON(pg->flags & ALUA_PG_RUN_STPG); spin_unlock_irqrestore(&pg->lock, flags); + kref_put(&pg->kref, release_port_group); return; } if (pg->flags & ALUA_SYNC_STPG) From 6d3a56ed098566bc83d6c2afa74b4199c12ea074 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Fri, 28 Oct 2016 10:09:12 +0530 Subject: [PATCH 083/319] scsi: mpt3sas: Fix for block device of raid exists even after deleting raid disk While merging mpt3sas & mpt2sas code, we added the is_warpdrive check condition on the wrong line --------------------------------------------------------------------------- scsih_target_alloc(struct scsi_target *starget) sas_target_priv_data->handle = raid_device->handle; sas_target_priv_data->sas_address = raid_device->wwid; sas_target_priv_data->flags |= MPT_TARGET_FLAGS_VOLUME; - raid_device->starget = starget; + sas_target_priv_data->raid_device = raid_device; + if (ioc->is_warpdrive) + raid_device->starget = starget; } spin_unlock_irqrestore(&ioc->raid_device_lock, flags); return 0; ------------------------------------------------------------------------------ That check should be for the line sas_target_priv_data->raid_device = raid_device; Due to above hunk, we are not initializing raid_device's starget for raid volumes, and so during raid disk deletion driver is not calling scsi_remove_target() API as driver observes starget field of raid_device's structure as NULL. Signed-off-by: Sreekanth Reddy Cc: # v4.4+ Fixes: 7786ab6aff9 ("mpt3sas: Ported WarpDrive product SSS6200 support") Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 209a969a979d..8aa769a2d919 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1273,9 +1273,9 @@ scsih_target_alloc(struct scsi_target *starget) sas_target_priv_data->handle = raid_device->handle; sas_target_priv_data->sas_address = raid_device->wwid; sas_target_priv_data->flags |= MPT_TARGET_FLAGS_VOLUME; - sas_target_priv_data->raid_device = raid_device; if (ioc->is_warpdrive) - raid_device->starget = starget; + sas_target_priv_data->raid_device = raid_device; + raid_device->starget = starget; } spin_unlock_irqrestore(&ioc->raid_device_lock, flags); return 0; From aac173e9618faadf8f92af6cc05e64f7acc64d79 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Fri, 28 Oct 2016 12:27:26 -0400 Subject: [PATCH 084/319] scsi: vmw_pvscsi: return SUCCESS for successful command aborts The vmw_pvscsi driver reports most successful aborts as FAILED to the scsi error handler. This is do to a misunderstanding of how completion_done() works and its interaction with a successful wait using wait_for_completion_timeout(). The vmw_pvscsi driver is expecting completion_done() to always return true if complete() has been called on the completion structure. But completion_done() returns true after complete() has been called only if no function like wait_for_completion_timeout() has seen the completion and cleared it as part of successfully waiting for the completion. Instead of using completion_done(), vmw_pvscsi should just use the return value from wait_for_completion_timeout() to know if the wait timed out or not. [mkp: bumped driver version per request] Signed-off-by: David Jeffery Reviewed-by: Laurence Oberman Reviewed-by: Ewan D. Milne Acked-by: Jim Gill Signed-off-by: Martin K. Petersen --- drivers/scsi/vmw_pvscsi.c | 5 +++-- drivers/scsi/vmw_pvscsi.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 4a0d3cdc607c..15ca09cd16f3 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -793,6 +793,7 @@ static int pvscsi_abort(struct scsi_cmnd *cmd) unsigned long flags; int result = SUCCESS; DECLARE_COMPLETION_ONSTACK(abort_cmp); + int done; scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n", adapter->host->host_no, cmd); @@ -824,10 +825,10 @@ static int pvscsi_abort(struct scsi_cmnd *cmd) pvscsi_abort_cmd(adapter, ctx); spin_unlock_irqrestore(&adapter->hw_lock, flags); /* Wait for 2 secs for the completion. */ - wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000)); + done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000)); spin_lock_irqsave(&adapter->hw_lock, flags); - if (!completion_done(&abort_cmp)) { + if (!done) { /* * Failed to abort the command, unmark the fact that it * was requested to be aborted. diff --git a/drivers/scsi/vmw_pvscsi.h b/drivers/scsi/vmw_pvscsi.h index c097d2ccbde3..d41292ef85f2 100644 --- a/drivers/scsi/vmw_pvscsi.h +++ b/drivers/scsi/vmw_pvscsi.h @@ -26,7 +26,7 @@ #include -#define PVSCSI_DRIVER_VERSION_STRING "1.0.6.0-k" +#define PVSCSI_DRIVER_VERSION_STRING "1.0.7.0-k" #define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128 From df3d422cbac685da882e4c239dfda07de33d431b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 1 Nov 2016 08:19:57 -0600 Subject: [PATCH 085/319] scsi: scsi_dh_alua: Fix a reference counting bug The code at the end of alua_rtpg_work() is as follows: scsi_device_put(sdev); kref_put(&pg->kref, release_port_group); In other words, alua_rtpg_queue() must hold an sdev reference and a pg reference before queueing rtpg work. If no rtpg work is queued no additional references should be held when alua_rtpg_queue() returns. If no rtpg work is queued, ensure that alua_rtpg_queue() only gives up the sdev reference if that reference was obtained by the same alua_rtpg_queue() call. Signed-off-by: Bart Van Assche Reported-by: Tang Junhui Cc: Hannes Reinecke Cc: Tang Junhui Cc: Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index f375167f16ea..7bb20684e9fa 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -891,6 +891,7 @@ static void alua_rtpg_queue(struct alua_port_group *pg, /* Do not queue if the worker is already running */ if (!(pg->flags & ALUA_PG_RUNNING)) { kref_get(&pg->kref); + sdev = NULL; start_queue = 1; } } @@ -902,7 +903,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg, if (start_queue && !queue_delayed_work(alua_wq, &pg->rtpg_work, msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) { - scsi_device_put(sdev); + if (sdev) + scsi_device_put(sdev); kref_put(&pg->kref, release_port_group); } } From a5dd506e1584e91f3e7500ab9a165aa1b49eabd4 Mon Sep 17 00:00:00 2001 From: Bill Kuzeja Date: Fri, 21 Oct 2016 16:45:27 -0400 Subject: [PATCH 086/319] scsi: qla2xxx: Fix scsi scan hang triggered if adapter fails during init A system can get hung task timeouts if a qlogic board fails during initialization (if the board breaks again or fails the init). The hang involves the scsi scan. In a nutshell, since commit beb9e315e6e0 ("qla2xxx: Prevent removal and board_disable race"): ...it is possible to have freed ha (base_vha->hw) early by a call to qla2x00_remove_one when pdev->enable_cnt equals zero: if (!atomic_read(&pdev->enable_cnt)) { scsi_host_put(base_vha->host); kfree(ha); pci_set_drvdata(pdev, NULL); return; Almost always, the scsi_host_put above frees the vha structure (attached to the end of the Scsi_Host we're putting) since it's the last put, and life is good. However, if we are entering this routine because the adapter has broken sometime during initialization AND a scsi scan is already in progress (and has done its own scsi_host_get), vha will not be freed. What's worse, the scsi scan will access the freed ha structure through qla2xxx_scan_finished: if (time > vha->hw->loop_reset_delay * HZ) return 1; The scsi scan keeps checking to see if a scan is complete by calling qla2xxx_scan_finished. There is a timeout value that limits the length of time a scan can take (hw->loop_reset_delay, usually set to 5 seconds), but this definition is in the data structure (hw) that can get freed early. This can yield unpredictable results, the worst of which is that the scsi scan can hang indefinitely. This happens when the freed structure gets reused and loop_reset_delay gets overwritten with garbage, which the scan obliviously uses as its timeout value. The fix for this is simple: at the top of qla2xxx_scan_finished, check for the UNLOADING bit in the vha structure (_vha is not freed at this point). If UNLOADING is set, we exit the scan for this adapter immediately. After this last reference to the ha structure, we'll exit the scan for this adapter, and continue on. This problem is hard to hit, but I have run into it doing negative testing many times now (with a test specifically designed to bring it out), so I can verify that this fix works. My testing has been against a RHEL7 driver variant, but the bug and patch are equally relevant to to the upstream driver. Fixes: beb9e315e6e0 ("qla2xxx: Prevent removal and board_disable race") Cc: # v3.18+ Signed-off-by: Bill Kuzeja Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index ace65db1d2a2..4d99c3b37687 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2341,6 +2341,8 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time) { scsi_qla_host_t *vha = shost_priv(shost); + if (test_bit(UNLOADING, &vha->dpc_flags)) + return 1; if (!vha->host) return 1; if (time > vha->hw->loop_reset_delay * HZ) From 5c2f117a22e46a4afee6ddee29b653a7a2a6b41f Mon Sep 17 00:00:00 2001 From: Emil Lundmark Date: Wed, 12 Oct 2016 12:31:40 +0200 Subject: [PATCH 087/319] clk: imx: fix integer overflow in AV PLL round rate Since 'parent_rate * mfn' may overflow 32 bits, the result should be stored using 64 bits. The problem was discovered when trying to set the rate of the audio PLL (pll4_post_div) on an i.MX6Q. The desired rate was 196.608 MHz, but the actual rate returned was 192.000570 MHz. The round rate function should have been able to return 196.608 MHz, i.e., the desired rate. Fixes: ba7f4f557eb6 ("clk: imx: correct AV PLL rate formula") Cc: Anson Huang Signed-off-by: Emil Lundmark Reviewed-by: Fabio Estevam Acked-by: Shawn Guo Signed-off-by: Stephen Boyd --- drivers/clk/imx/clk-pllv3.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-pllv3.c b/drivers/clk/imx/clk-pllv3.c index 19f9b622981a..7a6acc3e4a92 100644 --- a/drivers/clk/imx/clk-pllv3.c +++ b/drivers/clk/imx/clk-pllv3.c @@ -223,7 +223,7 @@ static unsigned long clk_pllv3_av_recalc_rate(struct clk_hw *hw, temp64 *= mfn; do_div(temp64, mfd); - return (parent_rate * div) + (u32)temp64; + return parent_rate * div + (unsigned long)temp64; } static long clk_pllv3_av_round_rate(struct clk_hw *hw, unsigned long rate, @@ -247,7 +247,11 @@ static long clk_pllv3_av_round_rate(struct clk_hw *hw, unsigned long rate, do_div(temp64, parent_rate); mfn = temp64; - return parent_rate * div + parent_rate * mfn / mfd; + temp64 = (u64)parent_rate; + temp64 *= mfn; + do_div(temp64, mfd); + + return parent_rate * div + (unsigned long)temp64; } static int clk_pllv3_av_set_rate(struct clk_hw *hw, unsigned long rate, From 7c1c5413a7bdf1c9adc8d979521f1b8286366aef Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 17 Oct 2016 13:42:23 -0500 Subject: [PATCH 088/319] clk: qoriq: Don't allow CPU clocks higher than starting value The boot-time frequency of a CPU is considered its rated maximum, as we have no other source of such information. However, this was previously only used for chips with 80% restrictions on secondary PLLs. This usually wasn't a problem because most chips/configs boot with a divider of /1, with other dividers being used only for dynamic frequency reduction. However, at least one config (LS1021A at less than 1 GHz) uses a different divider for top speed. This was causing cpufreq to set a frequency beyond the chip's rated speed. This is fixed by applying a 100%-of-initial-speed limit to all CPU PLLs, similar to the existing 80% limit that only applied to some. Signed-off-by: Scott Wood Cc: stable@vger.kernel.org Signed-off-by: Stephen Boyd --- drivers/clk/clk-qoriq.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c index 20b105584f82..80ae2a51452d 100644 --- a/drivers/clk/clk-qoriq.c +++ b/drivers/clk/clk-qoriq.c @@ -700,6 +700,7 @@ static struct clk * __init create_mux_common(struct clockgen *cg, struct mux_hwclock *hwc, const struct clk_ops *ops, unsigned long min_rate, + unsigned long max_rate, unsigned long pct80_rate, const char *fmt, int idx) { @@ -728,6 +729,8 @@ static struct clk * __init create_mux_common(struct clockgen *cg, continue; if (rate < min_rate) continue; + if (rate > max_rate) + continue; parent_names[j] = div->name; hwc->parent_to_clksel[j] = i; @@ -759,7 +762,7 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx) struct mux_hwclock *hwc; const struct clockgen_pll_div *div; unsigned long plat_rate, min_rate; - u64 pct80_rate; + u64 max_rate, pct80_rate; u32 clksel; hwc = kzalloc(sizeof(*hwc), GFP_KERNEL); @@ -787,8 +790,8 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx) return NULL; } - pct80_rate = clk_get_rate(div->clk); - pct80_rate *= 8; + max_rate = clk_get_rate(div->clk); + pct80_rate = max_rate * 8; do_div(pct80_rate, 10); plat_rate = clk_get_rate(cg->pll[PLATFORM_PLL].div[PLL_DIV1].clk); @@ -798,7 +801,7 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx) else min_rate = plat_rate / 2; - return create_mux_common(cg, hwc, &cmux_ops, min_rate, + return create_mux_common(cg, hwc, &cmux_ops, min_rate, max_rate, pct80_rate, "cg-cmux%d", idx); } @@ -813,7 +816,7 @@ static struct clk * __init create_one_hwaccel(struct clockgen *cg, int idx) hwc->reg = cg->regs + 0x20 * idx + 0x10; hwc->info = cg->info.hwaccel[idx]; - return create_mux_common(cg, hwc, &hwaccel_ops, 0, 0, + return create_mux_common(cg, hwc, &hwaccel_ops, 0, ULONG_MAX, 0, "cg-hwaccel%d", idx); } From a29e52a6e66f4c0c895e7083e4bad2e7957f1fb5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 17 Sep 2016 15:54:13 +0000 Subject: [PATCH 089/319] clk: mmp: mmp2: fix return value check in mmp2_clk_init() Fix the retrn value check which testing the wrong variable in mmp2_clk_init(). Fixes: 1ec770d92a62 ("clk: mmp: add mmp2 DT support for clock driver") Signed-off-by: Wei Yongjun Signed-off-by: Stephen Boyd --- drivers/clk/mmp/clk-of-mmp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/mmp/clk-of-mmp2.c b/drivers/clk/mmp/clk-of-mmp2.c index 3a51fff1b0e7..9adaf48aea23 100644 --- a/drivers/clk/mmp/clk-of-mmp2.c +++ b/drivers/clk/mmp/clk-of-mmp2.c @@ -313,7 +313,7 @@ static void __init mmp2_clk_init(struct device_node *np) } pxa_unit->apmu_base = of_iomap(np, 1); - if (!pxa_unit->mpmu_base) { + if (!pxa_unit->apmu_base) { pr_err("failed to map apmu registers\n"); return; } From deab07261d54b4db7b627d38e0efac97f176c6d6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 17 Sep 2016 15:54:28 +0000 Subject: [PATCH 090/319] clk: mmp: pxa168: fix return value check in pxa168_clk_init() Fix the retrn value check which testing the wrong variable in pxa168_clk_init(). Fixes: ab08aefcd12d ("clk: mmp: add pxa168 DT support for clock driver") Signed-off-by: Wei Yongjun Signed-off-by: Stephen Boyd --- drivers/clk/mmp/clk-of-pxa168.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/mmp/clk-of-pxa168.c b/drivers/clk/mmp/clk-of-pxa168.c index 87f2317b2a00..f110c02e83cb 100644 --- a/drivers/clk/mmp/clk-of-pxa168.c +++ b/drivers/clk/mmp/clk-of-pxa168.c @@ -262,7 +262,7 @@ static void __init pxa168_clk_init(struct device_node *np) } pxa_unit->apmu_base = of_iomap(np, 1); - if (!pxa_unit->mpmu_base) { + if (!pxa_unit->apmu_base) { pr_err("failed to map apmu registers\n"); return; } From 10f2bfb092e3b49000526c02cfe8b2abbbdbb752 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 17 Sep 2016 15:55:56 +0000 Subject: [PATCH 091/319] clk: mmp: pxa910: fix return value check in pxa910_clk_init() Fix the retrn value check which testing the wrong variable in pxa910_clk_init(). Fixes: 2bc61da9f7ff ("clk: mmp: add pxa910 DT support for clock driver") Signed-off-by: Wei Yongjun Signed-off-by: Stephen Boyd --- drivers/clk/mmp/clk-of-pxa910.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/mmp/clk-of-pxa910.c b/drivers/clk/mmp/clk-of-pxa910.c index e22a67f76d93..64d1ef49caeb 100644 --- a/drivers/clk/mmp/clk-of-pxa910.c +++ b/drivers/clk/mmp/clk-of-pxa910.c @@ -282,7 +282,7 @@ static void __init pxa910_clk_init(struct device_node *np) } pxa_unit->apmu_base = of_iomap(np, 1); - if (!pxa_unit->mpmu_base) { + if (!pxa_unit->apmu_base) { pr_err("failed to map apmu registers\n"); return; } @@ -294,7 +294,7 @@ static void __init pxa910_clk_init(struct device_node *np) } pxa_unit->apbcp_base = of_iomap(np, 3); - if (!pxa_unit->mpmu_base) { + if (!pxa_unit->apbcp_base) { pr_err("failed to map apbcp registers\n"); return; } From ceb75787bc75d0a7b88519ab8a68067ac690f55a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 11:49:56 +0100 Subject: [PATCH 092/319] PM / sleep: fix device reference leak in test_suspend Make sure to drop the reference taken by class_find_device() after opening the RTC device. Fixes: 77437fd4e61f (pm: boot time suspend selftest) Signed-off-by: Johan Hovold Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend_test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index 084452e34a12..bdff5ed57f10 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -203,8 +203,10 @@ static int __init test_suspend(void) /* RTCs have initialized by now too ... can we use one? */ dev = class_find_device(rtc_class, NULL, NULL, has_wakealarm); - if (dev) + if (dev) { rtc = rtc_class_open(dev_name(dev)); + put_device(dev); + } if (!rtc) { printk(warn_no_rtc); return 0; From 8d23ea403044efe97553dd64228b172c8ffca047 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Tue, 25 Oct 2016 12:17:59 +0530 Subject: [PATCH 093/319] drm/msm/dsi: Queue HPD helper work in attach/detach callbacks The msm/dsi host drivers calls drm_helper_hpd_irq_event in the mipi_dsi_host attach/detatch callbacks. mipi_dsi_attach()/mipi_dsi_detach() from a panel/bridge driver could be called from a context where the drm_device's mode_config.mutex is already held, resulting in a deadlock. Queue it as work instead. Signed-off-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_host.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index f05ed0e1f3d6..6f240021705b 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -139,6 +139,7 @@ struct msm_dsi_host { u32 err_work_state; struct work_struct err_work; + struct work_struct hpd_work; struct workqueue_struct *workqueue; /* DSI 6G TX buffer*/ @@ -1294,6 +1295,14 @@ static void dsi_sw_reset_restore(struct msm_dsi_host *msm_host) wmb(); /* make sure dsi controller enabled again */ } +static void dsi_hpd_worker(struct work_struct *work) +{ + struct msm_dsi_host *msm_host = + container_of(work, struct msm_dsi_host, hpd_work); + + drm_helper_hpd_irq_event(msm_host->dev); +} + static void dsi_err_worker(struct work_struct *work) { struct msm_dsi_host *msm_host = @@ -1480,7 +1489,7 @@ static int dsi_host_attach(struct mipi_dsi_host *host, DBG("id=%d", msm_host->id); if (msm_host->dev) - drm_helper_hpd_irq_event(msm_host->dev); + queue_work(msm_host->workqueue, &msm_host->hpd_work); return 0; } @@ -1494,7 +1503,7 @@ static int dsi_host_detach(struct mipi_dsi_host *host, DBG("id=%d", msm_host->id); if (msm_host->dev) - drm_helper_hpd_irq_event(msm_host->dev); + queue_work(msm_host->workqueue, &msm_host->hpd_work); return 0; } @@ -1748,6 +1757,7 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi) /* setup workqueue */ msm_host->workqueue = alloc_ordered_workqueue("dsi_drm_work", 0); INIT_WORK(&msm_host->err_work, dsi_err_worker); + INIT_WORK(&msm_host->hpd_work, dsi_hpd_worker); msm_dsi->host = &msm_host->base; msm_dsi->id = msm_host->id; From 73b65b197024f3f9ef16a28cc9533c011e0a3f6d Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Thu, 27 Oct 2016 11:57:15 +0530 Subject: [PATCH 094/319] drm/msm: Set CLK_IGNORE_UNUSED flag for PLL clocks The DSI/HDMI PLLs in MSM require resources like interface clocks, power domains to be enabled before we can access their registers. The clock framework doesn't have a mechanism at the moment where we can tie such resources to a clock, so we make sure that the KMS driver enables these resources whenever a PLL is expected to be in use. One place where we can't ensure the resource dependencies are met is when the clock framework tries to disable unused clocks. The KMS driver doesn't know when the clock framework calls the is_enabled clk_op, and hence can't enable interface clocks/power domains beforehand. We set the CLK_IGNORE_UNUSED flag for PLL clocks for now. This needs to be revisited, since bootloaders can enable display, and we would want to disable the PLL clocks if there isn't a display driver using them. Cc: Stephen Boyd Signed-off-by: Archit Taneja Acked-by: Stephen Boyd Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm.c | 1 + drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c | 1 + drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c | 1 + drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm.c index 598fdaff0a41..26e3a01a99c2 100644 --- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm.c +++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm.c @@ -521,6 +521,7 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm) .parent_names = (const char *[]){ "xo" }, .num_parents = 1, .name = vco_name, + .flags = CLK_IGNORE_UNUSED, .ops = &clk_ops_dsi_pll_28nm_vco, }; struct device *dev = &pll_28nm->pdev->dev; diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c index 38c90e1eb002..49008451085b 100644 --- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c +++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c @@ -412,6 +412,7 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm) struct clk_init_data vco_init = { .parent_names = (const char *[]){ "pxo" }, .num_parents = 1, + .flags = CLK_IGNORE_UNUSED, .ops = &clk_ops_dsi_pll_28nm_vco, }; struct device *dev = &pll_28nm->pdev->dev; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c index aa94a553794f..143eab46ba68 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c @@ -702,6 +702,7 @@ static struct clk_init_data pll_init = { .ops = &hdmi_8996_pll_ops, .parent_names = hdmi_pll_parents, .num_parents = ARRAY_SIZE(hdmi_pll_parents), + .flags = CLK_IGNORE_UNUSED, }; int msm_hdmi_pll_8996_init(struct platform_device *pdev) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c index 92da69aa6187..99590758c68b 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c @@ -424,6 +424,7 @@ static struct clk_init_data pll_init = { .ops = &hdmi_pll_ops, .parent_names = hdmi_pll_parents, .num_parents = ARRAY_SIZE(hdmi_pll_parents), + .flags = CLK_IGNORE_UNUSED, }; int msm_hdmi_pll_8960_init(struct platform_device *pdev) From 31e4801aa2e59d4b42dc0fd42846a3aa7a6361af Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 13 Oct 2016 12:43:17 -0400 Subject: [PATCH 095/319] drm/msm/mdp5: handle non-fullscreen base plane case If the bottom-most layer is not fullscreen, we need to use the BASE mixer stage for solid fill (ie. MDP5_CTL_BLEND_OP_FLAG_BORDER_OUT). The blend_setup() code pretty much handled this already, we just had to figure this out in _atomic_check() and assign the stages appropriately. Also fix the case where there are zero enabled planes, where we also need to enable BORDER_OUT. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 46 ++++++++++++++---------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index fa2be7ce9468..c205c360e16d 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -223,12 +223,7 @@ static void blend_setup(struct drm_crtc *crtc) plane_cnt++; } - /* - * If there is no base layer, enable border color. - * Although it's not possbile in current blend logic, - * put it here as a reminder. - */ - if (!pstates[STAGE_BASE] && plane_cnt) { + if (!pstates[STAGE_BASE]) { ctl_blend_flags |= MDP5_CTL_BLEND_OP_FLAG_BORDER_OUT; DBG("Border Color is enabled"); } @@ -365,6 +360,15 @@ static int pstate_cmp(const void *a, const void *b) return pa->state->zpos - pb->state->zpos; } +/* is there a helper for this? */ +static bool is_fullscreen(struct drm_crtc_state *cstate, + struct drm_plane_state *pstate) +{ + return (pstate->crtc_x <= 0) && (pstate->crtc_y <= 0) && + ((pstate->crtc_x + pstate->crtc_w) >= cstate->mode.hdisplay) && + ((pstate->crtc_y + pstate->crtc_h) >= cstate->mode.vdisplay); +} + static int mdp5_crtc_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state) { @@ -375,21 +379,11 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc, struct plane_state pstates[STAGE_MAX + 1]; const struct mdp5_cfg_hw *hw_cfg; const struct drm_plane_state *pstate; - int cnt = 0, i; + int cnt = 0, base = 0, i; DBG("%s: check", mdp5_crtc->name); - /* verify that there are not too many planes attached to crtc - * and that we don't have conflicting mixer stages: - */ - hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg); drm_atomic_crtc_state_for_each_plane_state(plane, pstate, state) { - if (cnt >= (hw_cfg->lm.nb_stages)) { - dev_err(dev->dev, "too many planes!\n"); - return -EINVAL; - } - - pstates[cnt].plane = plane; pstates[cnt].state = to_mdp5_plane_state(pstate); @@ -399,8 +393,24 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc, /* assign a stage based on sorted zpos property */ sort(pstates, cnt, sizeof(pstates[0]), pstate_cmp, NULL); + /* if the bottom-most layer is not fullscreen, we need to use + * it for solid-color: + */ + if ((cnt > 0) && !is_fullscreen(state, &pstates[0].state->base)) + base++; + + /* verify that there are not too many planes attached to crtc + * and that we don't have conflicting mixer stages: + */ + hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg); + + if ((cnt + base) >= hw_cfg->lm.nb_stages) { + dev_err(dev->dev, "too many planes!\n"); + return -EINVAL; + } + for (i = 0; i < cnt; i++) { - pstates[i].state->stage = STAGE_BASE + i; + pstates[i].state->stage = STAGE_BASE + i + base; DBG("%s: assign pipe %s on stage=%d", mdp5_crtc->name, pipe2name(mdp5_plane_pipe(pstates[i].plane)), pstates[i].state->stage); From 07cd2e36263af34f7f0b48e36eff5d4b77d5756a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 17 Oct 2016 12:00:21 -0400 Subject: [PATCH 096/319] drm/msm/mdp5: no scaling support on RGBn pipes for 8x16 Looks like cut/paste error from the other device cfgs (which do support scaling on RGBn pipes). Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c | 2 +- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c index ac9e4cde1380..0e2bc5636c81 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c @@ -272,7 +272,7 @@ const struct mdp5_cfg_hw msm8x16_config = { .count = 2, .base = { 0x14000, 0x16000 }, .caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP | - MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_DECIMATION, + MDP_PIPE_CAP_DECIMATION, }, .pipe_dma = { .count = 1, diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 951c002b05df..83bf997dda03 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -292,8 +292,7 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane, format = to_mdp_format(msm_framebuffer_format(state->fb)); if (MDP_FORMAT_IS_YUV(format) && !pipe_supports_yuv(mdp5_plane->caps)) { - dev_err(plane->dev->dev, - "Pipe doesn't support YUV\n"); + DBG("Pipe doesn't support YUV\n"); return -EINVAL; } @@ -301,8 +300,7 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane, if (!(mdp5_plane->caps & MDP_PIPE_CAP_SCALE) && (((state->src_w >> 16) != state->crtc_w) || ((state->src_h >> 16) != state->crtc_h))) { - dev_err(plane->dev->dev, - "Pipe doesn't support scaling (%dx%d -> %dx%d)\n", + DBG("Pipe doesn't support scaling (%dx%d -> %dx%d)\n", state->src_w >> 16, state->src_h >> 16, state->crtc_w, state->crtc_h); @@ -313,8 +311,7 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane, vflip = !!(state->rotation & DRM_REFLECT_Y); if ((vflip && !(mdp5_plane->caps & MDP_PIPE_CAP_VFLIP)) || (hflip && !(mdp5_plane->caps & MDP_PIPE_CAP_HFLIP))) { - dev_err(plane->dev->dev, - "Pipe doesn't support flip\n"); + DBG("Pipe doesn't support flip\n"); return -EINVAL; } From 67cba0fbb484bbc1af42f2804662a80008ba61e9 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 24 Oct 2016 11:46:44 -0400 Subject: [PATCH 097/319] drm/msm/mdp5: 8x16 actually has 8 mixer stages Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c index 0e2bc5636c81..8b4e3004f451 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c @@ -282,7 +282,7 @@ const struct mdp5_cfg_hw msm8x16_config = { .lm = { .count = 2, /* LM0 and LM3 */ .base = { 0x44000, 0x47000 }, - .nb_stages = 5, + .nb_stages = 8, .max_width = 2048, .max_height = 0xFFFF, }, From 45788f1f5534fb02063ca077719592c2c3ba621e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Nov 2016 15:09:58 +0200 Subject: [PATCH 098/319] MAINTAINERS: Update MELLANOX MLX5 core VPI driver maintainers Add myself as a maintainer for mlx5 core driver as well. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 4012c2f98617..53964ad4f2de 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8053,6 +8053,7 @@ F: drivers/infiniband/hw/mlx4/ F: include/linux/mlx4/ MELLANOX MLX5 core VPI driver +M: Saeed Mahameed M: Matan Barak M: Leon Romanovsky L: netdev@vger.kernel.org From 23f4ffedb7d751c7e298732ba91ca75d224bc1a6 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Tue, 1 Nov 2016 23:45:12 +0800 Subject: [PATCH 099/319] ip6_tunnel: Clear IP6CB in ip6tunnel_xmit() skb->cb may contain data from previous layers. In the observed scenario, the garbage data were misinterpreted as IP6CB(skb)->frag_max_size, so that small packets sent through the tunnel are mistakenly fragmented. This patch unconditionally clears the control buffer in ip6tunnel_xmit(), which affects ip6_tunnel, ip6_udp_tunnel and ip6_gre. Currently none of these tunnels set IP6CB(skb)->flags, otherwise it needs to be done earlier. Cc: stable@vger.kernel.org Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 20ed9699fcd4..1b1cf33cbfb0 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -146,6 +146,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); if (unlikely(net_xmit_eval(err))) From 4fd19c15decedd06d707e2691c24fce08700e2b1 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Tue, 1 Nov 2016 23:45:13 +0800 Subject: [PATCH 100/319] ip6_udp_tunnel: remove unused IPCB related codes Some IPCB fields are currently set in udp_tunnel6_xmit_skb(), which are never used before it reaches ip6tunnel_xmit(), and past that point the control buffer is no longer interpreted as IPCB. This clears these unused IPCB related codes. Currently there is no skb scrubbing in ip6_udp_tunnel, otherwise IPCB(skb)->opt might need to be cleared for IPv4 packets, as shown in 5146d1f1511 ("tunnel: Clear IPCB(skb)->opt before dst_link_failure called"). Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- net/ipv6/ip6_udp_tunnel.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index a7520528ecd2..b283f293ee4a 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -88,9 +88,6 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, uh->len = htons(skb->len); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED - | IPSKB_REROUTED); skb_dst_set(skb, dst); udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); From 269ebce4531b8edc4224259a02143181a1c1d77c Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 2 Nov 2016 09:04:33 +0800 Subject: [PATCH 101/319] xen-netfront: cast grant table reference first to type int IS_ERR_VALUE() in commit 87557efc27f6a50140fb20df06a917f368ce3c66 ("xen-netfront: do not cast grant table reference to signed short") would not return true for error code unless we cast ref first to type int. Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 189a28dcd80d..bf2744e1e3db 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -304,7 +304,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) queue->rx_skbs[id] = skb; ref = gnttab_claim_grant_reference(&queue->gref_rx_head); - WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref)); queue->grant_rx_ref[id] = ref; page = skb_frag_page(&skb_shinfo(skb)->frags[0]); @@ -428,7 +428,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); ref = gnttab_claim_grant_reference(&queue->gref_tx_head); - WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)ref)); + WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref)); gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, gfn, GNTMAP_readonly); From cfbd950d5e6e649c6c1a88925feada64f890c894 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 20 Oct 2016 08:46:32 +0200 Subject: [PATCH 102/319] video: ARM CLCD: fix Vexpress regression The CLCD does not come up on Versatile Express as it does not (currently) have a syscon node for controlling the block apart from the CLCD itself. Make sure the .init() function can bail out without an error making it probe again. Reported-by: Amit Pundir Signed-off-by: Linus Walleij Tested-by: Amit Pundir Tested-by: Nicolae Rosia Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/amba-clcd-versatile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/amba-clcd-versatile.c b/drivers/video/fbdev/amba-clcd-versatile.c index 19ad8645d93c..e5d9bfc1703a 100644 --- a/drivers/video/fbdev/amba-clcd-versatile.c +++ b/drivers/video/fbdev/amba-clcd-versatile.c @@ -526,8 +526,8 @@ int versatile_clcd_init_panel(struct clcd_fb *fb, np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match, &clcd_id); if (!np) { - dev_err(dev, "no Versatile syscon node\n"); - return -ENODEV; + /* Vexpress does not have this */ + return 0; } versatile_clcd_type = (enum versatile_clcd)clcd_id->data; From a79a812131b07254c09cf325ec68c0d05aaed0b5 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 3 Nov 2016 18:06:13 +0300 Subject: [PATCH 103/319] arc: Implement arch-specific dma_map_ops.mmap We used to use generic implementation of dma_map_ops.mmap which is dma_common_mmap() but that only worked for simpler cached mappings when vaddr = paddr. If a driver requests uncached DMA buffer kernel maps it to virtual address so that MMU gets involved and page uncached status takes into account. In that case usage of dma_common_mmap() lead to mapping of vaddr to vaddr for user-space which is obviously wrong. For more detals please refer to verbose explanation here [1]. So here we implement our own version of mmap() which always deals with dma_addr and maps underlying memory to user-space properly (note that DMA buffer mapped to user-space is always uncached because there's no way to properly manage cache from user-space). [1] https://lkml.org/lkml/2016/10/26/973 Reviewed-by: Catalin Marinas Cc: Marek Szyprowski Cc: #4.5+ Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/mm/dma.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 60aab5a7522b..cd8aad8226dd 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -105,6 +105,31 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr, __free_pages(page, get_order(size)); } +static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + unsigned long user_count = vma_pages(vma); + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long pfn = __phys_to_pfn(plat_dma_to_phys(dev, dma_addr)); + unsigned long off = vma->vm_pgoff; + int ret = -ENXIO; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off < count && user_count <= (count - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + user_count << PAGE_SHIFT, + vma->vm_page_prot); + } + + return ret; +} + /* * streaming DMA Mapping API... * CPU accesses page via normal paddr, thus needs to explicitly made @@ -193,6 +218,7 @@ static int arc_dma_supported(struct device *dev, u64 dma_mask) struct dma_map_ops arc_dma_ops = { .alloc = arc_dma_alloc, .free = arc_dma_free, + .mmap = arc_dma_mmap, .map_page = arc_dma_map_page, .map_sg = arc_dma_map_sg, .sync_single_for_device = arc_dma_sync_single_for_device, From 14135f30e33ce37b22529f73660d7369cf424375 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 1 Nov 2016 16:04:36 -0700 Subject: [PATCH 104/319] inet: fix sleeping inside inet_wait_for_connect() Andrey reported this kernel warning: WARNING: CPU: 0 PID: 4608 at kernel/sched/core.c:7724 __might_sleep+0x14c/0x1a0 kernel/sched/core.c:7719 do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait+0xbc/0x210 kernel/sched/wait.c:178 Modules linked in: CPU: 0 PID: 4608 Comm: syz-executor Not tainted 4.9.0-rc2+ #320 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88006625f7a0 ffffffff81b46914 ffff88006625f818 0000000000000000 ffffffff84052960 0000000000000000 ffff88006625f7e8 ffffffff81111237 ffff88006aceac00 ffffffff00001e2c ffffed000cc4beff ffffffff84052960 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 [] __warn+0x1a7/0x1f0 kernel/panic.c:550 [] warn_slowpath_fmt+0xac/0xd0 kernel/panic.c:565 [] __might_sleep+0x14c/0x1a0 kernel/sched/core.c:7719 [< inline >] slab_pre_alloc_hook mm/slab.h:393 [< inline >] slab_alloc_node mm/slub.c:2634 [< inline >] slab_alloc mm/slub.c:2716 [] __kmalloc_track_caller+0x150/0x2a0 mm/slub.c:4240 [] kmemdup+0x24/0x50 mm/util.c:113 [] dccp_feat_clone_sp_val.part.5+0x4f/0xe0 net/dccp/feat.c:374 [< inline >] dccp_feat_clone_sp_val net/dccp/feat.c:1141 [< inline >] dccp_feat_change_recv net/dccp/feat.c:1141 [] dccp_feat_parse_options+0xaa1/0x13d0 net/dccp/feat.c:1411 [] dccp_parse_options+0x721/0x1010 net/dccp/options.c:128 [] dccp_rcv_state_process+0x200/0x15b0 net/dccp/input.c:644 [] dccp_v4_do_rcv+0xf4/0x1a0 net/dccp/ipv4.c:681 [< inline >] sk_backlog_rcv ./include/net/sock.h:872 [] __release_sock+0x126/0x3a0 net/core/sock.c:2044 [] release_sock+0x59/0x1c0 net/core/sock.c:2502 [< inline >] inet_wait_for_connect net/ipv4/af_inet.c:547 [] __inet_stream_connect+0x5d2/0xbb0 net/ipv4/af_inet.c:617 [] inet_stream_connect+0x55/0xa0 net/ipv4/af_inet.c:656 [] SYSC_connect+0x244/0x2f0 net/socket.c:1533 [] SyS_connect+0x24/0x30 net/socket.c:1514 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:209 Unlike commit 26cabd31259ba43f68026ce3f62b78094124333f ("sched, net: Clean up sk_wait_event() vs. might_sleep()"), the sleeping function is called before schedule_timeout(), this is indeed a bug. Fix this by moving the wait logic to the new API, it is similar to commit ff960a731788a7408b6f66ec4fd772ff18833211 ("netdev, sched/wait: Fix sleeping inside wait event"). Reported-by: Andrey Konovalov Cc: Andrey Konovalov Cc: Eric Dumazet Cc: Peter Zijlstra Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9648c97e541f..5ddf5cda07f4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -533,9 +533,9 @@ EXPORT_SYMBOL(inet_dgram_connect); static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; /* Basic assumption: if someone sets sk->sk_err, he _must_ @@ -545,13 +545,12 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) */ while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); if (signal_pending(current) || !timeo) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; return timeo; } From 0b53df1e9e07984a93ad3454686740fc2f4d6b4b Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 2 Nov 2016 10:52:53 +0530 Subject: [PATCH 105/319] cxgb4: correct device ID of T6 adapter Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 50812a1d67bd..df1573c4a659 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -178,9 +178,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6005), CH_PCI_ID_TABLE_FENTRY(0x6006), CH_PCI_ID_TABLE_FENTRY(0x6007), + CH_PCI_ID_TABLE_FENTRY(0x6008), CH_PCI_ID_TABLE_FENTRY(0x6009), CH_PCI_ID_TABLE_FENTRY(0x600d), - CH_PCI_ID_TABLE_FENTRY(0x6010), CH_PCI_ID_TABLE_FENTRY(0x6011), CH_PCI_ID_TABLE_FENTRY(0x6014), CH_PCI_ID_TABLE_FENTRY(0x6015), From 9512925a2cc2b1cd0206bb93bad200a69716f998 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Wed, 2 Nov 2016 16:36:46 +0200 Subject: [PATCH 106/319] qede: Correctly map aggregation replacement pages Driver allocates replacement buffers before-hand to make sure whenever an aggregation begins there would be a replacement for the Rx buffers, as we can't release the buffer until aggregation is terminated and driver logic assumes the Rx rings are always full. For every other Rx page that's being allocated [I.e., regular] the page is being completely mapped while for the replacement buffers only the first portion of the page is being mapped. This means that: a. Once replacement buffer replenishes the regular Rx ring, assuming there's more than a single packet on page we'd post unmapped memory toward HW [assuming mapping is actually done in granularity smaller than page]. b. Unmaps are being done for the entire page, which is incorrect. Fixes: 55482edc25f06 ("qede: Add slowpath/fastpath support and enable hardware GRO") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 7def29aaf65c..85f46dbecd5b 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2839,7 +2839,7 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) } mapping = dma_map_page(&edev->pdev->dev, replace_buf->data, 0, - rxq->rx_buf_size, DMA_FROM_DEVICE); + PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) { DP_NOTICE(edev, "Failed to map TPA replacement buffer\n"); From ac9e70b17ecd7c6e933ff2eaf7ab37429e71bf4d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 07:53:17 -0700 Subject: [PATCH 107/319] tcp: fix potential memory corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Imagine initial value of max_skb_frags is 17, and last skb in write queue has 15 frags. Then max_skb_frags is lowered to 14 or smaller value. tcp_sendmsg() will then be allowed to add additional page frags and eventually go past MAX_SKB_FRAGS, overflowing struct skb_shared_info. Fixes: 5f74f82ea34c ("net:Add sysctl_max_skb_frags") Signed-off-by: Eric Dumazet Cc: Hans Westgaard Ry Cc: Håkon Bugge Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3251fe71f39f..18238ef8135a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1241,7 +1241,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == sysctl_max_skb_frags || !sg) { + if (i >= sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } From da96786e26c3ae47316db2b92046b11268c4379c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 2 Nov 2016 12:08:25 -0700 Subject: [PATCH 108/319] net: tcp: check skb is non-NULL for exact match on lookups Andrey reported the following error report while running the syzkaller fuzzer: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 648 Comm: syz-executor Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800398c4480 task.stack: ffff88003b468000 RIP: 0010:[] [< inline >] inet_exact_dif_match include/net/tcp.h:808 RIP: 0010:[] [] __inet_lookup_listener+0xb6/0x500 net/ipv4/inet_hashtables.c:219 RSP: 0018:ffff88003b46f270 EFLAGS: 00010202 RAX: 0000000000000004 RBX: 0000000000004242 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffc90000e3c000 RDI: 0000000000000054 RBP: ffff88003b46f2d8 R08: 0000000000004000 R09: ffffffff830910e7 R10: 0000000000000000 R11: 000000000000000a R12: ffffffff867fa0c0 R13: 0000000000004242 R14: 0000000000000003 R15: dffffc0000000000 FS: 00007fb135881700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020cc3000 CR3: 000000006d56a000 CR4: 00000000000006f0 Stack: 0000000000000000 000000000601a8c0 0000000000000000 ffffffff00004242 424200003b9083c2 ffff88003def4041 ffffffff84e7e040 0000000000000246 ffff88003a0911c0 0000000000000000 ffff88003a091298 ffff88003b9083ae Call Trace: [] tcp_v4_send_reset+0x584/0x1700 net/ipv4/tcp_ipv4.c:643 [] tcp_v4_rcv+0x198b/0x2e50 net/ipv4/tcp_ipv4.c:1718 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 ... MD5 has a code path that calls __inet_lookup_listener with a null skb, so inet{6}_exact_dif_match needs to check skb against null before pulling the flag. Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev") Reported-by: Andrey Konovalov Signed-off-by: David Ahern Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/tcp.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ca1ad9ebbc92..a0649973ee5b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -149,7 +149,7 @@ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) { #if defined(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv6_l3mdev_skb(IP6CB(skb)->flags)) + skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) return true; #endif return false; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b82d4d94834..304a8e17bc87 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -805,7 +805,7 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) return true; #endif return false; From 9ee6c5dc816aa8256257f2cd4008a9291ec7e985 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 2 Nov 2016 16:36:17 -0400 Subject: [PATCH 109/319] ipv4: allow local fragmentation in ip_finish_output_gso() Some configurations (e.g. geneve interface with default MTU of 1500 over an ethernet interface with 1500 MTU) result in the transmission of packets that exceed the configured MTU. While this should be considered to be a "bad" configuration, it is still allowed and should not result in the sending of packets that exceed the configured MTU. Fix by dropping the assumption in ip_finish_output_gso() that locally originated gso packets will never need fragmentation. Basic testing using iperf (observing CPU usage and bandwidth) have shown no measurable performance impact for traffic not requiring fragmentation. Fixes: c7ba65d7b649 ("net: ip: push gso skb forwarding handling down the stack") Reported-by: Jan Tluka Signed-off-by: Lance Richardson Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 3 +-- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_output.c | 6 ++---- net/ipv4/ip_tunnel_core.c | 11 ----------- net/ipv4/ipmr.c | 2 +- 5 files changed, 5 insertions(+), 19 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 5413883ac47f..d3a107850a41 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -47,8 +47,7 @@ struct inet_skb_parm { #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) -#define IPSKB_FRAG_SEGS BIT(7) -#define IPSKB_L3SLAVE BIT(8) +#define IPSKB_L3SLAVE BIT(7) u16 frag_max_size; }; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 8b4ffd216839..9f0a7b96646f 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 03e7f7310423..49714010ac2e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -239,11 +239,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *segs; int ret = 0; - /* common case: fragmentation of segments is not allowed, - * or seglen is <= mtu + /* common case: seglen is <= mtu */ - if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) || - skb_gso_validate_mtu(skb, mtu)) + if (skb_gso_validate_mtu(skb, mtu)) return ip_finish_output2(net, sk, skb); /* Slowpath - GSO segment length is exceeding the dst MTU. diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 777bc1883870..fed3d29f9eb3 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); struct net_device *dev = skb->dev; - int skb_iif = skb->skb_iif; struct iphdr *iph; int err; @@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - if (skb_iif && !(df & htons(IP_DF))) { - /* Arrived from an ingress interface, got encapsulated, with - * fragmentation of encapulating frames allowed. - * If skb is gso, the resulting encapsulated network segments - * may exceed dst mtu. - * Allow IP Fragmentation of segments. - */ - IPCB(skb)->flags |= IPSKB_FRAG_SEGS; - } - /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f006e13de56..27089f5ebbb1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->dev->stats.tx_bytes += skb->len; } - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output From 79d8665b9545e128637c51cf7febde9c493b6481 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 14:41:50 -0700 Subject: [PATCH 110/319] tcp: fix return value for partial writes After my commit, tcp_sendmsg() might restart its loop after processing socket backlog. If sk_err is set, we blindly return an error, even though we copied data to user space before. We should instead return number of bytes that could be copied, otherwise user space might resend data and corrupt the stream. This might happen if another thread is using recvmsg(MSG_ERRQUEUE) to process timestamps. Issue was diagnosed by Soheil and Willem, big kudos to them ! Fixes: d41a69f1d390f ("tcp: make tcp_sendmsg() aware of socket backlog") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Soheil Hassas Yeganeh Cc: Yuchung Cheng Cc: Neal Cardwell Tested-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 18238ef8135a..814af89c1bd3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1164,7 +1164,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - goto out_err; + goto do_error; sg = !!(sk->sk_route_caps & NETIF_F_SG); From c3f24cfb3e508c70c26ee8569d537c8ca67a36c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 17:14:41 -0700 Subject: [PATCH 111/319] dccp: do not release listeners too soon Andrey Konovalov reported following error while fuzzing with syzkaller : IPv4: Attempt to release alive inet socket ffff880068e98940 kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 1 PID: 3905 Comm: a.out Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88006b9e0000 task.stack: ffff880068770000 RIP: 0010:[] [] selinux_socket_sock_rcv_skb+0xff/0x6a0 security/selinux/hooks.c:4639 RSP: 0018:ffff8800687771c8 EFLAGS: 00010202 RAX: ffff88006b9e0000 RBX: 1ffff1000d0eee3f RCX: 1ffff1000d1d312a RDX: 1ffff1000d1d31a6 RSI: dffffc0000000000 RDI: 0000000000000010 RBP: ffff880068777360 R08: 0000000000000000 R09: 0000000000000002 R10: dffffc0000000000 R11: 0000000000000006 R12: ffff880068e98940 R13: 0000000000000002 R14: ffff880068777338 R15: 0000000000000000 FS: 00007f00ff760700(0000) GS:ffff88006cd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020008000 CR3: 000000006a308000 CR4: 00000000000006e0 Stack: ffff8800687771e0 ffffffff812508a5 ffff8800686f3168 0000000000000007 ffff88006ac8cdfc ffff8800665ea500 0000000041b58ab3 ffffffff847b5480 ffffffff819eac60 ffff88006b9e0860 ffff88006b9e0868 ffff88006b9e07f0 Call Trace: [] security_sock_rcv_skb+0x75/0xb0 security/security.c:1317 [] sk_filter_trim_cap+0x67/0x10e0 net/core/filter.c:81 [] __sk_receive_skb+0x30/0xa00 net/core/sock.c:460 [] dccp_v4_rcv+0xdb2/0x1910 net/dccp/ipv4.c:873 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_local_deliver+0x1c2/0x4b0 net/ipv4/ip_input.c:257 [< inline >] dst_input ./include/net/dst.h:507 [] ip_rcv_finish+0x750/0x1c40 net/ipv4/ip_input.c:396 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_rcv+0x96f/0x12f0 net/ipv4/ip_input.c:487 [] __netif_receive_skb_core+0x1897/0x2a50 net/core/dev.c:4213 [] __netif_receive_skb+0x2a/0x170 net/core/dev.c:4251 [] netif_receive_skb_internal+0x1b3/0x390 net/core/dev.c:4279 [] netif_receive_skb+0x48/0x250 net/core/dev.c:4303 [] tun_get_user+0xbd5/0x28a0 drivers/net/tun.c:1308 [] tun_chr_write_iter+0xda/0x190 drivers/net/tun.c:1332 [< inline >] new_sync_write fs/read_write.c:499 [] __vfs_write+0x334/0x570 fs/read_write.c:512 [] vfs_write+0x17b/0x500 fs/read_write.c:560 [< inline >] SYSC_write fs/read_write.c:607 [] SyS_write+0xd4/0x1a0 fs/read_write.c:599 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 It turns out DCCP calls __sk_receive_skb(), and this broke when lookups no longer took a reference on listeners. Fix this issue by adding a @refcounted parameter to __sk_receive_skb(), so that sock_put() is used only when needed. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- net/core/sock.c | 5 +++-- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 3 ++- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 73c6b008f1b7..92b269709b9a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1596,11 +1596,11 @@ static inline void sock_put(struct sock *sk) void sock_gen_put(struct sock *sk); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, - unsigned int trim_cap); + unsigned int trim_cap, bool refcounted); static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { - return __sk_receive_skb(sk, skb, nested, 1); + return __sk_receive_skb(sk, skb, nested, 1, true); } static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) diff --git a/net/core/sock.c b/net/core/sock.c index df171acfe232..5e3ca414357e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL(sock_queue_rcv_skb); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, - const int nested, unsigned int trim_cap) + const int nested, unsigned int trim_cap, bool refcounted) { int rc = NET_RX_SUCCESS; @@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, bh_unlock_sock(sk); out: - sock_put(sk); + if (refcounted) + sock_put(sk); return rc; discard_and_relse: kfree_skb(skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 345a3aeb8c7e..dff7cfab1da4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -868,7 +868,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) goto discard_and_relse; nf_reset(skb); - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4); + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3828f94b234c..09c4e19aa285 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -738,7 +738,8 @@ static int dccp_v6_rcv(struct sk_buff *skb) if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0; + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, + refcounted) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) From 346da62cc186c4b4b1ac59f87f4482b47a047388 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 18:04:24 -0700 Subject: [PATCH 112/319] dccp: do not send reset to already closed sockets Andrey reported following warning while fuzzing with syzkaller WARNING: CPU: 1 PID: 21072 at net/dccp/proto.c:83 dccp_set_state+0x229/0x290 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 21072 Comm: syz-executor Not tainted 4.9.0-rc1+ #293 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff88003d4c7738 ffffffff81b474f4 0000000000000003 dffffc0000000000 ffffffff844f8b00 ffff88003d4c7804 ffff88003d4c7800 ffffffff8140c06a 0000000041b58ab3 ffffffff8479ab7d ffffffff8140beae ffffffff8140cd00 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 [] panic+0x1bc/0x39d kernel/panic.c:179 [] __warn+0x1cc/0x1f0 kernel/panic.c:542 [] warn_slowpath_null+0x2c/0x40 kernel/panic.c:585 [] dccp_set_state+0x229/0x290 net/dccp/proto.c:83 [] dccp_close+0x612/0xc10 net/dccp/proto.c:1016 [] inet_release+0xef/0x1c0 net/ipv4/af_inet.c:415 [] sock_release+0x8e/0x1d0 net/socket.c:570 [] sock_close+0x16/0x20 net/socket.c:1017 [] __fput+0x29d/0x720 fs/file_table.c:208 [] ____fput+0x15/0x20 fs/file_table.c:244 [] task_work_run+0xf8/0x170 kernel/task_work.c:116 [< inline >] exit_task_work include/linux/task_work.h:21 [] do_exit+0x883/0x2ac0 kernel/exit.c:828 [] do_group_exit+0x10e/0x340 kernel/exit.c:931 [] get_signal+0x634/0x15a0 kernel/signal.c:2307 [] do_signal+0x8d/0x1a30 arch/x86/kernel/signal.c:807 [] exit_to_usermode_loop+0xe5/0x130 arch/x86/entry/common.c:156 [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [] syscall_return_slowpath+0x1a8/0x1e0 arch/x86/entry/common.c:259 [] entry_SYSCALL_64_fastpath+0xc0/0xc2 Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Fix this the same way we did for TCP in commit 565b7b2d2e63 ("tcp: do not send reset to already closed sockets") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/dccp/proto.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 41e65804ddf5..9fe25bf63296 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout) __kfree_skb(skb); } + /* If socket has been already reset kill it. */ + if (sk->sk_state == DCCP_CLOSED) + goto adjudge_to_death; + if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); From 6706a97fec963d6cb3f7fc2978ec1427b4651214 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 19:00:40 -0700 Subject: [PATCH 113/319] dccp: fix out of bound access in dccp_v4_err() dccp_v4_err() does not use pskb_may_pull() and might access garbage. We only need 4 bytes at the beginning of the DCCP header, like TCP, so the 8 bytes pulled in icmp_socket_deliver() are more than enough. This patch might allow to process more ICMP messages, as some routers are still limiting the size of reflected bytes to 28 (RFC 792), instead of extended lengths (RFC 1812 4.3.2.3) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index dff7cfab1da4..b567c8725aea 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; const u8 offset = iph->ihl << 2; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; @@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet_lookup_established(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, From 93636d1f1f162ae89ae4f2a22a83bf4fd960724e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 20:21:20 -0700 Subject: [PATCH 114/319] netlink: netlink_diag_dump() runs without locks A recent commit removed locking from netlink_diag_dump() but forgot one error case. ===================================== [ BUG: bad unlock balance detected! ] 4.9.0-rc3+ #336 Not tainted ------------------------------------- syz-executor/4018 is trying to release lock ([ 36.220068] nl_table_lock ) at: [] netlink_diag_dump+0x1a3/0x250 net/netlink/diag.c:182 but there are no more locks to release! other info that might help us debug this: 3 locks held by syz-executor/4018: #0: [ 36.220068] ( sock_diag_mutex[ 36.220068] ){+.+.+.} , at: [ 36.220068] [] sock_diag_rcv+0x1b/0x40 #1: [ 36.220068] ( sock_diag_table_mutex[ 36.220068] ){+.+.+.} , at: [ 36.220068] [] sock_diag_rcv_msg+0x140/0x3a0 #2: [ 36.220068] ( nlk->cb_mutex[ 36.220068] ){+.+.+.} , at: [ 36.220068] [] netlink_dump+0x50/0xac0 stack backtrace: CPU: 1 PID: 4018 Comm: syz-executor Not tainted 4.9.0-rc3+ #336 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff8800645df688 ffffffff81b46934 ffffffff84eb3e78 ffff88006ad85800 ffffffff82dc8683 ffffffff84eb3e78 ffff8800645df6b8 ffffffff812043ca dffffc0000000000 ffff88006ad85ff8 ffff88006ad85fd0 00000000ffffffff Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x10f lib/dump_stack.c:51 [] print_unlock_imbalance_bug+0x17a/0x1a0 kernel/locking/lockdep.c:3388 [< inline >] __lock_release kernel/locking/lockdep.c:3512 [] lock_release+0x8e8/0xc60 kernel/locking/lockdep.c:3765 [< inline >] __raw_read_unlock ./include/linux/rwlock_api_smp.h:225 [] _raw_read_unlock+0x1a/0x30 kernel/locking/spinlock.c:255 [] netlink_diag_dump+0x1a3/0x250 net/netlink/diag.c:182 [] netlink_dump+0x397/0xac0 net/netlink/af_netlink.c:2110 Fixes: ad202074320c ("netlink: Use rhashtable walk interface in diag dump") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/netlink/diag.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index b2f0e986a6f4..a5546249fb10 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -178,11 +178,8 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } cb->args[1] = i; } else { - if (req->sdiag_protocol >= MAX_LINKS) { - read_unlock(&nl_table_lock); - rcu_read_unlock(); + if (req->sdiag_protocol >= MAX_LINKS) return -ENOENT; - } err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); } From 1aa9d1a0e7eefcc61696e147d123453fc0016005 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 20:30:48 -0700 Subject: [PATCH 115/319] ipv6: dccp: fix out of bound access in dccp_v6_err() dccp_v6_err() does not use pskb_may_pull() and might access garbage. We only need 4 bytes at the beginning of the DCCP header, like TCP, so the 8 bytes pulled in icmpv6_notify() are more than enough. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 09c4e19aa285..b2a43af967e5 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; @@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, __u64 seq; struct net *net = dev_net(skb->dev); - if (skb->len < offset + sizeof(*dh) || - skb->len < offset + __dccp_basic_hdr_len(dh)) { - __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), - ICMP6_MIB_INERRORS); - return; - } + /* Only need dccph_dport & dccph_sport which are the first + * 4 bytes in dccp header. + * Our caller (icmpv6_notify()) already pulled 8 bytes for us. + */ + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8); + BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8); + dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, From 29ab5a3b94c87382da06db88e96119911d557293 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 3 Nov 2016 08:16:20 -0200 Subject: [PATCH 116/319] ehea: fix operation state report Currently the ehea driver is missing a call to netif_carrier_off() before the interface bring-up; this is necessary in order to initialize the __LINK_STATE_NOCARRIER bit in the net_device state field. Otherwise, we observe state UNKNOWN on "ip address" command output. This patch adds a call to netif_carrier_off() on ehea's net device open callback. Reported-by: Xiong Zhou Reference-ID: IBM bz #137702, Red Hat bz #1089134 Signed-off-by: Guilherme G. Piccoli Signed-off-by: Douglas Miller Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ehea/ehea_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 54efa9a5167b..bd719e25dd76 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -2446,6 +2446,8 @@ static int ehea_open(struct net_device *dev) netif_info(port, ifup, dev, "enabling port\n"); + netif_carrier_off(dev); + ret = ehea_up(dev); if (!ret) { port_napi_enable(port); From 990ff4d84408fc55942ca6644f67e361737b3d8e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Nov 2016 08:59:46 -0700 Subject: [PATCH 117/319] ipv6: dccp: add missing bind_conflict to dccp_ipv6_mapped While fuzzing kernel with syzkaller, Andrey reported a nasty crash in inet6_bind() caused by DCCP lacking a required method. Fixes: ab1e0a13d7029 ("[SOCK] proto: Add hashinfo member to struct proto") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Arnaldo Carvalho de Melo Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b2a43af967e5..715e5d1dc107 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -958,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), + .bind_conflict = inet6_csk_bind_conflict, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, From 00ffc1ba02d876478c125e4305f9a02d40c6d284 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 3 Nov 2016 09:42:35 -0700 Subject: [PATCH 118/319] genetlink: fix a memory leak on error path In __genl_register_family(), when genl_validate_assign_mc_groups() fails, we forget to free the memory we possibly allocate for family->attrbuf. Note, some callers call genl_unregister_family() to clean up on error path, it doesn't work because the family is inserted to the global list in the nearly last step. Cc: Jakub Kicinski Cc: Johannes Berg Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 23cc12639ba7..49c28e8ef01b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -404,7 +404,7 @@ int __genl_register_family(struct genl_family *family) err = genl_validate_assign_mc_groups(family); if (err) - goto errout_locked; + goto errout_free; list_add_tail(&family->family_list, genl_family_chain(family->id)); genl_unlock_all(); @@ -417,6 +417,8 @@ int __genl_register_family(struct genl_family *family) return 0; +errout_free: + kfree(family->attrbuf); errout_locked: genl_unlock_all(); errout: From 243d52126184b072a18fe2130ce0008f8aa3a340 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 3 Nov 2016 09:42:36 -0700 Subject: [PATCH 119/319] taskstats: fix the length of cgroupstats_cmd_get_policy cgroupstats_cmd_get_policy is [CGROUPSTATS_CMD_ATTR_MAX+1], taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1], but their family.maxattr is TASKSTATS_CMD_ATTR_MAX. CGROUPSTATS_CMD_ATTR_MAX is less than TASKSTATS_CMD_ATTR_MAX, so we could end up accessing out-of-bound. Change cgroupstats_cmd_get_policy to TASKSTATS_CMD_ATTR_MAX+1, this is safe because the rest are initialized to 0's. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- kernel/taskstats.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index b3f05ee20d18..cbb387a265db 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -54,7 +54,11 @@ static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; -static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = { +/* + * We have to use TASKSTATS_CMD_ATTR_MAX here, it is the maxattr in the family. + * Make sure they are always aligned. + */ +static const struct nla_policy cgroupstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, }; From 16976085a114ae293c6fa7a463d74600ffcfeb4b Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Thu, 3 Nov 2016 17:36:18 +0530 Subject: [PATCH 120/319] drm/msm: Fix error handling crashes seen when VRAM allocation fails If VRAM allocation fails, the error handling path crashes in msm_drm_uninit(). The following changes are made to fix this: msm_gem_shrinker_cleanup() is fixed to unregister the shrinker only if it was init-ed in the first place. Before calling kms->funcs->destroy(), we check if kms->funcs is also non-NULL. This is needed for MDP5, since during msm_drm_int(), priv->kms becomes non-NULL early, but msm_kms_init() is called on it only later in mdp5_kms_init(). Signed-off-by: Archit Taneja Signed-off-by: Rob Clark Reviewed-by: Andy Gross --- drivers/gpu/drm/msm/msm_drv.c | 2 +- drivers/gpu/drm/msm/msm_gem_shrinker.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index fb5c0b0a7594..46568fc80848 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -228,7 +228,7 @@ static int msm_drm_uninit(struct device *dev) flush_workqueue(priv->atomic_wq); destroy_workqueue(priv->atomic_wq); - if (kms) + if (kms && kms->funcs) kms->funcs->destroy(kms); if (gpu) { diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 283d2841ba58..192b2d3a79cb 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -163,6 +163,9 @@ void msm_gem_shrinker_init(struct drm_device *dev) void msm_gem_shrinker_cleanup(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; - WARN_ON(unregister_vmap_purge_notifier(&priv->vmap_notifier)); - unregister_shrinker(&priv->shrinker); + + if (priv->shrinker.nr_deferred) { + WARN_ON(unregister_vmap_purge_notifier(&priv->vmap_notifier)); + unregister_shrinker(&priv->shrinker); + } } From 94d0e5980d6791b9f98a9b6c586c1f7cb76b2178 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Oct 2016 18:37:49 +0100 Subject: [PATCH 121/319] arm/arm64: KVM: Perform local TLB invalidation when multiplexing vcpus on a single CPU Architecturally, TLBs are private to the (physical) CPU they're associated with. But when multiple vcpus from the same VM are being multiplexed on the same CPU, the TLBs are not private to the vcpus (and are actually shared across the VMID). Let's consider the following scenario: - vcpu-0 maps PA to VA - vcpu-1 maps PA' to VA If run on the same physical CPU, vcpu-1 can hit TLB entries generated by vcpu-0 accesses, and access the wrong physical page. The solution to this is to keep a per-VM map of which vcpu ran last on each given physical CPU, and invalidate local TLBs when switching to a different vcpu from the same VM. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_asm.h | 1 + arch/arm/include/asm/kvm_host.h | 3 +++ arch/arm/include/asm/kvm_hyp.h | 1 + arch/arm/kvm/arm.c | 27 ++++++++++++++++++++++++++- arch/arm/kvm/hyp/tlb.c | 15 +++++++++++++++ arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/kvm/hyp/tlb.c | 15 +++++++++++++++ 9 files changed, 66 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index d7ea6bcb29bf..8ef05381984b 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -66,6 +66,7 @@ extern char __kvm_hyp_vector[]; extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); +extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 2d19e02d03fd..d5423ab15ed5 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -57,6 +57,9 @@ struct kvm_arch { /* VTTBR value associated with below pgd and vmid */ u64 vttbr; + /* The last vcpu id that ran on each physical CPU */ + int __percpu *last_vcpu_ran; + /* Timer */ struct arch_timer_kvm timer; diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 343135ede5fa..58508900c4bb 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -71,6 +71,7 @@ #define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0) #define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0) #define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0) +#define TLBIALL __ACCESS_CP15(c8, 0, c7, 0) #define TLBIALLNSNHIS __ACCESS_CP15(c8, 4, c3, 4) #define PRRR __ACCESS_CP15(c10, 0, c2, 0) #define NMRR __ACCESS_CP15(c10, 0, c2, 1) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 08bb84f2ad58..19b5f5c1c0ff 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -114,11 +114,18 @@ void kvm_arch_check_processor_compat(void *rtn) */ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - int ret = 0; + int ret, cpu; if (type) return -EINVAL; + kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); + if (!kvm->arch.last_vcpu_ran) + return -ENOMEM; + + for_each_possible_cpu(cpu) + *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; + ret = kvm_alloc_stage2_pgd(kvm); if (ret) goto out_fail_alloc; @@ -141,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) out_free_stage2_pgd: kvm_free_stage2_pgd(kvm); out_fail_alloc: + free_percpu(kvm->arch.last_vcpu_ran); + kvm->arch.last_vcpu_ran = NULL; return ret; } @@ -168,6 +177,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { int i; + free_percpu(kvm->arch.last_vcpu_ran); + kvm->arch.last_vcpu_ran = NULL; + for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { kvm_arch_vcpu_free(kvm->vcpus[i]); @@ -312,6 +324,19 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + int *last_ran; + + last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); + + /* + * We might get preempted before the vCPU actually runs, but + * over-invalidation doesn't affect correctness. + */ + if (*last_ran != vcpu->vcpu_id) { + kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu); + *last_ran = vcpu->vcpu_id; + } + vcpu->cpu = cpu; vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c index 729652854f90..6d810af2d9fd 100644 --- a/arch/arm/kvm/hyp/tlb.c +++ b/arch/arm/kvm/hyp/tlb.c @@ -55,6 +55,21 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) __kvm_tlb_flush_vmid(kvm); } +void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); + + /* Switch to requested VMID */ + write_sysreg(kvm->arch.vttbr, VTTBR); + isb(); + + write_sysreg(0, TLBIALL); + dsb(nsh); + isb(); + + write_sysreg(0, VTTBR); +} + void __hyp_text __kvm_flush_vm_context(void) { write_sysreg(0, TLBIALLNSNHIS); diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 18f746551bf6..ec3553eb9349 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -54,6 +54,7 @@ extern char __kvm_hyp_vector[]; extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); +extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bd94e6766759..e5050388e062 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -62,6 +62,9 @@ struct kvm_arch { /* VTTBR value associated with above pgd and vmid */ u64 vttbr; + /* The last vcpu id that ran on each physical CPU */ + int __percpu *last_vcpu_ran; + /* The maximum number of vCPUs depends on the used GIC model */ int max_vcpus; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index a79b969c26fc..6f72fe8b0e3e 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -128,7 +128,7 @@ static inline unsigned long __kern_hyp_va(unsigned long v) return v; } -#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v))) +#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) /* * We currently only support a 40bit IPA. diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 9cc0ea784ae6..88e2f2b938f0 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -64,6 +64,21 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) write_sysreg(0, vttbr_el2); } +void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); + + /* Switch to requested VMID */ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); + + asm volatile("tlbi vmalle1" : : ); + dsb(nsh); + isb(); + + write_sysreg(0, vttbr_el2); +} + void __hyp_text __kvm_flush_vm_context(void) { dsb(ishst); From 112b0b8f8f6e18d4695d21457961c0e1b322a1d7 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 1 Nov 2016 18:00:08 +0000 Subject: [PATCH 122/319] KVM: arm/arm64: vgic: Prevent access to invalid SPIs In our VGIC implementation we limit the number of SPIs to a number that the userland application told us. Accordingly we limit the allocation of memory for virtual IRQs to that number. However in our MMIO dispatcher we didn't check if we ever access an IRQ beyond that limit, leading to out-of-bound accesses. Add a test against the number of allocated SPIs in check_region(). Adjust the VGIC_ADDR_TO_INT macro to avoid an actual division, which is not implemented on ARM(32). [maz: cleaned-up original patch] Cc: stable@vger.kernel.org Reviewed-by: Christoffer Dall Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio.c | 41 +++++++++++++++++++++++------------ virt/kvm/arm/vgic/vgic-mmio.h | 14 ++++++------ 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index e18b30ddcdce..ebe1b9fa3c4d 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -453,17 +453,33 @@ struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) return container_of(dev, struct vgic_io_device, dev); } -static bool check_region(const struct vgic_register_region *region, +static bool check_region(const struct kvm *kvm, + const struct vgic_register_region *region, gpa_t addr, int len) { - if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) - return true; - if ((region->access_flags & VGIC_ACCESS_32bit) && - len == sizeof(u32) && !(addr & 3)) - return true; - if ((region->access_flags & VGIC_ACCESS_64bit) && - len == sizeof(u64) && !(addr & 7)) - return true; + int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + switch (len) { + case sizeof(u8): + flags = VGIC_ACCESS_8bit; + break; + case sizeof(u32): + flags = VGIC_ACCESS_32bit; + break; + case sizeof(u64): + flags = VGIC_ACCESS_64bit; + break; + default: + return false; + } + + if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) { + if (!region->bits_per_irq) + return true; + + /* Do we access a non-allocated IRQ? */ + return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs; + } return false; } @@ -477,7 +493,7 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, addr - iodev->base_addr); - if (!region || !check_region(region, addr, len)) { + if (!region || !check_region(vcpu->kvm, region, addr, len)) { memset(val, 0, len); return 0; } @@ -510,10 +526,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, addr - iodev->base_addr); - if (!region) - return 0; - - if (!check_region(region, addr, len)) + if (!region || !check_region(vcpu->kvm, region, addr, len)) return 0; switch (iodev->iodev_type) { diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 4c34d39d44a0..84961b4e4422 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -50,15 +50,15 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; #define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1) /* - * (addr & mask) gives us the byte offset for the INT ID, so we want to - * divide this with 'bytes per irq' to get the INT ID, which is given - * by '(bits) / 8'. But we do this with fixed-point-arithmetic and - * take advantage of the fact that division by a fraction equals - * multiplication with the inverted fraction, and scale up both the - * numerator and denominator with 8 to support at most 64 bits per IRQ: + * (addr & mask) gives us the _byte_ offset for the INT ID. + * We multiply this by 8 the get the _bit_ offset, then divide this by + * the number of bits to learn the actual INT ID. + * But instead of a division (which requires a "long long div" implementation), + * we shift by the binary logarithm of . + * This assumes that is a power of two. */ #define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ - 64 / (bits) / 8) + 8 >> ilog2(bits)) /* * Some VGIC registers store per-IRQ information, with a different number From d42c79701a3ee5c38fbbc82f98a140420bd40134 Mon Sep 17 00:00:00 2001 From: Shih-Wei Li Date: Thu, 27 Oct 2016 15:08:13 +0000 Subject: [PATCH 123/319] KVM: arm/arm64: vgic: Kick VCPUs when queueing already pending IRQs In cases like IPI, we could be queueing an interrupt for a VCPU that is already running and is not about to exit, because the VCPU has entered the VM with the interrupt pending and would not trap on EOI'ing that interrupt. This could result to delays in interrupt deliveries or even loss of interrupts. To guarantee prompt interrupt injection, here we have to try to kick the VCPU. Signed-off-by: Shih-Wei Li Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 2893d5ba523a..6440b56ec90e 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -273,6 +273,18 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq) * no more work for us to do. */ spin_unlock(&irq->irq_lock); + + /* + * We have to kick the VCPU here, because we could be + * queueing an edge-triggered interrupt for which we + * get no EOI maintenance interrupt. In that case, + * while the IRQ is already on the VCPU's AP list, the + * VCPU could have EOI'ed the original interrupt and + * won't see this one until it exits for some other + * reason. + */ + if (vcpu) + kvm_vcpu_kick(vcpu); return false; } From 87dc02551c509703123a60dd7f043d75e92a6aed Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 4 Nov 2016 01:48:42 +0200 Subject: [PATCH 124/319] net/mlx5e: Fix XDP error path of mlx5e_open_channel() In case of mlx5e_open_rq fails the error handling will jump to label err_close_xdp_sq and will try to close the xdp_sq unconditionally. xdp_sq is valid only in case of XDP use cases, i.e priv->xdp_prog is not null. To fix this in this patch we test xdp_sq validity prior to closing it. In addition we now close the xdp_sq.cq as well. Fixes: b5503b994ed5 ("net/mlx5e: XDP TX forwarding support") Signed-off-by: Saeed Mahameed Reported-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f4c687ce4c59..c83619d081d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1512,7 +1512,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, return 0; err_close_xdp_sq: - mlx5e_close_sq(&c->xdp_sq); + if (priv->xdp_prog) { + mlx5e_close_sq(&c->xdp_sq); + mlx5e_close_cq(&c->xdp_sq.cq); + } err_close_sqs: mlx5e_close_sqs(c); From d7a0ecab380c62ccd9fbe2e08dd720f7a367d6ee Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 4 Nov 2016 01:48:43 +0200 Subject: [PATCH 125/319] net/mlx5e: Re-arrange XDP SQ/CQ creation In mlx5e_open_channel CQs must be created before napi is enabled. Here we move the XDP CQ creation to satisfy that fact. mlx5e_close_channel is already working according to the right order. Fixes: b5503b994ed5 ("net/mlx5e: XDP TX forwarding support") Signed-off-by: Saeed Mahameed Reported-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c83619d081d8..84e8b250e2af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1445,6 +1445,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); c->num_tc = priv->params.num_tc; + c->xdp = !!priv->xdp_prog; if (priv->params.rx_am_enabled) rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode); @@ -1468,6 +1469,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) goto err_close_tx_cqs; + /* XDP SQ CQ params are same as normal TXQ sq CQ params */ + err = c->xdp ? mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, + priv->params.tx_cq_moderation) : 0; + if (err) + goto err_close_rx_cq; + napi_enable(&c->napi); err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq); @@ -1488,21 +1495,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, } } - if (priv->xdp_prog) { - /* XDP SQ CQ params are same as normal TXQ sq CQ params */ - err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, - priv->params.tx_cq_moderation); - if (err) - goto err_close_sqs; + err = c->xdp ? mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq) : 0; + if (err) + goto err_close_sqs; - err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq); - if (err) { - mlx5e_close_cq(&c->xdp_sq.cq); - goto err_close_sqs; - } - } - - c->xdp = !!priv->xdp_prog; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_xdp_sq; @@ -1512,10 +1508,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, return 0; err_close_xdp_sq: - if (priv->xdp_prog) { + if (c->xdp) mlx5e_close_sq(&c->xdp_sq); - mlx5e_close_cq(&c->xdp_sq.cq); - } err_close_sqs: mlx5e_close_sqs(c); @@ -1525,6 +1519,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, err_disable_napi: napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->xdp_sq.cq); + +err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); err_close_tx_cqs: From abd3277287c7743d3999b801c6769e8ad1b381dd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2016 01:48:44 +0200 Subject: [PATCH 126/319] net/mlx5e: Disallow changing name-space for VF representors VF reps should be altogether on the same NS as they were created. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 7fe6559e4ab3..bf1c09ca73c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -308,7 +308,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; #endif - netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC; + netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC; eth_hw_addr_random(netdev); From 358d79a47e5a8db83925241629252cfe64f225f7 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2016 01:48:45 +0200 Subject: [PATCH 127/319] net/mlx5e: Handle matching on vlan priority for offloaded TC rules We ignored the vlan priority in offloaded TC rules matching part, fix that. Fixes: 095b6cfd69ce ('net/mlx5e: Add TC vlan match parsing') Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ce8c54d18906..6bb21b31cfeb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -237,12 +237,15 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_VLAN, f->mask); - if (mask->vlan_id) { + if (mask->vlan_id || mask->vlan_priority) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority); } } From ee39fbc4447d5c42640963b559bf68490cb45308 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2016 01:48:46 +0200 Subject: [PATCH 128/319] net/mlx5: E-Switch, Set the actions for offloaded rules properly As for the current generation of the mlx5 HW (CX4/CX4-Lx) per flow vlan push/pop actions are emulated, we must not program them to the firmware. Fixes: f5f82476090f ('net/mlx5: E-Switch, Support VLAN actions in the offloads mode') Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c55ad8d00c05..d239f5d0ea36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -57,7 +57,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); - action = attr->action; + /* per flow vlan pop/push is emulated, don't set that into the firmware */ + action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; From 0e97a34083a03c931d4e9b34ba7899c29a09dce6 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Fri, 4 Nov 2016 01:48:47 +0200 Subject: [PATCH 129/319] net/mlx5: Fix invalid pointer reference when prof_sel parameter is invalid When prof_sel is invalid, mlx5_core_warn is called but the mlx5_core_dev is not initialized yet. Solution is moving the prof_sel code after dev->pdev assignment Fixes: 2974ab6e8bd8 ('net/mlx5: Improve driver log messages') Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d5433c49b2b0..3eb931585b3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1226,6 +1226,9 @@ static int init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); + dev->pdev = pdev; + dev->event = mlx5_core_event; + if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) { mlx5_core_warn(dev, "selected profile out of range, selecting default (%d)\n", @@ -1233,8 +1236,6 @@ static int init_one(struct pci_dev *pdev, prof_sel = MLX5_DEFAULT_PROF; } dev->profile = &profile[prof_sel]; - dev->pdev = pdev; - dev->event = mlx5_core_event; INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); From 56211121c0825cd188caad05574fdc518d5cac6f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 31 Oct 2016 16:57:32 +0200 Subject: [PATCH 130/319] pinctrl: cherryview: Serialize register access in suspend/resume If async suspend is enabled, the driver may access registers concurrently with another instance which may fail because of the bug in Cherryview GPIO hardware. Prevent this by taking the shared lock while accessing the hardware in suspend and resume hooks. Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 30389f4ccab4..097d835b3a50 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1656,8 +1656,11 @@ static int chv_pinctrl_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct chv_pinctrl *pctrl = platform_get_drvdata(pdev); + unsigned long flags; int i; + raw_spin_lock_irqsave(&chv_lock, flags); + pctrl->saved_intmask = readl(pctrl->regs + CHV_INTMASK); for (i = 0; i < pctrl->community->npins; i++) { @@ -1678,6 +1681,8 @@ static int chv_pinctrl_suspend(struct device *dev) ctx->padctrl1 = readl(reg); } + raw_spin_unlock_irqrestore(&chv_lock, flags); + return 0; } @@ -1685,8 +1690,11 @@ static int chv_pinctrl_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct chv_pinctrl *pctrl = platform_get_drvdata(pdev); + unsigned long flags; int i; + raw_spin_lock_irqsave(&chv_lock, flags); + /* * Mask all interrupts before restoring per-pin configuration * registers because we don't know in which state BIOS left them @@ -1731,6 +1739,8 @@ static int chv_pinctrl_resume(struct device *dev) chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); chv_writel(pctrl->saved_intmask, pctrl->regs + CHV_INTMASK); + raw_spin_unlock_irqrestore(&chv_lock, flags); + return 0; } #endif From d2cdf5dc58f6970e9d9d26e47974c21fe87983f3 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 31 Oct 2016 16:57:33 +0200 Subject: [PATCH 131/319] pinctrl: cherryview: Prevent possible interrupt storm on resume When the system is suspended to S3 the BIOS might re-initialize certain GPIO pins back to their original state or it may re-program interrupt mask of others. For example Acer TravelMate B116-M had BIOS bug where certain GPIO pin (MF_ISH_GPIO_5) was programmed to trigger on high level, and the pin state was high once the BIOS gave control to the OS on resume. This triggers lots of messages like: irq 117, desc: ffff88017a61e600, depth: 1, count: 0, unhandled: 0 ->handle_irq(): ffffffff8109b613, handle_bad_irq+0x0/0x1e0 ->irq_data.chip(): ffffffffa0020180, chv_pinctrl_exit+0x2d84/0x12 [pinctrl_cherryview] ->action(): (null) IRQ_NOPROBE set We reset the mask back to known state in chv_pinctrl_resume() but that is called only after device interrupts have already been enabled. Now, this particular issue was fixed by upgrading the BIOS to the latest (v1.23) but not everybody upgrades their BIOSes so we fix it up in the driver as well. Prevent the possible interrupt storm by moving suspend and resume hooks to be called at _noirq time instead. Since device interrupts are still disabled we can restore the mask back to known state before interrupt storm happens. Cc: stable@vger.kernel.org Reported-by: Christian Steiner Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 097d835b3a50..c43b1e9a06af 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1652,7 +1652,7 @@ static int chv_pinctrl_probe(struct platform_device *pdev) } #ifdef CONFIG_PM_SLEEP -static int chv_pinctrl_suspend(struct device *dev) +static int chv_pinctrl_suspend_noirq(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct chv_pinctrl *pctrl = platform_get_drvdata(pdev); @@ -1686,7 +1686,7 @@ static int chv_pinctrl_suspend(struct device *dev) return 0; } -static int chv_pinctrl_resume(struct device *dev) +static int chv_pinctrl_resume_noirq(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct chv_pinctrl *pctrl = platform_get_drvdata(pdev); @@ -1746,7 +1746,8 @@ static int chv_pinctrl_resume(struct device *dev) #endif static const struct dev_pm_ops chv_pinctrl_pm_ops = { - SET_LATE_SYSTEM_SLEEP_PM_OPS(chv_pinctrl_suspend, chv_pinctrl_resume) + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(chv_pinctrl_suspend_noirq, + chv_pinctrl_resume_noirq) }; static const struct acpi_device_id chv_pinctrl_acpi_match[] = { From 3984903a2e3906d3def220e688040ce93368200a Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 27 Oct 2016 11:27:25 +0530 Subject: [PATCH 132/319] rtc: omap: Fix selecting external osc RTC can be clocked from an external 32KHz oscillator, or from the Peripheral PLL. The RTC has an internal oscillator buffer to support direct operation with a crystal. ---------------------------------------- | Device --------- | | | | | | | RTCSS | | | --------- | | | OSC |<------| RTC | | | | |------>| OSC |--- | | | | -------- | | | | | ----|clk | | | -------- | | | | | | PRCM |--- | | | | -------- -------- | ---------------------------------------- The RTC functional clock is sourced by default from the clock derived from the Peripheral PLL. In order to select source as external osc clk the following changes needs to be done: - Enable the RTC OSC (RTC_OSC_REG[4]OSC32K_GZ = 0) - Enable the clock mux(RTC_OSC_REG[6]K32CLK_EN = 1) - Select the external clock source (RTC_OSC_REG[3]32KCLK_SEL = 1) Fixes: 399cf0f63f6f2 ("rtc: omap: Add external clock enabling support") Signed-off-by: Keerthy Signed-off-by: Lokesh Vutla Signed-off-by: Dave Gerlach Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-omap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index b04ea9b5ae67..dddaa60871b9 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -113,6 +113,7 @@ /* OMAP_RTC_OSC_REG bit fields: */ #define OMAP_RTC_OSC_32KCLK_EN BIT(6) #define OMAP_RTC_OSC_SEL_32KCLK_SRC BIT(3) +#define OMAP_RTC_OSC_OSC32K_GZ_DISABLE BIT(4) /* OMAP_RTC_IRQWAKEEN bit fields: */ #define OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN BIT(1) @@ -786,8 +787,9 @@ static int omap_rtc_probe(struct platform_device *pdev) */ if (rtc->has_ext_clk) { reg = rtc_read(rtc, OMAP_RTC_OSC_REG); - rtc_write(rtc, OMAP_RTC_OSC_REG, - reg | OMAP_RTC_OSC_SEL_32KCLK_SRC); + reg &= ~OMAP_RTC_OSC_OSC32K_GZ_DISABLE; + reg |= OMAP_RTC_OSC_32KCLK_EN | OMAP_RTC_OSC_SEL_32KCLK_SRC; + rtc_writel(rtc, OMAP_RTC_OSC_REG, reg); } rtc->type->lock(rtc); From efce21fc43e00a76aee7b0a1eda73730ed2d5d3a Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 27 Oct 2016 11:27:26 +0530 Subject: [PATCH 133/319] rtc: omap: prevent disabling of clock/module during suspend If RTC is running from an internal clock source, the RTC module can't be disabled; otherwise it stops ticking completely. Current suspend handler implementation disables the clock/module unconditionally, instead fix this by disabling the clock only if we are running on external clock source, which is not affected by suspend. The prevention of disabling the clock must be done via implementing the runtime_pm handlers for the device, and returning an error code from the runtime suspend handler; otherwise OMAP core PM will disable the clocks for the driver. Signed-off-by: Tero Kristo Signed-off-by: Keerthy Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-omap.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c index dddaa60871b9..51e52446eacb 100644 --- a/drivers/rtc/rtc-omap.c +++ b/drivers/rtc/rtc-omap.c @@ -147,6 +147,7 @@ struct omap_rtc { u8 interrupts_reg; bool is_pmic_controller; bool has_ext_clk; + bool is_suspending; const struct omap_rtc_device_type *type; struct pinctrl_dev *pctldev; }; @@ -900,8 +901,7 @@ static int omap_rtc_suspend(struct device *dev) rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, 0); rtc->type->lock(rtc); - /* Disable the clock/module */ - pm_runtime_put_sync(dev); + rtc->is_suspending = true; return 0; } @@ -910,9 +910,6 @@ static int omap_rtc_resume(struct device *dev) { struct omap_rtc *rtc = dev_get_drvdata(dev); - /* Enable the clock/module so that we can access the registers */ - pm_runtime_get_sync(dev); - rtc->type->unlock(rtc); if (device_may_wakeup(dev)) disable_irq_wake(rtc->irq_alarm); @@ -920,11 +917,34 @@ static int omap_rtc_resume(struct device *dev) rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, rtc->interrupts_reg); rtc->type->lock(rtc); + rtc->is_suspending = false; + return 0; } #endif -static SIMPLE_DEV_PM_OPS(omap_rtc_pm_ops, omap_rtc_suspend, omap_rtc_resume); +#ifdef CONFIG_PM +static int omap_rtc_runtime_suspend(struct device *dev) +{ + struct omap_rtc *rtc = dev_get_drvdata(dev); + + if (rtc->is_suspending && !rtc->has_ext_clk) + return -EBUSY; + + return 0; +} + +static int omap_rtc_runtime_resume(struct device *dev) +{ + return 0; +} +#endif + +static const struct dev_pm_ops omap_rtc_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(omap_rtc_suspend, omap_rtc_resume) + SET_RUNTIME_PM_OPS(omap_rtc_runtime_suspend, + omap_rtc_runtime_resume, NULL) +}; static void omap_rtc_shutdown(struct platform_device *pdev) { From 98430c7aad6a3fdedcc78a0d6780dabb6580dc38 Mon Sep 17 00:00:00 2001 From: Randy Li Date: Tue, 25 Oct 2016 22:15:34 +0800 Subject: [PATCH 134/319] phy: Add reset callback for not generic phy Add a dummy function for phy_reset in case the CONFIG_GENERIC_PHY is disabled. Signed-off-by: Randy Li Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index ee1bed7dbfc6..78bb0d7f6b11 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -253,6 +253,13 @@ static inline int phy_set_mode(struct phy *phy, enum phy_mode mode) return -ENOSYS; } +static inline int phy_reset(struct phy *phy) +{ + if (!phy) + return 0; + return -ENOSYS; +} + static inline int phy_get_bus_width(struct phy *phy) { return -ENOSYS; From 766325427fdb2a6816778b39331637761bccf13a Mon Sep 17 00:00:00 2001 From: Axel Haslam Date: Thu, 3 Nov 2016 17:03:07 +0100 Subject: [PATCH 135/319] phy: da8xx-usb: rename the ohci device to ohci-da8xx The ohci device name has changed in the board configuraion files, hence, change the phy lookup table to match the new name. Signed-off-by: Axel Haslam Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-da8xx-usb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/phy/phy-da8xx-usb.c b/drivers/phy/phy-da8xx-usb.c index 32ae78c8ca17..c85fb0b59729 100644 --- a/drivers/phy/phy-da8xx-usb.c +++ b/drivers/phy/phy-da8xx-usb.c @@ -198,7 +198,8 @@ static int da8xx_usb_phy_probe(struct platform_device *pdev) } else { int ret; - ret = phy_create_lookup(d_phy->usb11_phy, "usb-phy", "ohci.0"); + ret = phy_create_lookup(d_phy->usb11_phy, "usb-phy", + "ohci-da8xx"); if (ret) dev_warn(dev, "Failed to create usb11 phy lookup\n"); ret = phy_create_lookup(d_phy->usb20_phy, "usb-phy", @@ -216,7 +217,7 @@ static int da8xx_usb_phy_remove(struct platform_device *pdev) if (!pdev->dev.of_node) { phy_remove_lookup(d_phy->usb20_phy, "usb-phy", "musb-da8xx"); - phy_remove_lookup(d_phy->usb11_phy, "usb-phy", "ohci.0"); + phy_remove_lookup(d_phy->usb11_phy, "usb-phy", "ohci-da8xx"); } return 0; From 232c260982ab0100cbb29055933e45b928252687 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Thu, 13 Oct 2016 12:42:13 +0800 Subject: [PATCH 136/319] phy-rockchip-pcie: remove deassert of phy_rst from exit callback The deassert of phy_rst from exit callback is incorrect as when doing phy_exit, we expect the phy_rst is on asserted state which was done by power_off callback, but not deasserted state. Meanwhile when disabling clk_pciephy_ref, the assert/deassert signal can't actually take effect on the phy. So let's fix it anyway. Signed-off-by: Shawn Lin Reviewed-by: Heiko Stuebner Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-rockchip-pcie.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/phy/phy-rockchip-pcie.c b/drivers/phy/phy-rockchip-pcie.c index a2b4c6b58aea..6904633cad68 100644 --- a/drivers/phy/phy-rockchip-pcie.c +++ b/drivers/phy/phy-rockchip-pcie.c @@ -249,21 +249,10 @@ static int rockchip_pcie_phy_init(struct phy *phy) static int rockchip_pcie_phy_exit(struct phy *phy) { struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy); - int err = 0; clk_disable_unprepare(rk_phy->clk_pciephy_ref); - err = reset_control_deassert(rk_phy->phy_rst); - if (err) { - dev_err(&phy->dev, "deassert phy_rst err %d\n", err); - goto err_reset; - } - - return err; - -err_reset: - clk_prepare_enable(rk_phy->clk_pciephy_ref); - return err; + return 0; } static const struct phy_ops ops = { From 4320f9d4c1831fd4d244a9de8f81bc27ea67699c Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 29 Oct 2016 00:27:01 +0800 Subject: [PATCH 137/319] phy: sun4i: check PMU presence when poking unknown bit of pmu Allwinner SoC's PHY 0, when used as OTG controller, have no pmu part. The code that poke some unknown bit of PMU for H3/A64 didn't check the PHY, and will cause kernel oops when PHY 0 is used. This patch will check whether the pmu is not NULL before poking. Fixes: b3e0d141ca9f (phy: sun4i: add support for A64 usb phy) Signed-off-by: Icenowy Zheng Acked-by: Maxime Ripard Reviewed-by: Hans de Goede Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-sun4i-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c index b9342a2af7b3..fec34f5213c4 100644 --- a/drivers/phy/phy-sun4i-usb.c +++ b/drivers/phy/phy-sun4i-usb.c @@ -264,7 +264,7 @@ static int sun4i_usb_phy_init(struct phy *_phy) return ret; } - if (data->cfg->enable_pmu_unk1) { + if (phy->pmu && data->cfg->enable_pmu_unk1) { val = readl(phy->pmu + REG_PMU_UNK1); writel(val & ~2, phy->pmu + REG_PMU_UNK1); } From 8af644a7d6846f48d6b72be5d4a3c6eb16bd33c8 Mon Sep 17 00:00:00 2001 From: Song Hongyan Date: Tue, 25 Oct 2016 01:06:03 +0000 Subject: [PATCH 138/319] iio: orientation: hid-sensor-rotation: Add PM function (fix non working driver) This fix makes newer ISH hubs work. Previous ones worked by lucky coincidence. Rotation sensor function does not work due to miss PM function. Add common hid sensor iio pm function for rotation sensor. Further clarification from Srinivas: If CONFIG_PM is not defined, then this prevents this sensor to function. So above commit caused this. This sensor was supposed to be always on to trigger wake up in prior external hubs. But with the new ISH hub this is not the case. Signed-off-by: Song Hongyan Fixes: 2b89635e9a9e ("iio: hid_sensor_hub: Common PM functions") Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/orientation/hid-sensor-rotation.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/orientation/hid-sensor-rotation.c b/drivers/iio/orientation/hid-sensor-rotation.c index b98b9d94d184..a97e802ca523 100644 --- a/drivers/iio/orientation/hid-sensor-rotation.c +++ b/drivers/iio/orientation/hid-sensor-rotation.c @@ -335,6 +335,7 @@ static struct platform_driver hid_dev_rot_platform_driver = { .id_table = hid_dev_rot_ids, .driver = { .name = KBUILD_MODNAME, + .pm = &hid_sensor_pm_ops, }, .probe = hid_dev_rot_probe, .remove = hid_dev_rot_remove, From 6f77199e9e4b84340c751c585691d7642a47d226 Mon Sep 17 00:00:00 2001 From: Song Hongyan Date: Tue, 25 Oct 2016 01:30:07 +0000 Subject: [PATCH 139/319] iio: hid-sensors: Increase the precision of scale to fix wrong reading interpretation. While testing, it was observed that on some platforms the scale value from iio sysfs for gyroscope is always 0 (E.g. Yoga 260). This results in the final angular velocity component values to be zeros. This is caused by insufficient precision of scale value displayed in sysfs. If the precision is changed to nano from current micro, then this is sufficient to display the scale value on this platform. Since this can be a problem for all other HID sensors, increase scale precision of all HID sensors to nano from current micro. Results on Yoga 260: name scale before scale now -------------------------------------------- gyro_3d 0.000000 0.000000174 als 0.001000 0.001000000 magn_3d 0.000001 0.000001000 accel_3d 0.000009 0.000009806 Signed-off-by: Song Hongyan Acked-by: Srinivas Pandruvada Cc: Signed-off-by: Jonathan Cameron --- .../hid-sensors/hid-sensor-attributes.c | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c index dc33c1dd5191..b5beea53d6f6 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c @@ -30,26 +30,26 @@ static struct { u32 usage_id; int unit; /* 0 for default others from HID sensor spec */ int scale_val0; /* scale, whole number */ - int scale_val1; /* scale, fraction in micros */ + int scale_val1; /* scale, fraction in nanos */ } unit_conversion[] = { - {HID_USAGE_SENSOR_ACCEL_3D, 0, 9, 806650}, + {HID_USAGE_SENSOR_ACCEL_3D, 0, 9, 806650000}, {HID_USAGE_SENSOR_ACCEL_3D, HID_USAGE_SENSOR_UNITS_METERS_PER_SEC_SQRD, 1, 0}, {HID_USAGE_SENSOR_ACCEL_3D, - HID_USAGE_SENSOR_UNITS_G, 9, 806650}, + HID_USAGE_SENSOR_UNITS_G, 9, 806650000}, - {HID_USAGE_SENSOR_GYRO_3D, 0, 0, 17453}, + {HID_USAGE_SENSOR_GYRO_3D, 0, 0, 17453293}, {HID_USAGE_SENSOR_GYRO_3D, HID_USAGE_SENSOR_UNITS_RADIANS_PER_SECOND, 1, 0}, {HID_USAGE_SENSOR_GYRO_3D, - HID_USAGE_SENSOR_UNITS_DEGREES_PER_SECOND, 0, 17453}, + HID_USAGE_SENSOR_UNITS_DEGREES_PER_SECOND, 0, 17453293}, - {HID_USAGE_SENSOR_COMPASS_3D, 0, 0, 1000}, + {HID_USAGE_SENSOR_COMPASS_3D, 0, 0, 1000000}, {HID_USAGE_SENSOR_COMPASS_3D, HID_USAGE_SENSOR_UNITS_GAUSS, 1, 0}, - {HID_USAGE_SENSOR_INCLINOMETER_3D, 0, 0, 17453}, + {HID_USAGE_SENSOR_INCLINOMETER_3D, 0, 0, 17453293}, {HID_USAGE_SENSOR_INCLINOMETER_3D, - HID_USAGE_SENSOR_UNITS_DEGREES, 0, 17453}, + HID_USAGE_SENSOR_UNITS_DEGREES, 0, 17453293}, {HID_USAGE_SENSOR_INCLINOMETER_3D, HID_USAGE_SENSOR_UNITS_RADIANS, 1, 0}, @@ -57,7 +57,7 @@ static struct { {HID_USAGE_SENSOR_ALS, HID_USAGE_SENSOR_UNITS_LUX, 1, 0}, {HID_USAGE_SENSOR_PRESSURE, 0, 100, 0}, - {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 0, 1000}, + {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 0, 1000000}, }; static int pow_10(unsigned power) @@ -266,15 +266,15 @@ EXPORT_SYMBOL(hid_sensor_write_raw_hyst_value); /* * This fuction applies the unit exponent to the scale. * For example: - * 9.806650 ->exp:2-> val0[980]val1[665000] - * 9.000806 ->exp:2-> val0[900]val1[80600] - * 0.174535 ->exp:2-> val0[17]val1[453500] - * 1.001745 ->exp:0-> val0[1]val1[1745] - * 1.001745 ->exp:2-> val0[100]val1[174500] - * 1.001745 ->exp:4-> val0[10017]val1[450000] - * 9.806650 ->exp:-2-> val0[0]val1[98066] + * 9.806650000 ->exp:2-> val0[980]val1[665000000] + * 9.000806000 ->exp:2-> val0[900]val1[80600000] + * 0.174535293 ->exp:2-> val0[17]val1[453529300] + * 1.001745329 ->exp:0-> val0[1]val1[1745329] + * 1.001745329 ->exp:2-> val0[100]val1[174532900] + * 1.001745329 ->exp:4-> val0[10017]val1[453290000] + * 9.806650000 ->exp:-2-> val0[0]val1[98066500] */ -static void adjust_exponent_micro(int *val0, int *val1, int scale0, +static void adjust_exponent_nano(int *val0, int *val1, int scale0, int scale1, int exp) { int i; @@ -285,32 +285,32 @@ static void adjust_exponent_micro(int *val0, int *val1, int scale0, if (exp > 0) { *val0 = scale0 * pow_10(exp); res = 0; - if (exp > 6) { + if (exp > 9) { *val1 = 0; return; } for (i = 0; i < exp; ++i) { - x = scale1 / pow_10(5 - i); + x = scale1 / pow_10(8 - i); res += (pow_10(exp - 1 - i) * x); - scale1 = scale1 % pow_10(5 - i); + scale1 = scale1 % pow_10(8 - i); } *val0 += res; *val1 = scale1 * pow_10(exp); } else if (exp < 0) { exp = abs(exp); - if (exp > 6) { + if (exp > 9) { *val0 = *val1 = 0; return; } *val0 = scale0 / pow_10(exp); rem = scale0 % pow_10(exp); res = 0; - for (i = 0; i < (6 - exp); ++i) { - x = scale1 / pow_10(5 - i); - res += (pow_10(5 - exp - i) * x); - scale1 = scale1 % pow_10(5 - i); + for (i = 0; i < (9 - exp); ++i) { + x = scale1 / pow_10(8 - i); + res += (pow_10(8 - exp - i) * x); + scale1 = scale1 % pow_10(8 - i); } - *val1 = rem * pow_10(6 - exp) + res; + *val1 = rem * pow_10(9 - exp) + res; } else { *val0 = scale0; *val1 = scale1; @@ -332,14 +332,14 @@ int hid_sensor_format_scale(u32 usage_id, unit_conversion[i].unit == attr_info->units) { exp = hid_sensor_convert_exponent( attr_info->unit_expo); - adjust_exponent_micro(val0, val1, + adjust_exponent_nano(val0, val1, unit_conversion[i].scale_val0, unit_conversion[i].scale_val1, exp); break; } } - return IIO_VAL_INT_PLUS_MICRO; + return IIO_VAL_INT_PLUS_NANO; } EXPORT_SYMBOL(hid_sensor_format_scale); From 6d6d36bc6e77f8b1f86d81884ad5149931bb4acd Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Tue, 1 Nov 2016 15:43:07 +0800 Subject: [PATCH 140/319] mm/filemap: don't allow partially uptodate page for pipes Starting from 4.9-rc1 kernel, I started noticing some test failures of sendfile(2) and splice(2) (sendfile0N and splice01 from LTP) when testing on sub-page block size filesystems (tested both XFS and ext4), these syscalls start to return EIO in the tests. e.g. sendfile02 1 TFAIL : sendfile02.c:133: sendfile(2) failed to return expected value, expected: 26, got: -1 sendfile02 2 TFAIL : sendfile02.c:133: sendfile(2) failed to return expected value, expected: 24, got: -1 sendfile02 3 TFAIL : sendfile02.c:133: sendfile(2) failed to return expected value, expected: 22, got: -1 sendfile02 4 TFAIL : sendfile02.c:133: sendfile(2) failed to return expected value, expected: 20, got: -1 This is because that in sub-page block size cases, we don't need the whole page to be uptodate, only the part we care about is uptodate is OK (if fs has ->is_partially_uptodate defined). But page_cache_pipe_buf_confirm() doesn't have the ability to check the partially-uptodate case, it needs the whole page to be uptodate. So it returns EIO in this case. This is a regression introduced by commit 82c156f85384 ("switch generic_file_splice_read() to use of ->read_iter()"). Prior to the change, generic_file_splice_read() doesn't allow partially-uptodate page either, so it worked fine. Fix it by skipping the partially-uptodate check if we're working on a pipe in do_generic_file_read(), so we read the whole page from disk as long as the page is not uptodate. Signed-off-by: Eryu Guan Signed-off-by: Al Viro --- mm/filemap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index c7fe2f16503f..50b52fe51937 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1732,6 +1732,9 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, if (inode->i_blkbits == PAGE_SHIFT || !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; + /* pipes can't handle partially uptodate pages */ + if (unlikely(iter->type & ITER_PIPE)) + goto page_not_up_to_date; if (!trylock_page(page)) goto page_not_up_to_date; /* Did it get truncated before we got the lock? */ From 772918524dfb8c8869120728c1b1109a2d49493c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 30 Oct 2016 05:28:27 +0100 Subject: [PATCH 141/319] nbd: Fix error handling 'blk_mq_alloc_request()' returns an error pointer in case of error, not NULL. So test it with IS_ERR. Fixes: fd8383fd88a2 ("nbd: convert to blkmq") Signed-off-by: Christophe JAILLET Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 19a16b2dbb91..7a1048755914 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -599,7 +599,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return -EINVAL; sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0); - if (!sreq) + if (IS_ERR(sreq)) return -ENOMEM; mutex_unlock(&nbd->tx_lock); From 9a76a3ac3b9b6326a317c59126cdf9e758f85375 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Mon, 31 Oct 2016 11:49:41 +0900 Subject: [PATCH 142/319] Documentation: synopsys-dw-mshc: add binding for reset-names Add reset-names property for binding dw-mmc controller. It might be used together with "reset" property. - Note: It must be "reset" as name. Fixes: d6786fefe816 ("mmc: dw_mmc: add reset support to dwmmc host controller") Signed-off-by: Jaehoon Chung Acked-by: John Stultz Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt index 4e00e859e885..bfa461aaac99 100644 --- a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt +++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt @@ -43,6 +43,9 @@ Optional properties: reset signal present internally in some host controller IC designs. See Documentation/devicetree/bindings/reset/reset.txt for details. +* reset-names: request name for using "resets" property. Must be "reset". + (It will be used together with "resets" property.) + * clocks: from common clock binding: handle to biu and ciu clocks for the bus interface unit clock and the card interface unit clock. @@ -103,6 +106,8 @@ board specific portions as listed below. interrupts = <0 75 0>; #address-cells = <1>; #size-cells = <0>; + resets = <&rst 20>; + reset-names = "reset"; }; [board specific internal DMA resources] From 3a667e3ff78ae81afc49343c753864cc1ede2f77 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Mon, 31 Oct 2016 11:49:42 +0900 Subject: [PATCH 143/319] mmc: dw_mmc: add the "reset" as name of reset controller Add the "reset" as name of reset controller. This is for preventing the wrong operation. Even if some SoC has reset controller, doesn't define "resets" in device-tree. Then it might be waiting for reset controller and it should be stuck. Fixes: d6786fefe816 ("mmc: dw_mmc: add reset support to dwmmc host controller") Signed-off-by: Jaehoon Chung Acked-by: John Stultz Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 4fcbc4012ed0..50a674be6655 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2940,7 +2940,7 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host) return ERR_PTR(-ENOMEM); /* find reset controller when exist */ - pdata->rstc = devm_reset_control_get_optional(dev, NULL); + pdata->rstc = devm_reset_control_get_optional(dev, "reset"); if (IS_ERR(pdata->rstc)) { if (PTR_ERR(pdata->rstc) == -EPROBE_DEFER) return ERR_PTR(-EPROBE_DEFER); From 355f1a39183467d48f20d90adce3b4aa75c6b260 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 1 Nov 2016 15:47:04 -0500 Subject: [PATCH 144/319] usb: musb: da8xx: Don't print phy error on -EPROBE_DEFER This suppresses printing the error message "failed to get phy" in the kernel log when the error is -EPROBE_DEFER. This prevents usless noise in the kernel log. Signed-off-by: David Lechner Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/da8xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index 210b7e43a6fd..2440f88e07a3 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -479,7 +479,8 @@ static int da8xx_probe(struct platform_device *pdev) glue->phy = devm_phy_get(&pdev->dev, "usb-phy"); if (IS_ERR(glue->phy)) { - dev_err(&pdev->dev, "failed to get phy\n"); + if (PTR_ERR(glue->phy) != -EPROBE_DEFER) + dev_err(&pdev->dev, "failed to get phy\n"); return PTR_ERR(glue->phy); } From 2e52ec2394fb9486fd496737a84d629b2c992db2 Mon Sep 17 00:00:00 2001 From: Kirill Esipov Date: Tue, 1 Nov 2016 15:47:05 -0500 Subject: [PATCH 145/319] usb: musb: remove duplicated actions Removing unnecessary duplicated actions that we've got while merging: Commit 19915e623458 ("Merge 4.1-rc7 into usb-next") [ b-liu@ti.com: added 'Commit' in the commit message ] Signed-off-by: Kirill Esipov Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 27dadc0d9114..e01116e4c067 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2114,11 +2114,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) musb->io.ep_offset = musb_flat_ep_offset; musb->io.ep_select = musb_flat_ep_select; } - /* And override them with platform specific ops if specified. */ - if (musb->ops->ep_offset) - musb->io.ep_offset = musb->ops->ep_offset; - if (musb->ops->ep_select) - musb->io.ep_select = musb->ops->ep_select; /* At least tusb6010 has its own offsets */ if (musb->ops->ep_offset) From c289d0eff3d5a594c577c0dc162412a2cad075a4 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 21 Oct 2016 15:25:05 -0700 Subject: [PATCH 146/319] drivers/usb: Skip auto handoff for TI and RENESAS usb controllers Never seen XHCI auto handoff working on TI and RENESAS cards. Eventually, we force handoff. This code forces the handoff unconditionally. It saves 5 seconds boot time for each card. Signed-off-by: Babu Moger Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index d793f548dfe2..a9a1e4c40480 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -995,6 +995,14 @@ static void quirk_usb_handoff_xhci(struct pci_dev *pdev) } val = readl(base + ext_cap_offset); + /* Auto handoff never worked for these devices. Force it and continue */ + if ((pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) || + (pdev->vendor == PCI_VENDOR_ID_RENESAS + && pdev->device == 0x0014)) { + val = (val | XHCI_HC_OS_OWNED) & ~XHCI_HC_BIOS_OWNED; + writel(val, base + ext_cap_offset); + } + /* If the BIOS owns the HC, signal that the OS wants it, and wait */ if (val & XHCI_HC_BIOS_OWNED) { writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset); From 7309aa847ead3fa561663b16779a0dde8c64cc7c Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 2 Nov 2016 14:42:52 +0100 Subject: [PATCH 147/319] cdc-acm: fix uninitialized variable variable struct usb_cdc_parsed_header h may be used uninitialized in acm_probe. In kernel 4.8. /* handle quirks deadly to normal probing*/ if (quirks == NO_UNION_NORMAL) ... goto skip_normal_probe; } we bypass call to cdc_parse_cdc_header(&h, intf, buffer, buflen); but later use h in if (h.usb_cdc_country_functional_desc) { /* export the country data */ Signed-off-by: Oliver Neukum CC: stable@vger.kernel.org Reported-by: Victor Sologoubov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 78f0f85bebdc..4ad4ca44058b 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1161,6 +1161,8 @@ static int acm_probe(struct usb_interface *intf, if (quirks == IGNORE_DEVICE) return -ENODEV; + memset(&h, 0x00, sizeof(struct usb_cdc_parsed_header)); + num_rx_buf = (quirks == SINGLE_RX_URB) ? 1 : ACM_NR; /* handle quirks deadly to normal probing*/ From 33c027ae3cfd8fefb6cccccc5c6b2c07d80891ce Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 3 Nov 2016 12:48:43 +0800 Subject: [PATCH 148/319] staging: sm750fb: Fix bugs introduced by early commits Early commit 30ca5cb63c56965 ("staging: sm750fb: change definition of PANEL_PLANE_TL fields") and 27b047bbe1ee9c0 ("staging: sm750fb: change definition of PANEL_PLANE_BR fields") modify the register bit fields definitions. But the modifications are wrong, because the bit mask of "bit field 10:0" is not 0xeff, but 0x7ff. The wrong definition bugs makes display very strange. Signed-off-by: Huacai Chen Cc: stable # 4.6+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/sm750fb/ddk750_reg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/sm750fb/ddk750_reg.h b/drivers/staging/sm750fb/ddk750_reg.h index 955247979aaa..4ed6d8d7712a 100644 --- a/drivers/staging/sm750fb/ddk750_reg.h +++ b/drivers/staging/sm750fb/ddk750_reg.h @@ -601,13 +601,13 @@ #define PANEL_PLANE_TL 0x08001C #define PANEL_PLANE_TL_TOP_SHIFT 16 -#define PANEL_PLANE_TL_TOP_MASK (0xeff << 16) -#define PANEL_PLANE_TL_LEFT_MASK 0xeff +#define PANEL_PLANE_TL_TOP_MASK (0x7ff << 16) +#define PANEL_PLANE_TL_LEFT_MASK 0x7ff #define PANEL_PLANE_BR 0x080020 #define PANEL_PLANE_BR_BOTTOM_SHIFT 16 -#define PANEL_PLANE_BR_BOTTOM_MASK (0xeff << 16) -#define PANEL_PLANE_BR_RIGHT_MASK 0xeff +#define PANEL_PLANE_BR_BOTTOM_MASK (0x7ff << 16) +#define PANEL_PLANE_BR_RIGHT_MASK 0x7ff #define PANEL_HORIZONTAL_TOTAL 0x080024 #define PANEL_HORIZONTAL_TOTAL_TOTAL_SHIFT 16 From a33547cc764ca994d27a8fcc5fc61fbf4b2f7361 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 3 Nov 2016 01:07:56 +1030 Subject: [PATCH 149/319] pinctrl-aspeed-g5: Never set SCU90[6] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a pin depending on bit 6 in SCU90 is requested for GPIO, the export will succeed but changes to the GPIO's value will not be accepted by the hardware. This is because the pinmux driver has misconfigured the SCU by writing 1 to the reserved bit. The description of SCU90[6] from the datasheet is 'Reserved, must keep at value ”0”'. The fix is to switch pinmux from the bit-flipping macro to explicitly configuring the .enable and .disable values to zero. The patch has been tested on an AST2500 EVB. Fixes: 56e57cb6c07f (pinctrl: Add pinctrl-aspeed-g5 driver) Reported-by: Uma Yadlapati Signed-off-by: Andrew Jeffery Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c index c8c72e8259d3..87b46390b695 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c @@ -26,7 +26,7 @@ #define ASPEED_G5_NR_PINS 228 -#define COND1 SIG_DESC_BIT(SCU90, 6, 0) +#define COND1 { SCU90, BIT(6), 0, 0 } #define COND2 { SCU94, GENMASK(1, 0), 0, 0 } #define B14 0 From 55abe8165f31ffb83ce8b24da959b61362dca4c4 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Thu, 27 Oct 2016 20:28:36 +0100 Subject: [PATCH 150/319] staging: comedi: ni_tio: fix buggy ni_tio_clock_period_ps() return value `ni_tio_clock_period_ps()` used to return the clock period in picoseconds, and had a `BUG()` call for invalid cases. It was changed to pass the clock period back via a pointer parameter and return an error for the invalid cases. Unfortunately the code to handle user-specified clock sources with user-specified clock period is still returning the clock period the old way, which can lead to the caller not getting the clock period, or seeing an unexpected error. Fix it by passing the clock period via the pointer parameter and returning `0`. Fixes: b42ca86ad605 ("staging: comedi: ni_tio: remove BUG() checks for ni_tio_get_clock_src()") Signed-off-by: Ian Abbott Cc: # 4.7+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_tio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/ni_tio.c b/drivers/staging/comedi/drivers/ni_tio.c index 7043eb0543f6..5ab49a798164 100644 --- a/drivers/staging/comedi/drivers/ni_tio.c +++ b/drivers/staging/comedi/drivers/ni_tio.c @@ -207,7 +207,8 @@ static int ni_tio_clock_period_ps(const struct ni_gpct *counter, * clock period is specified by user with prescaling * already taken into account. */ - return counter->clock_period_ps; + *period_ps = counter->clock_period_ps; + return 0; } switch (generic_clock_source & NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK) { From d9966f1de990577b49903061cfcea2d1e85353fb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 11:55:02 +0100 Subject: [PATCH 151/319] staging: greybus: arche-platform: fix device reference leak Make sure to drop the device reference taken by of_find_device_by_node() before returning from arche_platform_change_state(). Note that this code is expected to be removed, but let's fix up the leak nonetheless. Fixes: 886aba558b9e ("greybus: arche-platform: Export fn to allow...") Signed-off-by: Johan Hovold Acked-by: Viresh Kumar Reviewed-by: Vaibhav Hiremath Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/arche-platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/greybus/arche-platform.c b/drivers/staging/greybus/arche-platform.c index 34307ac3f255..d33d6fe078ad 100644 --- a/drivers/staging/greybus/arche-platform.c +++ b/drivers/staging/greybus/arche-platform.c @@ -186,6 +186,7 @@ int arche_platform_change_state(enum arche_platform_state state, exit: spin_unlock_irqrestore(&arche_pdata->wake_lock, flags); mutex_unlock(&arche_pdata->platform_state_mutex); + put_device(&pdev->dev); of_node_put(np); return ret; } From d8f8a74d5fece355d2234e1731231d1aebc66b38 Mon Sep 17 00:00:00 2001 From: Paul Fertser Date: Thu, 27 Oct 2016 17:22:08 +0300 Subject: [PATCH 152/319] drivers: staging: nvec: remove bogus reset command for PS/2 interface This command was sent behind serio's back and the answer to it was confusing atkbd probe function which lead to the elantech touchpad getting detected as a keyboard. To prevent this from happening just let every party do its part of the job. Signed-off-by: Paul Fertser Acked-by: Marc Dietrich Cc: stable # 3.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/nvec/nvec_ps2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/staging/nvec/nvec_ps2.c b/drivers/staging/nvec/nvec_ps2.c index a324322ee0ad..d63c4efa8074 100644 --- a/drivers/staging/nvec/nvec_ps2.c +++ b/drivers/staging/nvec/nvec_ps2.c @@ -106,7 +106,6 @@ static int nvec_mouse_probe(struct platform_device *pdev) { struct nvec_chip *nvec = dev_get_drvdata(pdev->dev.parent); struct serio *ser_dev; - char mouse_reset[] = { NVEC_PS2, SEND_COMMAND, PSMOUSE_RST, 3 }; ser_dev = devm_kzalloc(&pdev->dev, sizeof(struct serio), GFP_KERNEL); if (!ser_dev) @@ -127,9 +126,6 @@ static int nvec_mouse_probe(struct platform_device *pdev) serio_register_port(ser_dev); - /* mouse reset */ - nvec_write_async(nvec, mouse_reset, sizeof(mouse_reset)); - return 0; } From 17c1c9ba15b238ef79b51cf40d855c05b58d5934 Mon Sep 17 00:00:00 2001 From: Paul Fertser Date: Thu, 27 Oct 2016 17:22:09 +0300 Subject: [PATCH 153/319] Revert "staging: nvec: ps2: change serio type to passthrough" This reverts commit 36b30d6138f4677514aca35ab76c20c1604baaad. This is necessary to detect paz00 (ac100) touchpad properly as one speaking ETPS/2 protocol. Without it X.org's synaptics driver doesn't work as the touchpad is detected as an ImPS/2 mouse instead. Commit ec6184b1c717b8768122e25fe6d312f609cc1bb4 changed the way auto-detection is performed on ports marked as pass through and made the issue apparent. A pass through port is an additional PS/2 port used to connect a slave device to a master device that is using PS/2 to communicate with the host (so slave's PS/2 communication is tunneled over master's PS/2 link). "Synaptics PS/2 TouchPad Interfacing Guide" describes such a setup (PS/2 PASS-THROUGH OPTION section). Since paz00's embedded controller is not connected to a PS/2 port itself, the PS/2 interface it exposes is not a pass-through one. Signed-off-by: Paul Fertser Acked-by: Marc Dietrich Fixes: 36b30d6138f4 ("staging: nvec: ps2: change serio type to passthrough") Cc: stable # 3.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/nvec/nvec_ps2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/nvec/nvec_ps2.c b/drivers/staging/nvec/nvec_ps2.c index d63c4efa8074..910e87b761e7 100644 --- a/drivers/staging/nvec/nvec_ps2.c +++ b/drivers/staging/nvec/nvec_ps2.c @@ -111,7 +111,7 @@ static int nvec_mouse_probe(struct platform_device *pdev) if (!ser_dev) return -ENOMEM; - ser_dev->id.type = SERIO_PS_PSTHRU; + ser_dev->id.type = SERIO_8042; ser_dev->write = ps2_sendcommand; ser_dev->start = ps2_startstreaming; ser_dev->stop = ps2_stopstreaming; From 68fae2f3df455f53d0dfe33483a49020b3b758f3 Mon Sep 17 00:00:00 2001 From: Marc Dietrich Date: Tue, 1 Nov 2016 13:59:40 +0100 Subject: [PATCH 154/319] staging: nvec: remove managed resource from PS2 driver This basicly reverts commit e534f3e9 (staging:nvec: Introduce the use of the managed version of kzalloc). Serio struct should never by managed because it is refcounted. Doing so will lead to a double free oops on module remove. Signed-off-by: Marc Dietrich Fixes: e534f3e9429f ("staging:nvec: Introduce the use of the managed version of kzalloc") Cc: stable # 3.15+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/nvec/nvec_ps2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/nvec/nvec_ps2.c b/drivers/staging/nvec/nvec_ps2.c index 910e87b761e7..499952c8ef39 100644 --- a/drivers/staging/nvec/nvec_ps2.c +++ b/drivers/staging/nvec/nvec_ps2.c @@ -107,7 +107,7 @@ static int nvec_mouse_probe(struct platform_device *pdev) struct nvec_chip *nvec = dev_get_drvdata(pdev->dev.parent); struct serio *ser_dev; - ser_dev = devm_kzalloc(&pdev->dev, sizeof(struct serio), GFP_KERNEL); + ser_dev = kzalloc(sizeof(struct serio), GFP_KERNEL); if (!ser_dev) return -ENOMEM; From e8a6123e9ead1b0d40349809e51de9341312fe08 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 23 Oct 2016 13:55:34 +0200 Subject: [PATCH 155/319] x86/platform/intel-mid: Retrofit pci_platform_pm_ops ->get_state hook Commit cc7cc02bada8 ("PCI: Query platform firmware for device power state") augmented struct pci_platform_pm_ops with a ->get_state hook and implemented it for acpi_pci_platform_pm, the only pci_platform_pm_ops existing till v4.7. However v4.8 introduced another pci_platform_pm_ops for Intel Mobile Internet Devices with commit 5823d0893ec2 ("x86/platform/intel-mid: Add Power Management Unit driver"). It is missing the ->get_state hook, which is fatal since pci_set_platform_pm() enforces its presence. Andy Shevchenko reports that without the present commit, such a device "crashes without even a character printed out on serial console and reboots (since watchdog)". Retrofit mid_pci_platform_pm with the missing callback to fix the breakage. Acked-and-tested-by: Andy Shevchenko Fixes: cc7cc02bada8 ("PCI: Query platform firmware for device power state") Signed-off-by: Lukas Wunner Acked-by: Bjorn Helgaas Cc: linux-pci@vger.kernel.org Cc: Andy Shevchenko Link: http://lkml.kernel.org/r/7c1567d4c49303a4aada94ba16275cbf56b8976b.1477221514.git.lukas@wunner.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/intel-mid.h | 1 + arch/x86/platform/intel-mid/pwr.c | 19 +++++++++++++++++++ drivers/pci/pci-mid.c | 6 ++++++ 3 files changed, 26 insertions(+) diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 5b6753d1f7f4..49da9f497b90 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -17,6 +17,7 @@ extern int intel_mid_pci_init(void); extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); +extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev); extern void intel_mid_pwr_power_off(void); diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c index 5d3b45ad1c03..67375dda451c 100644 --- a/arch/x86/platform/intel-mid/pwr.c +++ b/arch/x86/platform/intel-mid/pwr.c @@ -272,6 +272,25 @@ int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) } EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state); +pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev) +{ + struct mid_pwr *pwr = midpwr; + int id, reg, bit; + u32 power; + + if (!pwr || !pwr->available) + return PCI_UNKNOWN; + + id = intel_mid_pwr_get_lss_id(pdev); + if (id < 0) + return PCI_UNKNOWN; + + reg = (id * LSS_PWS_BITS) / 32; + bit = (id * LSS_PWS_BITS) % 32; + power = mid_pwr_get_state(pwr, reg); + return (__force pci_power_t)((power >> bit) & 3); +} + void intel_mid_pwr_power_off(void) { struct mid_pwr *pwr = midpwr; diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c index 55f453de562e..c7f3408e3148 100644 --- a/drivers/pci/pci-mid.c +++ b/drivers/pci/pci-mid.c @@ -29,6 +29,11 @@ static int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) return intel_mid_pci_set_power_state(pdev, state); } +static pci_power_t mid_pci_get_power_state(struct pci_dev *pdev) +{ + return intel_mid_pci_get_power_state(pdev); +} + static pci_power_t mid_pci_choose_state(struct pci_dev *pdev) { return PCI_D3hot; @@ -52,6 +57,7 @@ static bool mid_pci_need_resume(struct pci_dev *dev) static struct pci_platform_pm_ops mid_pci_platform_pm = { .is_manageable = mid_pci_power_manageable, .set_state = mid_pci_set_power_state, + .get_state = mid_pci_get_power_state, .choose_state = mid_pci_choose_state, .sleep_wake = mid_pci_sleep_wake, .run_wake = mid_pci_run_wake, From 6ebebeab5185f50d55c6a24d0abe47e5dac1b191 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 2 Nov 2016 15:49:08 +0200 Subject: [PATCH 156/319] mmc: sdhci: Fix CMD line reset interfering with ongoing data transfer CMD line reset during an ongoing data transfer can cause the data transfer to hang. Fix by delaying the reset until the data transfer is finished. Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 71654b90227f..fd0f24cddad4 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2286,10 +2286,8 @@ static bool sdhci_request_done(struct sdhci_host *host) for (i = 0; i < SDHCI_MAX_MRQS; i++) { mrq = host->mrqs_done[i]; - if (mrq) { - host->mrqs_done[i] = NULL; + if (mrq) break; - } } if (!mrq) { @@ -2320,6 +2318,17 @@ static bool sdhci_request_done(struct sdhci_host *host) * upon error conditions. */ if (sdhci_needs_reset(host, mrq)) { + /* + * Do not finish until command and data lines are available for + * reset. Note there can only be one other mrq, so it cannot + * also be in mrqs_done, otherwise host->cmd and host->data_cmd + * would both be null. + */ + if (host->cmd || host->data_cmd) { + spin_unlock_irqrestore(&host->lock, flags); + return true; + } + /* Some controllers need this kick or reset won't work here */ if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET) /* This is to force an update */ @@ -2327,10 +2336,8 @@ static bool sdhci_request_done(struct sdhci_host *host) /* Spec says we should do both at the same time, but Ricoh controllers do not like that. */ - if (!host->cmd) - sdhci_do_reset(host, SDHCI_RESET_CMD); - if (!host->data_cmd) - sdhci_do_reset(host, SDHCI_RESET_DATA); + sdhci_do_reset(host, SDHCI_RESET_CMD); + sdhci_do_reset(host, SDHCI_RESET_DATA); host->pending_reset = false; } @@ -2338,6 +2345,8 @@ static bool sdhci_request_done(struct sdhci_host *host) if (!sdhci_has_requests(host)) sdhci_led_deactivate(host); + host->mrqs_done[i] = NULL; + mmiowb(); spin_unlock_irqrestore(&host->lock, flags); From 69b962a65a547690a356f9f76bc4f53db538ac49 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 2 Nov 2016 15:49:09 +0200 Subject: [PATCH 157/319] mmc: sdhci: Fix unexpected data interrupt handling In the busy response case (i.e. !host->data), an unexpected data interrupt would result in clearing the data command as though it had completed but without informing the upper layers and thus resulting in a hang. Fix by only clearing the data command for data interrupts that are expected. Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index fd0f24cddad4..0dd3c31e23bb 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2521,9 +2521,6 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) if (!host->data) { struct mmc_command *data_cmd = host->data_cmd; - if (data_cmd) - host->data_cmd = NULL; - /* * The "data complete" interrupt is also used to * indicate that a busy state has ended. See comment @@ -2531,11 +2528,13 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask) */ if (data_cmd && (data_cmd->flags & MMC_RSP_BUSY)) { if (intmask & SDHCI_INT_DATA_TIMEOUT) { + host->data_cmd = NULL; data_cmd->error = -ETIMEDOUT; sdhci_finish_mrq(host, data_cmd->mrq); return; } if (intmask & SDHCI_INT_DATA_END) { + host->data_cmd = NULL; /* * Some cards handle busy-end interrupt * before the command completed, so make From fe5fb2e3b58f866f82a80fe7c7866ea6d0b306b9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 2 Nov 2016 15:49:10 +0200 Subject: [PATCH 158/319] mmc: sdhci: Reset cmd and data circuits after tuning failure To prevent subsequent commands failing, ensure the cmd and data circuits are reset after a tuning timeout. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 0dd3c31e23bb..542aabc48032 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2086,6 +2086,10 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) if (!host->tuning_done) { pr_info(DRIVER_NAME ": Timeout waiting for Buffer Read Ready interrupt during tuning procedure, falling back to fixed sampling clock\n"); + + sdhci_do_reset(host, SDHCI_RESET_CMD); + sdhci_do_reset(host, SDHCI_RESET_DATA); + ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); ctrl &= ~SDHCI_CTRL_TUNED_CLK; ctrl &= ~SDHCI_CTRL_EXEC_TUNING; From 086b0ddbeff5317026417c29752129d28a216c66 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 2 Nov 2016 15:49:11 +0200 Subject: [PATCH 159/319] mmc: sdhci: Fix missing enhanced strobe setting during runtime resume Restore enhanced strobe setting during runtime resume. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 542aabc48032..42ef3ebb1d8c 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2924,6 +2924,10 @@ int sdhci_runtime_resume_host(struct sdhci_host *host) spin_unlock_irqrestore(&host->lock, flags); } + if ((mmc->caps2 & MMC_CAP2_HS400_ES) && + mmc->ops->hs400_enhanced_strobe) + mmc->ops->hs400_enhanced_strobe(mmc, &mmc->ios); + spin_lock_irqsave(&host->lock, flags); host->runtime_suspended = false; From be55f7bee39ed019713c5a1e930214e6d744fe0f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 4 Nov 2016 13:41:05 +0200 Subject: [PATCH 160/319] mmc: mmc_test: Fix "Commands during non-blocking write" tests mmc_test_check_result_async() requires that struct mmc_async_req is contained within struct mmc_test_async_req. Fix the "Commands during non-blocking write" tests so that is the case. Fixes: 4bbb9aac9a9a ("mmc: mmc_test: Add tests for sending commands during transfer") Signed-off-by: Adrian Hunter Tested-by: Ritesh Harjani Signed-off-by: Ulf Hansson --- drivers/mmc/card/mmc_test.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index 5a8dc5a76e0d..3678220964fe 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -2347,7 +2347,7 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test, struct mmc_test_req *rq = mmc_test_req_alloc(); struct mmc_host *host = test->card->host; struct mmc_test_area *t = &test->area; - struct mmc_async_req areq; + struct mmc_test_async_req test_areq = { .test = test }; struct mmc_request *mrq; unsigned long timeout; bool expired = false; @@ -2363,8 +2363,8 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test, mrq->sbc = &rq->sbc; mrq->cap_cmd_during_tfr = true; - areq.mrq = mrq; - areq.err_check = mmc_test_check_result_async; + test_areq.areq.mrq = mrq; + test_areq.areq.err_check = mmc_test_check_result_async; mmc_test_prepare_mrq(test, mrq, t->sg, t->sg_len, dev_addr, t->blocks, 512, write); @@ -2378,7 +2378,7 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test, /* Start ongoing data request */ if (use_areq) { - mmc_start_req(host, &areq, &ret); + mmc_start_req(host, &test_areq.areq, &ret); if (ret) goto out_free; } else { From fe1b5700c70faac6e027982d59667bc6020de5a8 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 4 Nov 2016 18:32:33 +0100 Subject: [PATCH 161/319] mmc: mmc: Use 500ms as the default generic CMD6 timeout In the eMMC 4.51 version of the spec, an EXT_CSD field called GENERIC_CMD6_TIME[248] was added. This allows cards to specify the maximum time it may need to move out from its busy state, when a CMD6 command has been sent. In cases when the card is compliant to versions < 4.51 of the eMMC spec, obviously the core needs to use a fall-back value for this timeout, which currently is set to 10 minutes. This value is completely in the wrong range and importantly in some cases it causes a card initialization to take more than 10 minute to complete. Earlier this scenario was avoided as the mmc core used CMD13 to poll the card, to find out when it stopped signaling busy. Commit 08573eaf1a70 ("mmc: mmc: do not use CMD13 to get status after speed mode switch") changed this behavior. Instead of reverting that commit, which would cause other issues, let's instead start by picking a simple solution for the problem, by using a 500ms default generic CMD6 timeout. The reason for using exactly 500ms, comes from observations that shows it's quite common for cards to specify 250ms. 500ms is two times that value so likely it should be enough for most cards. Cc: # v4.8+ Fixes: 08573eaf1a70 ("mmc: mmc: do not use CMD13 to get status after speed mode switch") Signed-off-by: Ulf Hansson Tested-by: Stephen Boyd Tested-by: Linus Walleij --- drivers/mmc/core/mmc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 39fc5b2b96c5..df19777068a6 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -26,6 +26,8 @@ #include "mmc_ops.h" #include "sd_ops.h" +#define DEFAULT_CMD6_TIMEOUT_MS 500 + static const unsigned int tran_exp[] = { 10000, 100000, 1000000, 10000000, 0, 0, 0, 0 @@ -571,6 +573,7 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) card->erased_byte = 0x0; /* eMMC v4.5 or later */ + card->ext_csd.generic_cmd6_time = DEFAULT_CMD6_TIMEOUT_MS; if (card->ext_csd.rev >= 6) { card->ext_csd.feature_support |= MMC_DISCARD_FEATURE; From f91346e8b5f46aaf12f1df26e87140584ffd1b3f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 5 Nov 2016 17:45:07 -0200 Subject: [PATCH 162/319] mmc: mxs: Initialize the spinlock prior to using it An interrupt may occur right after devm_request_irq() is called and prior to the spinlock initialization, leading to a kernel oops, as the interrupt handler uses the spinlock. In order to prevent this problem, move the spinlock initialization prior to requesting the interrupts. Fixes: e4243f13d10e (mmc: mxs-mmc: add mmc host driver for i.MX23/28) Signed-off-by: Fabio Estevam Reviewed-by: Marek Vasut Signed-off-by: Ulf Hansson --- drivers/mmc/host/mxs-mmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index d839147e591d..44ecebd1ea8c 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -661,13 +661,13 @@ static int mxs_mmc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mmc); + spin_lock_init(&host->lock); + ret = devm_request_irq(&pdev->dev, irq_err, mxs_mmc_irq_handler, 0, dev_name(&pdev->dev), host); if (ret) goto out_free_dma; - spin_lock_init(&host->lock); - ret = mmc_add_host(mmc); if (ret) goto out_free_dma; From 8d83bc22b259e5526625b6d298f637786c71129f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 11 Oct 2016 20:52:46 +0300 Subject: [PATCH 163/319] drm/i915: Respect alternate_ddc_pin for all DDI ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VBT provides the platform a way to mix and match the DDI ports vs. GMBUS pins. Currently we only trust the VBT for DDI E, which I suppose has no standard GMBUS pin assignment. However, there are machines out there that use a non-standard mapping for the other ports as well. Let's start trusting the VBT on this one for all ports on DDI platforms. I've structured the code such that other platforms could easily start using this as well, by simply filling in the ddi_port_info. IIRC there may be CHV system that might actually need this. v2: Include a commit message, include a debug message during init Cc: stable@vger.kernel.org Cc: Maarten Maathuis Tested-by: Maarten Maathuis Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97877 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1476208368-5710-3-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Jim Bride (cherry picked from commit e4ab73a13291fc844c9e24d5c347bd95818544d2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_hdmi.c | 84 ++++++++++++++++++------------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index f40a35f2913a..13c306173f27 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1799,6 +1799,50 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE; } +static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + const struct ddi_vbt_port_info *info = + &dev_priv->vbt.ddi_port_info[port]; + u8 ddc_pin; + + if (info->alternate_ddc_pin) { + DRM_DEBUG_KMS("Using DDC pin 0x%x for port %c (VBT)\n", + info->alternate_ddc_pin, port_name(port)); + return info->alternate_ddc_pin; + } + + switch (port) { + case PORT_B: + if (IS_BROXTON(dev_priv)) + ddc_pin = GMBUS_PIN_1_BXT; + else + ddc_pin = GMBUS_PIN_DPB; + break; + case PORT_C: + if (IS_BROXTON(dev_priv)) + ddc_pin = GMBUS_PIN_2_BXT; + else + ddc_pin = GMBUS_PIN_DPC; + break; + case PORT_D: + if (IS_CHERRYVIEW(dev_priv)) + ddc_pin = GMBUS_PIN_DPD_CHV; + else + ddc_pin = GMBUS_PIN_DPD; + break; + default: + MISSING_CASE(port); + ddc_pin = GMBUS_PIN_DPB; + break; + } + + DRM_DEBUG_KMS("Using DDC pin 0x%x for port %c (platform default)\n", + ddc_pin, port_name(port)); + + return ddc_pin; +} + void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector) { @@ -1808,7 +1852,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, struct drm_device *dev = intel_encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_dig_port->port; - uint8_t alternate_ddc_pin; DRM_DEBUG_KMS("Adding HDMI connector on port %c\n", port_name(port)); @@ -1826,12 +1869,10 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, connector->doublescan_allowed = 0; connector->stereo_allowed = 1; + intel_hdmi->ddc_bus = intel_hdmi_ddc_pin(dev_priv, port); + switch (port) { case PORT_B: - if (IS_BROXTON(dev_priv)) - intel_hdmi->ddc_bus = GMBUS_PIN_1_BXT; - else - intel_hdmi->ddc_bus = GMBUS_PIN_DPB; /* * On BXT A0/A1, sw needs to activate DDIA HPD logic and * interrupts to check the external panel connection. @@ -1842,46 +1883,17 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, intel_encoder->hpd_pin = HPD_PORT_B; break; case PORT_C: - if (IS_BROXTON(dev_priv)) - intel_hdmi->ddc_bus = GMBUS_PIN_2_BXT; - else - intel_hdmi->ddc_bus = GMBUS_PIN_DPC; intel_encoder->hpd_pin = HPD_PORT_C; break; case PORT_D: - if (WARN_ON(IS_BROXTON(dev_priv))) - intel_hdmi->ddc_bus = GMBUS_PIN_DISABLED; - else if (IS_CHERRYVIEW(dev_priv)) - intel_hdmi->ddc_bus = GMBUS_PIN_DPD_CHV; - else - intel_hdmi->ddc_bus = GMBUS_PIN_DPD; intel_encoder->hpd_pin = HPD_PORT_D; break; case PORT_E: - /* On SKL PORT E doesn't have seperate GMBUS pin - * We rely on VBT to set a proper alternate GMBUS pin. */ - alternate_ddc_pin = - dev_priv->vbt.ddi_port_info[PORT_E].alternate_ddc_pin; - switch (alternate_ddc_pin) { - case DDC_PIN_B: - intel_hdmi->ddc_bus = GMBUS_PIN_DPB; - break; - case DDC_PIN_C: - intel_hdmi->ddc_bus = GMBUS_PIN_DPC; - break; - case DDC_PIN_D: - intel_hdmi->ddc_bus = GMBUS_PIN_DPD; - break; - default: - MISSING_CASE(alternate_ddc_pin); - } intel_encoder->hpd_pin = HPD_PORT_E; break; - case PORT_A: - intel_encoder->hpd_pin = HPD_PORT_A; - /* Internal port only for eDP. */ default: - BUG(); + MISSING_CASE(port); + return; } if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { From cdffe3e252bb55e82ee89bbdfe8d2f18b6157c28 Mon Sep 17 00:00:00 2001 From: Lyude Date: Wed, 26 Oct 2016 12:36:09 -0400 Subject: [PATCH 164/319] drm/i915/vlv: Prevent enabling hpd polling in late suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of the CI machines began to run into issues with the hpd poller suddenly waking up in the midst of the late suspend phase. It looks like this is getting caused by the fact we now deinitialize power wells in late suspend, which means that intel_hpd_poll_init() gets called in late suspend causing polling to get re-enabled. So, when deinitializing power wells on valleyview we now refrain from enabling polling in the midst of suspend. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98040 Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd") Signed-off-by: Lyude Cc: Ville Syrjälä Cc: Jani Saarinen Cc: Petry Latvala Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477499769-1966-1-git-send-email-lyude@redhat.com (cherry picked from commit b64b540931483cca3200d98756bed6ad0e01d75c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_runtime_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 6c11168facd6..a38c2fefe85a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1139,7 +1139,9 @@ static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv) intel_power_sequencer_reset(dev_priv); - intel_hpd_poll_init(dev_priv); + /* Prevent us from re-enabling polling on accident in late suspend */ + if (!dev_priv->drm.dev->power.is_suspended) + intel_hpd_poll_init(dev_priv); } static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv, From fbb21c5202ae7f1e71e832b1af59fb047da6383e Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 1 Nov 2016 11:47:59 -0700 Subject: [PATCH 165/319] drm/i915/dp: BDW cdclk fix for DP audio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to BSpec, cdclk for BDW has to be not less than 432 MHz with DP audio enabled, port width x4, and link rate HBR2 (5.4 GHz). With cdclk less than 432 MHz, enabling audio leads to pipe FIFO underruns and displays cycling on/off. From BSpec: "Display» BDW-SKL» dpr» [Register] DP_TP_CTL [BDW+,EXCLUDE(CHV)] Workaround : Do not use DisplayPort with CDCLK less than 432 MHz, audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else there may be audio corruption or screen corruption." Since, some DP configurations (e.g., MST) use port width x4 and HBR2 link rate, let's increase the cdclk to >= 432 MHz to enable audio for those cases. v4: Changed commit message v3: Combine BDW pixel rate adjustments into a function (Jani) v2: Restrict fix to BDW Retain the set cdclk across modesets (Ville) Cc: stable@vger.kernel.org Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Ville Syrjälä Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1478026080-2925-1-git-send-email-dhinakaran.pandiyan@intel.com (cherry picked from commit b30ce9e0552aa017ac6f2243f3c2d8e36fe52e69) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0ad1879bfd9d..4f57f8c6074e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10243,6 +10243,27 @@ static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) bxt_set_cdclk(to_i915(dev), req_cdclk); } +static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, + int pixel_rate) +{ + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (crtc_state->ips_enabled) + pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); + + /* BSpec says "Do not use DisplayPort with CDCLK less than + * 432 MHz, audio enabled, port width x4, and link rate + * HBR2 (5.4 GHz), or else there may be audio corruption or + * screen corruption." + */ + if (intel_crtc_has_dp_encoder(crtc_state) && + crtc_state->has_audio && + crtc_state->port_clock >= 540000 && + crtc_state->lane_count == 4) + pixel_rate = max(432000, pixel_rate); + + return pixel_rate; +} + /* compute the max rate for new configuration */ static int ilk_max_pixel_rate(struct drm_atomic_state *state) { @@ -10268,9 +10289,9 @@ static int ilk_max_pixel_rate(struct drm_atomic_state *state) pixel_rate = ilk_pipe_pixel_rate(crtc_state); - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); + if (IS_BROADWELL(dev_priv)) + pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, + pixel_rate); intel_state->min_pixclk[i] = pixel_rate; } From 61e0c5438866d0e737937fc35d752538960e1e9f Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Wed, 2 Nov 2016 13:13:21 -0700 Subject: [PATCH 166/319] drm/i915/dp: Extend BDW DP audio workaround to GEN9 platforms According to BSpec, cdclk for BDW has to be not less than 432 MHz with DP audio enabled, port width x4, and link rate HBR2 (5.4 GHz). With cdclk less than 432 MHz, enabling audio leads to pipe FIFO underruns and displays cycling on/off. Let's apply this work around to GEN9 platforms too, as it fixes the same issue. v2: Move drm_device to drm_i915_private conversion Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97907 Cc: stable@vger.kernel.org Cc: Libin Yang Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1478117601-19122-1-git-send-email-dhinakaran.pandiyan@intel.com (cherry picked from commit 9c7540241885838cfc7fa58c4a8bd75be0303ed1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4f57f8c6074e..81c11499bcf0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10246,8 +10246,10 @@ static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, int pixel_rate) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (crtc_state->ips_enabled) + if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); /* BSpec says "Do not use DisplayPort with CDCLK less than @@ -10289,7 +10291,7 @@ static int ilk_max_pixel_rate(struct drm_atomic_state *state) pixel_rate = ilk_pipe_pixel_rate(crtc_state); - if (IS_BROADWELL(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, pixel_rate); From c4b8c570447a7bc171829532269878345b3ea9d0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Nov 2016 10:54:43 +0000 Subject: [PATCH 167/319] drm/i915: Round tile chunks up for constructing partial VMAs When we split a large object up into chunks for GTT faulting (because we can't fit the whole object into the aperture) we have to align our cuts with the fence registers. Each partial VMA must cover a complete set of tile rows or the offset into each partial VMA is not aligned with the whole image. Currently we enforce a minimum size on each partial VMA, but this minimum size itself was not aligned to the tile row causing distortion. Reported-by: Andreas Reis Reported-by: Chris Clayton Reported-by: Norbert Preining Tested-by: Norbert Preining Tested-by: Chris Clayton Fixes: 03af84fe7f48 ("drm/i915: Choose partial chunksize based on tile row size") Fixes: a61007a83a46 ("drm/i915: Fix partial GGTT faulting") # enabling patch Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98402 Testcase: igt/gem_mmap_gtt/medium-copy-odd Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Jani Nikula Cc: # v4.9-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20161107105443.27855-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit 0ef723cbceb6dce8116e75d44c5b8679b2eba69a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 23960de81b57..44b3f01faed3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1806,7 +1806,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) /* Use a partial view if it is bigger than available space */ chunk_size = MIN_CHUNK_PAGES; if (i915_gem_object_is_tiled(obj)) - chunk_size = max(chunk_size, tile_row_pages(obj)); + chunk_size = roundup(chunk_size, tile_row_pages(obj)); memset(&view, 0, sizeof(view)); view.type = I915_GGTT_VIEW_PARTIAL; From 54905ab5fe7aa453610e31cec640e528aaedb2e2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Nov 2016 11:01:28 +0000 Subject: [PATCH 168/319] drm/i915: Limit Valleyview and earlier to only using mappable scanout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Valleyview appears to be limited to only scanning out from the first 512MiB of the Global GTT. Lets presume that this behaviour was inherited from the display block copied from g4x (not Ironlake) and all earlier generations are similarly affected, though testing suggests different symptoms. For simplicity, impose that these platforms must scanout from the mappable region. (For extra simplicity, use HAS_GMCH_DISPLAY even though this catches Cherryview which does not appear to be limited to the low aperture for its scanout.) v2: Use HAS_GMCH_DISPLAY() to more clearly convey my intent about limiting this workaround to the old style of display engine. v3: Update changelog to reflect testing by Ville Syrjälä v4: Include the changes to the comments as well Reported-by: Luis Botello Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036 Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for scanout") Signed-off-by: Chris Wilson Cc: Akash Goel Cc: Joonas Lahtinen Cc: Ville Syrjälä Cc: # v4.9-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20161107110128.28762-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä (cherry picked from commit 767a222e47cc13239d38018887f911fec06169ea) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 44b3f01faed3..91ab7e9d6d2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3543,8 +3543,22 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (view->type == I915_GGTT_VIEW_NORMAL) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, PIN_MAPPABLE | PIN_NONBLOCK); - if (IS_ERR(vma)) - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0); + if (IS_ERR(vma)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int flags; + + /* Valleyview is definitely limited to scanning out the first + * 512MiB. Lets presume this behaviour was inherited from the + * g4x display engine and that all earlier gen are similarly + * limited. Testing suggests that it is a little more + * complicated than this. For example, Cherryview appears quite + * happy to scanout from anywhere within its global aperture. + */ + flags = 0; + if (HAS_GMCH_DISPLAY(i915)) + flags = PIN_MAPPABLE; + vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); + } if (IS_ERR(vma)) goto err_unpin_display; From 757124d95c42bb579d67df51e51789849929ee31 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 3 Nov 2016 17:47:51 -0400 Subject: [PATCH 169/319] drm/amdgpu: fix crash in acp_hw_fini On CZ/ST systems with AZ rather than ACP audio, we need to bail early in hw_fini since there is nothing to do. bug: https://bugs.freedesktop.org/show_bug.cgi?id=98276 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 892d60fb225b..2057683f7b59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -395,9 +395,12 @@ static int acp_hw_fini(void *handle) { int i, ret; struct device *dev; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* return early if no ACP */ + if (!adev->acp.acp_genpd) + return 0; + for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev); From 17ae1c650c1ecf8dc8e16d54b0f68a345965f43f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:19 +0100 Subject: [PATCH 170/319] phy: fix device reference leaks Make sure to drop the reference taken by bus_find_device_by_name() before returning from phy_connect() and phy_attach(). Note that both function still take a reference to the phy device through phy_attach_direct(). Fixes: e13934563db0 ("[PATCH] PHY Layer fixup") Cc: Florian Fainelli Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e977ba931878..1a4bf8acad78 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -723,6 +723,7 @@ struct phy_device *phy_connect(struct net_device *dev, const char *bus_id, phydev = to_phy_device(d); rc = phy_connect_direct(dev, phydev, handler, interface); + put_device(d); if (rc) return ERR_PTR(rc); @@ -953,6 +954,7 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phydev = to_phy_device(d); rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface); + put_device(d); if (rc) return ERR_PTR(rc); From c7262aaace1b17a650598063e3b9ee1785fde377 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:20 +0100 Subject: [PATCH 171/319] net: ethernet: ti: cpsw: fix device and of_node leaks Make sure to drop the references taken by of_get_child_by_name() and bus_find_device() before returning from cpsw_phy_sel(). Note that holding a reference to the cpsw-phy-sel device does not prevent the devres-managed private data from going away. Fixes: 5892cd135e16 ("drivers: net: cpsw-phy-sel: Add new driver...") Cc: Mugunthan V N Cc: Grygorii Strashko Cc: linux-omap@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw-phy-sel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index 054a8dd23dae..ba1e45ff6aae 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -176,9 +176,12 @@ void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave) } dev = bus_find_device(&platform_bus_type, NULL, node, match); + of_node_put(node); priv = dev_get_drvdata(dev); priv->cpsw_phy_sel(priv, phy_mode, slave); + + put_device(dev); } EXPORT_SYMBOL_GPL(cpsw_phy_sel); From 6bed0118012ea350acbe606ab3ae0ed3d60ed5f3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:21 +0100 Subject: [PATCH 172/319] net: ethernet: ti: davinci_emac: fix device reference leak Make sure to drop the references taken by bus_find_device() before returning from emac_dev_open(). Note that phy_connect still takes a reference to the phy device. Fixes: 5d69e0076a72 ("net: davinci_emac: switch to new mdio") Cc: Mugunthan V N Cc: Grygorii Strashko Cc: linux-omap@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_emac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 2fd94a5bc1f3..84fbe5714f8b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1410,6 +1410,7 @@ static int emac_dev_open(struct net_device *ndev) int i = 0; struct emac_priv *priv = netdev_priv(ndev); struct phy_device *phydev = NULL; + struct device *phy = NULL; ret = pm_runtime_get_sync(&priv->pdev->dev); if (ret < 0) { @@ -1488,19 +1489,20 @@ static int emac_dev_open(struct net_device *ndev) /* use the first phy on the bus if pdata did not give us a phy id */ if (!phydev && !priv->phy_id) { - struct device *phy; - phy = bus_find_device(&mdio_bus_type, NULL, NULL, match_first_device); - if (phy) + if (phy) { priv->phy_id = dev_name(phy); + if (!priv->phy_id || !*priv->phy_id) + put_device(phy); + } } if (!phydev && priv->phy_id && *priv->phy_id) { phydev = phy_connect(ndev, priv->phy_id, &emac_adjust_link, PHY_INTERFACE_MODE_MII); - + put_device(phy); /* reference taken by bus_find_device */ if (IS_ERR(phydev)) { dev_err(emac_dev, "could not connect to phy %s\n", priv->phy_id); From 2271150bfb814b72ec57ae2fdf66e39da2eafafd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2016 18:40:22 +0100 Subject: [PATCH 173/319] net: hns: fix device reference leaks Make sure to drop the reference taken by class_find_device() in hnae_get_handle() on errors and when later releasing the handle. Fixes: 6fe6611ff275 ("net: add Hisilicon Network Subsystem...") Cc: Yisen Zhuang Cc: Salil Mehta Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index c54c6fac0d1d..b6ed818f78ff 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -332,8 +332,10 @@ struct hnae_handle *hnae_get_handle(struct device *owner_dev, return ERR_PTR(-ENODEV); handle = dev->ops->get_handle(dev, port_id); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + put_device(&dev->cls_dev); return handle; + } handle->dev = dev; handle->owner_dev = owner_dev; @@ -356,6 +358,8 @@ struct hnae_handle *hnae_get_handle(struct device *owner_dev, for (j = i - 1; j >= 0; j--) hnae_fini_queue(handle->qs[j]); + put_device(&dev->cls_dev); + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(hnae_get_handle); @@ -377,6 +381,8 @@ void hnae_put_handle(struct hnae_handle *h) dev->ops->put_handle(h); module_put(dev->owner); + + put_device(&dev->cls_dev); } EXPORT_SYMBOL(hnae_put_handle); From 7233bc84a3aeda835d334499dc00448373caf5c0 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 3 Nov 2016 17:03:41 -0200 Subject: [PATCH 174/319] sctp: assign assoc_id earlier in __sctp_connect sctp_wait_for_connect() currently already holds the asoc to keep it alive during the sleep, in case another thread release it. But Andrey Konovalov and Dmitry Vyukov reported an use-after-free in such situation. Problem is that __sctp_connect() doesn't get a ref on the asoc and will do a read on the asoc after calling sctp_wait_for_connect(), but by then another thread may have closed it and the _put on sctp_wait_for_connect will actually release it, causing the use-after-free. Fix is, instead of doing the read after waiting for the connect, do it before so, and avoid this issue as the socket is still locked by then. There should be no issue on returning the asoc id in case of failure as the application shouldn't trust on that number in such situations anyway. This issue doesn't exist in sctp_sendmsg() path. Reported-by: Dmitry Vyukov Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 71b75f9d9c1b..faa48ff5cf4b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk, timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); - err = sctp_wait_for_connect(asoc, &timeo); - if ((err == 0 || err == -EINPROGRESS) && assoc_id) + if (assoc_id) *assoc_id = asoc->assoc_id; + err = sctp_wait_for_connect(asoc, &timeo); + /* Note: the asoc may be freed after the return of + * sctp_wait_for_connect. + */ /* Don't free association on exit. */ asoc = NULL; From 483bed2b0ddd12ec33fc9407e0c6e1088e77a97c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Nov 2016 00:01:19 +0100 Subject: [PATCH 175/319] bpf: fix htab map destruction when extra reserve is in use Commit a6ed3ea65d98 ("bpf: restore behavior of bpf_map_update_elem") added an extra per-cpu reserve to the hash table map to restore old behaviour from pre prealloc times. When non-prealloc is in use for a map, then problem is that once a hash table extra element has been linked into the hash-table, and the hash table is destroyed due to refcount dropping to zero, then htab_map_free() -> delete_all_elements() will walk the whole hash table and drop all elements via htab_elem_free(). The problem is that the element from the extra reserve is first fed to the wrong backend allocator and eventually freed twice. Fixes: a6ed3ea65d98 ("bpf: restore behavior of bpf_map_update_elem") Reported-by: Dmitry Vyukov Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/hashtab.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 570eeca7bdfa..ad1bc67aff1b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -687,7 +687,8 @@ static void delete_all_elements(struct bpf_htab *htab) hlist_for_each_entry_safe(l, n, head, hash_node) { hlist_del_rcu(&l->hash_node); - htab_elem_free(htab, l); + if (l->state != HTAB_EXTRA_ELEM_USED) + htab_elem_free(htab, l); } } } From 20b2b24f91f70e7d3f0918c077546cb21bd73a87 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Nov 2016 00:56:31 +0100 Subject: [PATCH 176/319] bpf: fix map not being uncharged during map creation failure In map_create(), we first find and create the map, then once that suceeded, we charge it to the user's RLIMIT_MEMLOCK, and then fetch a new anon fd through anon_inode_getfd(). The problem is, once the latter fails f.e. due to RLIMIT_NOFILE limit, then we only destruct the map via map->ops->map_free(), but without uncharging the previously locked memory first. That means that the user_struct allocation is leaked as well as the accounted RLIMIT_MEMLOCK memory not released. Make the label names in the fix consistent with bpf_prog_load(). Fixes: aaac3ba95e4c ("bpf: charge user for creation of BPF maps and programs") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 228f962447a5..237f3d6a7ddc 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -194,7 +194,7 @@ static int map_create(union bpf_attr *attr) err = bpf_map_charge_memlock(map); if (err) - goto free_map; + goto free_map_nouncharge; err = bpf_map_new_fd(map); if (err < 0) @@ -204,6 +204,8 @@ static int map_create(union bpf_attr *attr) return err; free_map: + bpf_map_uncharge_memlock(map); +free_map_nouncharge: map->ops->map_free(map); return err; } From e6e335bf3a400bc3b5a65322a891318a25749769 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 7 Nov 2016 10:36:46 -0800 Subject: [PATCH 177/319] ARC: change return value of userspace cmpxchg assist syscall The original syscall only used to return errno to indicate if cmpxchg succeeded. It was not returning the "previous" value which typical cmpxchg callers are interested in to build their slowpaths or retry loops. Given user preemption in syscall return path etc, it is not wise to check this in userspace afterwards, but should be what kernel actually observed in the syscall. So change the syscall interface to always return the previous value and additionally set Z flag to indicate whether operation succeeded or not (just like ARM implementation when they used to have this syscall) The flag approach avoids having to put_user errno which is nice given the use case for this syscall cares mostly about the "previous" value. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 2 ++ arch/arc/kernel/process.c | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 7f3f9f63708c..1bd24ec3e350 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -43,12 +43,14 @@ #define STATUS_AE_BIT 5 /* Exception active */ #define STATUS_DE_BIT 6 /* PC is in delay slot */ #define STATUS_U_BIT 7 /* User/Kernel mode */ +#define STATUS_Z_BIT 11 #define STATUS_L_BIT 12 /* Loop inhibit */ /* These masks correspond to the status word(STATUS_32) bits */ #define STATUS_AE_MASK (1<status32 &= ~STATUS_Z_MASK; + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; preempt_disable(); - ret = __get_user(uval, uaddr); - if (ret) + if (__get_user(uval, uaddr)) goto done; - if (uval != expected) - ret = -EAGAIN; - else - ret = __put_user(new, uaddr); + if (uval == expected) { + if (!__put_user(new, uaddr)) + regs->status32 |= STATUS_Z_MASK; + } done: preempt_enable(); - return ret; + return uval; } void arch_cpu_idle(void) From 922cc171998ac3dbe74d57011ef7ed57e9b0d7df Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 31 Oct 2016 14:09:52 -0700 Subject: [PATCH 178/319] ARC: timer: rtc: implement read loop in "C" vs. inline asm The current code doesn't even compile as somehow the inline assembly can't see the register names defined as ARC_RTC_* I'm pretty sure It worked when I first got it merged, but the tools were definitely different then. So better to write this in "C" anyways. CC: stable@vger.kernel.org #4.2+ Acked-by: Daniel Lezcano Signed-off-by: Vineet Gupta --- arch/arc/kernel/time.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index f927b8dc6edd..c10390d1ddb6 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -152,14 +152,17 @@ static cycle_t arc_read_rtc(struct clocksource *cs) cycle_t full; } stamp; - - __asm__ __volatile( - "1: \n" - " lr %0, [AUX_RTC_LOW] \n" - " lr %1, [AUX_RTC_HIGH] \n" - " lr %2, [AUX_RTC_CTRL] \n" - " bbit0.nt %2, 31, 1b \n" - : "=r" (stamp.low), "=r" (stamp.high), "=r" (status)); + /* + * hardware has an internal state machine which tracks readout of + * low/high and updates the CTRL.status if + * - interrupt/exception taken between the two reads + * - high increments after low has been read + */ + do { + stamp.low = read_aux_reg(AUX_RTC_LOW); + stamp.high = read_aux_reg(AUX_RTC_HIGH); + status = read_aux_reg(AUX_RTC_CTRL); + } while (!(status & _BITUL(31))); return stamp.full; } From bb29dd84333a96f309c6d0f88b285b5b78927058 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 26 Oct 2016 10:33:31 -0400 Subject: [PATCH 179/319] SUNRPC: Fix suspicious RCU usage We need to hold the rcu_read_lock() when calling rcu_dereference(), otherwise we can't guarantee that the object being dereferenced still exists. Fixes: 39e5d2df ("SUNRPC search xprt switch for sockaddr") Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 34dd7b26ee5f..62a482790937 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2753,14 +2753,18 @@ EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout); void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) { + rcu_read_lock(); xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) { + rcu_read_lock(); rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch), xprt); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt); @@ -2770,9 +2774,8 @@ bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, struct rpc_xprt_switch *xps; bool ret; - xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); - rcu_read_lock(); + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); ret = rpc_xprt_switch_has_addr(xps, sap); rcu_read_unlock(); return ret; From 8ef3295530ddc969ea9a3f307d94df97fcbc0629 Mon Sep 17 00:00:00 2001 From: Petr Vandrovec Date: Mon, 7 Nov 2016 12:11:29 -0800 Subject: [PATCH 180/319] NFS: Ignore connections that have cl_rpcclient uninitialized cl_rpcclient starts as ERR_PTR(-EINVAL), and connections like that are floating freely through the system. Most places check whether pointer is valid before dereferencing it, but newly added code in nfs_match_client does not. Which causes crashes when more than one NFS mount point is present. Signed-off-by: Petr Vandrovec Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 7555ba889d1f..ebecfb8fba06 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -314,7 +314,8 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat /* Match the full socket address */ if (!rpc_cmp_addr_port(sap, clap)) /* Match all xprt_switch full socket addresses */ - if (!rpc_clnt_xprt_switch_has_addr(clp->cl_rpcclient, + if (IS_ERR(clp->cl_rpcclient) || + !rpc_clnt_xprt_switch_has_addr(clp->cl_rpcclient, sap)) continue; From 192747166a468dd8fb5d47ad9d5048c138c1fc25 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 26 Oct 2016 15:54:31 -0400 Subject: [PATCH 181/319] NFS: Don't print a pNFS error if we aren't using pNFS We used to check for a valid layout type id before verifying pNFS flags as an indicator for if we are using pNFS. This changed in 3132e49ece with the introduction of multiple layout types, since now we are passing an array of ids instead of just one. Since then, users have been seeing a KERN_ERR printk show up whenever mounting NFS v4 without pNFS. This patch restores the original behavior of exiting set_pnfs_layoutdriver() early if we aren't using pNFS. Fixes 3132e49ece ("pnfs: track multiple layout types in fsinfo structure") Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 56b2d96f9103..259ef85f435a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -146,6 +146,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, u32 id; int i; + if (fsinfo->nlayouttypes == 0) + goto out_no_driver; if (!(server->nfs_client->cl_exchange_flags & (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n", From 0ac84b72c0ed96ace1d8973a06f0120a3b905177 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 7 Nov 2016 10:48:16 -0700 Subject: [PATCH 182/319] fs/nfs: Fix used uninitialized warn in nfs4_slot_seqid_in_use() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following warn: fs/nfs/nfs4session.c: In function ‘nfs4_slot_seqid_in_use’: fs/nfs/nfs4session.c:203:54: warning: ‘cur_seq’ may be used uninitialized in this function [-Wmaybe-uninitialized] if (nfs4_slot_get_seqid(tbl, slotid, &cur_seq) == 0 && ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~ cur_seq == seq_nr && test_bit(slotid, tbl->used_slots)) ~~~~~~~~~~~~~~~~~ Signed-off-by: Shuah Khan Signed-off-by: Anna Schumaker --- fs/nfs/nfs4session.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index 150c5a1879bf..a61350f75c74 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -198,7 +198,7 @@ static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid, static bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl, u32 slotid, u32 seq_nr) { - u32 cur_seq; + u32 cur_seq = 0; bool ret = false; spin_lock(&tbl->slot_tbl_lock); From 5d41ce29e3b91ef305f88d23f72b3359de329cec Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 3 Nov 2016 16:17:26 -0700 Subject: [PATCH 183/319] net: icmp6_send should use dst dev to determine L3 domain icmp6_send is called in response to some event. The skb may not have the device set (skb->dev is NULL), but it is expected to have a dst set. Update icmp6_send to use the dst on the skb to determine L3 domain. Fixes: ca254490c8dfd ("net: Add VRF support to IPv6 stack") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index bd59c343d35f..7370ad2e693a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -448,7 +448,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; else - iif = l3mdev_master_ifindex(skb->dev); + iif = l3mdev_master_ifindex(skb_dst(skb)->dev); /* * Must not send error if the source does not uniquely From f3358507c11999c91abf54744658bccd49b5879c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 4 Nov 2016 12:55:36 +0200 Subject: [PATCH 184/319] virtio-net: drop legacy features in virtio 1 mode Virtio 1.0 spec says VIRTIO_F_ANY_LAYOUT and VIRTIO_NET_F_GSO are legacy-only feature bits. Do not negotiate them in virtio 1 mode. Note this is a spec violation so we need to backport it to stable/downstream kernels. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fad84f3f4109..fd8b1e62301f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2038,23 +2038,33 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +#define VIRTNET_FEATURES \ + VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ + VIRTIO_NET_F_MAC, \ + VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ + VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ + VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ + VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ + VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ + VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ + VIRTIO_NET_F_CTRL_MAC_ADDR, \ + VIRTIO_NET_F_MTU + static unsigned int features[] = { - VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, - VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, - VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, - VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, - VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, - VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, - VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, - VIRTIO_NET_F_CTRL_MAC_ADDR, + VIRTNET_FEATURES, +}; + +static unsigned int features_legacy[] = { + VIRTNET_FEATURES, + VIRTIO_NET_F_GSO, VIRTIO_F_ANY_LAYOUT, - VIRTIO_NET_F_MTU, }; static struct virtio_driver virtio_net_driver = { .feature_table = features, .feature_table_size = ARRAY_SIZE(features), + .feature_table_legacy = features_legacy, + .feature_table_size_legacy = ARRAY_SIZE(features_legacy), .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, From 8e0140a2d7c9f55b794a5fce22e05350a435b965 Mon Sep 17 00:00:00 2001 From: Fabian Mewes Date: Fri, 4 Nov 2016 13:16:14 +0100 Subject: [PATCH 185/319] Documentation: networking: dsa: Update tagging protocols Add Qualcomm QCA tagging introduced in cafdc45c9 to the list of supported protocols. Signed-off-by: Fabian Mewes Reviewed-by: Andrew Lunn Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 6d6c07cf1a9a..63912ef34606 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -67,13 +67,14 @@ Note that DSA does not currently create network interfaces for the "cpu" and Switch tagging protocols ------------------------ -DSA currently supports 4 different tagging protocols, and a tag-less mode as +DSA currently supports 5 different tagging protocols, and a tag-less mode as well. The different protocols are implemented in: net/dsa/tag_trailer.c: Marvell's 4 trailer tag mode (legacy) net/dsa/tag_dsa.c: Marvell's original DSA tag net/dsa/tag_edsa.c: Marvell's enhanced DSA tag net/dsa/tag_brcm.c: Broadcom's 4 bytes tag +net/dsa/tag_qca.c: Qualcomm's 2 bytes tag The exact format of the tag protocol is vendor specific, but in general, they all contain something which: From fd0285a39b1cb496f60210a9a00ad33a815603e7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 4 Nov 2016 15:11:57 -0400 Subject: [PATCH 186/319] fib_trie: Correct /proc/net/route off by one error The display of /proc/net/route has had a couple issues due to the fact that when I originally rewrote most of fib_trie I made it so that the iterator was tracking the next value to use instead of the current. In addition it had an off by 1 error where I was tracking the first piece of data as position 0, even though in reality that belonged to the SEQ_START_TOKEN. This patch updates the code so the iterator tracks the last reported position and key instead of the next expected position and key. In addition it shifts things so that all of the leaves start at 1 instead of trying to report leaves starting with offset 0 as being valid. With these two issues addressed this should resolve any off by one errors that were present in the display of /proc/net/route. Fixes: 25b97c016b26 ("ipv4: off-by-one in continuation handling in /proc/net/route") Cc: Andy Whitcroft Reported-by: Jason Baron Tested-by: Jason Baron Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 31cef3602585..4cff74d4133f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2413,22 +2413,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, struct key_vector *l, **tp = &iter->tnode; t_key key; - /* use cache location of next-to-find key */ + /* use cached location of previously found key */ if (iter->pos > 0 && pos >= iter->pos) { - pos -= iter->pos; key = iter->key; } else { - iter->pos = 0; + iter->pos = 1; key = 0; } - while ((l = leaf_walk_rcu(tp, key)) != NULL) { + pos -= iter->pos; + + while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) { key = l->key + 1; iter->pos++; - - if (--pos <= 0) - break; - l = NULL; /* handle unlikely case of a key wrap */ @@ -2437,7 +2434,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, } if (l) - iter->key = key; /* remember it */ + iter->key = l->key; /* remember it */ else iter->pos = 0; /* forget it */ @@ -2465,7 +2462,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) return fib_route_get_idx(iter, *pos); iter->pos = 0; - iter->key = 0; + iter->key = KEY_MAX; return SEQ_START_TOKEN; } @@ -2474,7 +2471,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct fib_route_iter *iter = seq->private; struct key_vector *l = NULL; - t_key key = iter->key; + t_key key = iter->key + 1; ++*pos; @@ -2483,7 +2480,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) l = leaf_walk_rcu(&iter->tnode, key); if (l) { - iter->key = l->key + 1; + iter->key = l->key; iter->pos++; } else { iter->pos = 0; From 7ee7e87dfb158e79019ea1d5ea1b0e6f2bc93ee4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 7 Nov 2016 19:57:00 +0100 Subject: [PATCH 187/319] genirq: Use irq type from irqdata instead of irqdesc The type flags in the irq descriptor are there for historical reasons and only updated via irq_modify_status() or irq_set_type(). Both functions also update the type flags in irqdata. __setup_irq() is the only left over user of the type flags in the irq descriptor. If __setup_irq() is called with empty irq type flags, then the type flags are retrieved from irqdata. If an interrupt is shared, then the type flags are compared with the type flags stored in the irq descriptor. On x86 the ioapic does not have a irq_set_type() callback because the type is defined in the BIOS tables and cannot be changed. The type is stored in irqdata at setup time without updating the type data in the irq descriptor. As a result the comparison described above fails. There is no point in updating the irq descriptor flags because the only relevant storage is irqdata. Use the type flags from irqdata for both retrieval and comparison in __setup_irq() instead. Aside of that the print out in case of non matching type flags has the old and new type flags arguments flipped. Fix that as well. For correctness sake the flags stored in the irq descriptor should be removed, but this is beyond the scope of this bugfix and will be done in a later patch. Fixes: 4b357daed698 ("genirq: Look-up trigger type if not specified by caller") Reported-and-tested-by: Mika Westerberg Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: Jon Hunter Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1611072020360.3501@nanos Signed-off-by: Thomas Gleixner --- kernel/irq/manage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9c4d30483264..6b669593e7eb 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1341,12 +1341,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } else if (new->flags & IRQF_TRIGGER_MASK) { unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK; - unsigned int omsk = irq_settings_get_trigger_mask(desc); + unsigned int omsk = irqd_get_trigger_type(&desc->irq_data); if (nmsk != omsk) /* hope the handler works with current trigger mode */ pr_warn("irq %d uses trigger mode %u; requested %u\n", - irq, nmsk, omsk); + irq, omsk, nmsk); } *old_ptr = new; From a29d126027c781eaa305ecae432bb04d41e198c5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 8 Nov 2016 14:52:18 +0100 Subject: [PATCH 188/319] drm/amdgpu/powerplay/smu7: fix unintialized data usage A recent bugfix replaced an out-of-bounds access with direct use of unintialized data: drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c: In function 'smu7_patch_limits_vddc': drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c:2033:6: error: 'vddc' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c:2146:11: note: 'vddc' was declared here drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c:2033:6: error: 'vddci' may be used uninitialized in this function [-Werror=maybe-uninitialized] drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c:2146:17: note: 'vddci' was declared here uint32_t vddc, vddci; This initializes the data as before using the correct type. Fixes: 77f7f71f5be1 ("drm/amdgpu/powerplay/smu7: fix static checker warning") Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 75854021f403..51fb86f20c21 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2133,9 +2133,11 @@ static int smu7_patch_limits_vddc(struct pp_hwmgr *hwmgr, struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); if (tab) { + vddc = tab->vddc; smu7_patch_ppt_v0_with_vdd_leakage(hwmgr, &vddc, &data->vddc_leakage); tab->vddc = vddc; + vddci = tab->vddci; smu7_patch_ppt_v0_with_vdd_leakage(hwmgr, &vddci, &data->vddci_leakage); tab->vddci = vddci; From f20024d8ba6bc8abf8d0ec12eabfdedd9935fff2 Mon Sep 17 00:00:00 2001 From: Andrew Shadura Date: Thu, 3 Nov 2016 11:09:24 +0100 Subject: [PATCH 189/319] drm/amd/powerplay: return false instead of -EINVAL Returning -EINVAL from a bool-returning function phm_check_smc_update_required_for_display_configuration has an unexpected effect of returning true, which is probably not what was intended. Replace -EINVAL by false. The only place this function is called from is psm_adjust_power_state_dynamic in drivers/gpu/drm/amd/powerplay/eventmgr/psm.c:106: if (!equal || phm_check_smc_update_required_for_display_configuration(hwmgr)) { phm_apply_state_adjust_rules(hwmgr, requested, pcurrent); phm_set_power_state(hwmgr, &pcurrent->hardware, &requested->hardware); hwmgr->current_ps = requested; } It seems to expect a boolean value here. This issue has been found using the following Coccinelle semantic patch written by Peter Senna Tschudin: @@ identifier f; constant C; typedef bool; @@ bool f (...){ <+... * return -C; ...+> } Reviewed-by: Alex Deucher Signed-off-by: Andrew Shadura Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 14f8c1f4da3d..0723758ed065 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -272,7 +272,7 @@ bool phm_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hw PHM_FUNC_CHECK(hwmgr); if (hwmgr->hwmgr_func->check_smc_update_required_for_display_configuration == NULL) - return -EINVAL; + return false; return hwmgr->hwmgr_func->check_smc_update_required_for_display_configuration(hwmgr); } From 76a08404742e6da79f1e5002ac39033dc79d94da Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 8 Nov 2016 08:47:14 -0800 Subject: [PATCH 190/319] Revert "ARC: build: retire old toggles" This has caused a bunch of build failures at a few sites, with GNU 2015.12 and older as the assembler seems to need -mlock to be able to grok llock/scond instructions for ARC700 builds. different places since the older tools still seem to release of tools which most people are using seem to trip with the -mlock flag not being passed. This reverts commit c3005475889c7c730638f95d13be3360f0b33e98. --- arch/arc/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 864adad52280..aa82d13d4213 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -50,6 +50,9 @@ atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y) cflags-$(atleast_gcc44) += -fsection-anchors +cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock +cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape + ifdef CONFIG_ISA_ARCV2 ifndef CONFIG_ARC_HAS_LL64 From 66619433d0fdf3e7d9f573993217b0d2cc3044fa Mon Sep 17 00:00:00 2001 From: Noam Camus Date: Tue, 8 Nov 2016 11:58:23 +0200 Subject: [PATCH 191/319] ARC: [plat-eznps] remove IPI clear from SMP operations Today we register to plat_smp_ops.clear() method which actually is acking the IPI. However this is already taking care by our irqchip driver specifically by the irq_chip.irq_eoi() method. This is perfect timing where it should be done and no special handling is needed at plat_smp_ops.clear(). Signed-off-by: Noam Camus Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/smp.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arc/plat-eznps/smp.c b/arch/arc/plat-eznps/smp.c index 5e901f86e4bd..56a4c8522f11 100644 --- a/arch/arc/plat-eznps/smp.c +++ b/arch/arc/plat-eznps/smp.c @@ -140,16 +140,10 @@ static void eznps_init_per_cpu(int cpu) mtm_enable_core(cpu); } -static void eznps_ipi_clear(int irq) -{ - write_aux_reg(CTOP_AUX_IACK, 1 << irq); -} - struct plat_smp_ops plat_smp_ops = { .info = smp_cpuinfo_buf, .init_early_smp = eznps_init_cpumasks, .cpu_kick = eznps_smp_wakeup_cpu, .ipi_send = eznps_ipi_send, .init_per_cpu = eznps_init_per_cpu, - .ipi_clear = eznps_ipi_clear, }; From 19dbc76228899be555b84a09fd3a364c2ce86bbb Mon Sep 17 00:00:00 2001 From: Noam Camus Date: Tue, 8 Nov 2016 15:20:59 +0200 Subject: [PATCH 192/319] ARC: [plat-eznps] set default baud for early console For CONFIG_SERIAL_EARLYCON we need 800MHz for NPS SoC The early console driver uses BASE_BAUD and not using dtb. The default of 50MHz is NOT good for NPS SoC. Signed-off-by: Noam Camus Signed-off-by: Vineet Gupta --- arch/arc/kernel/devtree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c index f1e07c2344f8..3b67f538f142 100644 --- a/arch/arc/kernel/devtree.c +++ b/arch/arc/kernel/devtree.c @@ -31,6 +31,8 @@ static void __init arc_set_early_base_baud(unsigned long dt_root) arc_base_baud = 166666666; /* Fixed 166.6MHz clk (TB10x) */ else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp")) arc_base_baud = 33333333; /* Fixed 33MHz clk (AXS10x) */ + else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps")) + arc_base_baud = 800000000; /* Fixed 800MHz clk (NPS) */ else arc_base_baud = 50000000; /* Fixed default 50MHz */ } From 34e71e4cbb8eb467dbcfb3afbd2b95ff2b08f482 Mon Sep 17 00:00:00 2001 From: Yuriy Kolerov Date: Tue, 8 Nov 2016 10:08:31 +0300 Subject: [PATCH 193/319] ARC: IRQ: Do not use hwirq as virq and vice versa This came up when reviewing code to address missing IRQ affinity setting in AXS103 platform and/or implementing hierarchical IRQ domains - smp_ipi_irq_setup() callers pass hwirq but in turn calls request_percpu_irq() which expects a linux virq. So invoke irq_find_mapping() to do the conversion (also explicitify this in code by renaming the args appropriately) - idu_of_init()/idu_cascade_isr() were similarly using linux virq where hwirq is expected, so do the conversion using irqd_to_hwirq() helper Signed-off-by: Yuriy Kolerov [vgupta: made changelog a bit concise a bit] Signed-off-by: Vineet Gupta --- arch/arc/include/asm/smp.h | 4 ++-- arch/arc/kernel/mcip.c | 19 +++++++++---------- arch/arc/kernel/smp.c | 13 +++++++++---- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 89fdd1b0a76e..0861007d9ef3 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -37,9 +37,9 @@ extern const char *arc_platform_smp_cpuinfo(void); * API expected BY platform smp code (FROM arch smp code) * * smp_ipi_irq_setup: - * Takes @cpu and @irq to which the arch-common ISR is hooked up + * Takes @cpu and @hwirq to which the arch-common ISR is hooked up */ -extern int smp_ipi_irq_setup(int cpu, int irq); +extern int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq); /* * struct plat_smp_ops - SMP callbacks provided by platform to ARC SMP diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index c424d5abc318..6d18bb871926 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -207,16 +207,15 @@ static struct irq_chip idu_irq_chip = { }; -static int idu_first_irq; +static irq_hw_number_t idu_first_hwirq; static void idu_cascade_isr(struct irq_desc *desc) { - struct irq_domain *domain = irq_desc_get_handler_data(desc); - unsigned int core_irq = irq_desc_get_irq(desc); - unsigned int idu_irq; + struct irq_domain *idu_domain = irq_desc_get_handler_data(desc); + irq_hw_number_t core_hwirq = irqd_to_hwirq(irq_desc_get_irq_data(desc)); + irq_hw_number_t idu_hwirq = core_hwirq - idu_first_hwirq; - idu_irq = core_irq - idu_first_irq; - generic_handle_irq(irq_find_mapping(domain, idu_irq)); + generic_handle_irq(irq_find_mapping(idu_domain, idu_hwirq)); } static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq) @@ -282,7 +281,7 @@ idu_of_init(struct device_node *intc, struct device_node *parent) struct irq_domain *domain; /* Read IDU BCR to confirm nr_irqs */ int nr_irqs = of_irq_count(intc); - int i, irq; + int i, virq; struct mcip_bcr mp; READ_BCR(ARC_REG_MCIP_BCR, mp); @@ -303,11 +302,11 @@ idu_of_init(struct device_node *intc, struct device_node *parent) * however we need it to get the parent virq and set IDU handler * as first level isr */ - irq = irq_of_parse_and_map(intc, i); + virq = irq_of_parse_and_map(intc, i); if (!i) - idu_first_irq = irq; + idu_first_hwirq = irqd_to_hwirq(irq_get_irq_data(virq)); - irq_set_chained_handler_and_data(irq, idu_cascade_isr, domain); + irq_set_chained_handler_and_data(virq, idu_cascade_isr, domain); } __mcip_cmd(CMD_IDU_ENABLE, 0); diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index f00029e9cbe4..88674d972c9d 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -353,20 +354,24 @@ irqreturn_t do_IPI(int irq, void *dev_id) */ static DEFINE_PER_CPU(int, ipi_dev); -int smp_ipi_irq_setup(int cpu, int irq) +int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq) { int *dev = per_cpu_ptr(&ipi_dev, cpu); + unsigned int virq = irq_find_mapping(NULL, hwirq); + + if (!virq) + panic("Cannot find virq for root domain and hwirq=%lu", hwirq); /* Boot cpu calls request, all call enable */ if (!cpu) { int rc; - rc = request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev); + rc = request_percpu_irq(virq, do_IPI, "IPI Interrupt", dev); if (rc) - panic("Percpu IRQ request failed for %d\n", irq); + panic("Percpu IRQ request failed for %u\n", virq); } - enable_percpu_irq(irq, 0); + enable_percpu_irq(virq, 0); return 0; } From 0a0a047def15b7c8bcd27671d2be2de3d37fb30d Mon Sep 17 00:00:00 2001 From: Yuriy Kolerov Date: Tue, 8 Nov 2016 10:08:32 +0300 Subject: [PATCH 194/319] ARCv2: MCIP: Use IDU_M_DISTRI_DEST mode if there is only 1 destination core ARC linux uses 2 distribution modes for common interrupts: round robin mode (IDU_M_DISTRI_RR) and a simple destination mode (IDU_M_DISTRI_DEST). The first one is used when more than 1 cores may handle a common interrupt and the second one is used when only 1 core may handle a common interrupt. However idu_irq_set_affinity() always sets IDU_M_DISTRI_RR for all affinity values. But there is no sense in setting of such mode if only 1 core must handle a common interrupt. Signed-off-by: Yuriy Kolerov Signed-off-by: Vineet Gupta --- arch/arc/kernel/mcip.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 6d18bb871926..f39142acc89e 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -181,6 +181,8 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask, { unsigned long flags; cpumask_t online; + unsigned int destination_bits; + unsigned int distribution_mode; /* errout if no online cpu per @cpumask */ if (!cpumask_and(&online, cpumask, cpu_online_mask)) @@ -188,8 +190,15 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask, raw_spin_lock_irqsave(&mcip_lock, flags); - idu_set_dest(data->hwirq, cpumask_bits(&online)[0]); - idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR); + destination_bits = cpumask_bits(&online)[0]; + idu_set_dest(data->hwirq, destination_bits); + + if (ffs(destination_bits) == fls(destination_bits)) + distribution_mode = IDU_M_DISTRI_DEST; + else + distribution_mode = IDU_M_DISTRI_RR; + + idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, distribution_mode); raw_spin_unlock_irqrestore(&mcip_lock, flags); From 16d917b130d782b94fa02afc7bdf0d4aae689da4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 8 Nov 2016 14:25:24 -0600 Subject: [PATCH 195/319] PCI: Don't attempt to claim shadow copies of ROM If we're using a shadow copy of a PCI device ROM, the shadow copy is in RAM and the device never sees accesses to it and doesn't respond to it. We don't have to route the shadow range to the PCI device, and the device doesn't have to claim the range. Previously we treated the shadow copy as though it were the ROM BAR, and we failed to claim it because the region wasn't routed to the device: pci 0000:01:00.0: Video device with shadowed ROM at [mem 0x000c0000-0x000dffff] pci_bus 0000:01: Allocating resources pci 0000:01:00.0: can't claim BAR 6 [mem 0x000c0000-0x000dffff]: no compatible bridge window The failure path of pcibios_allocate_dev_rom_resource() cleared out the resource start address, which also caused the following ioremap() warning: WARNING: CPU: 0 PID: 116 at /build/linux-akdJXO/linux-4.8.0/arch/x86/mm/ioremap.c:121 __ioremap_caller+0x1ec/0x370 ioremap on RAM at 0x0000000000000000 - 0x000000000001ffff Handle an option ROM shadow copy as RAM, without trying to insert it into the iomem resource tree. This fixes a regression caused by 0c0e0736acad ("PCI: Set ROM shadow location in arch code, not in PCI core"), which appeared in v4.6. The regression causes video device initialization to fail. This was reported on AMD Turks, but it likely affects others as well. Fixes: 0c0e0736acad ("PCI: Set ROM shadow location in arch code, not in PCI core") Reported-and-tested-by: Vecu Bosseur Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1627496 Link: https://bugzilla.kernel.org/show_bug.cgi?id=175391 Link: https://bugzilla.redhat.com/show_bug.cgi?id=1352272 Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v4.6+ --- drivers/pci/setup-res.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 66c4d8f42233..9526e341988b 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -121,6 +121,14 @@ int pci_claim_resource(struct pci_dev *dev, int resource) return -EINVAL; } + /* + * If we have a shadow copy in RAM, the PCI device doesn't respond + * to the shadow range, so we don't need to claim it, and upstream + * bridges don't need to route the range to the device. + */ + if (res->flags & IORESOURCE_ROM_SHADOW) + return 0; + root = pci_find_parent_resource(dev, res); if (!root) { dev_info(&dev->dev, "can't claim BAR %d %pR: no compatible bridge window\n", From 8fbfef7f505bba60fb57078b7621270ee57cd1c4 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 3 Nov 2016 17:14:03 -0700 Subject: [PATCH 196/319] ipvs: use IPVS_CMD_ATTR_MAX for family.maxattr family.maxattr is the max index for policy[], the size of ops[] is determined with ARRAY_SIZE(). Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Pablo Neira Ayuso Signed-off-by: Cong Wang Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c3c809b2e712..a6e44ef2ec9a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2845,7 +2845,7 @@ static struct genl_family ip_vs_genl_family = { .hdrsize = 0, .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, - .maxattr = IPVS_CMD_MAX, + .maxattr = IPVS_CMD_ATTR_MAX, .netnsok = true, /* Make ipvsadm to work on netns */ }; From fb9c9649a1d0a65a8f94f784aa18252a0dd584c1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Oct 2016 03:01:50 +0200 Subject: [PATCH 197/319] netfilter: connmark: ignore skbs with magic untracked conntrack objects The (percpu) untracked conntrack entries can end up with nonzero connmarks. The 'untracked' conntrack objects are merely a way to distinguish INVALID (i.e. protocol connection tracker says payload doesn't meet some requirements or packet was never seen by the connection tracking code) from packets that are intentionally not tracked (some icmpv6 types such as neigh solicitation, or by using 'iptables -j CT --notrack' option). Untracked conntrack objects are implementation detail, we might as well use invalid magic address instead to tell INVALID and UNTRACKED apart. Check skb->nfct for untracked dummy and behave as if skb->nfct is NULL. Reported-by: XU Tianwen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connmark.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 69f78e96fdb4..b83e158e116a 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) u_int32_t newmark; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_untracked(ct)) return XT_CONTINUE; switch (info->mode) { @@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_untracked(ct)) return false; return ((ct->mark & info->mask) == info->mark) ^ info->invert; From 6114cc516dcc0d311badb83ad7db5aa4b611bea6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Nov 2016 14:44:42 +0100 Subject: [PATCH 198/319] netfilter: conntrack: fix CT target for UNSPEC helpers Thomas reports its not possible to attach the H.245 helper: iptables -t raw -A PREROUTING -p udp -j CT --helper H.245 iptables: No chain/target/match by that name. xt_CT: No such helper "H.245" This is because H.245 registers as NFPROTO_UNSPEC, but the CT target passes NFPROTO_IPV4/IPV6 to nf_conntrack_helper_try_module_get. We should treat UNSPEC as wildcard and ignore the l3num instead. Reported-by: Thomas Woerner Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 336e21559e01..7341adf7059d 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -138,9 +138,14 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { - if (!strcmp(h->name, name) && - h->tuple.src.l3num == l3num && - h->tuple.dst.protonum == protonum) + if (strcmp(h->name, name)) + continue; + + if (h->tuple.src.l3num != NFPROTO_UNSPEC && + h->tuple.src.l3num != l3num) + continue; + + if (h->tuple.dst.protonum == protonum) return h; } } From e0df8cae6c16b9ba66a005079aa754b9eedc6efa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 4 Nov 2016 16:54:58 +0100 Subject: [PATCH 199/319] netfilter: conntrack: refine gc worker heuristics Nicolas Dichtel says: After commit b87a2f9199ea ("netfilter: conntrack: add gc worker to remove timed-out entries"), netlink conntrack deletion events may be sent with a huge delay. Nicolas further points at this line: goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); and indeed, this isn't optimal at all. Rationale here was to ensure that we don't block other work items for too long, even if nf_conntrack_htable_size is huge. But in order to have some guarantee about maximum time period where a scan of the full conntrack table completes we should always use a fixed slice size, so that once every N scans the full table has been examined at least once. We also need to balance this vs. the case where the system is either idle (i.e., conntrack table (almost) empty) or very busy (i.e. eviction happens from packet path). So, after some discussion with Nicolas: 1. want hard guarantee that we scan entire table at least once every X s -> need to scan fraction of table (get rid of upper bound) 2. don't want to eat cycles on idle or very busy system -> increase interval if we did not evict any entries 3. don't want to block other worker items for too long -> make fraction really small, and prefer small scan interval instead 4. Want reasonable short time where we detect timed-out entry when system went idle after a burst of traffic, while not doing scans all the time. -> Store next gc scan in worker, increasing delays when no eviction happened and shrinking delay when we see timed out entries. The old gc interval is turned into a max number, scans can now happen every jiffy if stale entries are present. Longest possible time period until an entry is evicted is now 2 minutes in worst case (entry expires right after it was deemed 'not expired'). Reported-by: Nicolas Dichtel Signed-off-by: Florian Westphal Acked-by: Nicolas Dichtel Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 49 ++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index df2f5a3901df..0f87e5d21be7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -76,6 +76,7 @@ struct conntrack_gc_work { struct delayed_work dwork; u32 last_bucket; bool exiting; + long next_gc_run; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; @@ -83,9 +84,11 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; +/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ #define GC_MAX_BUCKETS_DIV 64u -#define GC_MAX_BUCKETS 8192u -#define GC_INTERVAL (5 * HZ) +/* upper bound of scan intervals */ +#define GC_INTERVAL_MAX (2 * HZ) +/* maximum conntracks to evict per gc run */ #define GC_MAX_EVICTS 256u static struct conntrack_gc_work conntrack_gc_work; @@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash) static void gc_worker(struct work_struct *work) { unsigned int i, goal, buckets = 0, expired_count = 0; - unsigned long next_run = GC_INTERVAL; - unsigned int ratio, scanned = 0; struct conntrack_gc_work *gc_work; + unsigned int ratio, scanned = 0; + unsigned long next_run; gc_work = container_of(work, struct conntrack_gc_work, dwork.work); - goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); + goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; i = gc_work->last_bucket; do { @@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work) if (gc_work->exiting) return; + /* + * Eviction will normally happen from the packet path, and not + * from this gc worker. + * + * This worker is only here to reap expired entries when system went + * idle after a busy period. + * + * The heuristics below are supposed to balance conflicting goals: + * + * 1. Minimize time until we notice a stale entry + * 2. Maximize scan intervals to not waste cycles + * + * Normally, expired_count will be 0, this increases the next_run time + * to priorize 2) above. + * + * As soon as a timed-out entry is found, move towards 1) and increase + * the scan frequency. + * In case we have lots of evictions next scan is done immediately. + */ ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90 || expired_count == GC_MAX_EVICTS) + if (ratio >= 90 || expired_count == GC_MAX_EVICTS) { + gc_work->next_gc_run = 0; next_run = 0; + } else if (expired_count) { + gc_work->next_gc_run /= 2U; + next_run = msecs_to_jiffies(1); + } else { + if (gc_work->next_gc_run < GC_INTERVAL_MAX) + gc_work->next_gc_run += msecs_to_jiffies(1); + + next_run = gc_work->next_gc_run; + } gc_work->last_bucket = i; - schedule_delayed_work(&gc_work->dwork, next_run); + queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); + gc_work->next_gc_run = GC_INTERVAL_MAX; gc_work->exiting = false; } @@ -1885,7 +1918,7 @@ int nf_conntrack_init_start(void) nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); conntrack_gc_work_init(&conntrack_gc_work); - schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL); + queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX); return 0; From 58c78e104d937c1f560fb10ed9bb2dcde0db4fcf Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 6 Nov 2016 14:40:01 +0800 Subject: [PATCH 200/319] netfilter: nf_tables: fix oops when inserting an element into a verdict map Dalegaard says: The following ruleset, when loaded with 'nft -f bad.txt' ----snip---- flush ruleset table ip inlinenat { map sourcemap { type ipv4_addr : verdict; } chain postrouting { ip saddr vmap @sourcemap accept } } add chain inlinenat test add element inlinenat sourcemap { 100.123.10.2 : jump test } ----snip---- results in a kernel oops: BUG: unable to handle kernel paging request at 0000000000001344 IP: [] nf_tables_check_loops+0x114/0x1f0 [nf_tables] [...] Call Trace: [] ? nft_data_init+0x13e/0x1a0 [nf_tables] [] nft_validate_register_store+0x60/0xb0 [nf_tables] [] nft_add_set_elem+0x545/0x5e0 [nf_tables] [] ? nft_table_lookup+0x30/0x60 [nf_tables] [] ? nla_strcmp+0x40/0x50 [] nf_tables_newsetelem+0x11e/0x210 [nf_tables] [] ? nla_validate+0x60/0x80 [] nfnetlink_rcv+0x354/0x5a7 [nfnetlink] Because we forget to fill the net pointer in bind_ctx, so dereferencing it may cause kernel crash. Reported-by: Dalegaard Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7d6a626b08f1..026581b04ea8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3568,6 +3568,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, dreg = nft_type_to_reg(set->dtype); list_for_each_entry(binding, &set->bindings, list) { struct nft_ctx bind_ctx = { + .net = ctx->net, .afi = ctx->afi, .table = ctx->table, .chain = (struct nft_chain *)binding->chain, From 69e2d1e6c0af0dd7f18cfd434b008844568641a9 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sat, 5 Nov 2016 21:49:28 +0530 Subject: [PATCH 201/319] scsi: libcxgbi: fix incorrect DDP resource cleanup Before calling task_release_itt() task data is memset to zero because of which DDP context information is lost resulting in incorrect DDP resource cleanup, to fix this call task_release_itt() before memset. Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/cxgbi/libcxgbi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index d1421139e6ea..2ffe029ff2b6 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -2081,9 +2081,10 @@ void cxgbi_cleanup_task(struct iscsi_task *task) /* never reached the xmit task callout */ if (tdata->skb) __kfree_skb(tdata->skb); - memset(tdata, 0, sizeof(*tdata)); task_release_itt(task, task->hdr_itt); + memset(tdata, 0, sizeof(*tdata)); + iscsi_tcp_cleanup_task(task); } EXPORT_SYMBOL_GPL(cxgbi_cleanup_task); From 04dfaa53a0b6e66b328a5bc549e3af8f8b6eac02 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 7 Nov 2016 17:53:30 -0200 Subject: [PATCH 202/319] scsi: qla2xxx: do not queue commands when unloading When the driver is unloading, in qla2x00_remove_one(), there is a single call/point in time to abort ongoing commands, qla2x00_abort_all_cmds(), which is still several steps away from the call to scsi_remove_host(). If more commands continue to arrive and be processed during that interval, when the driver is tearing down and releasing its structures, it might potentially hit an oops due to invalid memory access: Unable to handle kernel paging request for data at address 0x00000138 <...> NIP [d000000004700a40] qla2xxx_queuecommand+0x80/0x3f0 [qla2xxx] LR [d000000004700a10] qla2xxx_queuecommand+0x50/0x3f0 [qla2xxx] So, fail commands in qla2xxx_queuecommand() if the UNLOADING bit is set. Signed-off-by: Mauricio Faria de Oliveira Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 4d99c3b37687..e5db47443668 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -707,6 +707,11 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) srb_t *sp; int rval; + if (unlikely(test_bit(UNLOADING, &base_vha->dpc_flags))) { + cmd->result = DID_NO_CONNECT << 16; + goto qc24_fail_command; + } + if (ha->flags.eeh_busy) { if (ha->flags.pci_channel_io_perm_failure) { ql_dbg(ql_dbg_aer, vha, 0x9010, From 1535aa75a3d8320374f82d198e3900d5b0969d7e Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 7 Nov 2016 17:53:31 -0200 Subject: [PATCH 203/319] scsi: qla2xxx: fix invalid DMA access after command aborts in PCI device remove If a command is aborted in the kernel but not in the adapter, it might be considered complete and its DMA memory released, but it is still alive in the adapter, which will trigger an invalid DMA access upon its completion (in the DMA operations to deliver the command response to the driver). On powerpc platforms with IOMMU/EEH capabilities, the problem is observed during PCI device removal with ongoing IO requests -- which might trigger an EEH event very often, pointing to a 'TCE Request Page Access Error'. In that path, which is qla2x00_remove_one(), the commands are aborted in qla2x00_abort_all_cmds(), which does not perform an abort in the adapter as is done in qla2xxx_eh_abort() for example. So, this patch changes qla2x00_abort_all_cmds() to abort commands in the adapter too, with a call to qla2xxx_eh_abort(), which already implements all the logic to submit abort requests and handle responses. Reported-by: Naresh Bannoth Signed-off-by: Mauricio Faria de Oliveira Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index e5db47443668..567fa080e261 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1456,6 +1456,15 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { + /* Get a reference to the sp and drop the lock. + * The reference ensures this sp->done() call + * - and not the call in qla2xxx_eh_abort() - + * ends the SCSI command (with result 'res'). + */ + sp_get(sp); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + qla2xxx_eh_abort(GET_CMD_SP(sp)); + spin_lock_irqsave(&ha->hardware_lock, flags); req->outstanding_cmds[cnt] = NULL; sp->done(vha, sp, res); } From 5ced937b7d8dae2c6a536112abaad6352769a931 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 8 Nov 2016 17:04:10 +0100 Subject: [PATCH 204/319] drm/imx: disable planes before DC If the DC clock is disabled before the attached IDMACs are properly stopped the IDMACs may hang the IPU or even the whole system. Make sure the IDMACs are in safe state by disabling the planes before removal of the DC clock. Also set the atomic parameter to false to stop calling the atomic_begin hook, which does nothing useful as we immediately afterwards turn off vblank interrupts and possibly send the pending vblank event. Fixes: 33f14235302f (drm/imx: atomic phase 1: Use transitional atomic CRTC and plane helpers) Signed-off-by: Lucas Stach Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-crtc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index 4e1ae3fc462d..6be515a9fb69 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -68,6 +68,12 @@ static void ipu_crtc_atomic_disable(struct drm_crtc *crtc, ipu_dc_disable_channel(ipu_crtc->dc); ipu_di_disable(ipu_crtc->di); + /* + * Planes must be disabled before DC clock is removed, as otherwise the + * attached IDMACs will be left in undefined state, possibly hanging + * the IPU or even system. + */ + drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, false); ipu_dc_disable(ipu); spin_lock_irq(&crtc->dev->event_lock); @@ -77,9 +83,6 @@ static void ipu_crtc_atomic_disable(struct drm_crtc *crtc, } spin_unlock_irq(&crtc->dev->event_lock); - /* always disable planes on the CRTC */ - drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, true); - drm_crtc_vblank_off(crtc); } From 9cba9844547731d2f14d79485c43192ffaa37b76 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 25 Oct 2016 01:21:10 +0900 Subject: [PATCH 205/319] perf hist browser: Fix hierarchy column counts The perf report/top on TUI supports horizontal scrolling using LEFT and RIGHT keys. But it calculate the number of columns incorrectly when hierarchy mode is enabled so that keep pressing RIGHT key can make the output disappeared. In the hierarchy mode, all sort keys are collapsed into a single column, so it needs to be applied when calculating column numbers. Reported-and-Tested-by: Markus Trippelsdorf Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161024162110.17918-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 4ffff7be9299..5adedc1a09d3 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2076,8 +2076,21 @@ void hist_browser__init(struct hist_browser *browser, browser->b.use_navkeypressed = true; browser->show_headers = symbol_conf.show_hist_headers; - hists__for_each_format(hists, fmt) + if (symbol_conf.report_hierarchy) { + struct perf_hpp_list_node *fmt_node; + + /* count overhead columns (in the first node) */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) + ++browser->b.columns; + + /* add a single column for whole hierarchy sort keys*/ ++browser->b.columns; + } else { + hists__for_each_format(hists, fmt) + ++browser->b.columns; + } hists__reset_column_width(hists); } From 3d9f4683929a968dc9b9493f4e608b109ad292a2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:30 +0900 Subject: [PATCH 206/319] perf hists browser: Fix indentation of folded sign on --hierarchy It should indent 2 spaces for folded sign and a whitespace. Signed-off-by: Namhyung Kim Tested-by: Markus Trippelsdorf Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161108130833.9263-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 5adedc1a09d3..225ef2a15a13 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1337,8 +1337,8 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, } if (first) { - ui_browser__printf(&browser->b, "%c", folded_sign); - width--; + ui_browser__printf(&browser->b, "%c ", folded_sign); + width -= 2; first = false; } else { ui_browser__printf(&browser->b, " "); @@ -1555,7 +1555,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows int indent = hists->nr_hpp_node - 2; bool first_node, first_col; - ret = scnprintf(buf, size, " "); + ret = scnprintf(buf, size, " "); if (advance_hpp_check(&dummy_hpp, ret)) return ret; From 131d51eb1d17aac3a604cf929fd99ff4dd34f495 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:31 +0900 Subject: [PATCH 207/319] perf hists browser: Show folded sign properly on --hierarchy When horizontal scrolling is used in hierarchy mode, the folded signed disappears at the right most column. Committer note: To test it, run 'perf top --hierarchy, see the '+' symbol at the first column, then press the right arrow key, the '+' symbol will disappear, this patch fixes that. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Markus Trippelsdorf Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161108130833.9263-3-namhyung@kernel.org [ Move 'width -= 2' invariant to right after the if/else ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 225ef2a15a13..e767fbd17ad2 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1381,7 +1381,13 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, } perf_hpp_list__for_each_format(entry->hpp_list, fmt) { - ui_browser__write_nstring(&browser->b, "", 2); + if (first) { + ui_browser__printf(&browser->b, "%c ", folded_sign); + first = false; + } else { + ui_browser__write_nstring(&browser->b, "", 2); + } + width -= 2; /* From b9bf911e990a189f89147ee6b66660a153ed0125 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:32 +0900 Subject: [PATCH 208/319] perf hists browser: Fix column indentation on --hierarchy When horizontall scrolling is used in hierarchy mode, the the right most column has unnecessary indentation. Actually it's needed only if some of left (overhead) columns were shown. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Markus Trippelsdorf Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20161108130833.9263-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e767fbd17ad2..a53fef0c673b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1361,8 +1361,10 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, width -= hpp.buf - s; } - ui_browser__write_nstring(&browser->b, "", hierarchy_indent); - width -= hierarchy_indent; + if (!first) { + ui_browser__write_nstring(&browser->b, "", hierarchy_indent); + width -= hierarchy_indent; + } if (column >= browser->b.horiz_scroll) { char s[2048]; @@ -1565,6 +1567,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows if (advance_hpp_check(&dummy_hpp, ret)) return ret; + first_node = true; /* the first hpp_list_node is for overhead columns */ fmt_node = list_first_entry(&hists->hpp_formats, struct perf_hpp_list_node, list); @@ -1579,12 +1582,16 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " "); if (advance_hpp_check(&dummy_hpp, ret)) break; + + first_node = false; } - ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", - indent * HIERARCHY_INDENT, ""); - if (advance_hpp_check(&dummy_hpp, ret)) - return ret; + if (!first_node) { + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", + indent * HIERARCHY_INDENT, ""); + if (advance_hpp_check(&dummy_hpp, ret)) + return ret; + } first_node = true; list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { From c72ab446cac1d6c9551fd26c4cfef1b2fc5041fd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 8 Nov 2016 22:08:33 +0900 Subject: [PATCH 209/319] perf hists: Fix column length on --hierarchy Markus reported that there's a weird behavior on perf top --hierarchy regarding the column length. Looking at the code, I found a dubious code which affects the symptoms. When --hierarchy option is used, the last column length might be inaccurate since it skips to update the length on leaf entries. I cannot remember why it did and looks like a leftover from previous version during the development. Anyway, updating the column length often is not harmful. So let's move the code out. Reported-and-Tested-by: Markus Trippelsdorf Signed-off-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Fixes: 1a3906a7e6b9 ("perf hists: Resort hist entries with hierarchy") Link: http://lkml.kernel.org/r/20161108130833.9263-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b02992efb513..a69f027368ef 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1600,18 +1600,18 @@ static void hists__hierarchy_output_resort(struct hists *hists, if (prog) ui_progress__update(prog, 1); + hists->nr_entries++; + if (!he->filtered) { + hists->nr_non_filtered_entries++; + hists__calc_col_len(hists, he); + } + if (!he->leaf) { hists__hierarchy_output_resort(hists, prog, &he->hroot_in, &he->hroot_out, min_callchain_hits, use_callchain); - hists->nr_entries++; - if (!he->filtered) { - hists->nr_non_filtered_entries++; - hists__calc_col_len(hists, he); - } - continue; } From b0b6e86846093c5f8820386bc01515f857dd8faa Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 8 Nov 2016 09:35:06 +0100 Subject: [PATCH 210/319] x86/cpu/AMD: Fix cpu_llc_id for AMD Fam17h systems cpu_llc_id (Last Level Cache ID) derivation on AMD Fam17h has an underflow bug when extracting the socket_id value. It starts from 0 so subtracting 1 from it will result in an invalid value. This breaks scheduling topology later on since the cpu_llc_id will be incorrect. For example, the the cpu_llc_id of the *other* CPU in the loops in set_cpu_sibling_map() underflows and we're generating the funniest thread_siblings masks and then when I run 8 threads of nbench, they get spread around the LLC domains in a very strange pattern which doesn't give you the normal scheduling spread one would expect for performance. Other things like EDAC use cpu_llc_id so they will be b0rked too. So, the APIC ID is preset in APICx020 for bits 3 and above: they contain the core complex, node and socket IDs. The LLC is at the core complex level so we can find a unique cpu_llc_id by right shifting the APICID by 3 because then the least significant bit will be the Core Complex ID. Tested-by: Borislav Petkov Signed-off-by: Yazen Ghannam [ Cleaned up and extended the commit message. ] Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Cc: # v4.4.. Cc: Aravind Gopalakrishnan Cc: Linus Torvalds Cc: Peter Zijlstra Fixes: 3849e91f571d ("x86/AMD: Fix last level cache topology for AMD Fam17h systems") Link: http://lkml.kernel.org/r/20161108083506.rvqb5h4chrcptj7d@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b81fe2d63e15..1e81a37c034e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -347,7 +347,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); - unsigned int socket_id, core_complex_id; bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ @@ -365,10 +364,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) if (c->x86 != 0x17 || !cpuid_edx(0x80000006)) return; - socket_id = (c->apicid >> bits) - 1; - core_complex_id = (c->apicid & ((1 << bits) - 1)) >> 3; - - per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id; + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; #endif } From 5e5ec1759dd663a1d5a2f10930224dd009e500e8 Mon Sep 17 00:00:00 2001 From: Sumit Saxena Date: Wed, 9 Nov 2016 02:59:42 -0800 Subject: [PATCH 211/319] scsi: megaraid_sas: fix macro MEGASAS_IS_LOGICAL to avoid regression This patch will fix regression caused by commit 1e793f6fc0db ("scsi: megaraid_sas: Fix data integrity failure for JBOD (passthrough) devices"). The problem was that the MEGASAS_IS_LOGICAL macro did not have braces and as a result the driver ended up exposing a lot of non-existing SCSI devices (all SCSI commands to channels 1,2,3 were returned as SUCCESS-DID_OK by driver). [mkp: clarified patch description] Fixes: 1e793f6fc0db920400574211c48f9157a37e3945 Reported-by: Jens Axboe CC: stable@vger.kernel.org Signed-off-by: Kashyap Desai Signed-off-by: Sumit Saxena Tested-by: Sumit Saxena Reviewed-by: Tomas Henzl Tested-by: Jens Axboe Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index ca86c885dfaa..3aaea713bf37 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2233,7 +2233,7 @@ struct megasas_instance_template { }; #define MEGASAS_IS_LOGICAL(scp) \ - (scp->device->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1 + ((scp->device->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1) #define MEGASAS_DEV_INDEX(scp) \ (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \ From aa5fd0fb77486b8a6764ead8627baa14790e4280 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 4 Nov 2016 10:28:49 +0800 Subject: [PATCH 212/319] driver: macvlan: Destroy new macvlan port if macvlan_common_newlink failed. When there is no existing macvlan port in lowdev, one new macvlan port would be created. But it doesn't be destoried when something failed later. It casues some memleak. Now add one flag to indicate if new macvlan port is created. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 3234fcdea317..d2d6f12a112f 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1278,6 +1278,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct net_device *lowerdev; int err; int macmode; + bool create = false; if (!tb[IFLA_LINK]) return -EINVAL; @@ -1304,12 +1305,18 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, err = macvlan_port_create(lowerdev); if (err < 0) return err; + create = true; } port = macvlan_port_get_rtnl(lowerdev); /* Only 1 macvlan device can be created in passthru mode */ - if (port->passthru) - return -EINVAL; + if (port->passthru) { + /* The macvlan port must be not created this time, + * still goto destroy_macvlan_port for readability. + */ + err = -EINVAL; + goto destroy_macvlan_port; + } vlan->lowerdev = lowerdev; vlan->dev = dev; @@ -1325,24 +1332,28 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); if (vlan->mode == MACVLAN_MODE_PASSTHRU) { - if (port->count) - return -EINVAL; + if (port->count) { + err = -EINVAL; + goto destroy_macvlan_port; + } port->passthru = true; eth_hw_addr_inherit(dev, lowerdev); } if (data && data[IFLA_MACVLAN_MACADDR_MODE]) { - if (vlan->mode != MACVLAN_MODE_SOURCE) - return -EINVAL; + if (vlan->mode != MACVLAN_MODE_SOURCE) { + err = -EINVAL; + goto destroy_macvlan_port; + } macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]); err = macvlan_changelink_sources(vlan, macmode, data); if (err) - return err; + goto destroy_macvlan_port; } err = register_netdevice(dev); if (err < 0) - return err; + goto destroy_macvlan_port; dev->priv_flags |= IFF_MACVLAN; err = netdev_upper_dev_link(lowerdev, dev); @@ -1357,7 +1368,9 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, unregister_netdev: unregister_netdevice(dev); - +destroy_macvlan_port: + if (create) + macvlan_port_destroy(port->dev); return err; } EXPORT_SYMBOL_GPL(macvlan_common_newlink); From 3023898b7d4aac65987bd2f485cc22390aae6f78 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Fri, 4 Nov 2016 15:36:49 -0400 Subject: [PATCH 213/319] sock: fix sendmmsg for partial sendmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not send the next message in sendmmsg for partial sendmsg invocations. sendmmsg assumes that it can continue sending the next message when the return value of the individual sendmsg invocations is positive. It results in corrupting the data for TCP, SCTP, and UNIX streams. For example, sendmmsg([["abcd"], ["efgh"]]) can result in a stream of "aefgh" if the first sendmsg invocation sends only the first byte while the second sendmsg goes through. Datagram sockets either send the entire datagram or fail, so this patch affects only sockets of type SOCK_STREAM and SOCK_SEQPACKET. Fixes: 228e548e6020 ("net: Add sendmmsg socket system call") Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: Neal Cardwell Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/socket.c b/net/socket.c index 5a9bf5ee2464..272518b087c8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2038,6 +2038,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (err) break; ++datagrams; + if (msg_data_left(&msg_sys)) + break; cond_resched(); } From fb56be83e43d0bb0cc9e8c35a6a9cac853231ba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Fri, 4 Nov 2016 14:51:54 -0700 Subject: [PATCH 214/319] net-ipv6: on device mtu change do not add mtu to mtu-less routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Routes can specify an mtu explicitly or inherit the mtu from the underlying device - this inheritance is implemented in dst->ops->mtu handlers ip6_mtu() and ip6_blackhole_mtu(). Currently changing the mtu of a device adds mtu explicitly to routes using that device. ie. # ip link set dev lo mtu 65536 # ip -6 route add local 2000::1 dev lo # ip -6 route get 2000::1 local 2000::1 dev lo table local src ... metric 1024 pref medium # ip link set dev lo mtu 65535 # ip -6 route get 2000::1 local 2000::1 dev lo table local src ... metric 1024 mtu 65535 pref medium # ip link set dev lo mtu 65536 # ip -6 route get 2000::1 local 2000::1 dev lo table local src ... metric 1024 mtu 65536 pref medium # ip -6 route del local 2000::1 After this patch the route entry no longer changes unless it already has an mtu. There is no need: this inheritance is already done in ip6_mtu() # ip link set dev lo mtu 65536 # ip -6 route add local 2000::1 dev lo # ip -6 route add local 2000::2 dev lo mtu 2000 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 2000 pref medium # ip link set dev lo mtu 65535 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 2000 pref medium # ip link set dev lo mtu 1501 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 1501 pref medium # ip link set dev lo mtu 65536 # ip -6 route get 2000::1; ip -6 route get 2000::2 local 2000::1 dev lo table local src ... metric 1024 pref medium local 2000::2 dev lo table local src ... metric 1024 mtu 65536 pref medium # ip -6 route del local 2000::1 # ip -6 route del local 2000::2 This is desirable because changing device mtu and then resetting it to the previous value shouldn't change the user visible routing table. Signed-off-by: Maciej Żenczykowski CC: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7403d90dcb38..1b57e11e6e0d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2761,6 +2761,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->dst.dev == arg->dev && + dst_metric_raw(&rt->dst, RTAX_MTU) && !dst_metric_locked(&rt->dst, RTAX_MTU)) { if (rt->rt6i_flags & RTF_CACHE) { /* For RTF_CACHE with rt6i_pmtu == 0 From f91d718156fe93d0cf684cacf5f247c35a825d79 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 6 Nov 2016 18:05:06 +0200 Subject: [PATCH 215/319] Revert "net/mlx4_en: Fix panic during reboot" This reverts commit 9d2afba058722d40cc02f430229c91611c0e8d16. The original issue would possibly exist if an external module tried calling our "ethtool_ops" without checking if it still exists. The right way of solving it is by simply doing the check in the caller side. Currently, no action is required as there's no such use case. Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 12c99a2655f2..3a47e83d3e07 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2202,7 +2202,6 @@ void mlx4_en_destroy_netdev(struct net_device *dev) if (!shutdown) free_netdev(dev); - dev->ethtool_ops = NULL; } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) From d667f78514c656a6a8bf0b3d6134a7fe5cd4d317 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 7 Nov 2016 17:57:56 +0800 Subject: [PATCH 216/319] bna: Add synchronization for tx ring. We received two reports of BUG_ON in bnad_txcmpl_process() where hw_consumer_index appeared to be ahead of producer_index. Out of order write/read of these variables could explain these reports. bnad_start_xmit(), as a producer of tx descriptors, has a few memory barriers sprinkled around writes to producer_index and the device's doorbell but they're not paired with anything in bnad_txcmpl_process(), a consumer. Since we are synchronizing with a device, we must use mandatory barriers, not smp_*. Also, I didn't see the purpose of the last smp_mb() in bnad_start_xmit(). Signed-off-by: Benjamin Poirier Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index f9df4b5ae90e..f42f672b0e7e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -177,6 +177,7 @@ bnad_txcmpl_process(struct bnad *bnad, struct bna_tcb *tcb) return 0; hw_cons = *(tcb->hw_consumer_index); + rmb(); cons = tcb->consumer_index; q_depth = tcb->q_depth; @@ -3094,7 +3095,7 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) BNA_QE_INDX_INC(prod, q_depth); tcb->producer_index = prod; - smp_mb(); + wmb(); if (unlikely(!test_bit(BNAD_TXQ_TX_STARTED, &tcb->flags))) return NETDEV_TX_OK; @@ -3102,7 +3103,6 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) skb_tx_timestamp(skb); bna_txq_prod_indx_doorbell(tcb); - smp_mb(); return NETDEV_TX_OK; } From cdb26d3387f0cdf7b2a2eea581385173547ef21f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 7 Nov 2016 13:53:27 +0100 Subject: [PATCH 217/319] net: bgmac: fix reversed checks for clock control flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes regression introduced by patch adding feature flags. It was already reported and patch followed (it got accepted) but it appears it was incorrect. Instead of fixing reversed condition it broke a good one. This patch was verified to actually fix SoC hanges caused by bgmac on BCM47186B0. Fixes: db791eb2970b ("net: ethernet: bgmac: convert to feature flags") Fixes: 4af1474e6198 ("net: bgmac: Fix errant feature flag check") Cc: Jon Mason Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 91cbf92de971..49f4cafe5438 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1049,9 +1049,9 @@ static void bgmac_enable(struct bgmac *bgmac) mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >> BGMAC_DS_MM_SHIFT; - if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) || mode != 0) + if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0) bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT); - if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2) + if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) && mode == 2) bgmac_cco_ctl_maskset(bgmac, 1, ~0, BGMAC_CHIPCTL_1_RXC_DLL_BYPASS); From d49597fd3bc7d9534de55e9256767f073be1b33a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 9 Nov 2016 16:35:51 +0100 Subject: [PATCH 218/319] x86/cpu: Deal with broken firmware (VMWare/XEN) Both ACPI and MP specifications require that the APIC id in the respective tables must be the same as the APIC id in CPUID. The kernel retrieves the physical package id from the APIC id during the ACPI/MP table scan and builds the physical to logical package map. The physical package id which is used after a CPU comes up is retrieved from CPUID. So we rely on ACPI/MP tables and CPUID agreeing in that respect. There exist VMware and XEN implementations which violate the spec. As a result the physical to logical package map, which relies on the ACPI/MP tables does not work on those systems, because the CPUID initialized physical package id does not match the firmware id. This causes system crashes and malfunction due to invalid package mappings. The only way to cure this is to sanitize the physical package id after the CPUID enumeration and yell when the APIC ids are different. Fix up the initial APIC id, which is fine as it is only used printout purposes. If the physical package IDs differ yell and use the package information from the ACPI/MP tables so the existing logical package map just works. Chas provided the resulting dmesg output for his affected 4 virtual sockets, 1 core per socket VM: [Firmware Bug]: CPU1: APIC id mismatch. Firmware: 1 CPUID: 2 [Firmware Bug]: CPU1: Using firmware package id 1 instead of 2 .... Reported-and-tested-by: "Charles (Chas) Williams" , Reported-by: M. Vefa Bicakci Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Borislav Petkov Cc: Alok Kataria Cc: Boris Ostrovsky Cc: #4.6+ Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1611091613540.3501@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bd910a7dd0a..cc9e980c68ec 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -978,6 +978,35 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c) } } +/* + * The physical to logical package id mapping is initialized from the + * acpi/mptables information. Make sure that CPUID actually agrees with + * that. + */ +static void sanitize_package_id(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int pkg, apicid, cpu = smp_processor_id(); + + apicid = apic->cpu_present_to_apicid(cpu); + pkg = apicid >> boot_cpu_data.x86_coreid_bits; + + if (apicid != c->initial_apicid) { + pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n", + cpu, apicid, c->initial_apicid); + c->initial_apicid = apicid; + } + if (pkg != c->phys_proc_id) { + pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n", + cpu, pkg, c->phys_proc_id); + c->phys_proc_id = pkg; + } + c->logical_proc_id = topology_phys_to_logical_pkg(pkg); +#else + c->logical_proc_id = 0; +#endif +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -1103,8 +1132,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif - /* The boot/hotplug time assigment got cleared, restore it */ - c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id); + sanitize_package_id(c); } /* From 1571875beecd5de9657f73931449bda1b1329b6f Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 3 Nov 2016 16:21:26 +0200 Subject: [PATCH 219/319] ACPI / platform: Add support for build-in properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have a couple of drivers, acpi_apd.c and acpi_lpss.c, that need to pass extra build-in properties to the devices they create. Previously the drivers added those properties to the struct device which is member of the struct acpi_device, but that does not work. Those properties need to be assigned to the struct device of the platform device instead in order for them to become available to the drivers. To fix this, this patch changes acpi_create_platform_device function to take struct property_entry pointer as parameter. Fixes: 20a875e2e86e (serial: 8250_dw: Add quirk for APM X-Gene SoC) Signed-off-by: Heikki Krogerus Tested-by: Yazen Ghannam Tested-by: Jérôme de Bretagne Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_apd.c | 10 ++-------- drivers/acpi/acpi_lpss.c | 10 ++-------- drivers/acpi/acpi_platform.c | 5 ++++- drivers/acpi/dptf/int340x_thermal.c | 4 ++-- drivers/acpi/scan.c | 2 +- drivers/platform/x86/intel-hid.c | 2 +- drivers/platform/x86/intel-vbtn.c | 2 +- include/linux/acpi.h | 3 ++- 8 files changed, 15 insertions(+), 23 deletions(-) diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index 5f112d811e42..5959ed996a75 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -117,7 +117,7 @@ static int acpi_apd_create_device(struct acpi_device *adev, int ret; if (!dev_desc) { - pdev = acpi_create_platform_device(adev); + pdev = acpi_create_platform_device(adev, NULL); return IS_ERR_OR_NULL(pdev) ? PTR_ERR(pdev) : 1; } @@ -134,14 +134,8 @@ static int acpi_apd_create_device(struct acpi_device *adev, goto err_out; } - if (dev_desc->properties) { - ret = device_add_properties(&adev->dev, dev_desc->properties); - if (ret) - goto err_out; - } - adev->driver_data = pdata; - pdev = acpi_create_platform_device(adev); + pdev = acpi_create_platform_device(adev, dev_desc->properties); if (!IS_ERR_OR_NULL(pdev)) return 1; diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 552010288135..373657f7e35a 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -395,7 +395,7 @@ static int acpi_lpss_create_device(struct acpi_device *adev, dev_desc = (const struct lpss_device_desc *)id->driver_data; if (!dev_desc) { - pdev = acpi_create_platform_device(adev); + pdev = acpi_create_platform_device(adev, NULL); return IS_ERR_OR_NULL(pdev) ? PTR_ERR(pdev) : 1; } pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); @@ -451,14 +451,8 @@ static int acpi_lpss_create_device(struct acpi_device *adev, goto err_out; } - if (dev_desc->properties) { - ret = device_add_properties(&adev->dev, dev_desc->properties); - if (ret) - goto err_out; - } - adev->driver_data = pdata; - pdev = acpi_create_platform_device(adev); + pdev = acpi_create_platform_device(adev, dev_desc->properties); if (!IS_ERR_OR_NULL(pdev)) { return 1; } diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 159f7f19abce..56e70da83c6c 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -33,6 +33,7 @@ static const struct acpi_device_id forbidden_id_list[] = { /** * acpi_create_platform_device - Create platform device for ACPI device node * @adev: ACPI device node to create a platform device for. + * @properties: Optional collection of build-in properties. * * Check if the given @adev can be represented as a platform device and, if * that's the case, create and register a platform device, populate its common @@ -40,7 +41,8 @@ static const struct acpi_device_id forbidden_id_list[] = { * * Name of the platform device will be the same as @adev's. */ -struct platform_device *acpi_create_platform_device(struct acpi_device *adev) +struct platform_device *acpi_create_platform_device(struct acpi_device *adev, + struct property_entry *properties) { struct platform_device *pdev = NULL; struct platform_device_info pdevinfo; @@ -88,6 +90,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) pdevinfo.res = resources; pdevinfo.num_res = count; pdevinfo.fwnode = acpi_fwnode_handle(adev); + pdevinfo.properties = properties; if (acpi_dma_supported(adev)) pdevinfo.dma_mask = DMA_BIT_MASK(32); diff --git a/drivers/acpi/dptf/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c index 33505c651f62..86364097e236 100644 --- a/drivers/acpi/dptf/int340x_thermal.c +++ b/drivers/acpi/dptf/int340x_thermal.c @@ -34,11 +34,11 @@ static int int340x_thermal_handler_attach(struct acpi_device *adev, const struct acpi_device_id *id) { if (IS_ENABLED(CONFIG_INT340X_THERMAL)) - acpi_create_platform_device(adev); + acpi_create_platform_device(adev, NULL); /* Intel SoC DTS thermal driver needs INT3401 to set IRQ descriptor */ else if (IS_ENABLED(CONFIG_INTEL_SOC_DTS_THERMAL) && id->driver_data == INT3401_DEVICE) - acpi_create_platform_device(adev); + acpi_create_platform_device(adev, NULL); return 1; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ad9fc84a8601..3d31ae3a482d 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1734,7 +1734,7 @@ static void acpi_default_enumeration(struct acpi_device *device) &is_spi_i2c_slave); acpi_dev_free_resource_list(&resource_list); if (!is_spi_i2c_slave) { - acpi_create_platform_device(device); + acpi_create_platform_device(device, NULL); acpi_device_set_enumerated(device); } else { blocking_notifier_call_chain(&acpi_reconfig_chain, diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index ed5874217ee7..12dbb5063376 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -264,7 +264,7 @@ check_acpi_dev(acpi_handle handle, u32 lvl, void *context, void **rv) return AE_OK; if (acpi_match_device_ids(dev, ids) == 0) - if (acpi_create_platform_device(dev)) + if (acpi_create_platform_device(dev, NULL)) dev_info(&dev->dev, "intel-hid: created platform device\n"); diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index 146d02f8c9bc..78080763df51 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -164,7 +164,7 @@ check_acpi_dev(acpi_handle handle, u32 lvl, void *context, void **rv) return AE_OK; if (acpi_match_device_ids(dev, ids) == 0) - if (acpi_create_platform_device(dev)) + if (acpi_create_platform_device(dev, NULL)) dev_info(&dev->dev, "intel-vbtn: created platform device\n"); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 632ec16a855e..c09936f55166 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -546,7 +546,8 @@ int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); void acpi_walk_dep_device_list(acpi_handle handle); -struct platform_device *acpi_create_platform_device(struct acpi_device *); +struct platform_device *acpi_create_platform_device(struct acpi_device *, + struct property_entry *); #define ACPI_PTR(_ptr) (_ptr) static inline void acpi_device_set_enumerated(struct acpi_device *adev) From 3e884493448131179a5b7cae1ddca1028ffaecc8 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 7 Nov 2016 10:51:40 -0600 Subject: [PATCH 220/319] net: qcom/emac: configure the external phy to allow pause frames Pause frames are used to enable flow control. A MAC can send and receive pause frames in order to throttle traffic. However, the PHY must be configured to allow those frames to pass through. Reviewed-by: Florian Fainelli Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 6fb3bee904d3..70a55dcc431d 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1003,6 +1003,12 @@ int emac_mac_up(struct emac_adapter *adpt) writel((u32)~DIS_INT, adpt->base + EMAC_INT_STATUS); writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK); + /* Enable pause frames. Without this feature, the EMAC has been shown + * to receive (and drop) frames with FCS errors at gigabit connections. + */ + adpt->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + adpt->phydev->advertising |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + adpt->phydev->irq = PHY_IGNORE_INTERRUPT; phy_start(adpt->phydev); From df63022e182de4041b65ae22df1950d3416b577e Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 7 Nov 2016 10:51:41 -0600 Subject: [PATCH 221/319] net: qcom/emac: enable flow control if requested If the PHY has been configured to allow pause frames, then the MAC should be configured to generate and/or accept those frames. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 70a55dcc431d..0b4deb31e742 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -575,10 +575,11 @@ void emac_mac_start(struct emac_adapter *adpt) mac |= TXEN | RXEN; /* enable RX/TX */ - /* We don't have ethtool support yet, so force flow-control mode - * to 'full' always. - */ - mac |= TXFC | RXFC; + /* Configure MAC flow control to match the PHY's settings. */ + if (phydev->pause) + mac |= RXFC; + if (phydev->pause != phydev->asym_pause) + mac |= TXFC; /* setup link speed */ mac &= ~SPEED_MASK; From 9d1a6c4ea43e48c7880c85971c17939b56832d8a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Nov 2016 12:03:09 -0800 Subject: [PATCH 222/319] net: icmp_route_lookup should use rt dev to determine L3 domain icmp_send is called in response to some event. The skb may not have the device set (skb->dev is NULL), but it is expected to have an rt. Update icmp_route_lookup to use the rt on the skb to determine L3 domain. Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 38abe70e595f..48734ee6293f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -477,7 +477,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); + fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key_hash(net, fl4, @@ -502,7 +502,7 @@ static struct rtable *icmp_route_lookup(struct net *net, if (err) goto relookup_failed; - if (inet_addr_type_dev_table(net, skb_in->dev, + if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev, fl4_dec.saddr) == RTN_LOCAL) { rt2 = __ip_route_output_key(net, &fl4_dec); if (IS_ERR(rt2)) From 6dbcd8fb5968fda3a5fba019dfb0c80c3139627b Mon Sep 17 00:00:00 2001 From: John Allen Date: Mon, 7 Nov 2016 14:27:28 -0600 Subject: [PATCH 223/319] ibmvnic: Start completion queue negotiation at server-provided optimum values Use the opt_* fields to determine the starting point for negotiating the number of tx/rx completion queues with the vnic server. These contain the number of queues that the vnic server estimates that it will be able to allocate. While renegotiation may still occur, using the opt_* fields will reduce the number of times this needs to happen and will prevent driver probe timeout on systems using large numbers of ibmvnic client devices per vnic port. Signed-off-by: John Allen Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5f44c5520fbc..f6c9b6d38ac7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1505,9 +1505,8 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry) adapter->max_rx_add_entries_per_subcrq > entries_page ? entries_page : adapter->max_rx_add_entries_per_subcrq; - /* Choosing the maximum number of queues supported by firmware*/ - adapter->req_tx_queues = adapter->max_tx_queues; - adapter->req_rx_queues = adapter->max_rx_queues; + adapter->req_tx_queues = adapter->opt_tx_comp_sub_queues; + adapter->req_rx_queues = adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; adapter->req_mtu = adapter->max_mtu; From 4053ab1bf98dd128344b9e67ef139f931a967ae1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Nov 2016 22:09:07 +0100 Subject: [PATCH 224/319] vxlan: hide unused local variable A bugfix introduced a harmless warning in v4.9-rc4: drivers/net/vxlan.c: In function 'vxlan_group_used': drivers/net/vxlan.c:947:21: error: unused variable 'sock6' [-Werror=unused-variable] This hides the variable inside of the same #ifdef that is around its user. The extraneous initialization is removed at the same time, it was accidentally introduced in the same commit. Fixes: c6fcc4fc5f8b ("vxlan: avoid using stale vxlan socket.") Signed-off-by: Arnd Bergmann Acked-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f3c2fa3ab0d5..24532cdebb00 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -944,7 +944,9 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) { struct vxlan_dev *vxlan; struct vxlan_sock *sock4; - struct vxlan_sock *sock6 = NULL; +#if IS_ENABLED(CONFIG_IPV6) + struct vxlan_sock *sock6; +#endif unsigned short family = dev->default_dst.remote_ip.sa.sa_family; sock4 = rtnl_dereference(dev->vn4_sock); From d8e9e5e80e882b4f90cba7edf1e6cb7376e52e54 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 9 Nov 2016 22:52:58 +0100 Subject: [PATCH 225/319] drbd: Fix kernel_sendmsg() usage - potential NULL deref Don't pass a size larger than iov_len to kernel_sendmsg(). Otherwise it will cause a NULL pointer deref when kernel_sendmsg() returns with rv < size. DRBD as external module has been around in the kernel 2.4 days already. We used to be compatible to 2.4 and very early 2.6 kernels, we used to use rv = sock_sendmsg(sock, &msg, iov.iov_len); then later changed to rv = kernel_sendmsg(sock, &msg, &iov, 1, size); when we should have used rv = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); tcp_sendmsg() used to totally ignore the size parameter. 57be5bd ip: convert tcp_sendmsg() to iov_iter primitives changes that, and exposes our long standing error. Even with this error exposed, to trigger the bug, we would need to have an environment (config or otherwise) causing us to not use sendpage() for larger transfers, a failing connection, and have it fail "just at the right time". Apparently that was unlikely enough for most, so this went unnoticed for years. Still, it is known to trigger at least some of these, and suspected for the others: [0] http://lists.linbit.com/pipermail/drbd-user/2016-July/023112.html [1] http://lists.linbit.com/pipermail/drbd-dev/2016-March/003362.html [2] https://forums.grsecurity.net/viewtopic.php?f=3&t=4546 [3] https://ubuntuforums.org/showthread.php?t=2336150 [4] http://e2.howsolveproblem.com/i/1175162/ This should go into 4.9, and into all stable branches since and including v4.0, which is the first to contain the exposing change. It is correct for all stable branches older than that as well (which contain the DRBD driver; which is 2.6.33 and up). It requires a small "conflict" resolution for v4.4 and earlier, with v4.5 we dropped the comment block immediately preceding the kernel_sendmsg(). Fixes: b411b3637fa7 ("The DRBD driver") Cc: # 2.6.33.x- Cc: viro@zeniv.linux.org.uk Cc: christoph.lechleitner@iteg.at Cc: wolfgang.glas@iteg.at Reported-by: Christoph Lechleitner Tested-by: Christoph Lechleitner Signed-off-by: Richard Weinberger [changed oneliner to be "obvious" without context; more verbose message] Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 100be556e613..83482721bc01 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1871,7 +1871,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, drbd_update_congested(connection); } do { - rv = kernel_sendmsg(sock, &msg, &iov, 1, size); + rv = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); if (rv == -EAGAIN) { if (we_should_drop_the_connection(connection, sock)) break; From f567e950bf51290755a2539ff2aaef4c26f735d3 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 7 Nov 2016 23:22:19 +0100 Subject: [PATCH 226/319] rtnl: reset calcit fptr in rtnl_unregister() To avoid having dangling function pointers left behind, reset calcit in rtnl_unregister(), too. This is no issue so far, as only the rtnl core registers a netlink handler with a calcit hook which won't be unregistered, but may become one if new code makes use of the calcit hook. Fixes: c7ac8679bec9 ("rtnetlink: Compute and store minimum ifinfo...") Cc: Jeff Kirsher Cc: Greg Rose Signed-off-by: Mathias Krause Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fb7348f13501..db313ec7af32 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -275,6 +275,7 @@ int rtnl_unregister(int protocol, int msgtype) rtnl_msg_handlers[protocol][msgindex].doit = NULL; rtnl_msg_handlers[protocol][msgindex].dumpit = NULL; + rtnl_msg_handlers[protocol][msgindex].calcit = NULL; return 0; } From 8da3cf2a49a6d0ca5e620c6a5eee49b99a3f0880 Mon Sep 17 00:00:00 2001 From: Allan Chou Date: Tue, 8 Nov 2016 16:08:01 -0600 Subject: [PATCH 227/319] Net Driver: Add Cypress GX3 VID=04b4 PID=3610. Add support for Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller (Vendor=04b4 ProdID=3610). Patch verified on x64 linux kernel 4.7.4, 4.8.6, 4.9-rc4 systems with the Kensington SD4600P USB-C Universal Dock with Power, which uses the Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller. A similar patch was signed-off and tested-by Allan Chou on 2015-12-01. Allan verified his similar patch on x86 Linux kernel 4.1.6 system with Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller. Tested-by: Allan Chou Tested-by: Chris Roth Tested-by: Artjom Simon Signed-off-by: Allan Chou Signed-off-by: Chris Roth Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index e6338c16081a..8a6675d92b98 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1656,6 +1656,19 @@ static const struct driver_info ax88178a_info = { .tx_fixup = ax88179_tx_fixup, }; +static const struct driver_info cypress_GX3_info = { + .description = "Cypress GX3 SuperSpeed to Gigabit Ethernet Controller", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + static const struct driver_info dlink_dub1312_info = { .description = "D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter", .bind = ax88179_bind, @@ -1717,6 +1730,10 @@ static const struct usb_device_id products[] = { /* ASIX AX88178A 10/100/1000 */ USB_DEVICE(0x0b95, 0x178a), .driver_info = (unsigned long)&ax88178a_info, +}, { + /* Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller */ + USB_DEVICE(0x04b4, 0x3610), + .driver_info = (unsigned long)&cypress_GX3_info, }, { /* D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter */ USB_DEVICE(0x2001, 0x4a00), From 9b6c14d51bd2304b92f842e96172a9cc822fc77c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 Nov 2016 09:07:26 -0800 Subject: [PATCH 228/319] net: tcp response should set oif only if it is L3 master Lorenzo noted an Android unit test failed due to e0d56fdd7342: "The expectation in the test was that the RST replying to a SYN sent to a closed port should be generated with oif=0. In other words it should not prefer the interface where the SYN came in on, but instead should follow whatever the routing table says it should do." Revert the change to ip_send_unicast_reply and tcp_v6_send_response such that the oif in the flow is set to the skb_iif only if skb_iif is an L3 master. Fixes: e0d56fdd7342 ("net: l3mdev: remove redundant calls") Reported-by: Lorenzo Colitti Signed-off-by: David Ahern Tested-by: Lorenzo Colitti Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 ++- net/ipv6/tcp_ipv6.c | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 49714010ac2e..9403fa3850be 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1577,7 +1577,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, } oif = arg->bound_dev_if; - oif = oif ? : skb->skb_iif; + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; flowi4_init_output(&fl4, oif, IP4_REPLY_MARK(net, skb->mark), diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a27ab4eab39..6ca23c2e76f7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -818,8 +818,12 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else - fl6.flowi6_oif = oif ? : skb->skb_iif; + else { + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) + oif = skb->skb_iif; + + fl6.flowi6_oif = oif; + } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; From 18266403f3fe507f0246faa1d5432333a2f139ca Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 8 Nov 2016 13:10:57 +0100 Subject: [PATCH 229/319] USB: cdc-acm: fix TIOCMIWAIT The TIOCMIWAIT implementation would return -EINVAL if any of the three supported signals were included in the mask. Instead of returning an error in case TIOCM_CTS is included, simply drop the mask check completely, which is in accordance with how other drivers implement this ioctl. Fixes: 5a6a62bdb925 ("cdc-acm: add TIOCMIWAIT") Cc: stable Signed-off-by: Johan Hovold Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 4ad4ca44058b..fada988512a1 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -932,8 +932,6 @@ static int wait_serial_change(struct acm *acm, unsigned long arg) DECLARE_WAITQUEUE(wait, current); struct async_icount old, new; - if (arg & (TIOCM_DSR | TIOCM_RI | TIOCM_CD)) - return -EINVAL; do { spin_lock_irq(&acm->read_lock); old = acm->oldcount; From b13d14339baaaa720e7e5448855f33ba501917aa Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sun, 30 Oct 2016 23:19:24 +0100 Subject: [PATCH 230/319] ppdev: fix double-free of pp->pdev->name free_pardevice() is called by parport_unregister_device() and already frees pp->pdev->name, don't try to do it again. This bug causes kernel crashes. I found and verified this with KASAN and some added pr_emerg()s: [ 60.316568] pp_release: pp->pdev->name == ffff88039cb264c0 [ 60.316692] free_pardevice: freeing par_dev->name at ffff88039cb264c0 [ 60.316706] pp_release: kfree(ffff88039cb264c0) [ 60.316714] ========================================================== [ 60.316722] BUG: Double free or freeing an invalid pointer [ 60.316731] Unexpected shadow byte: 0xFB [ 60.316801] Object at ffff88039cb264c0, in cache kmalloc-32 size: 32 [ 60.316813] Allocated: [ 60.316824] PID = 1695 [ 60.316869] Freed: [ 60.316880] PID = 1695 [ 60.316935] ========================================================== Signed-off-by: Jann Horn Acked-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/char/ppdev.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index d23368874710..6af1ce04b3da 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -748,10 +748,7 @@ static int pp_release(struct inode *inode, struct file *file) } if (pp->pdev) { - const char *name = pp->pdev->name; - parport_unregister_device(pp->pdev); - kfree(name); pp->pdev = NULL; pr_debug(CHRDEV "%x: unregistered pardevice\n", minor); } From 62bdf94a2049822ef8c6d4b0e83cd9c3a1663ab4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 7 Nov 2016 16:16:24 -0500 Subject: [PATCH 231/319] xprtrdma: Fix DMAR failure in frwr_op_map() after reconnect When a LOCALINV WR is flushed, the frmr is marked STALE, then frwr_op_unmap_sync DMA-unmaps the frmr's SGL. These STALE frmrs are then recovered when frwr_op_map hunts for an INVALID frmr to use. All other cases that need frmr recovery leave that SGL DMA-mapped. The FRMR recovery path unconditionally DMA-unmaps the frmr's SGL. To avoid DMA unmapping the SGL twice for flushed LOCAL_INV WRs, alter the recovery logic (rather than the hot frwr_op_unmap_sync path) to distinguish among these cases. This solution also takes care of the case where multiple LOCAL_INV WRs are issued for the same rpcrdma_req, some complete successfully, but some are flushed. Reported-by: Vasco Steinmetz Signed-off-by: Chuck Lever Tested-by: Vasco Steinmetz Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 37 ++++++++++++++++++++------------- net/sunrpc/xprtrdma/xprt_rdma.h | 3 ++- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 210949562786..26b26beef2d4 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -44,18 +44,20 @@ * being done. * * When the underlying transport disconnects, MRs are left in one of - * three states: + * four states: * * INVALID: The MR was not in use before the QP entered ERROR state. - * (Or, the LOCAL_INV WR has not completed or flushed yet). - * - * STALE: The MR was being registered or unregistered when the QP - * entered ERROR state, and the pending WR was flushed. * * VALID: The MR was registered before the QP entered ERROR state. * - * When frwr_op_map encounters STALE and VALID MRs, they are recovered - * with ib_dereg_mr and then are re-initialized. Beause MR recovery + * FLUSHED_FR: The MR was being registered when the QP entered ERROR + * state, and the pending WR was flushed. + * + * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR + * state, and the pending WR was flushed. + * + * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered + * with ib_dereg_mr and then are re-initialized. Because MR recovery * allocates fresh resources, it is deferred to a workqueue, and the * recovered MRs are placed back on the rb_mws list when recovery is * complete. frwr_op_map allocates another MR for the current RPC while @@ -177,12 +179,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) static void frwr_op_recover_mr(struct rpcrdma_mw *mw) { + enum rpcrdma_frmr_state state = mw->frmr.fr_state; struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_ia *ia = &r_xprt->rx_ia; int rc; rc = __frwr_reset_mr(ia, mw); - ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); + if (state != FRMR_FLUSHED_LI) + ib_dma_unmap_sg(ia->ri_device, + mw->mw_sg, mw->mw_nents, mw->mw_dir); if (rc) goto out_release; @@ -262,10 +267,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) } static void -__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr, - const char *wr) +__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) { - frmr->fr_state = FRMR_IS_STALE; if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("rpcrdma: %s: %s (%u/0x%x)\n", wr, ib_wc_status_msg(wc->status), @@ -288,7 +291,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) { cqe = wc->wr_cqe; frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - __frwr_sendcompletion_flush(wc, frmr, "fastreg"); + frmr->fr_state = FRMR_FLUSHED_FR; + __frwr_sendcompletion_flush(wc, "fastreg"); } } @@ -308,7 +312,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS) { cqe = wc->wr_cqe; frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - __frwr_sendcompletion_flush(wc, frmr, "localinv"); + frmr->fr_state = FRMR_FLUSHED_LI; + __frwr_sendcompletion_flush(wc, "localinv"); } } @@ -328,8 +333,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wr_cqe and status are reliable at this point */ cqe = wc->wr_cqe; frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); - if (wc->status != IB_WC_SUCCESS) - __frwr_sendcompletion_flush(wc, frmr, "localinv"); + if (wc->status != IB_WC_SUCCESS) { + frmr->fr_state = FRMR_FLUSHED_LI; + __frwr_sendcompletion_flush(wc, "localinv"); + } complete(&frmr->fr_linv_done); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0d35b761c883..6e1bba358203 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -216,7 +216,8 @@ struct rpcrdma_rep { enum rpcrdma_frmr_state { FRMR_IS_INVALID, /* ready to be used */ FRMR_IS_VALID, /* in use */ - FRMR_IS_STALE, /* failed completion */ + FRMR_FLUSHED_FR, /* flushed FASTREG WR */ + FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ }; struct rpcrdma_frmr { From 0a866d38ef3c13cf5834b78df21ee4c39bb5a689 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Nov 2016 18:01:01 -0500 Subject: [PATCH 232/319] drm/amd/powerplay: propagate errors in phm_get_voltage_evv_on_sclk Missing for one case. bugs: https://bugzilla.kernel.org/show_bug.cgi?id=185681 https://bugs.freedesktop.org/show_bug.cgi?id=98357 Reviewed-by: Rex Zhu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 2ba7937d2545..520665e1ef12 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -711,7 +711,7 @@ int phm_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, int ret = 0; if (hwmgr->chip_id < CHIP_POLARIS10) { - atomctrl_get_voltage_evv_on_sclk(hwmgr, voltage_type, sclk, id, voltage); + ret = atomctrl_get_voltage_evv_on_sclk(hwmgr, voltage_type, sclk, id, voltage); if (*voltage >= 2000 || *voltage == 0) *voltage = 1150; } else { From 90ebf11857c2743fab1b2b64140aff24e256e758 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Nov 2016 18:04:54 -0500 Subject: [PATCH 233/319] drm/amd/powerplay: update phm_get_voltage_evv_on_sclk for iceland Was missing the handling for iceland. bugs: https://bugzilla.kernel.org/show_bug.cgi?id=185681 https://bugs.freedesktop.org/show_bug.cgi?id=98357 Reviewed-by: Rex Zhu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 520665e1ef12..e03dcb6ea9c1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -710,7 +710,9 @@ int phm_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t vol; int ret = 0; - if (hwmgr->chip_id < CHIP_POLARIS10) { + if (hwmgr->chip_id < CHIP_TONGA) { + ret = atomctrl_get_voltage_evv(hwmgr, id, voltage); + } else if (hwmgr->chip_id < CHIP_POLARIS10) { ret = atomctrl_get_voltage_evv_on_sclk(hwmgr, voltage_type, sclk, id, voltage); if (*voltage >= 2000 || *voltage == 0) *voltage = 1150; From 0f12f73c5175d39445fa10cdca4481f80efdef49 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Nov 2016 17:52:42 -0500 Subject: [PATCH 234/319] drm/amd/powerplay/smu7: fix checks in smu7_get_evv_voltages (v2) Only check if the tables exist in relevant configs. This fixes a failure on V0 tables. v2: fix version check as suggested by Rex bugs: https://bugzilla.kernel.org/show_bug.cgi?id=185681 https://bugs.freedesktop.org/show_bug.cgi?id=98357 Reviewed-by: Rex Zhu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 51fb86f20c21..f41cddf2a16b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -1460,19 +1460,19 @@ static int smu7_get_evv_voltages(struct pp_hwmgr *hwmgr) struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = NULL; - if (table_info == NULL) - return -EINVAL; - - sclk_table = table_info->vdd_dep_on_sclk; - for (i = 0; i < SMU7_MAX_LEAKAGE_COUNT; i++) { vv_id = ATOM_VIRTUAL_VOLTAGE_ID0 + i; if (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) { - if (0 == phm_get_sclk_for_voltage_evv(hwmgr, + if ((hwmgr->pp_table_version == PP_TABLE_V1) + && !phm_get_sclk_for_voltage_evv(hwmgr, table_info->vddgfx_lookup_table, vv_id, &sclk)) { if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_ClockStretcher)) { + if (table_info == NULL) + return -EINVAL; + sclk_table = table_info->vdd_dep_on_sclk; + for (j = 1; j < sclk_table->count; j++) { if (sclk_table->entries[j].clk == sclk && sclk_table->entries[j].cks_enable == 0) { @@ -1498,12 +1498,15 @@ static int smu7_get_evv_voltages(struct pp_hwmgr *hwmgr) } } } else { - if ((hwmgr->pp_table_version == PP_TABLE_V0) || !phm_get_sclk_for_voltage_evv(hwmgr, table_info->vddc_lookup_table, vv_id, &sclk)) { if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_ClockStretcher)) { + if (table_info == NULL) + return -EINVAL; + sclk_table = table_info->vdd_dep_on_sclk; + for (j = 1; j < sclk_table->count; j++) { if (sclk_table->entries[j].clk == sclk && sclk_table->entries[j].cks_enable == 0) { From 0ace81ec7192201af48528c309ee0b4103021f55 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 9 Nov 2016 15:04:39 -0500 Subject: [PATCH 235/319] ipv4: update comment to document GSO fragmentation cases. This is a follow-up to commit 9ee6c5dc816a ("ipv4: allow local fragmentation in ip_finish_output_gso()"), updating the comment documenting cases in which fragmentation is needed for egress GSO packets. Suggested-by: Shmulik Ladkani Reviewed-by: Shmulik Ladkani Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9403fa3850be..105908d841a3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -244,12 +244,18 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, if (skb_gso_validate_mtu(skb, mtu)) return ip_finish_output2(net, sk, skb); - /* Slowpath - GSO segment length is exceeding the dst MTU. + /* Slowpath - GSO segment length exceeds the egress MTU. * - * This can happen in two cases: - * 1) TCP GRO packet, DF bit not set - * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly - * from host network stack. + * This can happen in several cases: + * - Forwarding of a TCP GRO skb, when DF flag is not set. + * - Forwarding of an skb that arrived on a virtualization interface + * (virtio-net/vhost/tap) with TSO/GSO size set by other network + * stack. + * - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an + * interface with a smaller MTU. + * - Arriving GRO skb (or GSO skb in a virtualized environment) that is + * bridged to a NETIF_F_TSO tunnel stacked over an interface with an + * insufficent MTU. */ features = netif_skb_features(skb); BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); From 31a3a7b5b26f75fbe82de10ca99f2b673f6c26b4 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Thu, 10 Nov 2016 11:14:37 -0600 Subject: [PATCH 236/319] PCI: rockchip: Add three new resets as required properties pm_rst, aclk_rst, pclk_rst was controlled by ROM code so the software wasn't needed to control it again in theory. But it didn't work properly, so we do need to do it again and add enough delay between the assert of pm_rst and the deassert of pm_rst. The Soc intergrated with this controller, rk3399, is still under MP test internally, so the backward compatibility won't be a big deal. Signed-off-by: Shawn Lin Signed-off-by: Bjorn Helgaas Reviewed-by: Heiko Stuebner Acked-by: Rob Herring --- .../devicetree/bindings/pci/rockchip-pcie.txt | 11 +++- drivers/pci/host/pcie-rockchip.c | 62 +++++++++++++++++++ 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pci/rockchip-pcie.txt b/Documentation/devicetree/bindings/pci/rockchip-pcie.txt index ba67b39939c1..71aeda1ca055 100644 --- a/Documentation/devicetree/bindings/pci/rockchip-pcie.txt +++ b/Documentation/devicetree/bindings/pci/rockchip-pcie.txt @@ -26,13 +26,16 @@ Required properties: - "sys" - "legacy" - "client" -- resets: Must contain five entries for each entry in reset-names. +- resets: Must contain seven entries for each entry in reset-names. See ../reset/reset.txt for details. - reset-names: Must include the following names - "core" - "mgmt" - "mgmt-sticky" - "pipe" + - "pm" + - "aclk" + - "pclk" - pinctrl-names : The pin control state names - pinctrl-0: The "default" pinctrl state - #interrupt-cells: specifies the number of cells needed to encode an @@ -86,8 +89,10 @@ pcie0: pcie@f8000000 { reg = <0x0 0xf8000000 0x0 0x2000000>, <0x0 0xfd000000 0x0 0x1000000>; reg-names = "axi-base", "apb-base"; resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>, - <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>; - reset-names = "core", "mgmt", "mgmt-sticky", "pipe"; + <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE> , + <&cru SRST_PCIE_PM>, <&cru SRST_P_PCIE>, <&cru SRST_A_PCIE>; + reset-names = "core", "mgmt", "mgmt-sticky", "pipe", + "pm", "pclk", "aclk"; phys = <&pcie_phy>; phy-names = "pcie-phy"; pinctrl-names = "default"; diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c index e0b22dab9b7a..e04f69beb42d 100644 --- a/drivers/pci/host/pcie-rockchip.c +++ b/drivers/pci/host/pcie-rockchip.c @@ -190,6 +190,9 @@ struct rockchip_pcie { struct reset_control *mgmt_rst; struct reset_control *mgmt_sticky_rst; struct reset_control *pipe_rst; + struct reset_control *pm_rst; + struct reset_control *aclk_rst; + struct reset_control *pclk_rst; struct clk *aclk_pcie; struct clk *aclk_perf_pcie; struct clk *hclk_pcie; @@ -408,6 +411,44 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) gpiod_set_value(rockchip->ep_gpio, 0); + err = reset_control_assert(rockchip->aclk_rst); + if (err) { + dev_err(dev, "assert aclk_rst err %d\n", err); + return err; + } + + err = reset_control_assert(rockchip->pclk_rst); + if (err) { + dev_err(dev, "assert pclk_rst err %d\n", err); + return err; + } + + err = reset_control_assert(rockchip->pm_rst); + if (err) { + dev_err(dev, "assert pm_rst err %d\n", err); + return err; + } + + udelay(10); + + err = reset_control_deassert(rockchip->pm_rst); + if (err) { + dev_err(dev, "deassert pm_rst err %d\n", err); + return err; + } + + err = reset_control_deassert(rockchip->aclk_rst); + if (err) { + dev_err(dev, "deassert mgmt_sticky_rst err %d\n", err); + return err; + } + + err = reset_control_deassert(rockchip->pclk_rst); + if (err) { + dev_err(dev, "deassert mgmt_sticky_rst err %d\n", err); + return err; + } + err = phy_init(rockchip->phy); if (err < 0) { dev_err(dev, "fail to init phy, err %d\n", err); @@ -781,6 +822,27 @@ static int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) return PTR_ERR(rockchip->pipe_rst); } + rockchip->pm_rst = devm_reset_control_get(dev, "pm"); + if (IS_ERR(rockchip->pm_rst)) { + if (PTR_ERR(rockchip->pm_rst) != -EPROBE_DEFER) + dev_err(dev, "missing pm reset property in node\n"); + return PTR_ERR(rockchip->pm_rst); + } + + rockchip->pclk_rst = devm_reset_control_get(dev, "pclk"); + if (IS_ERR(rockchip->pclk_rst)) { + if (PTR_ERR(rockchip->pclk_rst) != -EPROBE_DEFER) + dev_err(dev, "missing pclk reset property in node\n"); + return PTR_ERR(rockchip->pclk_rst); + } + + rockchip->aclk_rst = devm_reset_control_get(dev, "aclk"); + if (IS_ERR(rockchip->aclk_rst)) { + if (PTR_ERR(rockchip->aclk_rst) != -EPROBE_DEFER) + dev_err(dev, "missing aclk reset property in node\n"); + return PTR_ERR(rockchip->aclk_rst); + } + rockchip->ep_gpio = devm_gpiod_get(dev, "ep", GPIOD_OUT_HIGH); if (IS_ERR(rockchip->ep_gpio)) { dev_err(dev, "missing ep-gpios property in node\n"); From 4d3222f7071c01104b43300f1c10b55723df8f68 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Thu, 10 Nov 2016 11:14:46 -0600 Subject: [PATCH 237/319] arm64: dts: rockchip: add three new resets for rk3399 PCIe controller pm_rst, aclk_rst and pclk_rst should be controlled by driver, so we need to add these three resets for PCIe controller. Signed-off-by: Shawn Lin Signed-off-by: Bjorn Helgaas Acked-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index b65c193dc64e..7afbfb0f96a3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -300,8 +300,11 @@ pcie0: pcie@f8000000 { ranges = <0x83000000 0x0 0xfa000000 0x0 0xfa000000 0x0 0x600000 0x81000000 0x0 0xfa600000 0x0 0xfa600000 0x0 0x100000>; resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>, - <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>; - reset-names = "core", "mgmt", "mgmt-sticky", "pipe"; + <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>, + <&cru SRST_PCIE_PM>, <&cru SRST_P_PCIE>, + <&cru SRST_A_PCIE>; + reset-names = "core", "mgmt", "mgmt-sticky", "pipe", + "pm", "pclk", "aclk"; status = "disabled"; pcie0_intc: interrupt-controller { From 8d1d8fcb21cfc4a65731760c3100920f929e8f3d Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Wed, 9 Nov 2016 22:48:43 +0200 Subject: [PATCH 238/319] qed: configure ll2 RoCE v1/v2 flavor correctly Currently RoCE v2 won't operate with RDMA CM due to missing setting of the roce-flavour in the ll2 configuration. This patch properly sets the flavour, and deletes incorrect HSI that doesn't [yet] exist. Fixes: abd49676c707 ("qed: Add RoCE ll2 & GSI support") Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 3 --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 72eee29c677f..2777d5bb4380 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -727,9 +727,6 @@ struct core_tx_bd_flags { #define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT 6 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK 0x1 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7 -#define CORE_TX_BD_FLAGS_ROCE_FLAV_MASK 0x1 -#define CORE_TX_BD_FLAGS_ROCE_FLAV_SHIFT 12 - }; struct core_tx_bd { diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 63e1a1b0ef8e..f95385cbbd40 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1119,6 +1119,7 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, start_bd->bd_flags.as_bitfield |= CORE_TX_BD_FLAGS_START_BD_MASK << CORE_TX_BD_FLAGS_START_BD_SHIFT; SET_FIELD(start_bd->bitfield0, CORE_TX_BD_NBDS, num_of_bds); + SET_FIELD(start_bd->bitfield0, CORE_TX_BD_ROCE_FLAV, type); DMA_REGPAIR_LE(start_bd->addr, first_frag); start_bd->nbytes = cpu_to_le16(first_frag_len); From 5c5f26090840951b4102d9a1e6db9aac41101e5a Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Wed, 9 Nov 2016 22:48:44 +0200 Subject: [PATCH 239/319] qed: Correct rdma params configuration Previous fix has broken RoCE support as the rdma_pf_params are now being set into the parameters only after the params are alrady assigned into the hw-function. Fixes: 0189efb8f4f8 ("qed*: Fix Kconfig dependencies with INFINIBAND_QEDR") Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c418360ba02a..333c7442e48a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -839,20 +839,19 @@ static void qed_update_pf_params(struct qed_dev *cdev, { int i; + if (IS_ENABLED(CONFIG_QED_RDMA)) { + params->rdma_pf_params.num_qps = QED_ROCE_QPS; + params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; + /* divide by 3 the MRs to avoid MF ILT overflow */ + params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; + params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; + } + for (i = 0; i < cdev->num_hwfns; i++) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; p_hwfn->pf_params = *params; } - - if (!IS_ENABLED(CONFIG_QED_RDMA)) - return; - - params->rdma_pf_params.num_qps = QED_ROCE_QPS; - params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; - /* divide by 3 the MRs to avoid MF ILT overflow */ - params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; - params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; } static int qed_slowpath_start(struct qed_dev *cdev, From 33b1341cd1bf5c89e7ef332aa8ac3ed614a3d942 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 10 Nov 2016 12:31:04 +0100 Subject: [PATCH 240/319] mlxsw: spectrum_router: Fix handling of neighbour structure __neigh_create function works in a different way than assumed. It passes "n" as a parameter to ndo_neigh_construct. But this "n" might be destroyed right away before __neigh_create() returns in case there is already another neighbour struct in the hashtable with the same dev and primary key. That is not expected by mlxsw_sp_router_neigh_construct() and the stored "n" points to freed memory, eventually leading to crash. Fix this by doing tight 1:1 coupling between neighbour struct and internal driver neigh_entry. That allows to narrow down the key in internal driver hashtable to do lookups by "n" only. Fixes: 6cf3c971dc84 ("mlxsw: spectrum_router: Add private neigh table") Signed-off-by: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 95 +++++++------------ 1 file changed, 34 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4573da2c5560..28630129065d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -600,15 +600,13 @@ static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) } struct mlxsw_sp_neigh_key { - unsigned char addr[sizeof(struct in6_addr)]; - struct net_device *dev; + struct neighbour *n; }; struct mlxsw_sp_neigh_entry { struct rhash_head ht_node; struct mlxsw_sp_neigh_key key; u16 rif; - struct neighbour *n; bool offloaded; struct delayed_work dw; struct mlxsw_sp_port *mlxsw_sp_port; @@ -646,19 +644,15 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); static struct mlxsw_sp_neigh_entry * -mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len, - struct net_device *dev, u16 rif, - struct neighbour *n) +mlxsw_sp_neigh_entry_create(struct neighbour *n, u16 rif) { struct mlxsw_sp_neigh_entry *neigh_entry; neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); if (!neigh_entry) return NULL; - memcpy(neigh_entry->key.addr, addr, addr_len); - neigh_entry->key.dev = dev; + neigh_entry->key.n = n; neigh_entry->rif = rif; - neigh_entry->n = n; INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); INIT_LIST_HEAD(&neigh_entry->nexthop_list); return neigh_entry; @@ -671,13 +665,11 @@ mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) } static struct mlxsw_sp_neigh_entry * -mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr, - size_t addr_len, struct net_device *dev) +mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) { - struct mlxsw_sp_neigh_key key = {{ 0 } }; + struct mlxsw_sp_neigh_key key; - memcpy(key.addr, addr, addr_len); - key.dev = dev; + key.n = n; return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, &key, mlxsw_sp_neigh_ht_params); } @@ -689,26 +681,20 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_rif *r; - u32 dip; int err; if (n->tbl != &arp_tbl) return 0; - dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), - n->dev); - if (neigh_entry) { - WARN_ON(neigh_entry->n != n); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (neigh_entry) return 0; - } r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); if (WARN_ON(!r)) return -EINVAL; - neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev, - r->rif, n); + neigh_entry = mlxsw_sp_neigh_entry_create(n, r->rif); if (!neigh_entry) return -ENOMEM; err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); @@ -727,14 +713,11 @@ void mlxsw_sp_router_neigh_destroy(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; - u32 dip; if (n->tbl != &arp_tbl) return; - dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), - n->dev); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); if (!neigh_entry) return; mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); @@ -862,7 +845,7 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) * is active regardless of the traffic. */ if (!list_empty(&neigh_entry->nexthop_list)) - neigh_event_send(neigh_entry->n, NULL); + neigh_event_send(neigh_entry->key.n, NULL); } rtnl_unlock(); } @@ -908,9 +891,9 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) rtnl_lock(); list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, nexthop_neighs_list_node) { - if (!(neigh_entry->n->nud_state & NUD_VALID) && + if (!(neigh_entry->key.n->nud_state & NUD_VALID) && !list_empty(&neigh_entry->nexthop_list)) - neigh_event_send(neigh_entry->n, NULL); + neigh_event_send(neigh_entry->key.n, NULL); } rtnl_unlock(); @@ -927,7 +910,7 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) { struct mlxsw_sp_neigh_entry *neigh_entry = container_of(work, struct mlxsw_sp_neigh_entry, dw.work); - struct neighbour *n = neigh_entry->n; + struct neighbour *n = neigh_entry->key.n; struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char rauht_pl[MLXSW_REG_RAUHT_LEN]; @@ -1030,11 +1013,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, mlxsw_sp = mlxsw_sp_port->mlxsw_sp; dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, - &dip, - sizeof(__be32), - dev); - if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) { + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (WARN_ON(!neigh_entry)) { mlxsw_sp_port_dev_put(mlxsw_sp_port); return NOTIFY_DONE; } @@ -1343,33 +1323,26 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, struct fib_nh *fib_nh) { struct mlxsw_sp_neigh_entry *neigh_entry; - u32 gwip = ntohl(fib_nh->nh_gw); struct net_device *dev = fib_nh->nh_dev; struct neighbour *n; u8 nud_state; - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, - sizeof(gwip), dev); - if (!neigh_entry) { - __be32 gwipn = htonl(gwip); - - n = neigh_create(&arp_tbl, &gwipn, dev); + /* Take a reference of neigh here ensuring that neigh would + * not be detructed before the nexthop entry is finished. + * The reference is taken either in neigh_lookup() or + * in neith_create() in case n is not found. + */ + n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev); + if (!n) { + n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, - sizeof(gwip), dev); - if (!neigh_entry) { - neigh_release(n); - return -EINVAL; - } - } else { - /* Take a reference of neigh here ensuring that neigh would - * not be detructed before the nexthop entry is finished. - * The second branch takes the reference in neith_create() - */ - n = neigh_entry->n; - neigh_clone(n); + } + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (!neigh_entry) { + neigh_release(n); + return -EINVAL; } /* If that is the first nexthop connected to that neigh, add to @@ -1403,7 +1376,7 @@ static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, if (list_empty(&nh->neigh_entry->nexthop_list)) list_del(&nh->neigh_entry->nexthop_neighs_list_node); - neigh_release(neigh_entry->n); + neigh_release(neigh_entry->key.n); } static struct mlxsw_sp_nexthop_group * @@ -1463,11 +1436,11 @@ static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, for (i = 0; i < fi->fib_nhs; i++) { struct fib_nh *fib_nh = &fi->fib_nh[i]; - u32 gwip = ntohl(fib_nh->nh_gw); + struct neighbour *n = nh->neigh_entry->key.n; - if (memcmp(nh->neigh_entry->key.addr, - &gwip, sizeof(u32)) == 0 && - nh->neigh_entry->key.dev == fib_nh->nh_dev) + if (memcmp(n->primary_key, &fib_nh->nh_gw, + sizeof(fib_nh->nh_gw)) == 0 && + n->dev == fib_nh->nh_dev) return true; } return false; From 0e3715c9c250747280b1757ea267c577e7591e31 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 10 Nov 2016 12:31:05 +0100 Subject: [PATCH 241/319] mlxsw: spectrum_router: Ignore FIB notification events for non-init namespaces Since now, the table with same id in multiple netnamespaces were squashed to a single virtual router. That is not only incorrect, it also causes error messages when trying to use RALUE register to do double remove of FIB entries, like this one: mlxsw_spectrum 0000:03:00.0: EMAD reg access failed (tid=facb831c00007b20,reg_id=8013(ralue),type=write,status=7(bad parameter)) Since we don't allow ports to change namespaces (NETIF_F_NETNS_LOCAL), and the infrastructure is not yet prepared to handle netnamespaces, just ignore FIB notification events for non-init namespaces. That is clear to do since we don't need to offload them. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Signed-off-by: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 28630129065d..040737e14a3f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1931,6 +1931,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct fib_entry_notifier_info *fen_info = ptr; int err; + if (!net_eq(fen_info->info.net, &init_net)) + return NOTIFY_DONE; + switch (event) { case FIB_EVENT_ENTRY_ADD: err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info); From 954e6bee03fea509a85aea4cbf45307fe4e3b43e Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 10 Nov 2016 14:07:34 +0800 Subject: [PATCH 242/319] drm/amd/powerplay: implement get_clock_by_type for iceland. iceland use pptable v0. bugs: https://bugzilla.kernel.org/show_bug.cgi?id=185681 https://bugs.freedesktop.org/show_bug.cgi?id=98357 Reviewed-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 51 ++++++++++++------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index f41cddf2a16b..b0c929dd8beb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -4233,18 +4233,26 @@ static int smu7_get_sclks(struct pp_hwmgr *hwmgr, struct amd_pp_clocks *clocks) { struct phm_ppt_v1_information *table_info = (struct phm_ppt_v1_information *)hwmgr->pptable; - struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table = NULL; + struct phm_clock_voltage_dependency_table *sclk_table; int i; - if (table_info == NULL) - return -EINVAL; - - dep_sclk_table = table_info->vdd_dep_on_sclk; - - for (i = 0; i < dep_sclk_table->count; i++) { - clocks->clock[i] = dep_sclk_table->entries[i].clk; - clocks->count++; + if (hwmgr->pp_table_version == PP_TABLE_V1) { + if (table_info == NULL || table_info->vdd_dep_on_sclk == NULL) + return -EINVAL; + dep_sclk_table = table_info->vdd_dep_on_sclk; + for (i = 0; i < dep_sclk_table->count; i++) { + clocks->clock[i] = dep_sclk_table->entries[i].clk; + clocks->count++; + } + } else if (hwmgr->pp_table_version == PP_TABLE_V0) { + sclk_table = hwmgr->dyn_state.vddc_dependency_on_sclk; + for (i = 0; i < sclk_table->count; i++) { + clocks->clock[i] = sclk_table->entries[i].clk; + clocks->count++; + } } + return 0; } @@ -4266,17 +4274,24 @@ static int smu7_get_mclks(struct pp_hwmgr *hwmgr, struct amd_pp_clocks *clocks) (struct phm_ppt_v1_information *)hwmgr->pptable; struct phm_ppt_v1_clock_voltage_dependency_table *dep_mclk_table; int i; + struct phm_clock_voltage_dependency_table *mclk_table; - if (table_info == NULL) - return -EINVAL; - - dep_mclk_table = table_info->vdd_dep_on_mclk; - - for (i = 0; i < dep_mclk_table->count; i++) { - clocks->clock[i] = dep_mclk_table->entries[i].clk; - clocks->latency[i] = smu7_get_mem_latency(hwmgr, + if (hwmgr->pp_table_version == PP_TABLE_V1) { + if (table_info == NULL) + return -EINVAL; + dep_mclk_table = table_info->vdd_dep_on_mclk; + for (i = 0; i < dep_mclk_table->count; i++) { + clocks->clock[i] = dep_mclk_table->entries[i].clk; + clocks->latency[i] = smu7_get_mem_latency(hwmgr, dep_mclk_table->entries[i].clk); - clocks->count++; + clocks->count++; + } + } else if (hwmgr->pp_table_version == PP_TABLE_V0) { + mclk_table = hwmgr->dyn_state.vddc_dependency_on_mclk; + for (i = 0; i < mclk_table->count; i++) { + clocks->clock[i] = mclk_table->entries[i].clk; + clocks->count++; + } } return 0; } From 8a8d56176635515d607c520580c73d61f300b409 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 9 Nov 2016 16:47:54 +0800 Subject: [PATCH 243/319] ceph: use default file splice read callback Splice read/write implementation changed recently. When using generic_file_splice_read(), iov_iter with type == ITER_PIPE is passed to filesystem's read_iter callback. But ceph_sync_read() can't serve ITER_PIPE iov_iter correctly (ITER_PIPE iov_iter expects pages from page cache). Fixing ceph_sync_read() requires a big patch. So use default splice read callback for now. Signed-off-by: Yan, Zheng Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 18630e800208..f995e3528a33 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1770,7 +1770,6 @@ const struct file_operations ceph_file_fops = { .fsync = ceph_fsync, .lock = ceph_lock, .flock = ceph_flock, - .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .unlocked_ioctl = ceph_ioctl, .compat_ioctl = ceph_ioctl, From 3890dce1d3a8b9fe3bc36de99496792e468cd079 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 9 Nov 2016 16:42:48 +0800 Subject: [PATCH 244/319] libceph: fix legacy layout decode with pool 0 If your data pool was pool 0, ceph_file_layout_from_legacy() transform that to -1 unconditionally, which broke upgrades. We only want do that for a fully zeroed ceph_file_layout, so that it still maps to a file_layout_t. If any fields are set, though, we trust the fl_pgpool to be a valid pool. Fixes: 7627151ea30bc ("libceph: define new ceph_file_layout structure") Link: http://tracker.ceph.com/issues/17825 Signed-off-by: Yan, Zheng Signed-off-by: Ilya Dryomov --- net/ceph/ceph_fs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c index 7d54e944de5e..dcbe67ff3e2b 100644 --- a/net/ceph/ceph_fs.c +++ b/net/ceph/ceph_fs.c @@ -34,7 +34,8 @@ void ceph_file_layout_from_legacy(struct ceph_file_layout *fl, fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count); fl->object_size = le32_to_cpu(legacy->fl_object_size); fl->pool_id = le32_to_cpu(legacy->fl_pg_pool); - if (fl->pool_id == 0) + if (fl->pool_id == 0 && fl->stripe_unit == 0 && + fl->stripe_count == 0 && fl->object_size == 0) fl->pool_id = -1; } EXPORT_SYMBOL(ceph_file_layout_from_legacy); From 264048afab27d7c27eedf5394714e0b396d787f7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 8 Nov 2016 15:15:24 +0100 Subject: [PATCH 245/319] libceph: initialize last_linger_id with a large integer osdc->last_linger_id is a counter for lreq->linger_id, which is used for watch cookies. Starting with a large integer should ease the task of telling apart kernel and userspace clients. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 2 ++ net/ceph/osd_client.c | 1 + 2 files changed, 3 insertions(+) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 96337b15a60d..a8e66344bacc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -258,6 +258,8 @@ struct ceph_watch_item { struct ceph_entity_addr addr; }; +#define CEPH_LINGER_ID_START 0xffff000000000000ULL + struct ceph_osd_client { struct ceph_client *client; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d9bf7a1d0a58..e6ae15bc41b7 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -4094,6 +4094,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) osd_init(&osdc->homeless_osd); osdc->homeless_osd.o_osdc = osdc; osdc->homeless_osd.o_osd = CEPH_HOMELESS_OSD; + osdc->last_linger_id = CEPH_LINGER_ID_START; osdc->linger_requests = RB_ROOT; osdc->map_checks = RB_ROOT; osdc->linger_map_checks = RB_ROOT; From bc79c9851a76d76f269d9ef5150e180825650b67 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Nov 2016 16:10:44 -0500 Subject: [PATCH 246/319] PCI: VMD: Update filename to reflect move Updating MAINTAINERS to reflect the new location of the VMD driver. Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 906e96948520..aefa6bf1782e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9318,7 +9318,7 @@ PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD) M: Keith Busch L: linux-pci@vger.kernel.org S: Supported -F: arch/x86/pci/vmd.c +F: drivers/pci/host/vmd.c PCIE DRIVER FOR ST SPEAR13XX M: Pratyush Anand From e5581fe2b4f40e54bfea9a1eee1bc487da52e98c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 8 Nov 2016 16:38:00 +1000 Subject: [PATCH 247/319] drm/udl: make control msg static const. (v2) Thou shall not send control msg from the stack, does that mean I can send it from the RO memory area? and it looks like the answer is no, so here's v2 which kmemdups. Reported-by: poma Tested-by: poma Signed-off-by: Dave Airlie --- drivers/gpu/drm/udl/udl_main.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 29f0207fa677..873f010d9616 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -98,17 +98,23 @@ static int udl_parse_vendor_descriptor(struct drm_device *dev, static int udl_select_std_channel(struct udl_device *udl) { int ret; - u8 set_def_chn[] = {0x57, 0xCD, 0xDC, 0xA7, - 0x1C, 0x88, 0x5E, 0x15, - 0x60, 0xFE, 0xC6, 0x97, - 0x16, 0x3D, 0x47, 0xF2}; + static const u8 set_def_chn[] = {0x57, 0xCD, 0xDC, 0xA7, + 0x1C, 0x88, 0x5E, 0x15, + 0x60, 0xFE, 0xC6, 0x97, + 0x16, 0x3D, 0x47, 0xF2}; + void *sendbuf; + + sendbuf = kmemdup(set_def_chn, sizeof(set_def_chn), GFP_KERNEL); + if (!sendbuf) + return -ENOMEM; ret = usb_control_msg(udl->udev, usb_sndctrlpipe(udl->udev, 0), NR_USB_REQUEST_CHANNEL, (USB_DIR_OUT | USB_TYPE_VENDOR), 0, 0, - set_def_chn, sizeof(set_def_chn), + sendbuf, sizeof(set_def_chn), USB_CTRL_SET_TIMEOUT); + kfree(sendbuf); return ret < 0 ? ret : 0; } From e519e7774784f3fa7728657d780863805ed1c983 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 10 Nov 2016 18:32:13 -0500 Subject: [PATCH 248/319] splice: remove detritus from generic_file_splice_read() i_size check is a leftover from the horrors that used to play with the page cache in that function. With the switch to ->read_iter(), it's neither needed nor correct - for gfs2 it ends up being buggy, since i_size is not guaranteed to be correct until later (inside ->read_iter()). Spotted-by: Abhi Das Signed-off-by: Al Viro --- fs/splice.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 153d4f3bd441..dcaf185a5731 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -299,13 +299,8 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, { struct iov_iter to; struct kiocb kiocb; - loff_t isize; int idx, ret; - isize = i_size_read(in->f_mapping->host); - if (unlikely(*ppos >= isize)) - return 0; - iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len); idx = to.idx; init_sync_kiocb(&kiocb, in); From 6f75c3fd56daf547d684127a7f83c283c3c160d1 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 9 Nov 2016 17:21:08 -0800 Subject: [PATCH 249/319] PM / sleep: don't suspend parent when async child suspend_{noirq, late} fails Consider two devices, A and B, where B is a child of A, and B utilizes asynchronous suspend (it does not matter whether A is sync or async). If B fails to suspend_noirq() or suspend_late(), or is interrupted by a wakeup (pm_wakeup_pending()), then it aborts and sets the async_error variable. However, device A does not (immediately) check the async_error variable; it may continue to run its own suspend_noirq()/suspend_late() callback. This is bad. We can resolve this problem by doing our error and wakeup checking (particularly, for the async_error flag) after waiting for children to suspend, instead of before. This also helps align the logic for the noirq and late suspend cases with the logic in __device_suspend(). It's easy to observe this erroneous behavior by, for example, forcing a device to sleep a bit in its suspend_noirq() (to ensure the parent is waiting for the child to complete), then return an error, and watch the parent suspend_noirq() still get called. (Or similarly, fake a wakeup event at the right (or is it wrong?) time.) Fixes: de377b397272 (PM / sleep: Asynchronous threads for suspend_late) Fixes: 28b6fd6e3779 (PM / sleep: Asynchronous threads for suspend_noirq) Reported-by: Jeffy Chen Signed-off-by: Brian Norris Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index e44944f4be77..2932a5bd892f 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1027,6 +1027,8 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a TRACE_DEVICE(dev); TRACE_SUSPEND(0); + dpm_wait_for_children(dev, async); + if (async_error) goto Complete; @@ -1038,8 +1040,6 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a if (dev->power.syscore || dev->power.direct_complete) goto Complete; - dpm_wait_for_children(dev, async); - if (dev->pm_domain) { info = "noirq power domain "; callback = pm_noirq_op(&dev->pm_domain->ops, state); @@ -1174,6 +1174,8 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as __pm_runtime_disable(dev, false); + dpm_wait_for_children(dev, async); + if (async_error) goto Complete; @@ -1185,8 +1187,6 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as if (dev->power.syscore || dev->power.direct_complete) goto Complete; - dpm_wait_for_children(dev, async); - if (dev->pm_domain) { info = "late power domain "; callback = pm_late_early_op(&dev->pm_domain->ops, state); From d786810b2f896854506e7b698a137f074942e410 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 8 Nov 2016 13:54:41 -0500 Subject: [PATCH 250/319] perf/x86/intel/uncore: Add more Intel uncore IMC PCI IDs for SkyLake Several uncore IMC PCI IDs are missed for Intel SkyLake. Add the PCI IDs for SkyLake Y, U, H and S platforms. Rename the ID macros for 0x191f and 0x190c. The corresponding bug: https://bugzilla.kernel.org/show_bug.cgi?id=187301 The related datasheets are also attached in the bug entry for permanent reference. Reported-by: Ben Widawsky Tested-by: Ben Widawsky Signed-off-by: Kan Liang Reviewed-by: Ben Widawsky Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1478631281-5061-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snb.c | 32 ++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 5f845eef9a4d..81195cca7eae 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -8,8 +8,12 @@ #define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 -#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f -#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 +#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 +#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 +#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f +#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -616,13 +620,29 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = { static const struct pci_device_id skl_uncore_pci_ids[] = { { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -666,8 +686,12 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ - IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ + IMC_DEV(SKL_Y_IMC, &skl_uncore_pci_driver), /* 6th Gen Core Y */ IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ + IMC_DEV(SKL_HD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Dual Core */ + IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ + IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ + IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ { /* end marker */ } }; From 9e80c719a82be2399bb06eab6cd27cb97aa59742 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 10 Nov 2016 10:46:04 -0800 Subject: [PATCH 251/319] mm: remove extra newline from allocation stall warning Commit 63f53dea0c98 ("mm: warn about allocations which stall for too long") by error embedded "\n" in the format string, resulting in strange output. [ 722.876655] kworker/0:1: page alloction stalls for 160001ms, order:0 [ 722.876656] , mode:0x2400000(GFP_NOIO) [ 722.876657] CPU: 0 PID: 6966 Comm: kworker/0:1 Not tainted 4.8.0+ #69 Link: http://lkml.kernel.org/r/1476026219-7974-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 072d791dce2d..6de9440e3ae2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3658,7 +3658,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, /* Make sure we know about allocations which stall for too long */ if (time_after(jiffies, alloc_start + stall_timeout)) { warn_alloc(gfp_mask, - "page alloction stalls for %ums, order:%u\n", + "page allocation stalls for %ums, order:%u", jiffies_to_msecs(jiffies-alloc_start), order); stall_timeout += 10 * HZ; } From 5e322beefc8699b5747cfb35539a9496034e4296 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 10 Nov 2016 10:46:07 -0800 Subject: [PATCH 252/319] mm, frontswap: make sure allocated frontswap map is assigned Christian Borntraeger reports: With commit 8ea1d2a1985a ("mm, frontswap: convert frontswap_enabled to static key") kmemleak complains about a memory leak in swapon unreferenced object 0x3e09ba56000 (size 32112640): comm "swapon", pid 7852, jiffies 4294968787 (age 1490.770s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: __vmalloc_node_range+0x194/0x2d8 vzalloc+0x58/0x68 SyS_swapon+0xd60/0x12f8 system_call+0xd6/0x270 Turns out kmemleak is right. We now allocate the frontswap map depending on the kernel config (and no longer on the enablement) swapfile.c: [...] if (IS_ENABLED(CONFIG_FRONTSWAP)) frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long)); but later on this is passed along --> enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map); and ignored if frontswap is disabled --> frontswap_init(p->type, frontswap_map); static inline void frontswap_init(unsigned type, unsigned long *map) { if (frontswap_enabled()) __frontswap_init(type, map); } Thing is, that frontswap map is never freed. The leakage is relatively not that bad, because swapon is an infrequent and privileged operation. However, if the first frontswap backend is registered after a swap type has been already enabled, it will WARN_ON in frontswap_register_ops() and frontswap will not be available for the swap type. Fix this by making sure the map is assigned by frontswap_init() as long as CONFIG_FRONTSWAP is enabled. Fixes: 8ea1d2a1985a ("mm, frontswap: convert frontswap_enabled to static key") Link: http://lkml.kernel.org/r/20161026134220.2566-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reported-by: Christian Borntraeger Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: Juergen Gross Cc: "Kirill A. Shutemov" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/frontswap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index c46d2aa16d81..1d18af034554 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -106,8 +106,9 @@ static inline void frontswap_invalidate_area(unsigned type) static inline void frontswap_init(unsigned type, unsigned long *map) { - if (frontswap_enabled()) - __frontswap_init(type, map); +#ifdef CONFIG_FRONTSWAP + __frontswap_init(type, map); +#endif } #endif /* _LINUX_FRONTSWAP_H */ From 9956edf37e65e93fbb76dcff1236dff2323d306a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 10 Nov 2016 10:46:11 -0800 Subject: [PATCH 253/319] shmem: fix pageflags after swapping DMA32 object If shmem_alloc_page() does not set PageLocked and PageSwapBacked, then shmem_replace_page() needs to do so for itself. Without this, it puts newpage on the wrong lru, re-unlocks the unlocked newpage, and system descends into "Bad page" reports and freeze; or if CONFIG_DEBUG_VM=y, it hits an earlier VM_BUG_ON_PAGE(!PageLocked), depending on config. But shmem_replace_page() is not a common path: it's only called when swapin (or swapoff) finds the page was already read into an unsuitable zone: usually all zones are suitable, but gem objects for a few drm devices (gma500, omapdrm, crestline, broadwater) require zone DMA32 if there's more than 4GB of ram. Fixes: 800d8c63b2e9 ("shmem: add huge pages support") Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1611062003510.11253@eggly.anvils Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: [4.8.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/shmem.c b/mm/shmem.c index ad7813d73ea7..166ebf5d2bce 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1483,6 +1483,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, copy_highpage(newpage, oldpage); flush_dcache_page(newpage); + __SetPageLocked(newpage); + __SetPageSwapBacked(newpage); SetPageUptodate(newpage); set_page_private(newpage, swap_index); SetPageSwapCache(newpage); From eef06b82f16c78dc0197e3e9d5b2230647a890ff Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 10 Nov 2016 10:46:13 -0800 Subject: [PATCH 254/319] scripts/bloat-o-meter: fix SIGPIPE Fix piping output to a program which quickly exits (read: head -n1) $ ./scripts/bloat-o-meter ../vmlinux-000 ../obj/vmlinux | head -n1 add/remove: 0/0 grow/shrink: 9/60 up/down: 124/-305 (-181) close failed in file object destructor: sys.excepthook is missing lost sys.stderr Link: http://lkml.kernel.org/r/20161028204618.GA29923@avx2 Signed-off-by: Alexey Dobriyan Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/bloat-o-meter | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 19f5adfd877d..d9ff038c1b28 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -8,6 +8,9 @@ # of the GNU General Public License, incorporated herein by reference. import sys, os, re +from signal import signal, SIGPIPE, SIG_DFL + +signal(SIGPIPE, SIG_DFL) if len(sys.argv) != 3: sys.stderr.write("usage: %s file1 file2\n" % sys.argv[0]) From 6b36ba599d602d8a73920fb5c470fe272fac49c1 Mon Sep 17 00:00:00 2001 From: Shiraz Hashim Date: Thu, 10 Nov 2016 10:46:16 -0800 Subject: [PATCH 255/319] mm/cma.c: check the max limit for cma allocation CMA allocation request size is represented by size_t that gets truncated when same is passed as int to bitmap_find_next_zero_area_off. We observe that during fuzz testing when cma allocation request is too high, bitmap_find_next_zero_area_off still returns success due to the truncation. This leads to kernel crash, as subsequent code assumes that requested memory is available. Fail cma allocation in case the request breaches the corresponding cma region size. Link: http://lkml.kernel.org/r/1478189211-3467-1-git-send-email-shashim@codeaurora.org Signed-off-by: Shiraz Hashim Cc: Catalin Marinas Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/cma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/cma.c b/mm/cma.c index 384c2cb51b56..c960459eda7e 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -385,6 +385,9 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align) bitmap_maxno = cma_bitmap_maxno(cma); bitmap_count = cma_bitmap_pages_to_bits(cma, count); + if (bitmap_count > bitmap_maxno) + return NULL; + for (;;) { mutex_lock(&cma->lock); bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap, From dd111be69114cc867f8e826284559bfbc1c40e37 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 10 Nov 2016 10:46:19 -0800 Subject: [PATCH 256/319] swapfile: fix memory corruption via malformed swapfile When root activates a swap partition whose header has the wrong endianness, nr_badpages elements of badpages are swabbed before nr_badpages has been checked, leading to a buffer overrun of up to 8GB. This normally is not a security issue because it can only be exploited by root (more specifically, a process with CAP_SYS_ADMIN or the ability to modify a swap file/partition), and such a process can already e.g. modify swapped-out memory of any other userspace process on the system. Link: http://lkml.kernel.org/r/1477949533-2509-1-git-send-email-jann@thejh.net Signed-off-by: Jann Horn Acked-by: Kees Cook Acked-by: Jerome Marchand Acked-by: Johannes Weiner Cc: "Kirill A. Shutemov" Cc: Vlastimil Babka Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swapfile.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/swapfile.c b/mm/swapfile.c index 2210de290b54..f30438970cd1 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2224,6 +2224,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p, swab32s(&swap_header->info.version); swab32s(&swap_header->info.last_page); swab32s(&swap_header->info.nr_badpages); + if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) + return 0; for (i = 0; i < swap_header->info.nr_badpages; i++) swab32s(&swap_header->info.badpages[i]); } From c3901e722b2975666f42748340df798114742d6d Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 10 Nov 2016 10:46:23 -0800 Subject: [PATCH 257/319] mm: hwpoison: fix thp split handling in memory_failure() When memory_failure() runs on a thp tail page after pmd is split, we trigger the following VM_BUG_ON_PAGE(): page:ffffd7cd819b0040 count:0 mapcount:0 mapping: (null) index:0x1 flags: 0x1fffc000400000(hwpoison) page dumped because: VM_BUG_ON_PAGE(!page_count(p)) ------------[ cut here ]------------ kernel BUG at /src/linux-dev/mm/memory-failure.c:1132! memory_failure() passed refcount and page lock from tail page to head page, which is not needed because we can pass any subpage to split_huge_page(). Fixes: 61f5d698cc97 ("mm: re-enable THP") Link: http://lkml.kernel.org/r/1477961577-7183-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index de88f33519c0..19e796d36a62 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1112,10 +1112,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags) } if (!PageHuge(p) && PageTransHuge(hpage)) { - lock_page(hpage); - if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) { - unlock_page(hpage); - if (!PageAnon(hpage)) + lock_page(p); + if (!PageAnon(p) || unlikely(split_huge_page(p))) { + unlock_page(p); + if (!PageAnon(p)) pr_err("Memory failure: %#lx: non anonymous thp\n", pfn); else @@ -1126,9 +1126,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) put_hwpoison_page(p); return -EBUSY; } - unlock_page(hpage); - get_hwpoison_page(p); - put_hwpoison_page(hpage); + unlock_page(p); VM_BUG_ON_PAGE(!page_count(p), p); hpage = compound_head(p); } From c6c7d83b9c9e6a8b3e6d84c820ac61fbffc9e396 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 10 Nov 2016 10:46:26 -0800 Subject: [PATCH 258/319] Revert "console: don't prefer first registered if DT specifies stdout-path" This reverts commit 05fd007e4629 ("console: don't prefer first registered if DT specifies stdout-path"). The reverted commit changes existing behavior on which many ARM boards rely. Many ARM small-board-computers, like e.g. the Raspberry Pi have both a video output and a serial console. Depending on whether the user is using the device as a more regular computer; or as a headless device we need to have the console on either one or the other. Many users rely on the kernel behavior of the console being present on both outputs, before the reverted commit the console setup with no console= kernel arguments on an ARM board which sets stdout-path in dt would look like this: [root@localhost ~]# cat /proc/consoles ttyS0 -W- (EC p a) 4:64 tty0 -WU (E p ) 4:1 Where as after the reverted commit, it looks like this: [root@localhost ~]# cat /proc/consoles ttyS0 -W- (EC p a) 4:64 This commit reverts commit 05fd007e4629 ("console: don't prefer first registered if DT specifies stdout-path") restoring the original behavior. Fixes: 05fd007e4629 ("console: don't prefer first registered if DT specifies stdout-path") Link: http://lkml.kernel.org/r/20161104121135.4780-2-hdegoede@redhat.com Signed-off-by: Hans de Goede Cc: Paul Burton Cc: Rob Herring Cc: Frank Rowand Cc: Thorsten Leemhuis Cc: Greg Kroah-Hartman Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/of/base.c | 2 -- include/linux/console.h | 6 ------ kernel/printk/printk.c | 13 +------------ 3 files changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index d687e6de24a0..a0bccb54a9bd 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -2077,8 +2077,6 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) name = of_get_property(of_aliases, "stdout", NULL); if (name) of_stdout = of_find_node_opts_by_path(name, &of_stdout_options); - if (of_stdout) - console_set_by_of(); } if (!of_aliases) diff --git a/include/linux/console.h b/include/linux/console.h index 3672809234a7..d530c4627e54 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -173,12 +173,6 @@ static inline void console_sysfs_notify(void) #endif extern bool console_suspend_enabled; -#ifdef CONFIG_OF -extern void console_set_by_of(void); -#else -static inline void console_set_by_of(void) {} -#endif - /* Suspend and resume console messages over PM events */ extern void suspend_console(void); extern void resume_console(void); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index de08fc90baaf..5028f4fd504a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -253,17 +253,6 @@ static int preferred_console = -1; int console_set_on_cmdline; EXPORT_SYMBOL(console_set_on_cmdline); -#ifdef CONFIG_OF -static bool of_specified_console; - -void console_set_by_of(void) -{ - of_specified_console = true; -} -#else -# define of_specified_console false -#endif - /* Flag: console code may call schedule() */ static int console_may_schedule; @@ -2657,7 +2646,7 @@ void register_console(struct console *newcon) * didn't select a console we take the first one * that registers here. */ - if (preferred_console < 0 && !of_specified_console) { + if (preferred_console < 0) { if (newcon->index < 0) newcon->index = 0; if (newcon->setup == NULL || From d006c71f8ad2663dd47f81bf96bf655eeed428e2 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 10 Nov 2016 10:46:29 -0800 Subject: [PATCH 259/319] ocfs2: fix not enough credit panic The following panic was caught when run ocfs2 disconfig single test (block size 512 and cluster size 8192). ocfs2_journal_dirty() return -ENOSPC, that means credits were used up. The total credit should include 3 times of "num_dx_leaves" from ocfs2_dx_dir_rebalance(), because 2 times will be consumed in ocfs2_dx_dir_transfer_leaf() and 1 time will be consumed in ocfs2_dx_dir_new_cluster() -> __ocfs2_dx_dir_new_cluster() -> ocfs2_dx_dir_format_cluster(). But only two times is included in ocfs2_dx_dir_rebalance_credits(), fix it. This can cause read-only fs(v4.1+) or panic for mainline linux depending on mount option. ------------[ cut here ]------------ kernel BUG at fs/ocfs2/journal.c:775! invalid opcode: 0000 [#1] SMP Modules linked in: ocfs2 nfsd lockd grace nfs_acl auth_rpcgss sunrpc autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sd_mod sg ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ppdev xen_kbdfront xen_netfront fb_sys_fops sysimgblt sysfillrect syscopyarea parport_pc parport acpi_cpufreq i2c_piix4 i2c_core pcspkr ext4 jbd2 mbcache xen_blkfront floppy pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod CPU: 2 PID: 10601 Comm: dd Not tainted 4.1.12-71.el6uek.bug24939243.x86_64 #2 Hardware name: Xen HVM domU, BIOS 4.4.4OVM 02/11/2016 task: ffff8800b6de6200 ti: ffff8800a7d48000 task.ti: ffff8800a7d48000 RIP: ocfs2_journal_dirty+0xa7/0xb0 [ocfs2] RSP: 0018:ffff8800a7d4b6d8 EFLAGS: 00010286 RAX: 00000000ffffffe4 RBX: 00000000814d0a9c RCX: 00000000000004f9 RDX: ffffffffa008e990 RSI: ffffffffa008f1ee RDI: ffff8800622b6460 RBP: ffff8800a7d4b6f8 R08: ffffffffa008f288 R09: ffff8800622b6460 R10: 0000000000000000 R11: 0000000000000282 R12: 0000000002c8421e R13: ffff88006d0cad00 R14: ffff880092beef60 R15: 0000000000000070 FS: 00007f9b83e92700(0000) GS:ffff8800be880000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fb2c0d1a000 CR3: 0000000008f80000 CR4: 00000000000406e0 Call Trace: ocfs2_dx_dir_transfer_leaf+0x159/0x1a0 [ocfs2] ocfs2_dx_dir_rebalance+0xd9b/0xea0 [ocfs2] ocfs2_find_dir_space_dx+0xd3/0x300 [ocfs2] ocfs2_prepare_dx_dir_for_insert+0x219/0x450 [ocfs2] ocfs2_prepare_dir_for_insert+0x1d6/0x580 [ocfs2] ocfs2_mknod+0x5a2/0x1400 [ocfs2] ocfs2_create+0x73/0x180 [ocfs2] vfs_create+0xd8/0x100 lookup_open+0x185/0x1c0 do_last+0x36d/0x780 path_openat+0x92/0x470 do_filp_open+0x4a/0xa0 do_sys_open+0x11a/0x230 SyS_open+0x1e/0x20 system_call_fastpath+0x12/0x71 Code: 1d 3f 29 09 00 48 85 db 74 1f 48 8b 03 0f 1f 80 00 00 00 00 48 8b 7b 08 48 83 c3 10 4c 89 e6 ff d0 48 8b 03 48 85 c0 75 eb eb 90 <0f> 0b eb fe 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55 41 54 RIP ocfs2_journal_dirty+0xa7/0xb0 [ocfs2] ---[ end trace 91ac5312a6ee1288 ]--- Kernel panic - not syncing: Fatal exception Kernel Offset: disabled Link: http://lkml.kernel.org/r/1478248135-31963-1-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index e7054e2ac922..3ecb9f337b7d 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -3699,7 +3699,7 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash, static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb, struct ocfs2_dx_root_block *dx_root) { - int credits = ocfs2_clusters_to_blocks(osb->sb, 2); + int credits = ocfs2_clusters_to_blocks(osb->sb, 3); credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list); credits += ocfs2_quota_trans_credits(osb->sb); From 96b96a96ddee4ba08ce4aeb8a558a3271fd4a7a7 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 10 Nov 2016 10:46:32 -0800 Subject: [PATCH 260/319] mm/hugetlb: fix huge page reservation leak in private mapping error paths Error paths in hugetlb_cow() and hugetlb_no_page() may free a newly allocated huge page. If a reservation was associated with the huge page, alloc_huge_page() consumed the reservation while allocating. When the newly allocated page is freed in free_huge_page(), it will increment the global reservation count. However, the reservation entry in the reserve map will remain. This is not an issue for shared mappings as the entry in the reserve map indicates a reservation exists. But, an entry in a private mapping reserve map indicates the reservation was consumed and no longer exists. This results in an inconsistency between the reserve map and the global reservation count. This 'leaks' a reserved huge page. Create a new routine restore_reserve_on_error() to restore the reserve entry in these specific error paths. This routine makes use of a new function vma_add_reservation() which will add a reserve entry for a specific address/page. In general, these error paths were rarely (if ever) taken on most architectures. However, powerpc contained arch specific code that that resulted in an extra fault and execution of these error paths on all private mappings. Fixes: 67961f9db8c4 ("mm/hugetlb: fix huge page reserve accounting for private mappings) Link: http://lkml.kernel.org/r/1476933077-23091-2-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reported-by: Jan Stancek Tested-by: Jan Stancek Reviewed-by: Aneesh Kumar K.V Acked-by: Hillf Danton Cc: Naoya Horiguchi Cc: Michal Hocko Cc: Kirill A . Shutemov Cc: Dave Hansen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ec49d9ef1eef..418bf01a50ed 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1826,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h, * is not the case is if a reserve map was changed between calls. It * is the responsibility of the caller to notice the difference and * take appropriate action. + * + * vma_add_reservation is used in error paths where a reservation must + * be restored when a newly allocated huge page must be freed. It is + * to be called after calling vma_needs_reservation to determine if a + * reservation exists. */ enum vma_resv_mode { VMA_NEEDS_RESV, VMA_COMMIT_RESV, VMA_END_RESV, + VMA_ADD_RESV, }; static long __vma_reservation_common(struct hstate *h, struct vm_area_struct *vma, unsigned long addr, @@ -1856,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h, region_abort(resv, idx, idx + 1); ret = 0; break; + case VMA_ADD_RESV: + if (vma->vm_flags & VM_MAYSHARE) + ret = region_add(resv, idx, idx + 1); + else { + region_abort(resv, idx, idx + 1); + ret = region_del(resv, idx, idx + 1); + } + break; default: BUG(); } @@ -1903,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h, (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV); } +static long vma_add_reservation(struct hstate *h, + struct vm_area_struct *vma, unsigned long addr) +{ + return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV); +} + +/* + * This routine is called to restore a reservation on error paths. In the + * specific error paths, a huge page was allocated (via alloc_huge_page) + * and is about to be freed. If a reservation for the page existed, + * alloc_huge_page would have consumed the reservation and set PagePrivate + * in the newly allocated page. When the page is freed via free_huge_page, + * the global reservation count will be incremented if PagePrivate is set. + * However, free_huge_page can not adjust the reserve map. Adjust the + * reserve map here to be consistent with global reserve count adjustments + * to be made by free_huge_page. + */ +static void restore_reserve_on_error(struct hstate *h, + struct vm_area_struct *vma, unsigned long address, + struct page *page) +{ + if (unlikely(PagePrivate(page))) { + long rc = vma_needs_reservation(h, vma, address); + + if (unlikely(rc < 0)) { + /* + * Rare out of memory condition in reserve map + * manipulation. Clear PagePrivate so that + * global reserve count will not be incremented + * by free_huge_page. This will make it appear + * as though the reservation for this page was + * consumed. This may prevent the task from + * faulting in the page at a later time. This + * is better than inconsistent global huge page + * accounting of reserve counts. + */ + ClearPagePrivate(page); + } else if (rc) { + rc = vma_add_reservation(h, vma, address); + if (unlikely(rc < 0)) + /* + * See above comment about rare out of + * memory condition. + */ + ClearPagePrivate(page); + } else + vma_end_reservation(h, vma, address); + } +} + struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { @@ -3498,6 +3562,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); out_release_all: + restore_reserve_on_error(h, vma, address, new_page); put_page(new_page); out_release_old: put_page(old_page); @@ -3680,6 +3745,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, spin_unlock(ptl); backout_unlocked: unlock_page(page); + restore_reserve_on_error(h, vma, address, page); put_page(page); goto out; } From 60da81ea61201f43387793d1cc3f501609d922ed Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Thu, 10 Nov 2016 10:46:35 -0800 Subject: [PATCH 261/319] mm/filemap: don't allow partially uptodate page for pipes Starting from 4.9-rc1 kernel, I started noticing some test failures of sendfile(2) and splice(2) (sendfile0N and splice01 from LTP) when testing on sub-page block size filesystems (tested both XFS and ext4), these syscalls start to return EIO in the tests. e.g. sendfile02 1 TFAIL : sendfile02.c:133: sendfile(2) failed to return expected value, expected: 26, got: -1 sendfile02 2 TFAIL : sendfile02.c:133: sendfile(2) failed to return expected value, expected: 24, got: -1 sendfile02 3 TFAIL : sendfile02.c:133: sendfile(2) failed to return expected value, expected: 22, got: -1 sendfile02 4 TFAIL : sendfile02.c:133: sendfile(2) failed to return expected value, expected: 20, got: -1 This is because that in sub-page block size cases, we don't need the whole page to be uptodate, only the part we care about is uptodate is OK (if fs has ->is_partially_uptodate defined). But page_cache_pipe_buf_confirm() doesn't have the ability to check the partially-uptodate case, it needs the whole page to be uptodate. So it returns EIO in this case. This is a regression introduced by commit 82c156f85384 ("switch generic_file_splice_read() to use of ->read_iter()"). Prior to the change, generic_file_splice_read() doesn't allow partially-uptodate page either, so it worked fine. Fix it by skipping the partially-uptodate check if we're working on a pipe in do_generic_file_read(), so we read the whole page from disk as long as the page is not uptodate. I think the other way to fix it is to add the ability to check & allow partially-uptodate page to page_cache_pipe_buf_confirm(), but that is much harder to do and seems gain little. Link: http://lkml.kernel.org/r/1477986187-12717-1-git-send-email-guaneryu@gmail.com Signed-off-by: Eryu Guan Reviewed-by: Jan Kara Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index c7fe2f16503f..50b52fe51937 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1732,6 +1732,9 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, if (inode->i_blkbits == PAGE_SHIFT || !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; + /* pipes can't handle partially uptodate pages */ + if (unlikely(iter->type & ITER_PIPE)) + goto page_not_up_to_date; if (!trylock_page(page)) goto page_not_up_to_date; /* Did it get truncated before we got the lock? */ From 70d78fe7c8b640b5acfad56ad341985b3810998a Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 10 Nov 2016 10:46:38 -0800 Subject: [PATCH 262/319] coredump: fix unfreezable coredumping task It could be not possible to freeze coredumping task when it waits for 'core_state->startup' completion, because threads are frozen in get_signal() before they got a chance to complete 'core_state->startup'. Inability to freeze a task during suspend will cause suspend to fail. Also CRIU uses cgroup freezer during dump operation. So with an unfreezable task the CRIU dump will fail because it waits for a transition from 'FREEZING' to 'FROZEN' state which will never happen. Use freezer_do_not_count() to tell freezer to ignore coredumping task while it waits for core_state->startup completion. Link: http://lkml.kernel.org/r/1475225434-3753-1-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Acked-by: Pavel Machek Acked-by: Oleg Nesterov Cc: Alexander Viro Cc: Tejun Heo Cc: "Rafael J. Wysocki" Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coredump.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/coredump.c b/fs/coredump.c index 281b768000e6..eb9c92c9b20f 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -423,7 +424,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state) if (core_waiters > 0) { struct core_thread *ptr; + freezer_do_not_count(); wait_for_completion(&core_state->startup); + freezer_count(); /* * Wait for all the threads to become inactive, so that * all the thread context (extended register state, like From f773e36de3d77c4000ca914c9d146f55f2fd51e8 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 10 Nov 2016 10:46:41 -0800 Subject: [PATCH 263/319] memcg: prevent memcg caches to be both OFF_SLAB & OBJFREELIST_SLAB While testing OBJFREELIST_SLAB integration with pagealloc, we found a bug where kmem_cache(sys) would be created with both CFLGS_OFF_SLAB & CFLGS_OBJFREELIST_SLAB. When it happened, critical allocations needed for loading drivers or creating new caches will fail. The original kmem_cache is created early making OFF_SLAB not possible. When kmem_cache(sys) is created, OFF_SLAB is possible and if pagealloc is enabled it will try to enable it first under certain conditions. Given kmem_cache(sys) reuses the original flag, you can have both flags at the same time resulting in allocation failures and odd behaviors. This fix discards allocator specific flags from memcg before calling create_cache. The bug exists since 4.6-rc1 and affects testing debug pagealloc configurations. Fixes: b03a017bebc4 ("mm/slab: introduce new slab management type, OBJFREELIST_SLAB") Link: http://lkml.kernel.org/r/1478553075-120242-1-git-send-email-thgarnie@google.com Signed-off-by: Greg Thelen Signed-off-by: Thomas Garnier Tested-by: Thomas Garnier Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vladimir Davydov Cc: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 71f0b28a1bec..329b03843863 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -533,8 +533,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, s = create_cache(cache_name, root_cache->object_size, root_cache->size, root_cache->align, - root_cache->flags, root_cache->ctor, - memcg, root_cache); + root_cache->flags & CACHE_CREATE_MASK, + root_cache->ctor, memcg, root_cache); /* * If we could not create a memcg cache, do not complain, because * that's not critical at all as we can always proceed with the root From d7c19b066dcf4bd19c4385e8065558d4e74f9e73 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 10 Nov 2016 10:46:44 -0800 Subject: [PATCH 264/319] mm: kmemleak: scan .data.ro_after_init Limit the number of kmemleak false positives by including .data.ro_after_init in memory scanning. To achieve this we need to add symbols for start and end of the section to the linker scripts. The problem was been uncovered by commit 56989f6d8568 ("genetlink: mark families as __ro_after_init"). Link: http://lkml.kernel.org/r/1478274173-15218-1-git-send-email-jakub.kicinski@netronome.com Reviewed-by: Catalin Marinas Signed-off-by: Jakub Kicinski Cc: Arnd Bergmann Cc: Cong Wang Cc: Johannes Berg Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kernel/vmlinux.lds.S | 2 ++ include/asm-generic/sections.h | 3 +++ include/asm-generic/vmlinux.lds.h | 5 ++++- mm/kmemleak.c | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 000e6e91f6a0..3667d20e997f 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -62,9 +62,11 @@ SECTIONS . = ALIGN(PAGE_SIZE); __start_ro_after_init = .; + __start_data_ro_after_init = .; .data..ro_after_init : { *(.data..ro_after_init) } + __end_data_ro_after_init = .; EXCEPTION_TABLE(16) . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index af0254c09424..4df64a1fc09e 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -14,6 +14,8 @@ * [_sdata, _edata]: contains .data.* sections, may also contain .rodata.* * and/or .init.* sections. * [__start_rodata, __end_rodata]: contains .rodata.* sections + * [__start_data_ro_after_init, __end_data_ro_after_init]: + * contains data.ro_after_init section * [__init_begin, __init_end]: contains .init.* sections, but .init.text.* * may be out of this range on some architectures. * [_sinittext, _einittext]: contains .init.text.* sections @@ -31,6 +33,7 @@ extern char _data[], _sdata[], _edata[]; extern char __bss_start[], __bss_stop[]; extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; +extern char __start_data_ro_after_init[], __end_data_ro_after_init[]; extern char _end[]; extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 30747960bc54..31e1d639abed 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -259,7 +259,10 @@ * own by defining an empty RO_AFTER_INIT_DATA. */ #ifndef RO_AFTER_INIT_DATA -#define RO_AFTER_INIT_DATA *(.data..ro_after_init) +#define RO_AFTER_INIT_DATA \ + __start_data_ro_after_init = .; \ + *(.data..ro_after_init) \ + __end_data_ro_after_init = .; #endif /* diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e5355a5b423f..d1380ed93fdf 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1414,6 +1414,7 @@ static void kmemleak_scan(void) /* data/bss scanning */ scan_large_block(_sdata, _edata); scan_large_block(__bss_start, __bss_stop); + scan_large_block(__start_data_ro_after_init, __end_data_ro_after_init); #ifdef CONFIG_SMP /* per-cpu sections scanning */ From ae65a21fb851f09bf6341761d884fb86b644b75a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 10 Nov 2016 10:46:47 -0800 Subject: [PATCH 265/319] lib/stackdepot: export save/fetch stack for drivers Some drivers would like to record stacktraces in order to aide leak tracing. As stackdepot already provides a facility for only storing the unique traces, thereby reducing the memory required, export that functionality for use by drivers. The code was originally created for KASAN and moved under lib in commit cd11016e5f521 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") so that it could be shared with mm/. In turn, we want to share it now with drivers. Link: http://lkml.kernel.org/r/20161108133209.22704-1-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Joonsoo Kim Cc: "Kirill A. Shutemov" Cc: Daniel Vetter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/stackdepot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 4d830e299989..f87d138e9672 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -192,6 +192,7 @@ void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace) trace->entries = stack->entries; trace->skip = 0; } +EXPORT_SYMBOL_GPL(depot_fetch_stack); /** * depot_save_stack - save stack in a stack depot. @@ -283,3 +284,4 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace, fast_exit: return retval; } +EXPORT_SYMBOL_GPL(depot_save_stack); From a76bcf557ef408b368cf26f52a60865bfc27b632 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 17:44:44 +0100 Subject: [PATCH 266/319] Kbuild: enable -Wmaybe-uninitialized warning for "make W=1" Traditionally, we have always had warnings about uninitialized variables enabled, as this is part of -Wall, and generally a good idea [1], but it also always produced false positives, mainly because this is a variation of the halting problem and provably impossible to get right in all cases [2]. Various people have identified cases that are particularly bad for false positives, and in commit e74fc973b6e5 ("Turn off -Wmaybe-uninitialized when building with -Os"), I turned off the warning for any build that was done with CC_OPTIMIZE_FOR_SIZE. This drastically reduced the number of false positive warnings in the default build but unfortunately had the side effect of turning the warning off completely in 'allmodconfig' builds, which in turn led to a lot of warnings (both actual bugs, and remaining false positives) to go in unnoticed. With commit 877417e6ffb9 ("Kbuild: change CC_OPTIMIZE_FOR_SIZE definition") enabled the warning again for allmodconfig builds in v4.7 and in v4.8-rc1, I had finally managed to address all warnings I get in an ARM allmodconfig build and most other maybe-uninitialized warnings for ARM randconfig builds. However, commit 6e8d666e9253 ("Disable "maybe-uninitialized" warning globally") was merged at the same time and disabled it completely for all configurations, because of false-positive warnings on x86 that I had not addressed until then. This caused a lot of actual bugs to get merged into mainline, and I sent several dozen patches for these during the v4.9 development cycle. Most of these are actual bugs, some are for correct code that is safe because it is only called under external constraints that make it impossible to run into the case that gcc sees, and in a few cases gcc is just stupid and finds something that can obviously never happen. I have now done a few thousand randconfig builds on x86 and collected all patches that I needed to address every single warning I got (I can provide the combined patch for the other warnings if anyone is interested), so I hope we can get the warning back and let people catch the actual bugs earlier. This reverts the change to disable the warning completely and for now brings it back at the "make W=1" level, so we can get it merged into mainline without introducing false positives. A follow-up patch enables it on all levels unless some configuration option turns it off because of false-positives. Link: https://rusty.ozlabs.org/?p=232 [1] Link: https://gcc.gnu.org/wiki/Better_Uninitialized_Warnings [2] Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- Makefile | 10 ++++++---- arch/arc/Makefile | 4 +++- scripts/Makefile.extrawarn | 3 +++ scripts/Makefile.ubsan | 4 ++++ 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index f97f786de58d..06e2b73978e8 100644 --- a/Makefile +++ b/Makefile @@ -370,7 +370,7 @@ LDFLAGS_MODULE = CFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = -CFLAGS_GCOV = -fprofile-arcs -ftest-coverage -fno-tree-loop-im +CFLAGS_GCOV = -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) @@ -620,7 +620,6 @@ ARCH_CFLAGS := include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) -KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION @@ -629,15 +628,18 @@ KBUILD_CFLAGS += $(call cc-option,-fdata-sections,) endif ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -KBUILD_CFLAGS += -Os +KBUILD_CFLAGS += -Os $(call cc-disable-warning,maybe-uninitialized,) else ifdef CONFIG_PROFILE_ALL_BRANCHES -KBUILD_CFLAGS += -O2 +KBUILD_CFLAGS += -O2 $(call cc-disable-warning,maybe-uninitialized,) else KBUILD_CFLAGS += -O2 endif endif +KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, \ + $(call cc-disable-warning,maybe-uninitialized,)) + # Tell gcc to never replace conditional load with a non-conditional one KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 864adad52280..25f81a1db9f9 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -68,7 +68,9 @@ cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables $(cfi) ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE # Generic build system uses -O2, we want -O3 # Note: No need to add to cflags-y as that happens anyways -ARCH_CFLAGS += -O3 +# +# Disable the false maybe-uninitialized warings gcc spits out at -O3 +ARCH_CFLAGS += -O3 $(call cc-disable-warning,maybe-uninitialized,) endif # small data is default for elf32 tool-chain. If not usable, disable it diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 53449a6ff6aa..7fc2c5a3dbbe 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -36,6 +36,7 @@ warning-2 += -Wshadow warning-2 += $(call cc-option, -Wlogical-op) warning-2 += $(call cc-option, -Wmissing-field-initializers) warning-2 += $(call cc-option, -Wsign-compare) +warning-2 += $(call cc-option, -Wmaybe-uninitialized) warning-3 := -Wbad-function-cast warning-3 += -Wcast-qual @@ -59,6 +60,8 @@ endif KBUILD_CFLAGS += $(warning) else +KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized) + ifeq ($(cc-name),clang) KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides) KBUILD_CFLAGS += $(call cc-disable-warning, unused-value) diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan index dd779c40c8e6..3b1b13818d59 100644 --- a/scripts/Makefile.ubsan +++ b/scripts/Makefile.ubsan @@ -17,4 +17,8 @@ endif ifdef CONFIG_UBSAN_NULL CFLAGS_UBSAN += $(call cc-option, -fsanitize=null) endif + + # -fsanitize=* options makes GCC less smart than usual and + # increase number of 'maybe-uninitialized false-positives + CFLAGS_UBSAN += $(call cc-option, -Wno-maybe-uninitialized) endif From e84efa32b9d2763fb92e73c8942100d8f55ad4ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 17:44:45 +0100 Subject: [PATCH 267/319] NFSv4.1: work around -Wmaybe-uninitialized warning A bugfix introduced a harmless gcc warning in nfs4_slot_seqid_in_use if we enable -Wmaybe-uninitialized again: fs/nfs/nfs4session.c:203:54: error: 'cur_seq' may be used uninitialized in this function [-Werror=maybe-uninitialized] gcc is not smart enough to conclude that the IS_ERR/PTR_ERR pair results in a nonzero return value here. Using PTR_ERR_OR_ZERO() instead makes this clear to the compiler. Fixes: e09c978aae5b ("NFSv4.1: Fix Oopsable condition in server callback races") Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- fs/nfs/nfs4session.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index b62973045a3e..150c5a1879bf 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -178,12 +178,14 @@ static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid, __must_hold(&tbl->slot_tbl_lock) { struct nfs4_slot *slot; + int ret; slot = nfs4_lookup_slot(tbl, slotid); - if (IS_ERR(slot)) - return PTR_ERR(slot); - *seq_nr = slot->seq_nr; - return 0; + ret = PTR_ERR_OR_ZERO(slot); + if (!ret) + *seq_nr = slot->seq_nr; + + return ret; } /* From 3a6d867612dce2035ca50cb02e7871d27c86aa72 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 17:44:46 +0100 Subject: [PATCH 268/319] x86: apm: avoid uninitialized data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apm_bios_call() can fail, and return a status in its argument structure. If that status however is zero during a call from apm_get_power_status(), we end up using data that may have never been set, as reported by "gcc -Wmaybe-uninitialized": arch/x86/kernel/apm_32.c: In function ‘apm’: arch/x86/kernel/apm_32.c:1729:17: error: ‘bx’ may be used uninitialized in this function [-Werror=maybe-uninitialized] arch/x86/kernel/apm_32.c:1835:5: error: ‘cx’ may be used uninitialized in this function [-Werror=maybe-uninitialized] arch/x86/kernel/apm_32.c:1730:17: note: ‘cx’ was declared here arch/x86/kernel/apm_32.c:1842:27: error: ‘dx’ may be used uninitialized in this function [-Werror=maybe-uninitialized] arch/x86/kernel/apm_32.c:1731:17: note: ‘dx’ was declared here This changes the function to return "APM_NO_ERROR" here, which makes the code more robust to broken BIOS versions, and avoids the warning. Signed-off-by: Arnd Bergmann Reviewed-by: Jiri Kosina Reviewed-by: Luis R. Rodriguez Signed-off-by: Linus Torvalds --- arch/x86/kernel/apm_32.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index c7364bd633e1..51287cd90bf6 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1042,8 +1042,11 @@ static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) if (apm_info.get_power_status_broken) return APM_32_UNSUPPORTED; - if (apm_bios_call(&call)) + if (apm_bios_call(&call)) { + if (!call.err) + return APM_NO_ERROR; return call.err; + } *status = call.ebx; *bat = call.ecx; if (apm_info.get_power_status_swabinminutes) { From 069013a9e2bbf1ace3f796cb664023dcd40730f7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 17:44:47 +0100 Subject: [PATCH 269/319] nios2: fix timer initcall return value When called more than twice, the nios2_time_init() function return an uninitialized value, as detected by gcc -Wmaybe-uninitialized arch/nios2/kernel/time.c: warning: 'ret' may be used uninitialized in this function This makes it return '0' here, matching the comment above the function. Acked-by: Ley Foon Tan Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- arch/nios2/kernel/time.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c index d9563ddb337e..746bf5caaffc 100644 --- a/arch/nios2/kernel/time.c +++ b/arch/nios2/kernel/time.c @@ -324,6 +324,7 @@ static int __init nios2_time_init(struct device_node *timer) ret = nios2_clocksource_init(timer); break; default: + ret = 0; break; } From 92dfffee974f75c28b0c89f31318669091de1ec3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 17:44:48 +0100 Subject: [PATCH 270/319] s390: pci: don't print uninitialized data for debugging gcc correctly warns about an incorrect use of the 'pa' variable in case we pass an empty scatterlist to __s390_dma_map_sg: arch/s390/pci/pci_dma.c: In function '__s390_dma_map_sg': arch/s390/pci/pci_dma.c:309:13: warning: 'pa' may be used uninitialized in this function [-Wmaybe-uninitialized] This adds a bogus initialization to the function to sanitize the debug output. I would have preferred a solution without the initialization, but I only got the report from the kbuild bot after turning on the warning again, and didn't manage to reproduce it myself. Signed-off-by: Arnd Bergmann Acked-by: Sebastian Ott Acked-by: Martin Schwidefsky Signed-off-by: Linus Torvalds --- arch/s390/pci/pci_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 7350c8bc13a2..6b2f72f523b9 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -423,7 +423,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, dma_addr_t dma_addr_base, dma_addr; int flags = ZPCI_PTE_VALID; struct scatterlist *s; - unsigned long pa; + unsigned long pa = 0; int ret; size = PAGE_ALIGN(size); From ba13e98f2cebd55a3744c5ffaa08f9dca73bf521 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 10 Nov 2016 17:44:49 +0100 Subject: [PATCH 271/319] dib0700: fix nec repeat handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When receiving a nec repeat, ensure the correct scancode is repeated rather than a random value from the stack. This removes the need for the bogus uninitialized_var() and also fixes the warnings: drivers/media/usb/dvb-usb/dib0700_core.c: In function ‘dib0700_rc_urb_completion’: drivers/media/usb/dvb-usb/dib0700_core.c:679: warning: ‘protocol’ may be used uninitialized in this function [sean addon: So after writing the patch and submitting it, I've bought the hardware on ebay. Without this patch you get random scancodes on nec repeats, which the patch indeed fixes.] Signed-off-by: Sean Young Tested-by: Sean Young Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- drivers/media/usb/dvb-usb/dib0700_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index 92d5408684ac..47ce9d5de4c6 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -704,7 +704,7 @@ static void dib0700_rc_urb_completion(struct urb *purb) struct dvb_usb_device *d = purb->context; struct dib0700_rc_response *poll_reply; enum rc_type protocol; - u32 uninitialized_var(keycode); + u32 keycode; u8 toggle; deb_info("%s()\n", __func__); @@ -745,7 +745,8 @@ static void dib0700_rc_urb_completion(struct urb *purb) poll_reply->nec.data == 0x00 && poll_reply->nec.not_data == 0xff) { poll_reply->data_state = 2; - break; + rc_repeat(d->rc_dev); + goto resubmit; } if ((poll_reply->nec.data ^ poll_reply->nec.not_data) != 0xff) { From 9cdbe14fb468586454d26d7ff878e7b698449727 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 17:44:50 +0100 Subject: [PATCH 272/319] rc: print correct variable for z8f0811 A recent rework accidentally left a debugging printk untouched while changing the meaning of the variables, leading to an uninitialized variable being printed: drivers/media/i2c/ir-kbd-i2c.c: In function 'get_key_haup_common': drivers/media/i2c/ir-kbd-i2c.c:62:2: error: 'toggle' may be used uninitialized in this function [-Werror=maybe-uninitialized] This prints the correct one instead, as we did before the patch. Fixes: 00bb820755ed ("[media] rc: Hauppauge z8f0811 can decode RC6") Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- drivers/media/i2c/ir-kbd-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/ir-kbd-i2c.c b/drivers/media/i2c/ir-kbd-i2c.c index f95a6bc839d5..cede3975d04b 100644 --- a/drivers/media/i2c/ir-kbd-i2c.c +++ b/drivers/media/i2c/ir-kbd-i2c.c @@ -118,7 +118,7 @@ static int get_key_haup_common(struct IR_i2c *ir, enum rc_type *protocol, *protocol = RC_TYPE_RC6_MCE; dev &= 0x7f; dprintk(1, "ir hauppauge (rc6-mce): t%d vendor=%d dev=%d code=%d\n", - toggle, vendor, dev, code); + *ptoggle, vendor, dev, code); } else { *ptoggle = 0; *protocol = RC_TYPE_RC6_6A_32; From beae2c9eb500d5509feb9fd148d34d97a9b1d276 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 17:44:51 +0100 Subject: [PATCH 273/319] crypto: aesni: shut up -Wmaybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rfc4106 encrypy/decrypt helper functions cause an annoying false-positive warning in allmodconfig if we turn on -Wmaybe-uninitialized warnings again: arch/x86/crypto/aesni-intel_glue.c: In function ‘helper_rfc4106_decrypt’: include/linux/scatterlist.h:67:31: warning: ‘dst_sg_walk.sg’ may be used uninitialized in this function [-Wmaybe-uninitialized] The problem seems to be that the compiler doesn't track the state of the 'one_entry_in_sg' variable across the kernel_fpu_begin/kernel_fpu_end section. This takes the easy way out by adding a bogus initialization, which should be harmless enough to get the patch into v4.9 so we can turn on this warning again by default without producing useless output. A follow-up patch for v4.10 rearranges the code to make the warning go away. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- arch/x86/crypto/aesni-intel_glue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 0ab5ee1c26af..aa8b0672f87a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -888,7 +888,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req) unsigned long auth_tag_len = crypto_aead_authsize(tfm); u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); struct scatter_walk src_sg_walk; - struct scatter_walk dst_sg_walk; + struct scatter_walk dst_sg_walk = {}; unsigned int i; /* Assuming we are supporting rfc4106 64-bit extended */ @@ -968,7 +968,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); u8 authTag[16]; struct scatter_walk src_sg_walk; - struct scatter_walk dst_sg_walk; + struct scatter_walk dst_sg_walk = {}; unsigned int i; if (unlikely(req->assoclen != 16 && req->assoclen != 20)) From c50e90d0d2bc489901c9adb825609cbbb2f7ffa1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 17:44:52 +0100 Subject: [PATCH 274/319] infiniband: shut up a maybe-uninitialized warning Some configurations produce this harmless warning when built with gcc -Wmaybe-uninitialized: infiniband/core/cma.c: In function 'cma_get_net_dev': infiniband/core/cma.c:1242:12: warning: 'src_addr_storage.sin_addr.s_addr' may be used uninitialized in this function [-Wmaybe-uninitialized] I previously reported this for the powerpc64 defconfig, but have now reproduced the same thing for x86 as well, using gcc-5 or higher. The code looks correct to me, and this change just rearranges it by making sure we alway initialize the entire address structure to make the warning disappear. My first approach added an initialization at the time of the declaration, which Doug commented may be too costly, so I hope this version doesn't add overhead. Link: http://arm-soc.lixom.net/buildlogs/mainline/v4.7-rc6/buildall.powerpc.ppc64_defconfig.log.passed Link: https://patchwork.kernel.org/patch/9212825/ Acked-by: Haggai Eran Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- drivers/infiniband/core/cma.c | 54 ++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 36bf50ebb187..89a6b0546804 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1094,47 +1094,47 @@ static void cma_save_ib_info(struct sockaddr *src_addr, } } -static void cma_save_ip4_info(struct sockaddr *src_addr, - struct sockaddr *dst_addr, +static void cma_save_ip4_info(struct sockaddr_in *src_addr, + struct sockaddr_in *dst_addr, struct cma_hdr *hdr, __be16 local_port) { - struct sockaddr_in *ip4; - if (src_addr) { - ip4 = (struct sockaddr_in *)src_addr; - ip4->sin_family = AF_INET; - ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr; - ip4->sin_port = local_port; + *src_addr = (struct sockaddr_in) { + .sin_family = AF_INET, + .sin_addr.s_addr = hdr->dst_addr.ip4.addr, + .sin_port = local_port, + }; } if (dst_addr) { - ip4 = (struct sockaddr_in *)dst_addr; - ip4->sin_family = AF_INET; - ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr; - ip4->sin_port = hdr->port; + *dst_addr = (struct sockaddr_in) { + .sin_family = AF_INET, + .sin_addr.s_addr = hdr->src_addr.ip4.addr, + .sin_port = hdr->port, + }; } } -static void cma_save_ip6_info(struct sockaddr *src_addr, - struct sockaddr *dst_addr, +static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, + struct sockaddr_in6 *dst_addr, struct cma_hdr *hdr, __be16 local_port) { - struct sockaddr_in6 *ip6; - if (src_addr) { - ip6 = (struct sockaddr_in6 *)src_addr; - ip6->sin6_family = AF_INET6; - ip6->sin6_addr = hdr->dst_addr.ip6; - ip6->sin6_port = local_port; + *src_addr = (struct sockaddr_in6) { + .sin6_family = AF_INET6, + .sin6_addr = hdr->dst_addr.ip6, + .sin6_port = local_port, + }; } if (dst_addr) { - ip6 = (struct sockaddr_in6 *)dst_addr; - ip6->sin6_family = AF_INET6; - ip6->sin6_addr = hdr->src_addr.ip6; - ip6->sin6_port = hdr->port; + *dst_addr = (struct sockaddr_in6) { + .sin6_family = AF_INET6, + .sin6_addr = hdr->src_addr.ip6, + .sin6_port = hdr->port, + }; } } @@ -1159,10 +1159,12 @@ static int cma_save_ip_info(struct sockaddr *src_addr, switch (cma_get_ip_ver(hdr)) { case 4: - cma_save_ip4_info(src_addr, dst_addr, hdr, port); + cma_save_ip4_info((struct sockaddr_in *)src_addr, + (struct sockaddr_in *)dst_addr, hdr, port); break; case 6: - cma_save_ip6_info(src_addr, dst_addr, hdr, port); + cma_save_ip6_info((struct sockaddr_in6 *)src_addr, + (struct sockaddr_in6 *)dst_addr, hdr, port); break; default: return -EAFNOSUPPORT; From 75ed26878b4cc0ca1c9d8ed5a642b45b60c231d9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 17:44:53 +0100 Subject: [PATCH 275/319] pcmcia: fix return value of soc_pcmcia_regulator_set The newly introduced soc_pcmcia_regulator_set() function sometimes returns without setting its return code, as shown by this warning: drivers/pcmcia/soc_common.c: In function 'soc_pcmcia_regulator_set': drivers/pcmcia/soc_common.c:112:5: error: 'ret' may be used uninitialized in this function [-Werror=maybe-uninitialized] This changes it to propagate the regulator_disable() result instead. Fixes: ac61b6001a63 ("pcmcia: soc_common: add support for Vcc and Vpp regulators") Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- drivers/pcmcia/soc_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index 153f3122283d..b6b316de055c 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -107,7 +107,7 @@ int soc_pcmcia_regulator_set(struct soc_pcmcia_socket *skt, ret = regulator_enable(r->reg); } else { - regulator_disable(r->reg); + ret = regulator_disable(r->reg); } if (ret == 0) r->on = on; From 4324cb23f4569edcf76e637cdb3c1dfe8e8a85e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Nov 2016 17:44:54 +0100 Subject: [PATCH 276/319] Kbuild: enable -Wmaybe-uninitialized warnings by default Previously the warnings were added back at the W=1 level and above, this now turns them on again by default, assuming that we have addressed all warnings and again have a clean build for v4.10. I found a number of new warnings in linux-next already and submitted bugfixes for those. Hopefully they are caught by the 0day builder in the future as soon as this patch is merged. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- scripts/Makefile.extrawarn | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 7fc2c5a3dbbe..7c321a603b07 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -60,8 +60,6 @@ endif KBUILD_CFLAGS += $(warning) else -KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized) - ifeq ($(cc-name),clang) KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides) KBUILD_CFLAGS += $(call cc-disable-warning, unused-value) From 409ae5a76e0505c8ffe1424f9c00dbf2ec7b5eea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Thu, 10 Nov 2016 12:26:57 +0100 Subject: [PATCH 277/319] lightnvm: invalid offset calculation for lba_shift MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ns->lba_shift assumes its value to be the logarithmic of the LA size. A previous patch duplicated the lba_shift calculation into lightnvm. It prematurely also subtracted a 512byte shift, which commonly is applied per-command. The 512byte shift being subtracted twice led to data loss when restoring the logical to physical mapping table from device and when issuing I/O commands using rrpc. Fix offset by removing the 512byte shift subtraction when calculating lba_shift. Fixes: b0b4e09c1ae7 "lightnvm: control life of nvm_dev in driver" Reported-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/nvme/host/lightnvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index f5e3011e31fc..5daf2f4be0cd 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -612,7 +612,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node, ret = nvm_register(dev); - ns->lba_shift = ilog2(dev->sec_size) - 9; + ns->lba_shift = ilog2(dev->sec_size); if (sysfs_create_group(&dev->dev.kobj, attrs)) pr_warn("%s: failed to create sysfs group for identification\n", From 0cbc72a1781250f373327dd7e306e33859a42154 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 11 Nov 2016 18:28:50 -0700 Subject: [PATCH 278/319] aoe: fix crash in page count manipulation aoeblk contains some mysterious code, that wants to elevate the bio vec page counts while it's under IO. That is not needed, it's fragile, and it's causing kernel oopses for some. Reported-by: Tested-by: Don Koch Tested-by: Tested-by: Don Koch Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 41 -------------------------------------- 1 file changed, 41 deletions(-) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index ab19adb07a12..3c606c09fd5a 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -853,45 +853,6 @@ rqbiocnt(struct request *r) return n; } -/* This can be removed if we are certain that no users of the block - * layer will ever use zero-count pages in bios. Otherwise we have to - * protect against the put_page sometimes done by the network layer. - * - * See http://oss.sgi.com/archives/xfs/2007-01/msg00594.html for - * discussion. - * - * We cannot use get_page in the workaround, because it insists on a - * positive page count as a precondition. So we use _refcount directly. - */ -static void -bio_pageinc(struct bio *bio) -{ - struct bio_vec bv; - struct page *page; - struct bvec_iter iter; - - bio_for_each_segment(bv, bio, iter) { - /* Non-zero page count for non-head members of - * compound pages is no longer allowed by the kernel. - */ - page = compound_head(bv.bv_page); - page_ref_inc(page); - } -} - -static void -bio_pagedec(struct bio *bio) -{ - struct page *page; - struct bio_vec bv; - struct bvec_iter iter; - - bio_for_each_segment(bv, bio, iter) { - page = compound_head(bv.bv_page); - page_ref_dec(page); - } -} - static void bufinit(struct buf *buf, struct request *rq, struct bio *bio) { @@ -899,7 +860,6 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio) buf->rq = rq; buf->bio = bio; buf->iter = bio->bi_iter; - bio_pageinc(bio); } static struct buf * @@ -1127,7 +1087,6 @@ aoe_end_buf(struct aoedev *d, struct buf *buf) if (buf == d->ip.buf) d->ip.buf = NULL; rq = buf->rq; - bio_pagedec(buf->bio); mempool_free(buf, d->bufpool); n = (unsigned long) rq->special; rq->special = (void *) --n; From 4e3264d21b90984c2165e8fe5a7b64cf25bc2c2d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Nov 2016 15:36:33 -0800 Subject: [PATCH 279/319] bpf: Fix bpf_redirect to an ipip/ip6tnl dev If the bpf program calls bpf_redirect(dev, 0) and dev is an ipip/ip6tnl, it currently includes the mac header. e.g. If dev is ipip, the end result is IP-EthHdr-IP instead of IP-IP. The fix is to pull the mac header. At ingress, skb_postpull_rcsum() is not needed because the ethhdr should have been pulled once already and then got pushed back just before calling the bpf_prog. At egress, this patch calls skb_postpull_rcsum(). If bpf_redirect(dev, BPF_F_INGRESS) is called, it also fails now because it calls dev_forward_skb() which eventually calls eth_type_trans(skb, dev). The eth_type_trans() will set skb->type = PACKET_OTHERHOST because the mac address does not match the redirecting dev->dev_addr. The PACKET_OTHERHOST will eventually cause the ip_rcv() errors out. To fix this, ____dev_forward_skb() is added. Joint work with Daniel Borkmann. Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel") Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/netdevice.h | 15 +++++++++ net/core/dev.c | 17 ++++------ net/core/filter.c | 68 ++++++++++++++++++++++++++++++++++----- 3 files changed, 81 insertions(+), 19 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 91ee3643ccc8..bf04a46f6d5b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb); +static __always_inline int ____dev_forward_skb(struct net_device *dev, + struct sk_buff *skb) +{ + if (skb_orphan_frags(skb, GFP_ATOMIC) || + unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->priority = 0; + return 0; +} + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; diff --git a/net/core/dev.c b/net/core/dev.c index eaad4c28069f..6666b28b6815 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_orphan_frags(skb, GFP_ATOMIC) || - unlikely(!is_skb_forwardable(dev, skb))) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; + int ret = ____dev_forward_skb(dev, skb); + + if (likely(!ret)) { + skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } - skb_scrub_packet(skb, true); - skb->priority = 0; - skb->protocol = eth_type_trans(skb, dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - - return 0; + return ret; } EXPORT_SYMBOL_GPL(__dev_forward_skb); diff --git a/net/core/filter.c b/net/core/filter.c index 00351cdf7d0c..b391209838ef 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) return dev_forward_skb(dev, skb); } +static inline int __bpf_rx_skb_no_mac(struct net_device *dev, + struct sk_buff *skb) +{ + int ret = ____dev_forward_skb(dev, skb); + + if (likely(!ret)) { + skb->dev = dev; + ret = netif_rx(skb); + } + + return ret; +} + static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) { int ret; @@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) return ret; } +static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + /* skb->mac_len is not set on normal egress */ + unsigned int mlen = skb->network_header - skb->mac_header; + + __skb_pull(skb, mlen); + + /* At ingress, the mac header has already been pulled once. + * At egress, skb_pospull_rcsum has to be done in case that + * the skb is originated from ingress (i.e. a forwarded skb) + * to ensure that rcsum starts at net header. + */ + if (!skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + skb_pop_mac_header(skb); + skb_reset_mac_len(skb); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb); +} + +static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + bpf_push_mac_rcsum(skb); + return flags & BPF_F_INGRESS ? + __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); +} + +static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, + u32 flags) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return __bpf_redirect_no_mac(skb, dev, flags); + default: + return __bpf_redirect_common(skb, dev, flags); + } +} + BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { struct net_device *dev; @@ -1675,10 +1733,7 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) return -ENOMEM; } - bpf_push_mac_rcsum(clone); - - return flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone); + return __bpf_redirect(clone, dev, flags); } static const struct bpf_func_proto bpf_clone_redirect_proto = { @@ -1722,10 +1777,7 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - bpf_push_mac_rcsum(skb); - - return ri->flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); + return __bpf_redirect(skb, dev, ri->flags); } static const struct bpf_func_proto bpf_redirect_proto = { From 90e02896f1a4627b14624245fbcbc19f8fd916cb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Nov 2016 15:36:34 -0800 Subject: [PATCH 280/319] bpf: Add test for bpf_redirect to ipip/ip6tnl The test creates two netns, ns1 and ns2. The host (the default netns) has an ipip or ip6tnl dev configured for tunneling traffic to the ns2. ping VIPS from ns1 <----> host <--tunnel--> ns2 (VIPs at loopback) The test is to have ns1 pinging VIPs configured at the loopback interface in ns2. The VIPs are 10.10.1.102 and 2401:face::66 (which are configured at lo@ns2). [Note: 0x66 => 102]. At ns1, the VIPs are routed _via_ the host. At the host, bpf programs are installed at the veth to redirect packets from a veth to the ipip/ip6tnl. The test is configured in a way so that both ingress and egress can be tested. At ns2, the ipip/ip6tnl dev is configured with the local and remote address specified. The return path is routed to the dev ipip/ip6tnl. During egress test, the host also locally tests pinging the VIPs to ensure that bpf_redirect at egress also works for the direct egress (i.e. not forwarding from dev ve1 to ve2). Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- samples/bpf/Makefile | 4 + samples/bpf/tc_l2_redirect.sh | 173 ++++++++++++++++++++++ samples/bpf/tc_l2_redirect_kern.c | 236 ++++++++++++++++++++++++++++++ samples/bpf/tc_l2_redirect_user.c | 73 +++++++++ 4 files changed, 486 insertions(+) create mode 100755 samples/bpf/tc_l2_redirect.sh create mode 100644 samples/bpf/tc_l2_redirect_kern.c create mode 100644 samples/bpf/tc_l2_redirect_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 12b7304d55dc..72c58675973e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -27,6 +27,7 @@ hostprogs-y += xdp2 hostprogs-y += test_current_task_under_cgroup hostprogs-y += trace_event hostprogs-y += sampleip +hostprogs-y += tc_l2_redirect test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -56,6 +57,7 @@ test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o libbpf.o trace_event_user.o sampleip-objs := bpf_load.o libbpf.o sampleip_user.o +tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -72,6 +74,7 @@ always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o always += tcbpf2_kern.o +always += tc_l2_redirect_kern.o always += lathist_kern.o always += offwaketime_kern.o always += spintest_kern.o @@ -111,6 +114,7 @@ HOSTLOADLIBES_xdp2 += -lelf HOSTLOADLIBES_test_current_task_under_cgroup += -lelf HOSTLOADLIBES_trace_event += -lelf HOSTLOADLIBES_sampleip += -lelf +HOSTLOADLIBES_tc_l2_redirect += -l elf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh new file mode 100755 index 000000000000..80a05591a140 --- /dev/null +++ b/samples/bpf/tc_l2_redirect.sh @@ -0,0 +1,173 @@ +#!/bin/bash + +[[ -z $TC ]] && TC='tc' +[[ -z $IP ]] && IP='ip' + +REDIRECT_USER='./tc_l2_redirect' +REDIRECT_BPF='./tc_l2_redirect_kern.o' + +RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter) +IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding) + +function config_common { + local tun_type=$1 + + $IP netns add ns1 + $IP netns add ns2 + $IP link add ve1 type veth peer name vens1 + $IP link add ve2 type veth peer name vens2 + $IP link set dev ve1 up + $IP link set dev ve2 up + $IP link set dev ve1 mtu 1500 + $IP link set dev ve2 mtu 1500 + $IP link set dev vens1 netns ns1 + $IP link set dev vens2 netns ns2 + + $IP -n ns1 link set dev lo up + $IP -n ns1 link set dev vens1 up + $IP -n ns1 addr add 10.1.1.101/24 dev vens1 + $IP -n ns1 addr add 2401:db01::65/64 dev vens1 nodad + $IP -n ns1 route add default via 10.1.1.1 dev vens1 + $IP -n ns1 route add default via 2401:db01::1 dev vens1 + + $IP -n ns2 link set dev lo up + $IP -n ns2 link set dev vens2 up + $IP -n ns2 addr add 10.2.1.102/24 dev vens2 + $IP -n ns2 addr add 2401:db02::66/64 dev vens2 nodad + $IP -n ns2 addr add 10.10.1.102 dev lo + $IP -n ns2 addr add 2401:face::66/64 dev lo nodad + $IP -n ns2 link add ipt2 type ipip local 10.2.1.102 remote 10.2.1.1 + $IP -n ns2 link add ip6t2 type ip6tnl mode any local 2401:db02::66 remote 2401:db02::1 + $IP -n ns2 link set dev ipt2 up + $IP -n ns2 link set dev ip6t2 up + $IP netns exec ns2 $TC qdisc add dev vens2 clsact + $IP netns exec ns2 $TC filter add dev vens2 ingress bpf da obj $REDIRECT_BPF sec drop_non_tun_vip + if [[ $tun_type == "ipip" ]]; then + $IP -n ns2 route add 10.1.1.0/24 dev ipt2 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ipt2.rp_filter=0 + else + $IP -n ns2 route add 10.1.1.0/24 dev ip6t2 + $IP -n ns2 route add 2401:db01::/64 dev ip6t2 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ip6t2.rp_filter=0 + fi + + $IP addr add 10.1.1.1/24 dev ve1 + $IP addr add 2401:db01::1/64 dev ve1 nodad + $IP addr add 10.2.1.1/24 dev ve2 + $IP addr add 2401:db02::1/64 dev ve2 nodad + + $TC qdisc add dev ve2 clsact + $TC filter add dev ve2 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_forward + + sysctl -q -w net.ipv4.conf.all.rp_filter=0 + sysctl -q -w net.ipv6.conf.all.forwarding=1 +} + +function cleanup { + set +e + [[ -z $DEBUG ]] || set +x + $IP netns delete ns1 >& /dev/null + $IP netns delete ns2 >& /dev/null + $IP link del ve1 >& /dev/null + $IP link del ve2 >& /dev/null + $IP link del ipt >& /dev/null + $IP link del ip6t >& /dev/null + sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER + sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING + rm -f /sys/fs/bpf/tc/globals/tun_iface + [[ -z $DEBUG ]] || set -x + set -e +} + +function l2_to_ipip { + echo -n "l2_to_ipip $1: " + + local dir=$1 + + config_common ipip + + $IP link add ipt type ipip external + $IP link set dev ipt up + sysctl -q -w net.ipv4.conf.ipt.rp_filter=0 + sysctl -q -w net.ipv4.conf.ipt.forwarding=1 + + if [[ $dir == "egress" ]]; then + $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2 + $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect + sysctl -q -w net.ipv4.conf.ve1.forwarding=1 + else + $TC qdisc add dev ve1 clsact + $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect + fi + + $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ipt/ifindex) + + $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null + + if [[ $dir == "egress" ]]; then + # test direct egress to ve2 (i.e. not forwarding from + # ve1 to ve2). + ping -c1 10.10.1.102 >& /dev/null + fi + + cleanup + + echo "OK" +} + +function l2_to_ip6tnl { + echo -n "l2_to_ip6tnl $1: " + + local dir=$1 + + config_common ip6tnl + + $IP link add ip6t type ip6tnl mode any external + $IP link set dev ip6t up + sysctl -q -w net.ipv4.conf.ip6t.rp_filter=0 + sysctl -q -w net.ipv4.conf.ip6t.forwarding=1 + + if [[ $dir == "egress" ]]; then + $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2 + $IP route add 2401:face::/64 via 2401:db02::66 dev ve2 + $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect + sysctl -q -w net.ipv4.conf.ve1.forwarding=1 + else + $TC qdisc add dev ve1 clsact + $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect + fi + + $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ip6t/ifindex) + + $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null + $IP netns exec ns1 ping -6 -c1 2401:face::66 >& /dev/null + + if [[ $dir == "egress" ]]; then + # test direct egress to ve2 (i.e. not forwarding from + # ve1 to ve2). + ping -c1 10.10.1.102 >& /dev/null + ping -6 -c1 2401:face::66 >& /dev/null + fi + + cleanup + + echo "OK" +} + +cleanup +test_names="l2_to_ipip l2_to_ip6tnl" +test_dirs="ingress egress" +if [[ $# -ge 2 ]]; then + test_names=$1 + test_dirs=$2 +elif [[ $# -ge 1 ]]; then + test_names=$1 +fi + +for t in $test_names; do + for d in $test_dirs; do + $t $d + done +done diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c new file mode 100644 index 000000000000..92a44729dbe4 --- /dev/null +++ b/samples/bpf/tc_l2_redirect_kern.c @@ -0,0 +1,236 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define _htonl __builtin_bswap32 + +#define PIN_GLOBAL_NS 2 +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; +}; + +/* copy of 'struct ethhdr' without __packed */ +struct eth_hdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_proto; +}; + +struct bpf_elf_map SEC("maps") tun_iface = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(int), + .size_value = sizeof(int), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr) +{ + if (eth_proto == htons(ETH_P_IP)) + return (_htonl(0xffffff00) & daddr) == _htonl(0x0a0a0100); + else if (eth_proto == htons(ETH_P_IPV6)) + return (daddr == _htonl(0x2401face)); + + return false; +} + +SEC("l2_to_iptun_ingress_forward") +int _l2_to_iptun_ingress_forward(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + int ret; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "ingress forward to ifindex:%d daddr4:%x\n"; + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (iph->protocol != IPPROTO_IPIP) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), *ifindex, + _htonl(iph->daddr)); + return bpf_redirect(*ifindex, BPF_F_INGRESS); + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + char fmt6[] = "ingress forward to ifindex:%d daddr6:%x::%x\n"; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (ip6h->nexthdr != IPPROTO_IPIP && + ip6h->nexthdr != IPPROTO_IPV6) + return TC_ACT_OK; + + bpf_trace_printk(fmt6, sizeof(fmt6), *ifindex, + _htonl(ip6h->daddr.s6_addr32[0]), + _htonl(ip6h->daddr.s6_addr32[3])); + return bpf_redirect(*ifindex, BPF_F_INGRESS); + } + + return TC_ACT_OK; +} + +SEC("l2_to_iptun_ingress_redirect") +int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + int ret; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n"; + struct iphdr *iph = data + sizeof(*eth); + __be32 daddr = iph->daddr; + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, daddr)) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(daddr), *ifindex); + } else { + return TC_ACT_OK; + } + + tkey.tunnel_id = 10000; + tkey.tunnel_ttl = 64; + tkey.remote_ipv4 = 0x0a020166; /* 10.2.1.102 */ + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0); + return bpf_redirect(*ifindex, 0); +} + +SEC("l2_to_ip6tun_ingress_redirect") +int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + int key = 0, *ifindex; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + ifindex = bpf_map_lookup_elem(&tun_iface, &key); + if (!ifindex) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n"; + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, iph->daddr)) + return TC_ACT_OK; + + bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(iph->daddr), + *ifindex); + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + char fmt6[] = "e/ingress redirect daddr6:%x to ifindex:%d\n"; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (!is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0])) + return TC_ACT_OK; + + bpf_trace_printk(fmt6, sizeof(fmt6), + _htonl(ip6h->daddr.s6_addr32[0]), *ifindex); + } else { + return TC_ACT_OK; + } + + tkey.tunnel_id = 10000; + tkey.tunnel_ttl = 64; + /* 2401:db02:0:0:0:0:0:66 */ + tkey.remote_ipv6[0] = _htonl(0x2401db02); + tkey.remote_ipv6[1] = 0; + tkey.remote_ipv6[2] = 0; + tkey.remote_ipv6[3] = _htonl(0x00000066); + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), BPF_F_TUNINFO_IPV6); + return bpf_redirect(*ifindex, 0); +} + +SEC("drop_non_tun_vip") +int _drop_non_tun_vip(struct __sk_buff *skb) +{ + struct bpf_tunnel_key tkey = {}; + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + void *data_end = (void *)(long)skb->data_end; + + if (data + sizeof(*eth) > data_end) + return TC_ACT_OK; + + if (eth->h_proto == htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*iph) > data_end) + return TC_ACT_OK; + + if (is_vip_addr(eth->h_proto, iph->daddr)) + return TC_ACT_SHOT; + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = data + sizeof(*eth); + + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0])) + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c new file mode 100644 index 000000000000..4013c5337b91 --- /dev/null +++ b/samples/bpf/tc_l2_redirect_user.c @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include + +#include +#include +#include +#include +#include + +#include "libbpf.h" + +static void usage(void) +{ + printf("Usage: tc_l2_ipip_redirect [...]\n"); + printf(" -U Update an already pinned BPF array\n"); + printf(" -i Interface index\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + const char *pinned_file = NULL; + int ifindex = -1; + int array_key = 0; + int array_fd = -1; + int ret = -1; + int opt; + + while ((opt = getopt(argc, argv, "F:U:i:")) != -1) { + switch (opt) { + /* General args */ + case 'U': + pinned_file = optarg; + break; + case 'i': + ifindex = atoi(optarg); + break; + default: + usage(); + goto out; + } + } + + if (ifindex < 0 || !pinned_file) { + usage(); + goto out; + } + + array_fd = bpf_obj_get(pinned_file); + if (array_fd < 0) { + fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + + /* bpf_tunnel_key.remote_ipv4 expects host byte orders */ + ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + +out: + if (array_fd != -1) + close(array_fd); + return ret; +} From 34fad54c2537f7c99d07375e50cb30aa3c23bd83 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Nov 2016 16:04:46 -0800 Subject: [PATCH 281/319] net: __skb_flow_dissect() must cap its return value After Tom patch, thoff field could point past the end of the buffer, this could fool some callers. If an skb was provided, skb->len should be the upper limit. If not, hlen is supposed to be the upper limit. Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") Signed-off-by: Eric Dumazet Reported-by: Yibin Yang Acked-by: Willem de Bruijn Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index ab193e5def07..69e4463a4b1b 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -122,7 +122,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_keyid *key_keyid; bool skip_vlan = false; u8 ip_proto = 0; - bool ret = false; + bool ret; if (!data) { data = skb->data; @@ -549,12 +549,17 @@ bool __skb_flow_dissect(const struct sk_buff *skb, out_good: ret = true; -out_bad: + key_control->thoff = (u16)nhoff; +out: key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; - key_control->thoff = (u16)nhoff; return ret; + +out_bad: + ret = false; + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + goto out; } EXPORT_SYMBOL(__skb_flow_dissect); From 10b217681ddec4fa3ddb375bb188fec504523da4 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 10 Nov 2016 13:21:42 +0200 Subject: [PATCH 282/319] net: bpqether.h: remove if_ether.h guard __LINUX_IF_ETHER_H is not defined anywhere, and if_ether.h can keep itself from double inclusion, though it uses a single underscore prefix. Signed-off-by: Baruch Siach Signed-off-by: David S. Miller --- include/uapi/linux/bpqether.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/uapi/linux/bpqether.h b/include/uapi/linux/bpqether.h index a6c35e1a89ad..05865edaefda 100644 --- a/include/uapi/linux/bpqether.h +++ b/include/uapi/linux/bpqether.h @@ -5,9 +5,7 @@ * Defines for the BPQETHER pseudo device driver */ -#ifndef __LINUX_IF_ETHER_H #include -#endif #define SIOCSBPQETHOPT (SIOCDEVPRIVATE+0) /* reserved */ #define SIOCSBPQETHADDR (SIOCDEVPRIVATE+1) From 02e56902e40e4c1ff57590c717e46377b72d5966 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 12 Nov 2016 21:04:23 +0000 Subject: [PATCH 283/319] x86/efi: Fix EFI memmap pointer size warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this when building on 32-bit: arch/x86/platform/efi/efi.c: In function ‘__efi_enter_virtual_mode’: arch/x86/platform/efi/efi.c:911:5: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (efi_memory_desc_t *)pa); ^ arch/x86/platform/efi/efi.c:918:5: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (efi_memory_desc_t *)pa); ^ The @pa local variable is declared as phys_addr_t and that is a u64 when CONFIG_PHYS_ADDR_T_64BIT=y. (The last is enabled on 32-bit on a PAE build.) However, its value comes from __pa() which is basically doing pointer arithmetic and checking, and returns unsigned long as it is the native pointer width. So let's use an unsigned long too. It should be fine to do so because the later users cast it to a pointer too. Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20161112210424.5157-2-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index bf99aa7005eb..936a488d6cf6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -861,7 +861,7 @@ static void __init __efi_enter_virtual_mode(void) int count = 0, pg_shift = 0; void *new_memmap = NULL; efi_status_t status; - phys_addr_t pa; + unsigned long pa; efi.systab = NULL; From f6697df36bdf0bf7fce984605c2918d4a7b4269f Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 12 Nov 2016 21:04:24 +0000 Subject: [PATCH 284/319] x86/efi: Prevent mixed mode boot corruption with CONFIG_VMAP_STACK=y Booting an EFI mixed mode kernel has been crashing since commit: e37e43a497d5 ("x86/mm/64: Enable vmapped stacks (CONFIG_HAVE_ARCH_VMAP_STACK=y)") The user-visible effect in my test setup was the kernel being unable to find the root file system ramdisk. This was likely caused by silent memory or page table corruption. Enabling CONFIG_DEBUG_VIRTUAL=y immediately flagged the thunking code as abusing virt_to_phys() because it was passing addresses that were not part of the kernel direct mapping. Use the slow version instead, which correctly handles all memory regions by performing a page table walk. Suggested-by: Andy Lutomirski Signed-off-by: Matt Fleming Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20161112210424.5157-3-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 80 ++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 58b0f801f66f..319148bd4b05 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -211,6 +212,35 @@ void efi_sync_low_kernel_mappings(void) memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); } +/* + * Wrapper for slow_virt_to_phys() that handles NULL addresses. + */ +static inline phys_addr_t +virt_to_phys_or_null_size(void *va, unsigned long size) +{ + bool bad_size; + + if (!va) + return 0; + + if (virt_addr_valid(va)) + return virt_to_phys(va); + + /* + * A fully aligned variable on the stack is guaranteed not to + * cross a page bounary. Try to catch strings on the stack by + * checking that 'size' is a power of two. + */ + bad_size = size > PAGE_SIZE || !is_power_of_2(size); + + WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size); + + return slow_virt_to_phys(va); +} + +#define virt_to_phys_or_null(addr) \ + virt_to_phys_or_null_size((addr), sizeof(*(addr))) + int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { unsigned long pfn, text; @@ -494,8 +524,8 @@ static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); - phys_tc = virt_to_phys(tc); + phys_tm = virt_to_phys_or_null(tm); + phys_tc = virt_to_phys_or_null(tc); status = efi_thunk(get_time, phys_tm, phys_tc); @@ -511,7 +541,7 @@ static efi_status_t efi_thunk_set_time(efi_time_t *tm) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(set_time, phys_tm); @@ -529,9 +559,9 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, spin_lock(&rtc_lock); - phys_enabled = virt_to_phys(enabled); - phys_pending = virt_to_phys(pending); - phys_tm = virt_to_phys(tm); + phys_enabled = virt_to_phys_or_null(enabled); + phys_pending = virt_to_phys_or_null(pending); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(get_wakeup_time, phys_enabled, phys_pending, phys_tm); @@ -549,7 +579,7 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) spin_lock(&rtc_lock); - phys_tm = virt_to_phys(tm); + phys_tm = virt_to_phys_or_null(tm); status = efi_thunk(set_wakeup_time, enabled, phys_tm); @@ -558,6 +588,10 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) return status; } +static unsigned long efi_name_size(efi_char16_t *name) +{ + return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1; +} static efi_status_t efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, @@ -567,11 +601,11 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, u32 phys_name, phys_vendor, phys_attr; u32 phys_data_size, phys_data; - phys_data_size = virt_to_phys(data_size); - phys_vendor = virt_to_phys(vendor); - phys_name = virt_to_phys(name); - phys_attr = virt_to_phys(attr); - phys_data = virt_to_phys(data); + phys_data_size = virt_to_phys_or_null(data_size); + phys_vendor = virt_to_phys_or_null(vendor); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_attr = virt_to_phys_or_null(attr); + phys_data = virt_to_phys_or_null_size(data, *data_size); status = efi_thunk(get_variable, phys_name, phys_vendor, phys_attr, phys_data_size, phys_data); @@ -586,9 +620,9 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, u32 phys_name, phys_vendor, phys_data; efi_status_t status; - phys_name = virt_to_phys(name); - phys_vendor = virt_to_phys(vendor); - phys_data = virt_to_phys(data); + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); + phys_vendor = virt_to_phys_or_null(vendor); + phys_data = virt_to_phys_or_null_size(data, data_size); /* If data_size is > sizeof(u32) we've got problems */ status = efi_thunk(set_variable, phys_name, phys_vendor, @@ -605,9 +639,9 @@ efi_thunk_get_next_variable(unsigned long *name_size, efi_status_t status; u32 phys_name_size, phys_name, phys_vendor; - phys_name_size = virt_to_phys(name_size); - phys_vendor = virt_to_phys(vendor); - phys_name = virt_to_phys(name); + phys_name_size = virt_to_phys_or_null(name_size); + phys_vendor = virt_to_phys_or_null(vendor); + phys_name = virt_to_phys_or_null_size(name, *name_size); status = efi_thunk(get_next_variable, phys_name_size, phys_name, phys_vendor); @@ -621,7 +655,7 @@ efi_thunk_get_next_high_mono_count(u32 *count) efi_status_t status; u32 phys_count; - phys_count = virt_to_phys(count); + phys_count = virt_to_phys_or_null(count); status = efi_thunk(get_next_high_mono_count, phys_count); return status; @@ -633,7 +667,7 @@ efi_thunk_reset_system(int reset_type, efi_status_t status, { u32 phys_data; - phys_data = virt_to_phys(data); + phys_data = virt_to_phys_or_null_size(data, data_size); efi_thunk(reset_system, reset_type, status, data_size, phys_data); } @@ -661,9 +695,9 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - phys_storage = virt_to_phys(storage_space); - phys_remaining = virt_to_phys(remaining_space); - phys_max = virt_to_phys(max_variable_size); + phys_storage = virt_to_phys_or_null(storage_space); + phys_remaining = virt_to_phys_or_null(remaining_space); + phys_max = virt_to_phys_or_null(max_variable_size); status = efi_thunk(query_variable_info, attr, phys_storage, phys_remaining, phys_max); From d70674eeaa5efdefb99928691161578ae0a80316 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Oct 2016 17:55:04 +0200 Subject: [PATCH 285/319] iio: maxim_thermocouple: detect invalid storage size in read() As found by gcc -Wmaybe-uninitialized, having a storage_bytes value other than 2 or 4 will result in undefined behavior: drivers/iio/temperature/maxim_thermocouple.c: In function 'maxim_thermocouple_read': drivers/iio/temperature/maxim_thermocouple.c:141:5: error: 'ret' may be used uninitialized in this function [-Werror=maybe-uninitialized] This probably cannot happen, but returning -EINVAL here is appropriate and makes gcc happy and the code more robust. Fixes: 231147ee77f3 ("iio: maxim_thermocouple: Align 16 bit big endian value of raw reads") Signed-off-by: Arnd Bergmann Signed-off-by: Jonathan Cameron (cherry picked from commit 32cb7d27e65df9daa7cee8f1fdf7b259f214bee2) Signed-off-by: Greg Kroah-Hartman --- drivers/iio/temperature/maxim_thermocouple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c index 066161a4bccd..f962f31a5eb2 100644 --- a/drivers/iio/temperature/maxim_thermocouple.c +++ b/drivers/iio/temperature/maxim_thermocouple.c @@ -136,6 +136,8 @@ static int maxim_thermocouple_read(struct maxim_thermocouple_data *data, ret = spi_read(data->spi, (void *)&buf32, storage_bytes); *val = be32_to_cpu(buf32); break; + default: + ret = -EINVAL; } if (ret) From ca0a75316dc62af92943761b1cc049e15c92eb09 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 9 Nov 2016 19:51:25 -0800 Subject: [PATCH 286/319] r8152: Fix error path in open function If usb_submit_urb() called from the open function fails, the following crash may be observed. r8152 8-1:1.0 eth0: intr_urb submit failed: -19 ... r8152 8-1:1.0 eth0: v1.08.3 Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b6b7b pgd = ffffffc0e7305000 [6b6b6b6b6b6b6b7b] *pgd=0000000000000000, *pud=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT SMP ... PC is at notifier_chain_register+0x2c/0x58 LR is at blocking_notifier_chain_register+0x54/0x70 ... Call trace: [] notifier_chain_register+0x2c/0x58 [] blocking_notifier_chain_register+0x54/0x70 [] register_pm_notifier+0x24/0x2c [] rtl8152_open+0x3dc/0x3f8 [r8152] [] __dev_open+0xac/0x104 [] __dev_change_flags+0xb0/0x148 [] dev_change_flags+0x34/0x70 [] do_setlink+0x2c8/0x888 [] rtnl_newlink+0x328/0x644 [] rtnetlink_rcv_msg+0x1a8/0x1d4 [] netlink_rcv_skb+0x68/0xd0 [] rtnetlink_rcv+0x2c/0x3c [] netlink_unicast+0x16c/0x234 [] netlink_sendmsg+0x340/0x364 [] sock_sendmsg+0x48/0x60 [] SyS_sendto+0xe0/0x120 [] SyS_send+0x40/0x4c [] el0_svc_naked+0x24/0x28 Clean up error handling to avoid registering the notifier if the open function is going to fail. Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 75c516889645..efb84f092492 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3266,10 +3266,8 @@ static int rtl8152_open(struct net_device *netdev) goto out; res = usb_autopm_get_interface(tp->intf); - if (res < 0) { - free_all_mem(tp); - goto out; - } + if (res < 0) + goto out_free; mutex_lock(&tp->control); @@ -3285,10 +3283,9 @@ static int rtl8152_open(struct net_device *netdev) netif_device_detach(tp->netdev); netif_warn(tp, ifup, netdev, "intr_urb submit failed: %d\n", res); - free_all_mem(tp); - } else { - napi_enable(&tp->napi); + goto out_unlock; } + napi_enable(&tp->napi); mutex_unlock(&tp->control); @@ -3297,7 +3294,13 @@ static int rtl8152_open(struct net_device *netdev) tp->pm_notifier.notifier_call = rtl_notifier; register_pm_notifier(&tp->pm_notifier); #endif + return 0; +out_unlock: + mutex_unlock(&tp->control); + usb_autopm_put_interface(tp->intf); +out_free: + free_all_mem(tp); out: return res; } From 969447f226b451c453ddc83cac6144eaeac6f2e3 Mon Sep 17 00:00:00 2001 From: Stephen Suryaputra Lin Date: Thu, 10 Nov 2016 11:16:15 -0500 Subject: [PATCH 287/319] ipv4: use new_gw for redirect neigh lookup In v2.6, ip_rt_redirect() calls arp_bind_neighbour() which returns 0 and then the state of the neigh for the new_gw is checked. If the state isn't valid then the redirected route is deleted. This behavior is maintained up to v3.5.7 by check_peer_redirect() because rt->rt_gateway is assigned to peer->redirect_learned.a4 before calling ipv4_neigh_lookup(). After commit 5943634fc559 ("ipv4: Maintain redirect and PMTU info in struct rtable again."), ipv4_neigh_lookup() is performed without the rt_gateway assigned to the new_gw. In the case when rt_gateway (old_gw) isn't zero, the function uses it as the key. The neigh is most likely valid since the old_gw is the one that sends the ICMP redirect message. Then the new_gw is assigned to fib_nh_exception. The problem is: the new_gw ARP may never gets resolved and the traffic is blackholed. So, use the new_gw for neigh lookup. Changes from v1: - use __ipv4_neigh_lookup instead (per Eric Dumazet). Fixes: 5943634fc559 ("ipv4: Maintain redirect and PMTU info in struct rtable again.") Signed-off-by: Stephen Suryaputra Lin Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 62d4d90c1389..2a57566e6e91 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -753,7 +753,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow goto reject_redirect; } - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); + n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); + if (!n) + n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); From ac6e780070e30e4c35bd395acfe9191e6268bdd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Nov 2016 13:12:35 -0800 Subject: [PATCH 288/319] tcp: take care of truncations done by sk_filter() With syzkaller help, Marco Grassi found a bug in TCP stack, crashing in tcp_collapse() Root cause is that sk_filter() can truncate the incoming skb, but TCP stack was not really expecting this to happen. It probably was expecting a simple DROP or ACCEPT behavior. We first need to make sure no part of TCP header could be removed. Then we need to adjust TCP_SKB_CB(skb)->end_seq Many thanks to syzkaller team and Marco for giving us a reproducer. Signed-off-by: Eric Dumazet Reported-by: Marco Grassi Reported-by: Vladis Dronov Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++++- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 304a8e17bc87..123979fe12bf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1220,6 +1220,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +int tcp_filter(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 61b7be303eec..2259114c7242 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1564,6 +1564,21 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_add_backlog); +int tcp_filter(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th = (struct tcphdr *)skb->data; + unsigned int eaten = skb->len; + int err; + + err = sk_filter_trim_cap(sk, skb, th->doff * 4); + if (!err) { + eaten -= skb->len; + TCP_SKB_CB(skb)->end_seq -= eaten; + } + return err; +} +EXPORT_SYMBOL(tcp_filter); + /* * From tcp_input.c */ @@ -1676,8 +1691,10 @@ int tcp_v4_rcv(struct sk_buff *skb) nf_reset(skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); skb->dev = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6ca23c2e76f7..b9f1fee9a886 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1229,7 +1229,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard; /* @@ -1457,8 +1457,10 @@ static int tcp_v6_rcv(struct sk_buff *skb) if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); skb->dev = NULL; From 7b5b74efcca00f15c2aec1dc7175bfe34b6ec643 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 10 Nov 2016 19:08:39 -0500 Subject: [PATCH 289/319] Revert "include/uapi/linux/atm_zatm.h: include linux/time.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit cf00713a655d ("include/uapi/linux/atm_zatm.h: include linux/time.h"). This attempted to fix userspace breakage that no longer existed when the patch was merged. Almost one year earlier, commit 70ba07b675b5 ("atm: remove 'struct zatm_t_hist'") deleted the struct in question. After this patch was merged, we now have to deal with people being unable to include this header in conjunction with standard C library headers like stdlib.h (which linux-atm does). Example breakage: x86_64-pc-linux-gnu-gcc -DHAVE_CONFIG_H -I. -I../.. -I./../q2931 -I./../saal \ -I. -DCPPFLAGS_TEST -I../../src/include -O2 -march=native -pipe -g \ -frecord-gcc-switches -freport-bug -Wimplicit-function-declaration \ -Wnonnull -Wstrict-aliasing -Wparentheses -Warray-bounds \ -Wfree-nonheap-object -Wreturn-local-addr -fno-strict-aliasing -Wall \ -Wshadow -Wpointer-arith -Wwrite-strings -Wstrict-prototypes -c zntune.c In file included from /usr/include/linux/atm_zatm.h:17:0, from zntune.c:17: /usr/include/linux/time.h:9:8: error: redefinition of ‘struct timespec’ struct timespec { ^ In file included from /usr/include/sys/select.h:43:0, from /usr/include/sys/types.h:219, from /usr/include/stdlib.h:314, from zntune.c:9: /usr/include/time.h:120:8: note: originally defined here struct timespec ^ Signed-off-by: Mike Frysinger Acked-by: Mikko Rapeli Signed-off-by: David S. Miller --- include/uapi/linux/atm_zatm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h index 5cd4d4d2dd1d..9c9c6ad55f14 100644 --- a/include/uapi/linux/atm_zatm.h +++ b/include/uapi/linux/atm_zatm.h @@ -14,7 +14,6 @@ #include #include -#include #define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) /* get pool statistics */ From 3ffb6a39b751b635a0c50b650064c38b8d371ef2 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 11 Nov 2016 00:11:42 -0500 Subject: [PATCH 290/319] bnxt_en: Fix ring arithmetic in bnxt_setup_tc(). The logic is missing the check on whether the tx and rx rings are sharing completion rings or not. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a9f9f3738022..c6909660e097 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6309,6 +6309,7 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, struct tc_to_netdev *ntc) { struct bnxt *bp = netdev_priv(dev); + bool sh = false; u8 tc; if (ntc->type != TC_SETUP_MQPRIO) @@ -6325,12 +6326,11 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, if (netdev_get_num_tc(dev) == tc) return 0; + if (bp->flags & BNXT_FLAG_SHARED_RINGS) + sh = true; + if (tc) { int max_rx_rings, max_tx_rings, rc; - bool sh = false; - - if (bp->flags & BNXT_FLAG_SHARED_RINGS) - sh = true; rc = bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh); if (rc || bp->tx_nr_rings_per_tc * tc > max_tx_rings) @@ -6348,7 +6348,8 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto, bp->tx_nr_rings = bp->tx_nr_rings_per_tc; netdev_reset_tc(dev); } - bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings); + bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : + bp->tx_nr_rings + bp->rx_nr_rings; bp->num_stat_ctxs = bp->cp_nr_rings; if (netif_running(bp->dev)) From 73b9bad63ae3c902ce64221d10a0d371d059748d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 11 Nov 2016 00:11:43 -0500 Subject: [PATCH 291/319] bnxt_en: Fix VF virtual link state. If the physical link is down and the VF virtual link is set to "enable", the current code does not always work. If the link is down but the cable is attached, the firmware returns LINK_SIGNAL instead of NO_LINK. The current code is treating LINK_SIGNAL as link up. The fix is to treat link as down when the link_status != LINK. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index ec6cd18842c3..60e2af8678bd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -774,8 +774,8 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf) if (vf->flags & BNXT_VF_LINK_UP) { /* if physical link is down, force link up on VF */ - if (phy_qcfg_resp.link == - PORT_PHY_QCFG_RESP_LINK_NO_LINK) { + if (phy_qcfg_resp.link != + PORT_PHY_QCFG_RESP_LINK_LINK) { phy_qcfg_resp.link = PORT_PHY_QCFG_RESP_LINK_LINK; phy_qcfg_resp.link_speed = cpu_to_le16( From 2d644d4c7506646f9c4a2afceb7fd5f030bc0c9f Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Fri, 11 Nov 2016 16:34:25 +0100 Subject: [PATCH 292/319] mlxsw: spectrum: Fix refcount bug on span entries When binding port to a newly created span entry, its refcount is initialized to zero even though it has a bound port. That leads to unexpected behaviour when the user tries to delete that port from the span entry. Fix this by initializing the reference count to 1. Also add a warning to put function. Fixes: 763b4b70afcd ("mlxsw: spectrum: Add support in matchall mirror TC offloading") Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1ec0a4ce3c46..dda5761e91bc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -231,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) span_entry->used = true; span_entry->id = index; - span_entry->ref_count = 0; + span_entry->ref_count = 1; span_entry->local_port = local_port; return span_entry; } @@ -270,6 +270,7 @@ static struct mlxsw_sp_span_entry span_entry = mlxsw_sp_span_entry_find(port); if (span_entry) { + /* Already exists, just take a reference */ span_entry->ref_count++; return span_entry; } @@ -280,6 +281,7 @@ static struct mlxsw_sp_span_entry static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_span_entry *span_entry) { + WARN_ON(!span_entry->ref_count); if (--span_entry->ref_count == 0) mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); return 0; From 42cdb338f40a98e6558bae35456fe86b6e90e1ef Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Fri, 11 Nov 2016 16:34:26 +0100 Subject: [PATCH 293/319] mlxsw: spectrum_router: Correctly dump neighbour activity The device's neighbour table is periodically dumped in order to update the kernel about active neighbours. A single dump session may span multiple queries, until the response carries less records than requested or when a record (can contain up to four neighbour entries) is not full. Current code stops the session when the number of returned records is zero, which can result in infinite loop in case of high packet rate. Fix this by stopping the session according to the above logic. Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table") Signed-off-by: Arkadi Sharshevsky Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 040737e14a3f..cbeeddd70c5a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -800,6 +800,26 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, } } +static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) +{ + u8 num_rec, last_rec_index, num_entries; + + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); + last_rec_index = num_rec - 1; + + if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM) + return false; + if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) == + MLXSW_REG_RAUHTD_TYPE_IPV6) + return true; + + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, + last_rec_index); + if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC) + return true; + return false; +} + static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) { char *rauhtd_pl; @@ -826,7 +846,7 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) for (i = 0; i < num_rec; i++) mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, i); - } while (num_rec); + } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); rtnl_unlock(); kfree(rauhtd_pl); From 7724325a19fb0a51d2a69bd2915b33f0ff197f5a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 12 Nov 2016 12:46:26 -0200 Subject: [PATCH 294/319] dvb-usb: move data_mutex to struct dvb_usb_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data_mutex is initialized too late, as it is needed for each device driver's power control, causing an OOPS: dvb-usb: found a 'TerraTec/qanu USB2.0 Highspeed DVB-T Receiver' in warm state. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __mutex_lock_slowpath+0x6f/0x100 PGD 0 Oops: 0002 [#1] SMP Modules linked in: dvb_usb_cinergyT2(+) dvb_usb CPU: 0 PID: 2029 Comm: modprobe Not tainted 4.9.0-rc4-dvbmod #24 Hardware name: FUJITSU LIFEBOOK A544/FJNBB35 , BIOS Version 1.17 05/09/2014 task: ffff88020e943840 task.stack: ffff8801f36ec000 RIP: 0010:[] [] __mutex_lock_slowpath+0x6f/0x100 RSP: 0018:ffff8801f36efb10 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff88021509bdc8 RCX: 00000000c0000100 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff88021509bdcc RBP: ffff8801f36efb58 R08: ffff88021f216320 R09: 0000000000100000 R10: ffff88021f216320 R11: 00000023fee6c5a1 R12: ffff88020e943840 R13: ffff88021509bdcc R14: 00000000ffffffff R15: ffff88021509bdd0 FS: 00007f21adb86740(0000) GS:ffff88021f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000215bce000 CR4: 00000000001406f0 Call Trace: mutex_lock+0x16/0x25 cinergyt2_power_ctrl+0x1f/0x60 [dvb_usb_cinergyT2] dvb_usb_device_init+0x21e/0x5d0 [dvb_usb] cinergyt2_usb_probe+0x21/0x50 [dvb_usb_cinergyT2] usb_probe_interface+0xf3/0x2a0 driver_probe_device+0x208/0x2b0 __driver_attach+0x87/0x90 driver_probe_device+0x2b0/0x2b0 bus_for_each_dev+0x52/0x80 bus_add_driver+0x1a3/0x220 driver_register+0x56/0xd0 usb_register_driver+0x77/0x130 do_one_initcall+0x46/0x180 free_vmap_area_noflush+0x38/0x70 kmem_cache_alloc+0x84/0xc0 do_init_module+0x50/0x1be load_module+0x1d8b/0x2100 find_symbol_in_section+0xa0/0xa0 SyS_finit_module+0x89/0x90 entry_SYSCALL_64_fastpath+0x13/0x94 Code: e8 a7 1d 00 00 8b 03 83 f8 01 0f 84 97 00 00 00 48 8b 43 10 4c 8d 7b 08 48 89 63 10 4c 89 3c 24 41 be ff ff ff ff 48 89 44 24 08 <48> 89 20 4c 89 64 24 10 eb 1a 49 c7 44 24 08 02 00 00 00 c6 43 RIP [] __mutex_lock_slowpath+0x6f/0x100 RSP CR2: 0000000000000000 So, move it to the struct dvb_usb_device and initialize it before calling the driver's callbacks. Reported-by: Jörg Otte Tested-by: Jörg Otte Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- drivers/media/usb/dvb-usb/af9005.c | 33 ++++++------------ drivers/media/usb/dvb-usb/cinergyT2-core.c | 33 ++++++------------ drivers/media/usb/dvb-usb/cxusb.c | 39 +++++++++------------ drivers/media/usb/dvb-usb/cxusb.h | 1 - drivers/media/usb/dvb-usb/dtt200u.c | 40 +++++++++------------- drivers/media/usb/dvb-usb/dvb-usb-init.c | 1 + drivers/media/usb/dvb-usb/dvb-usb.h | 9 +++-- 7 files changed, 61 insertions(+), 95 deletions(-) diff --git a/drivers/media/usb/dvb-usb/af9005.c b/drivers/media/usb/dvb-usb/af9005.c index b257780fb380..7853261906b1 100644 --- a/drivers/media/usb/dvb-usb/af9005.c +++ b/drivers/media/usb/dvb-usb/af9005.c @@ -53,7 +53,6 @@ struct af9005_device_state { u8 sequence; int led_state; unsigned char data[256]; - struct mutex data_mutex; }; static int af9005_generic_read_write(struct dvb_usb_device *d, u16 reg, @@ -72,7 +71,7 @@ static int af9005_generic_read_write(struct dvb_usb_device *d, u16 reg, return -EINVAL; } - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = 14; /* rest of buffer length low */ st->data[1] = 0; /* rest of buffer length high */ @@ -140,7 +139,7 @@ static int af9005_generic_read_write(struct dvb_usb_device *d, u16 reg, values[i] = st->data[8 + i]; ret: - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } @@ -481,7 +480,7 @@ int af9005_send_command(struct dvb_usb_device *d, u8 command, u8 * wbuf, } packet_len = wlen + 5; - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = (u8) (packet_len & 0xff); st->data[1] = (u8) ((packet_len & 0xff00) >> 8); @@ -512,7 +511,7 @@ int af9005_send_command(struct dvb_usb_device *d, u8 command, u8 * wbuf, rbuf[i] = st->data[i + 7]; } - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } @@ -523,7 +522,7 @@ int af9005_read_eeprom(struct dvb_usb_device *d, u8 address, u8 * values, u8 seq; int ret, i; - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); memset(st->data, 0, sizeof(st->data)); @@ -559,7 +558,7 @@ int af9005_read_eeprom(struct dvb_usb_device *d, u8 address, u8 * values, for (i = 0; i < len; i++) values[i] = st->data[6 + i]; } - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } @@ -847,7 +846,7 @@ static int af9005_rc_query(struct dvb_usb_device *d, u32 * event, int *state) return 0; } - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); /* deb_info("rc_query\n"); */ st->data[0] = 3; /* rest of packet length low */ @@ -890,7 +889,7 @@ static int af9005_rc_query(struct dvb_usb_device *d, u32 * event, int *state) } ret: - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } @@ -1004,20 +1003,8 @@ static struct dvb_usb_device_properties af9005_properties; static int af9005_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { - struct dvb_usb_device *d; - struct af9005_device_state *st; - int ret; - - ret = dvb_usb_device_init(intf, &af9005_properties, - THIS_MODULE, &d, adapter_nr); - - if (ret < 0) - return ret; - - st = d->priv; - mutex_init(&st->data_mutex); - - return 0; + return dvb_usb_device_init(intf, &af9005_properties, + THIS_MODULE, NULL, adapter_nr); } enum af9005_usb_table_entry { diff --git a/drivers/media/usb/dvb-usb/cinergyT2-core.c b/drivers/media/usb/dvb-usb/cinergyT2-core.c index 8ac825413d5a..290275bc7fde 100644 --- a/drivers/media/usb/dvb-usb/cinergyT2-core.c +++ b/drivers/media/usb/dvb-usb/cinergyT2-core.c @@ -42,7 +42,6 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); struct cinergyt2_state { u8 rc_counter; unsigned char data[64]; - struct mutex data_mutex; }; /* We are missing a release hook with usb_device data */ @@ -56,12 +55,12 @@ static int cinergyt2_streaming_ctrl(struct dvb_usb_adapter *adap, int enable) struct cinergyt2_state *st = d->priv; int ret; - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = CINERGYT2_EP1_CONTROL_STREAM_TRANSFER; st->data[1] = enable ? 1 : 0; ret = dvb_usb_generic_rw(d, st->data, 2, st->data, 64, 0); - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } @@ -71,12 +70,12 @@ static int cinergyt2_power_ctrl(struct dvb_usb_device *d, int enable) struct cinergyt2_state *st = d->priv; int ret; - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = CINERGYT2_EP1_SLEEP_MODE; st->data[1] = enable ? 0 : 1; ret = dvb_usb_generic_rw(d, st->data, 2, st->data, 3, 0); - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } @@ -89,7 +88,7 @@ static int cinergyt2_frontend_attach(struct dvb_usb_adapter *adap) adap->fe_adap[0].fe = cinergyt2_fe_attach(adap->dev); - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = CINERGYT2_EP1_GET_FIRMWARE_VERSION; ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 3, 0); @@ -97,7 +96,7 @@ static int cinergyt2_frontend_attach(struct dvb_usb_adapter *adap) deb_rc("cinergyt2_power_ctrl() Failed to retrieve sleep " "state info\n"); } - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); /* Copy this pointer as we are gonna need it in the release phase */ cinergyt2_usb_device = adap->dev; @@ -166,7 +165,7 @@ static int cinergyt2_rc_query(struct dvb_usb_device *d, u32 *event, int *state) *state = REMOTE_NO_KEY_PRESSED; - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = CINERGYT2_EP1_GET_RC_EVENTS; ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 5, 0); @@ -202,29 +201,17 @@ static int cinergyt2_rc_query(struct dvb_usb_device *d, u32 *event, int *state) } ret: - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } static int cinergyt2_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { - struct dvb_usb_device *d; - struct cinergyt2_state *st; - int ret; - - ret = dvb_usb_device_init(intf, &cinergyt2_properties, - THIS_MODULE, &d, adapter_nr); - if (ret < 0) - return ret; - - st = d->priv; - mutex_init(&st->data_mutex); - - return 0; + return dvb_usb_device_init(intf, &cinergyt2_properties, + THIS_MODULE, NULL, adapter_nr); } - static struct usb_device_id cinergyt2_usb_table[] = { { USB_DEVICE(USB_VID_TERRATEC, 0x0038) }, { 0 } diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c index 39772812269d..243403081fa5 100644 --- a/drivers/media/usb/dvb-usb/cxusb.c +++ b/drivers/media/usb/dvb-usb/cxusb.c @@ -68,7 +68,7 @@ static int cxusb_ctrl_msg(struct dvb_usb_device *d, wo = (rbuf == NULL || rlen == 0); /* write-only */ - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = cmd; memcpy(&st->data[1], wbuf, wlen); if (wo) @@ -77,7 +77,7 @@ static int cxusb_ctrl_msg(struct dvb_usb_device *d, ret = dvb_usb_generic_rw(d, st->data, 1 + wlen, rbuf, rlen, 0); - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } @@ -1461,43 +1461,36 @@ static struct dvb_usb_device_properties cxusb_mygica_t230_properties; static int cxusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { - struct dvb_usb_device *d; - struct cxusb_state *st; - if (0 == dvb_usb_device_init(intf, &cxusb_medion_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_lgh064f_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_dee1601_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_lgz201_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_dtt7579_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_dualdig4_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_nano2_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_nano2_needsfirmware_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_aver_a868r_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_dualdig4_rev2_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_d680_dmb_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_mygica_d689_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_mygica_t230_properties, - THIS_MODULE, &d, adapter_nr) || - 0) { - st = d->priv; - mutex_init(&st->data_mutex); - + THIS_MODULE, NULL, adapter_nr) || + 0) return 0; - } return -EINVAL; } diff --git a/drivers/media/usb/dvb-usb/cxusb.h b/drivers/media/usb/dvb-usb/cxusb.h index 9f3ee0e47d5c..18acda19527a 100644 --- a/drivers/media/usb/dvb-usb/cxusb.h +++ b/drivers/media/usb/dvb-usb/cxusb.h @@ -37,7 +37,6 @@ struct cxusb_state { struct i2c_client *i2c_client_tuner; unsigned char data[MAX_XFER_SIZE]; - struct mutex data_mutex; }; #endif diff --git a/drivers/media/usb/dvb-usb/dtt200u.c b/drivers/media/usb/dvb-usb/dtt200u.c index f88572c7ae7c..fcbff7fb0c4e 100644 --- a/drivers/media/usb/dvb-usb/dtt200u.c +++ b/drivers/media/usb/dvb-usb/dtt200u.c @@ -22,7 +22,6 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); struct dtt200u_state { unsigned char data[80]; - struct mutex data_mutex; }; static int dtt200u_power_ctrl(struct dvb_usb_device *d, int onoff) @@ -30,23 +29,24 @@ static int dtt200u_power_ctrl(struct dvb_usb_device *d, int onoff) struct dtt200u_state *st = d->priv; int ret = 0; - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = SET_INIT; if (onoff) ret = dvb_usb_generic_write(d, st->data, 2); - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } static int dtt200u_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) { - struct dtt200u_state *st = adap->dev->priv; + struct dvb_usb_device *d = adap->dev; + struct dtt200u_state *st = d->priv; int ret; - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = SET_STREAMING; st->data[1] = onoff; @@ -61,26 +61,27 @@ static int dtt200u_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) ret = dvb_usb_generic_write(adap->dev, st->data, 1); ret: - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } static int dtt200u_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, int onoff) { - struct dtt200u_state *st = adap->dev->priv; + struct dvb_usb_device *d = adap->dev; + struct dtt200u_state *st = d->priv; int ret; pid = onoff ? pid : 0; - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = SET_PID_FILTER; st->data[1] = index; st->data[2] = pid & 0xff; st->data[3] = (pid >> 8) & 0x1f; ret = dvb_usb_generic_write(adap->dev, st->data, 4); - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } @@ -91,7 +92,7 @@ static int dtt200u_rc_query(struct dvb_usb_device *d) u32 scancode; int ret; - mutex_lock(&st->data_mutex); + mutex_lock(&d->data_mutex); st->data[0] = GET_RC_CODE; ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 5, 0); @@ -126,7 +127,7 @@ static int dtt200u_rc_query(struct dvb_usb_device *d) deb_info("st->data: %*ph\n", 5, st->data); ret: - mutex_unlock(&st->data_mutex); + mutex_unlock(&d->data_mutex); return ret; } @@ -145,24 +146,17 @@ static struct dvb_usb_device_properties wt220u_miglia_properties; static int dtt200u_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { - struct dvb_usb_device *d; - struct dtt200u_state *st; - if (0 == dvb_usb_device_init(intf, &dtt200u_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &wt220u_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &wt220u_fc_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &wt220u_zl0353_properties, - THIS_MODULE, &d, adapter_nr) || + THIS_MODULE, NULL, adapter_nr) || 0 == dvb_usb_device_init(intf, &wt220u_miglia_properties, - THIS_MODULE, &d, adapter_nr)) { - st = d->priv; - mutex_init(&st->data_mutex); - + THIS_MODULE, NULL, adapter_nr)) return 0; - } return -ENODEV; } diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c index 3896ba9a4179..84308569e7dc 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-init.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c @@ -142,6 +142,7 @@ static int dvb_usb_init(struct dvb_usb_device *d, short *adapter_nums) { int ret = 0; + mutex_init(&d->data_mutex); mutex_init(&d->usb_mutex); mutex_init(&d->i2c_mutex); diff --git a/drivers/media/usb/dvb-usb/dvb-usb.h b/drivers/media/usb/dvb-usb/dvb-usb.h index 639c4678c65b..107255b08b2b 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb.h +++ b/drivers/media/usb/dvb-usb/dvb-usb.h @@ -404,8 +404,12 @@ struct dvb_usb_adapter { * Powered is in/decremented for each call to modify the state. * @udev: pointer to the device's struct usb_device. * - * @usb_mutex: semaphore of USB control messages (reading needs two messages) - * @i2c_mutex: semaphore for i2c-transfers + * @data_mutex: mutex to protect the data structure used to store URB data + * @usb_mutex: mutex of USB control messages (reading needs two messages). + * Please notice that this mutex is used internally at the generic + * URB control functions. So, drivers using dvb_usb_generic_rw() and + * derivated functions should not lock it internally. + * @i2c_mutex: mutex for i2c-transfers * * @i2c_adap: device's i2c_adapter if it uses I2CoverUSB * @@ -433,6 +437,7 @@ struct dvb_usb_device { int powered; /* locking */ + struct mutex data_mutex; struct mutex usb_mutex; /* i2c */ From 1596c387e970cb680d4031fbe4d6eb2c2a4ddb63 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 12 Nov 2016 12:46:27 -0200 Subject: [PATCH 295/319] gp8psk: fix gp8psk_usb_in_op() logic Commit bc29131ecb10 ("[media] gp8psk: don't do DMA on stack") fixed the usage of DMA on stack, but the memcpy was wrong for gp8psk_usb_in_op(). Fix it. From Derek's email: "Fix confirmed using 2 different Skywalker models with HD mpeg4, SD mpeg2." Suggested-by: Johannes Stezenbach Fixes: bc29131ecb10 ("[media] gp8psk: don't do DMA on stack") Tested-by: Derek Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- drivers/media/usb/dvb-usb/gp8psk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb/gp8psk.c b/drivers/media/usb/dvb-usb/gp8psk.c index adfd76491451..2829e3082d15 100644 --- a/drivers/media/usb/dvb-usb/gp8psk.c +++ b/drivers/media/usb/dvb-usb/gp8psk.c @@ -67,7 +67,6 @@ int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 return ret; while (ret >= 0 && ret != blen && try < 3) { - memcpy(st->data, b, blen); ret = usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev,0), req, @@ -81,8 +80,10 @@ int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 if (ret < 0 || ret != blen) { warn("usb in %d operation failed.", req); ret = -EIO; - } else + } else { ret = 0; + memcpy(b, st->data, blen); + } deb_xfer("in: req. %x, val: %x, ind: %x, buffer: ",req,value,index); debug_dump(b,blen,deb_xfer); From 7a0786c19d65bd4502b4a53aec9ef75e18192a00 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 12 Nov 2016 12:46:28 -0200 Subject: [PATCH 296/319] gp8psk: Fix DVB frontend attach The DVB binding schema at the DVB core assumes that the frontend is a separate driver. Faling to do that causes OOPS when the module is removed, as it tries to do a symbol_put_addr on an internal symbol, causing craches like: WARNING: CPU: 1 PID: 28102 at kernel/module.c:1108 module_put+0x57/0x70 Modules linked in: dvb_usb_gp8psk(-) dvb_usb dvb_core nvidia_drm(PO) nvidia_modeset(PO) snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd soundcore nvidia(PO) [last unloaded: rc_core] CPU: 1 PID: 28102 Comm: rmmod Tainted: P WC O 4.8.4-build.1 #1 Hardware name: MSI MS-7309/MS-7309, BIOS V1.12 02/23/2009 Call Trace: dump_stack+0x44/0x64 __warn+0xfa/0x120 module_put+0x57/0x70 module_put+0x57/0x70 warn_slowpath_null+0x23/0x30 module_put+0x57/0x70 gp8psk_fe_set_frontend+0x460/0x460 [dvb_usb_gp8psk] symbol_put_addr+0x27/0x50 dvb_usb_adapter_frontend_exit+0x3a/0x70 [dvb_usb] From Derek's tests: "Attach bug is fixed, tuning works, module unloads without crashing. Everything seems ok!" Reported-by: Derek Tested-by: Derek Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- drivers/media/dvb-frontends/Kconfig | 5 + drivers/media/dvb-frontends/Makefile | 1 + .../dvb-usb => dvb-frontends}/gp8psk-fe.c | 135 +++++++++++------- drivers/media/dvb-frontends/gp8psk-fe.h | 82 +++++++++++ drivers/media/usb/dvb-usb/Makefile | 2 +- drivers/media/usb/dvb-usb/gp8psk.c | 106 ++++++++++---- drivers/media/usb/dvb-usb/gp8psk.h | 63 -------- 7 files changed, 244 insertions(+), 150 deletions(-) rename drivers/media/{usb/dvb-usb => dvb-frontends}/gp8psk-fe.c (72%) create mode 100644 drivers/media/dvb-frontends/gp8psk-fe.h diff --git a/drivers/media/dvb-frontends/Kconfig b/drivers/media/dvb-frontends/Kconfig index 012225587c25..b71b747ee0ba 100644 --- a/drivers/media/dvb-frontends/Kconfig +++ b/drivers/media/dvb-frontends/Kconfig @@ -513,6 +513,11 @@ config DVB_AS102_FE depends on DVB_CORE default DVB_AS102 +config DVB_GP8PSK_FE + tristate + depends on DVB_CORE + default DVB_USB_GP8PSK + comment "DVB-C (cable) frontends" depends on DVB_CORE diff --git a/drivers/media/dvb-frontends/Makefile b/drivers/media/dvb-frontends/Makefile index e90165ad361b..93921a4eaa27 100644 --- a/drivers/media/dvb-frontends/Makefile +++ b/drivers/media/dvb-frontends/Makefile @@ -121,6 +121,7 @@ obj-$(CONFIG_DVB_RTL2832_SDR) += rtl2832_sdr.o obj-$(CONFIG_DVB_M88RS2000) += m88rs2000.o obj-$(CONFIG_DVB_AF9033) += af9033.o obj-$(CONFIG_DVB_AS102_FE) += as102_fe.o +obj-$(CONFIG_DVB_GP8PSK_FE) += gp8psk-fe.o obj-$(CONFIG_DVB_TC90522) += tc90522.o obj-$(CONFIG_DVB_HORUS3A) += horus3a.o obj-$(CONFIG_DVB_ASCOT2E) += ascot2e.o diff --git a/drivers/media/usb/dvb-usb/gp8psk-fe.c b/drivers/media/dvb-frontends/gp8psk-fe.c similarity index 72% rename from drivers/media/usb/dvb-usb/gp8psk-fe.c rename to drivers/media/dvb-frontends/gp8psk-fe.c index db6eb79cde07..be19afeed7a9 100644 --- a/drivers/media/usb/dvb-usb/gp8psk-fe.c +++ b/drivers/media/dvb-frontends/gp8psk-fe.c @@ -14,11 +14,27 @@ * * see Documentation/dvb/README.dvb-usb for more information */ -#include "gp8psk.h" + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "gp8psk-fe.h" +#include "dvb_frontend.h" + +static int debug; +module_param(debug, int, 0644); +MODULE_PARM_DESC(debug, "Turn on/off debugging (default:off)."); + +#define dprintk(fmt, arg...) do { \ + if (debug) \ + printk(KERN_DEBUG pr_fmt("%s: " fmt), \ + __func__, ##arg); \ +} while (0) struct gp8psk_fe_state { struct dvb_frontend fe; - struct dvb_usb_device *d; + void *priv; + const struct gp8psk_fe_ops *ops; + bool is_rev1; u8 lock; u16 snr; unsigned long next_status_check; @@ -29,22 +45,24 @@ static int gp8psk_tuned_to_DCII(struct dvb_frontend *fe) { struct gp8psk_fe_state *st = fe->demodulator_priv; u8 status; - gp8psk_usb_in_op(st->d, GET_8PSK_CONFIG, 0, 0, &status, 1); + + st->ops->in(st->priv, GET_8PSK_CONFIG, 0, 0, &status, 1); return status & bmDCtuned; } static int gp8psk_set_tuner_mode(struct dvb_frontend *fe, int mode) { - struct gp8psk_fe_state *state = fe->demodulator_priv; - return gp8psk_usb_out_op(state->d, SET_8PSK_CONFIG, mode, 0, NULL, 0); + struct gp8psk_fe_state *st = fe->demodulator_priv; + + return st->ops->out(st->priv, SET_8PSK_CONFIG, mode, 0, NULL, 0); } static int gp8psk_fe_update_status(struct gp8psk_fe_state *st) { u8 buf[6]; if (time_after(jiffies,st->next_status_check)) { - gp8psk_usb_in_op(st->d, GET_SIGNAL_LOCK, 0,0,&st->lock,1); - gp8psk_usb_in_op(st->d, GET_SIGNAL_STRENGTH, 0,0,buf,6); + st->ops->in(st->priv, GET_SIGNAL_LOCK, 0, 0, &st->lock, 1); + st->ops->in(st->priv, GET_SIGNAL_STRENGTH, 0, 0, buf, 6); st->snr = (buf[1]) << 8 | buf[0]; st->next_status_check = jiffies + (st->status_check_interval*HZ)/1000; } @@ -116,13 +134,12 @@ static int gp8psk_fe_get_tune_settings(struct dvb_frontend* fe, struct dvb_front static int gp8psk_fe_set_frontend(struct dvb_frontend *fe) { - struct gp8psk_fe_state *state = fe->demodulator_priv; + struct gp8psk_fe_state *st = fe->demodulator_priv; struct dtv_frontend_properties *c = &fe->dtv_property_cache; u8 cmd[10]; u32 freq = c->frequency * 1000; - int gp_product_id = le16_to_cpu(state->d->udev->descriptor.idProduct); - deb_fe("%s()\n", __func__); + dprintk("%s()\n", __func__); cmd[4] = freq & 0xff; cmd[5] = (freq >> 8) & 0xff; @@ -136,21 +153,21 @@ static int gp8psk_fe_set_frontend(struct dvb_frontend *fe) switch (c->delivery_system) { case SYS_DVBS: if (c->modulation != QPSK) { - deb_fe("%s: unsupported modulation selected (%d)\n", + dprintk("%s: unsupported modulation selected (%d)\n", __func__, c->modulation); return -EOPNOTSUPP; } c->fec_inner = FEC_AUTO; break; case SYS_DVBS2: /* kept for backwards compatibility */ - deb_fe("%s: DVB-S2 delivery system selected\n", __func__); + dprintk("%s: DVB-S2 delivery system selected\n", __func__); break; case SYS_TURBO: - deb_fe("%s: Turbo-FEC delivery system selected\n", __func__); + dprintk("%s: Turbo-FEC delivery system selected\n", __func__); break; default: - deb_fe("%s: unsupported delivery system selected (%d)\n", + dprintk("%s: unsupported delivery system selected (%d)\n", __func__, c->delivery_system); return -EOPNOTSUPP; } @@ -161,9 +178,9 @@ static int gp8psk_fe_set_frontend(struct dvb_frontend *fe) cmd[3] = (c->symbol_rate >> 24) & 0xff; switch (c->modulation) { case QPSK: - if (gp_product_id == USB_PID_GENPIX_8PSK_REV_1_WARM) + if (st->is_rev1) if (gp8psk_tuned_to_DCII(fe)) - gp8psk_bcm4500_reload(state->d); + st->ops->reload(st->priv); switch (c->fec_inner) { case FEC_1_2: cmd[9] = 0; break; @@ -207,18 +224,18 @@ static int gp8psk_fe_set_frontend(struct dvb_frontend *fe) cmd[9] = 0; break; default: /* Unknown modulation */ - deb_fe("%s: unsupported modulation selected (%d)\n", + dprintk("%s: unsupported modulation selected (%d)\n", __func__, c->modulation); return -EOPNOTSUPP; } - if (gp_product_id == USB_PID_GENPIX_8PSK_REV_1_WARM) + if (st->is_rev1) gp8psk_set_tuner_mode(fe, 0); - gp8psk_usb_out_op(state->d, TUNE_8PSK, 0, 0, cmd, 10); + st->ops->out(st->priv, TUNE_8PSK, 0, 0, cmd, 10); - state->lock = 0; - state->next_status_check = jiffies; - state->status_check_interval = 200; + st->lock = 0; + st->next_status_check = jiffies; + st->status_check_interval = 200; return 0; } @@ -228,9 +245,9 @@ static int gp8psk_fe_send_diseqc_msg (struct dvb_frontend* fe, { struct gp8psk_fe_state *st = fe->demodulator_priv; - deb_fe("%s\n",__func__); + dprintk("%s\n", __func__); - if (gp8psk_usb_out_op(st->d,SEND_DISEQC_COMMAND, m->msg[0], 0, + if (st->ops->out(st->priv, SEND_DISEQC_COMMAND, m->msg[0], 0, m->msg, m->msg_len)) { return -EINVAL; } @@ -243,12 +260,12 @@ static int gp8psk_fe_send_diseqc_burst(struct dvb_frontend *fe, struct gp8psk_fe_state *st = fe->demodulator_priv; u8 cmd; - deb_fe("%s\n",__func__); + dprintk("%s\n", __func__); /* These commands are certainly wrong */ cmd = (burst == SEC_MINI_A) ? 0x00 : 0x01; - if (gp8psk_usb_out_op(st->d,SEND_DISEQC_COMMAND, cmd, 0, + if (st->ops->out(st->priv, SEND_DISEQC_COMMAND, cmd, 0, &cmd, 0)) { return -EINVAL; } @@ -258,10 +275,10 @@ static int gp8psk_fe_send_diseqc_burst(struct dvb_frontend *fe, static int gp8psk_fe_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone) { - struct gp8psk_fe_state* state = fe->demodulator_priv; + struct gp8psk_fe_state *st = fe->demodulator_priv; - if (gp8psk_usb_out_op(state->d,SET_22KHZ_TONE, - (tone == SEC_TONE_ON), 0, NULL, 0)) { + if (st->ops->out(st->priv, SET_22KHZ_TONE, + (tone == SEC_TONE_ON), 0, NULL, 0)) { return -EINVAL; } return 0; @@ -270,9 +287,9 @@ static int gp8psk_fe_set_tone(struct dvb_frontend *fe, static int gp8psk_fe_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage) { - struct gp8psk_fe_state* state = fe->demodulator_priv; + struct gp8psk_fe_state *st = fe->demodulator_priv; - if (gp8psk_usb_out_op(state->d,SET_LNB_VOLTAGE, + if (st->ops->out(st->priv, SET_LNB_VOLTAGE, voltage == SEC_VOLTAGE_18, 0, NULL, 0)) { return -EINVAL; } @@ -281,52 +298,60 @@ static int gp8psk_fe_set_voltage(struct dvb_frontend *fe, static int gp8psk_fe_enable_high_lnb_voltage(struct dvb_frontend* fe, long onoff) { - struct gp8psk_fe_state* state = fe->demodulator_priv; - return gp8psk_usb_out_op(state->d, USE_EXTRA_VOLT, onoff, 0,NULL,0); + struct gp8psk_fe_state *st = fe->demodulator_priv; + + return st->ops->out(st->priv, USE_EXTRA_VOLT, onoff, 0, NULL, 0); } static int gp8psk_fe_send_legacy_dish_cmd (struct dvb_frontend* fe, unsigned long sw_cmd) { - struct gp8psk_fe_state* state = fe->demodulator_priv; + struct gp8psk_fe_state *st = fe->demodulator_priv; u8 cmd = sw_cmd & 0x7f; - if (gp8psk_usb_out_op(state->d,SET_DN_SWITCH, cmd, 0, - NULL, 0)) { + if (st->ops->out(st->priv, SET_DN_SWITCH, cmd, 0, NULL, 0)) return -EINVAL; - } - if (gp8psk_usb_out_op(state->d,SET_LNB_VOLTAGE, !!(sw_cmd & 0x80), - 0, NULL, 0)) { + + if (st->ops->out(st->priv, SET_LNB_VOLTAGE, !!(sw_cmd & 0x80), + 0, NULL, 0)) return -EINVAL; - } return 0; } static void gp8psk_fe_release(struct dvb_frontend* fe) { - struct gp8psk_fe_state *state = fe->demodulator_priv; - kfree(state); + struct gp8psk_fe_state *st = fe->demodulator_priv; + + kfree(st); } static struct dvb_frontend_ops gp8psk_fe_ops; -struct dvb_frontend * gp8psk_fe_attach(struct dvb_usb_device *d) +struct dvb_frontend *gp8psk_fe_attach(const struct gp8psk_fe_ops *ops, + void *priv, bool is_rev1) { - struct gp8psk_fe_state *s = kzalloc(sizeof(struct gp8psk_fe_state), GFP_KERNEL); - if (s == NULL) - goto error; + struct gp8psk_fe_state *st; - s->d = d; - memcpy(&s->fe.ops, &gp8psk_fe_ops, sizeof(struct dvb_frontend_ops)); - s->fe.demodulator_priv = s; + if (!ops || !ops->in || !ops->out || !ops->reload) { + pr_err("Error! gp8psk-fe ops not defined.\n"); + return NULL; + } - goto success; -error: - return NULL; -success: - return &s->fe; + st = kzalloc(sizeof(struct gp8psk_fe_state), GFP_KERNEL); + if (!st) + return NULL; + + memcpy(&st->fe.ops, &gp8psk_fe_ops, sizeof(struct dvb_frontend_ops)); + st->fe.demodulator_priv = st; + st->ops = ops; + st->priv = priv; + st->is_rev1 = is_rev1; + + pr_info("Frontend %sattached\n", is_rev1 ? "revision 1 " : ""); + + return &st->fe; } - +EXPORT_SYMBOL_GPL(gp8psk_fe_attach); static struct dvb_frontend_ops gp8psk_fe_ops = { .delsys = { SYS_DVBS }, diff --git a/drivers/media/dvb-frontends/gp8psk-fe.h b/drivers/media/dvb-frontends/gp8psk-fe.h new file mode 100644 index 000000000000..6c7944b1ecd6 --- /dev/null +++ b/drivers/media/dvb-frontends/gp8psk-fe.h @@ -0,0 +1,82 @@ +/* + * gp8psk_fe driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef GP8PSK_FE_H +#define GP8PSK_FE_H + +#include + +/* gp8psk commands */ + +#define GET_8PSK_CONFIG 0x80 /* in */ +#define SET_8PSK_CONFIG 0x81 +#define I2C_WRITE 0x83 +#define I2C_READ 0x84 +#define ARM_TRANSFER 0x85 +#define TUNE_8PSK 0x86 +#define GET_SIGNAL_STRENGTH 0x87 /* in */ +#define LOAD_BCM4500 0x88 +#define BOOT_8PSK 0x89 /* in */ +#define START_INTERSIL 0x8A /* in */ +#define SET_LNB_VOLTAGE 0x8B +#define SET_22KHZ_TONE 0x8C +#define SEND_DISEQC_COMMAND 0x8D +#define SET_DVB_MODE 0x8E +#define SET_DN_SWITCH 0x8F +#define GET_SIGNAL_LOCK 0x90 /* in */ +#define GET_FW_VERS 0x92 +#define GET_SERIAL_NUMBER 0x93 /* in */ +#define USE_EXTRA_VOLT 0x94 +#define GET_FPGA_VERS 0x95 +#define CW3K_INIT 0x9d + +/* PSK_configuration bits */ +#define bm8pskStarted 0x01 +#define bm8pskFW_Loaded 0x02 +#define bmIntersilOn 0x04 +#define bmDVBmode 0x08 +#define bm22kHz 0x10 +#define bmSEL18V 0x20 +#define bmDCtuned 0x40 +#define bmArmed 0x80 + +/* Satellite modulation modes */ +#define ADV_MOD_DVB_QPSK 0 /* DVB-S QPSK */ +#define ADV_MOD_TURBO_QPSK 1 /* Turbo QPSK */ +#define ADV_MOD_TURBO_8PSK 2 /* Turbo 8PSK (also used for Trellis 8PSK) */ +#define ADV_MOD_TURBO_16QAM 3 /* Turbo 16QAM (also used for Trellis 8PSK) */ + +#define ADV_MOD_DCII_C_QPSK 4 /* Digicipher II Combo */ +#define ADV_MOD_DCII_I_QPSK 5 /* Digicipher II I-stream */ +#define ADV_MOD_DCII_Q_QPSK 6 /* Digicipher II Q-stream */ +#define ADV_MOD_DCII_C_OQPSK 7 /* Digicipher II offset QPSK */ +#define ADV_MOD_DSS_QPSK 8 /* DSS (DIRECTV) QPSK */ +#define ADV_MOD_DVB_BPSK 9 /* DVB-S BPSK */ + +/* firmware revision id's */ +#define GP8PSK_FW_REV1 0x020604 +#define GP8PSK_FW_REV2 0x020704 +#define GP8PSK_FW_VERS(_fw_vers) \ + ((_fw_vers)[2]<<0x10 | (_fw_vers)[1]<<0x08 | (_fw_vers)[0]) + +struct gp8psk_fe_ops { + int (*in)(void *priv, u8 req, u16 value, u16 index, u8 *b, int blen); + int (*out)(void *priv, u8 req, u16 value, u16 index, u8 *b, int blen); + int (*reload)(void *priv); +}; + +struct dvb_frontend *gp8psk_fe_attach(const struct gp8psk_fe_ops *ops, + void *priv, bool is_rev1); + +#endif diff --git a/drivers/media/usb/dvb-usb/Makefile b/drivers/media/usb/dvb-usb/Makefile index 2a7b5a963acf..3b3f32b426d1 100644 --- a/drivers/media/usb/dvb-usb/Makefile +++ b/drivers/media/usb/dvb-usb/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_DVB_USB_VP7045) += dvb-usb-vp7045.o dvb-usb-vp702x-objs := vp702x.o vp702x-fe.o obj-$(CONFIG_DVB_USB_VP702X) += dvb-usb-vp702x.o -dvb-usb-gp8psk-objs := gp8psk.o gp8psk-fe.o +dvb-usb-gp8psk-objs := gp8psk.o obj-$(CONFIG_DVB_USB_GP8PSK) += dvb-usb-gp8psk.o dvb-usb-dtt200u-objs := dtt200u.o dtt200u-fe.o diff --git a/drivers/media/usb/dvb-usb/gp8psk.c b/drivers/media/usb/dvb-usb/gp8psk.c index 2829e3082d15..993bb7a72985 100644 --- a/drivers/media/usb/dvb-usb/gp8psk.c +++ b/drivers/media/usb/dvb-usb/gp8psk.c @@ -15,6 +15,7 @@ * see Documentation/dvb/README.dvb-usb for more information */ #include "gp8psk.h" +#include "gp8psk-fe.h" /* debug */ static char bcm4500_firmware[] = "dvb-usb-gp8psk-02.fw"; @@ -28,34 +29,8 @@ struct gp8psk_state { unsigned char data[80]; }; -static int gp8psk_get_fw_version(struct dvb_usb_device *d, u8 *fw_vers) -{ - return (gp8psk_usb_in_op(d, GET_FW_VERS, 0, 0, fw_vers, 6)); -} - -static int gp8psk_get_fpga_version(struct dvb_usb_device *d, u8 *fpga_vers) -{ - return (gp8psk_usb_in_op(d, GET_FPGA_VERS, 0, 0, fpga_vers, 1)); -} - -static void gp8psk_info(struct dvb_usb_device *d) -{ - u8 fpga_vers, fw_vers[6]; - - if (!gp8psk_get_fw_version(d, fw_vers)) - info("FW Version = %i.%02i.%i (0x%x) Build %4i/%02i/%02i", - fw_vers[2], fw_vers[1], fw_vers[0], GP8PSK_FW_VERS(fw_vers), - 2000 + fw_vers[5], fw_vers[4], fw_vers[3]); - else - info("failed to get FW version"); - - if (!gp8psk_get_fpga_version(d, &fpga_vers)) - info("FPGA Version = %i", fpga_vers); - else - info("failed to get FPGA version"); -} - -int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 *b, int blen) +static int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, + u16 index, u8 *b, int blen) { struct gp8psk_state *st = d->priv; int ret = 0,try = 0; @@ -93,7 +68,7 @@ int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 return ret; } -int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value, +static int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 *b, int blen) { struct gp8psk_state *st = d->priv; @@ -124,6 +99,34 @@ int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value, return ret; } + +static int gp8psk_get_fw_version(struct dvb_usb_device *d, u8 *fw_vers) +{ + return gp8psk_usb_in_op(d, GET_FW_VERS, 0, 0, fw_vers, 6); +} + +static int gp8psk_get_fpga_version(struct dvb_usb_device *d, u8 *fpga_vers) +{ + return gp8psk_usb_in_op(d, GET_FPGA_VERS, 0, 0, fpga_vers, 1); +} + +static void gp8psk_info(struct dvb_usb_device *d) +{ + u8 fpga_vers, fw_vers[6]; + + if (!gp8psk_get_fw_version(d, fw_vers)) + info("FW Version = %i.%02i.%i (0x%x) Build %4i/%02i/%02i", + fw_vers[2], fw_vers[1], fw_vers[0], GP8PSK_FW_VERS(fw_vers), + 2000 + fw_vers[5], fw_vers[4], fw_vers[3]); + else + info("failed to get FW version"); + + if (!gp8psk_get_fpga_version(d, &fpga_vers)) + info("FPGA Version = %i", fpga_vers); + else + info("failed to get FPGA version"); +} + static int gp8psk_load_bcm4500fw(struct dvb_usb_device *d) { int ret; @@ -226,10 +229,13 @@ static int gp8psk_power_ctrl(struct dvb_usb_device *d, int onoff) return 0; } -int gp8psk_bcm4500_reload(struct dvb_usb_device *d) +static int gp8psk_bcm4500_reload(struct dvb_usb_device *d) { u8 buf; int gp_product_id = le16_to_cpu(d->udev->descriptor.idProduct); + + deb_xfer("reloading firmware\n"); + /* Turn off 8psk power */ if (gp8psk_usb_in_op(d, BOOT_8PSK, 0, 0, &buf, 1)) return -EINVAL; @@ -248,9 +254,47 @@ static int gp8psk_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) return gp8psk_usb_out_op(adap->dev, ARM_TRANSFER, onoff, 0 , NULL, 0); } +/* Callbacks for gp8psk-fe.c */ + +static int gp8psk_fe_in(void *priv, u8 req, u16 value, + u16 index, u8 *b, int blen) +{ + struct dvb_usb_device *d = priv; + + return gp8psk_usb_in_op(d, req, value, index, b, blen); +} + +static int gp8psk_fe_out(void *priv, u8 req, u16 value, + u16 index, u8 *b, int blen) +{ + struct dvb_usb_device *d = priv; + + return gp8psk_usb_out_op(d, req, value, index, b, blen); +} + +static int gp8psk_fe_reload(void *priv) +{ + struct dvb_usb_device *d = priv; + + return gp8psk_bcm4500_reload(d); +} + +const struct gp8psk_fe_ops gp8psk_fe_ops = { + .in = gp8psk_fe_in, + .out = gp8psk_fe_out, + .reload = gp8psk_fe_reload, +}; + static int gp8psk_frontend_attach(struct dvb_usb_adapter *adap) { - adap->fe_adap[0].fe = gp8psk_fe_attach(adap->dev); + struct dvb_usb_device *d = adap->dev; + int id = le16_to_cpu(d->udev->descriptor.idProduct); + int is_rev1; + + is_rev1 = (id == USB_PID_GENPIX_8PSK_REV_1_WARM) ? true : false; + + adap->fe_adap[0].fe = dvb_attach(gp8psk_fe_attach, + &gp8psk_fe_ops, d, is_rev1); return 0; } diff --git a/drivers/media/usb/dvb-usb/gp8psk.h b/drivers/media/usb/dvb-usb/gp8psk.h index ed32b9da4843..d8975b866dee 100644 --- a/drivers/media/usb/dvb-usb/gp8psk.h +++ b/drivers/media/usb/dvb-usb/gp8psk.h @@ -24,58 +24,6 @@ extern int dvb_usb_gp8psk_debug; #define deb_info(args...) dprintk(dvb_usb_gp8psk_debug,0x01,args) #define deb_xfer(args...) dprintk(dvb_usb_gp8psk_debug,0x02,args) #define deb_rc(args...) dprintk(dvb_usb_gp8psk_debug,0x04,args) -#define deb_fe(args...) dprintk(dvb_usb_gp8psk_debug,0x08,args) - -/* Twinhan Vendor requests */ -#define TH_COMMAND_IN 0xC0 -#define TH_COMMAND_OUT 0xC1 - -/* gp8psk commands */ - -#define GET_8PSK_CONFIG 0x80 /* in */ -#define SET_8PSK_CONFIG 0x81 -#define I2C_WRITE 0x83 -#define I2C_READ 0x84 -#define ARM_TRANSFER 0x85 -#define TUNE_8PSK 0x86 -#define GET_SIGNAL_STRENGTH 0x87 /* in */ -#define LOAD_BCM4500 0x88 -#define BOOT_8PSK 0x89 /* in */ -#define START_INTERSIL 0x8A /* in */ -#define SET_LNB_VOLTAGE 0x8B -#define SET_22KHZ_TONE 0x8C -#define SEND_DISEQC_COMMAND 0x8D -#define SET_DVB_MODE 0x8E -#define SET_DN_SWITCH 0x8F -#define GET_SIGNAL_LOCK 0x90 /* in */ -#define GET_FW_VERS 0x92 -#define GET_SERIAL_NUMBER 0x93 /* in */ -#define USE_EXTRA_VOLT 0x94 -#define GET_FPGA_VERS 0x95 -#define CW3K_INIT 0x9d - -/* PSK_configuration bits */ -#define bm8pskStarted 0x01 -#define bm8pskFW_Loaded 0x02 -#define bmIntersilOn 0x04 -#define bmDVBmode 0x08 -#define bm22kHz 0x10 -#define bmSEL18V 0x20 -#define bmDCtuned 0x40 -#define bmArmed 0x80 - -/* Satellite modulation modes */ -#define ADV_MOD_DVB_QPSK 0 /* DVB-S QPSK */ -#define ADV_MOD_TURBO_QPSK 1 /* Turbo QPSK */ -#define ADV_MOD_TURBO_8PSK 2 /* Turbo 8PSK (also used for Trellis 8PSK) */ -#define ADV_MOD_TURBO_16QAM 3 /* Turbo 16QAM (also used for Trellis 8PSK) */ - -#define ADV_MOD_DCII_C_QPSK 4 /* Digicipher II Combo */ -#define ADV_MOD_DCII_I_QPSK 5 /* Digicipher II I-stream */ -#define ADV_MOD_DCII_Q_QPSK 6 /* Digicipher II Q-stream */ -#define ADV_MOD_DCII_C_OQPSK 7 /* Digicipher II offset QPSK */ -#define ADV_MOD_DSS_QPSK 8 /* DSS (DIRECTV) QPSK */ -#define ADV_MOD_DVB_BPSK 9 /* DVB-S BPSK */ #define GET_USB_SPEED 0x07 @@ -86,15 +34,4 @@ extern int dvb_usb_gp8psk_debug; #define PRODUCT_STRING_READ 0x0D #define FW_BCD_VERSION_READ 0x14 -/* firmware revision id's */ -#define GP8PSK_FW_REV1 0x020604 -#define GP8PSK_FW_REV2 0x020704 -#define GP8PSK_FW_VERS(_fw_vers) ((_fw_vers)[2]<<0x10 | (_fw_vers)[1]<<0x08 | (_fw_vers)[0]) - -extern struct dvb_frontend * gp8psk_fe_attach(struct dvb_usb_device *d); -extern int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 *b, int blen); -extern int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value, - u16 index, u8 *b, int blen); -extern int gp8psk_bcm4500_reload(struct dvb_usb_device *d); - #endif From a25f0944ba9b1d8a6813fd6f1a86f1bd59ac25a6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Nov 2016 10:32:32 -0800 Subject: [PATCH 297/319] Linux 4.9-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 06e2b73978e8..247430abfc73 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From 46d054f8f540612f09987a53154aa39ae15f2e4c Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Fri, 11 Nov 2016 15:56:51 +0000 Subject: [PATCH 298/319] sfc: clear napi_hash state when copying channels efx_copy_channel() doesn't correctly clear the napi_hash related state. This means that when napi_hash_add is called for that channel nothing is done, and we are left with a copy of the napi_hash_node from the old channel. When we later call napi_hash_del() on this channel we have a stale napi_hash_node. Corruption is only seen when there are multiple entries in one of the napi_hash lists. This is made more likely by having a very large number of channels. Testing was carried out with 512 channels - 32 channels on each of 16 ports. This failure typically appears as protection faults within napi_by_id() or napi_hash_add(). efx_copy_channel() is only used when tx or rx ring sizes are changed (ethtool -G). Fixes: 36763266bbe8 ("sfc: Add support for busy polling") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 3cf3557106c2..6b89e4a7b164 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -485,6 +485,9 @@ efx_copy_channel(const struct efx_channel *old_channel) *channel = *old_channel; channel->napi_dev = NULL; + INIT_HLIST_NODE(&channel->napi_str.napi_hash_node); + channel->napi_str.napi_id = 0; + channel->napi_str.state = 0; memset(&channel->eventq, 0, sizeof(channel->eventq)); for (j = 0; j < EFX_TXQ_TYPES; j++) { From b7f193da17fb18b752bef77ce52eb49723299bd8 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 11 Nov 2016 11:00:45 -0600 Subject: [PATCH 299/319] ibmvnic: Unmap ibmvnic_statistics structure This structure was mapped but never subsequently unmapped. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index f6c9b6d38ac7..921c40fad1c3 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3844,6 +3844,9 @@ static int ibmvnic_remove(struct vio_dev *dev) if (adapter->debugfs_dir && !IS_ERR(adapter->debugfs_dir)) debugfs_remove_recursive(adapter->debugfs_dir); + dma_unmap_single(&dev->dev, adapter->stats_token, + sizeof(struct ibmvnic_statistics), DMA_FROM_DEVICE); + if (adapter->ras_comps) dma_free_coherent(&dev->dev, adapter->ras_comp_num * From e1fac0adf0f9b2c1eb49e658e6ed070a744bbaef Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 11 Nov 2016 11:00:46 -0600 Subject: [PATCH 300/319] ibmvnic: Fix size of debugfs name buffer This mistake was causing debugfs directory creation failures when multiple ibmvnic devices were probed. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 921c40fad1c3..4f3281a03e7e 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3705,7 +3705,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) struct net_device *netdev; unsigned char *mac_addr_p; struct dentry *ent; - char buf[16]; /* debugfs name buf */ + char buf[17]; /* debugfs name buf */ int rc; dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n", From 7774d46b2037b98d3f7e414bffb1d53082dc139b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 12 Nov 2016 17:44:06 +0000 Subject: [PATCH 301/319] net: ethernet: ixp4xx_eth: fix spelling mistake in debug message Trivial fix to spelling mistake "successed" to "succeeded" in debug message. Also unwrap multi-line literal string. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/xscale/ixp4xx_eth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 7f127dc1b7ba..fa32391720fe 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -708,8 +708,7 @@ static int eth_poll(struct napi_struct *napi, int budget) if (!qmgr_stat_below_low_watermark(rxq) && napi_reschedule(napi)) { /* not empty again */ #if DEBUG_RX - printk(KERN_DEBUG "%s: eth_poll" - " napi_reschedule successed\n", + printk(KERN_DEBUG "%s: eth_poll napi_reschedule succeeded\n", dev->name); #endif qmgr_disable_irq(rxq); From cedecbc5e0f39d2987b8e1004908e90459a82e78 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Aug 2016 09:48:00 +0000 Subject: [PATCH 302/319] ntb_pingpong: Fix db_init parameter description Fix 'db_init' parameter description. Signed-off-by: Wei Yongjun Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_pingpong.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index 7d311799fca1..435861189d97 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -88,7 +88,7 @@ MODULE_PARM_DESC(delay_ms, "Milliseconds to delay the response to peer"); static unsigned long db_init = 0x7; module_param(db_init, ulong, 0644); -MODULE_PARM_DESC(delay_ms, "Initial doorbell bits to ring on the peer"); +MODULE_PARM_DESC(db_init, "Initial doorbell bits to ring on the peer"); struct pp_ctx { struct ntb_dev *ntb; From 49b89de41f8d97eb13a60c1865ed61fbebed0d15 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Aug 2016 09:48:42 +0000 Subject: [PATCH 303/319] NTB: ntb_hw_intel: Fix typo in module parameter descriptions Fix typo in module parameter descriptions. Signed-off-by: Wei Yongjun Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 0d5c29ae51de..1ee61d92c54b 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -112,17 +112,17 @@ MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, module_param_named(xeon_b2b_usd_bar4_addr64, xeon_b2b_usd_addr.bar4_addr64, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_usd_bar4_addr64, "XEON B2B USD BAR 4 64-bit address"); module_param_named(xeon_b2b_usd_bar4_addr32, xeon_b2b_usd_addr.bar4_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_usd_bar4_addr32, "XEON B2B USD split-BAR 4 32-bit address"); module_param_named(xeon_b2b_usd_bar5_addr32, xeon_b2b_usd_addr.bar5_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_usd_bar5_addr32, "XEON B2B USD split-BAR 5 32-bit address"); module_param_named(xeon_b2b_dsd_bar2_addr64, @@ -132,17 +132,17 @@ MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, module_param_named(xeon_b2b_dsd_bar4_addr64, xeon_b2b_dsd_addr.bar4_addr64, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_dsd_bar4_addr64, "XEON B2B DSD BAR 4 64-bit address"); module_param_named(xeon_b2b_dsd_bar4_addr32, xeon_b2b_dsd_addr.bar4_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_dsd_bar4_addr32, "XEON B2B DSD split-BAR 4 32-bit address"); module_param_named(xeon_b2b_dsd_bar5_addr32, xeon_b2b_dsd_addr.bar5_addr32, ullong, 0644); -MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64, +MODULE_PARM_DESC(xeon_b2b_dsd_bar5_addr32, "XEON B2B DSD split-BAR 5 32-bit address"); #ifndef ioread64 From c0a88032ef8e6814d4dd84551e5f333c1de639b3 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 22 Aug 2016 18:51:35 +0200 Subject: [PATCH 304/319] ntb_transport: make DMA_OUT_RESOURCE_TO HZ independent schedule_timeout_* takes a timeout in jiffies but the code currently is passing in a constant which makes this timeout HZ dependent, so pass it through msecs_to_jiffies() to fix this up. Signed-off-by: Nicholas Mc Guire Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 8601c10acf74..4eb8adb34508 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -257,7 +257,7 @@ enum { #define NTB_QP_DEF_NUM_ENTRIES 100 #define NTB_LINK_DOWN_TIMEOUT 10 #define DMA_RETRIES 20 -#define DMA_OUT_RESOURCE_TO 50 +#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) static void ntb_transport_rxc_db(unsigned long data); static const struct ntb_ctx_ops ntb_transport_ops; From cdc08982a5f334cecc15d802464588115512cc36 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Mon, 22 Aug 2016 18:51:36 +0200 Subject: [PATCH 305/319] ntb: make DMA_OUT_RESOURCE_TO HZ independent schedule_timeout_* takes a timeout in jiffies but the code currently is passing in a constant which makes this timeout HZ dependent, so pass it through msecs_to_jiffies() to fix this up. Signed-off-by: Nicholas Mc Guire Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 6a50f20bf1cd..e065b695200d 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -72,7 +72,7 @@ #define MAX_THREADS 32 #define MAX_TEST_SIZE SZ_1M #define MAX_SRCS 32 -#define DMA_OUT_RESOURCE_TO 50 +#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) #define DMA_RETRIES 20 #define SZ_4G (1ULL << 32) #define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */ From 25ea9f2bf5f76082da919f2a91ea8d920932c1da Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 27 Oct 2016 11:06:44 -0700 Subject: [PATCH 306/319] ntb: ntb_hw_intel: init peer_addr in struct intel_ntb_dev The peer_addr member of intel_ntb_dev is not set, therefore when acquiring ntb_peer_db and ntb_peer_spad we only get the offset rather than the actual physical address. Adding fix to correct that. Signed-off-by: Dave Jiang Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 1ee61d92c54b..7310a261c858 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -1755,6 +1755,8 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev, XEON_B2B_MIN_SIZE); if (!ndev->peer_mmio) return -EIO; + + ndev->peer_addr = pci_resource_start(pdev, b2b_bar); } return 0; @@ -2019,6 +2021,7 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev) goto err_mmio; } ndev->peer_mmio = ndev->self_mmio; + ndev->peer_addr = pci_resource_start(pdev, 0); return 0; From 819baf885953b588b63bef28e5598daf9ed4ddf9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2016 10:34:18 +0300 Subject: [PATCH 307/319] ntb_perf: potential info leak in debugfs This is a static checker warning, not something I'm desperately concerned about. But snprintf() returns the number of bytes that would have been copied if there were space. We really care about the number of bytes that actually were copied so we should use scnprintf() instead. It probably won't overrun, and in that case we may as well just use sprintf() but these sorts of things make static checkers and code reviewers happier. Signed-off-by: Dan Carpenter Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index e065b695200d..e75d4fdc0866 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -589,7 +589,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, return -ENOMEM; if (mutex_is_locked(&perf->run_mutex)) { - out_off = snprintf(buf, 64, "running\n"); + out_off = scnprintf(buf, 64, "running\n"); goto read_from_buf; } @@ -600,14 +600,14 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, break; if (pctx->status) { - out_off += snprintf(buf + out_off, 1024 - out_off, + out_off += scnprintf(buf + out_off, 1024 - out_off, "%d: error %d\n", i, pctx->status); continue; } rate = div64_u64(pctx->copied, pctx->diff_us); - out_off += snprintf(buf + out_off, 1024 - out_off, + out_off += scnprintf(buf + out_off, 1024 - out_off, "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", i, pctx->copied, pctx->diff_us, rate); } From b15efc38626f20f3fc8b831b826b50740d90dab9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 14 Nov 2016 11:14:37 -0200 Subject: [PATCH 308/319] gp8psk-fe: add missing MODULE_foo() macros This file was converted to a separate module at commit 7a0786c19d65 ("gp8psk: Fix DVB frontend attach"), because the DVB attach routines require it to work. However, I forgot to copy the MODULE_foo() macros from the original module, causing this warning: WARNING: modpost: missing MODULE_LICENSE() in drivers/media/dvb-frontends/gp8psk-fe.o Reported-by: Stephen Rothwell Fixes: 7a0786c19d65 ("gp8psk: Fix DVB frontend attach") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- drivers/media/dvb-frontends/gp8psk-fe.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/media/dvb-frontends/gp8psk-fe.c b/drivers/media/dvb-frontends/gp8psk-fe.c index be19afeed7a9..93f59bfea092 100644 --- a/drivers/media/dvb-frontends/gp8psk-fe.c +++ b/drivers/media/dvb-frontends/gp8psk-fe.c @@ -1,5 +1,5 @@ -/* DVB USB compliant Linux driver for the - * - GENPIX 8pks/qpsk/DCII USB2.0 DVB-S module +/* + * Frontend driver for the GENPIX 8pks/qpsk/DCII USB2.0 DVB-S module * * Copyright (C) 2006,2007 Alan Nisota (alannisota@gmail.com) * Copyright (C) 2006,2007 Genpix Electronics (genpix@genpix-electronics.com) @@ -8,11 +8,9 @@ * * This module is based off the vp7045 and vp702x modules * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, version 2. - * - * see Documentation/dvb/README.dvb-usb for more information + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -395,3 +393,8 @@ static struct dvb_frontend_ops gp8psk_fe_ops = { .dishnetwork_send_legacy_command = gp8psk_fe_send_legacy_dish_cmd, .enable_high_lnb_voltage = gp8psk_fe_enable_high_lnb_voltage }; + +MODULE_AUTHOR("Alan Nisota "); +MODULE_DESCRIPTION("Frontend Driver for Genpix DVB-S"); +MODULE_VERSION("1.1"); +MODULE_LICENSE("GPL"); From f5c9f9c72395c3291c2e35c905dedae2b98475a4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 14 Nov 2016 09:31:52 -0800 Subject: [PATCH 309/319] Revert "printk: make reading the kernel log flush pending lines" This reverts commit bfd8d3f23b51018388be0411ccbc2d56277fe294. It turns out that this flushes things much too aggressiverly, and causes lines to break up when the system logger races with new continuation lines being printed. There's a pending patch to make printk() flushing much more straightforward, but it's too invasive for 4.9, so in the meantime let's just not make the system message logging flush continuation lines. They'll be flushed by the final newline anyway. Suggested-by: Petr Mladek Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5028f4fd504a..f7a55e9ff2f7 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -783,8 +783,6 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) return ret; } -static void cont_flush(void); - static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -800,7 +798,6 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, if (ret) return ret; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); while (user->seq == log_next_seq) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; @@ -863,7 +860,6 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) return -ESPIPE; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); switch (whence) { case SEEK_SET: /* the first record */ @@ -902,7 +898,6 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (user->seq < log_next_seq) { /* return error when data has vanished underneath us */ if (user->seq < log_first_seq) @@ -1289,7 +1284,6 @@ static int syslog_print(char __user *buf, int size) size_t skip; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -1349,7 +1343,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear) return -ENOMEM; raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (buf) { u64 next_seq; u64 seq; @@ -1511,7 +1504,6 @@ int do_syslog(int type, char __user *buf, int len, int source) /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: raw_spin_lock_irq(&logbuf_lock); - cont_flush(); if (syslog_seq < log_first_seq) { /* messages are gone, move to first one */ syslog_seq = log_first_seq; @@ -3028,7 +3020,6 @@ void kmsg_dump(enum kmsg_dump_reason reason) dumper->active = true; raw_spin_lock_irqsave(&logbuf_lock, flags); - cont_flush(); dumper->cur_seq = clear_seq; dumper->cur_idx = clear_idx; dumper->next_seq = log_next_seq; @@ -3119,7 +3110,6 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, bool ret; raw_spin_lock_irqsave(&logbuf_lock, flags); - cont_flush(); ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); raw_spin_unlock_irqrestore(&logbuf_lock, flags); @@ -3162,7 +3152,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, goto out; raw_spin_lock_irqsave(&logbuf_lock, flags); - cont_flush(); if (dumper->cur_seq < log_first_seq) { /* messages are gone, move to first available one */ dumper->cur_seq = log_first_seq; From ee2bd216e1fa9fa980e6ac702e5973d157c40c48 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 14 Nov 2016 09:46:08 -0800 Subject: [PATCH 310/319] ASoC: lpass-platform: fix uninitialized variable In commit 022d00ee0b55 ("ASoC: lpass-platform: Fix broken pcm data usage") the stream specific information initialization was broken, with the dma channel information not being initialized if there was no alloc_dma_channel() helper function. Before that, the DMA channel number was implicitly initialized to zero because the backing store was allocated with devm_kzalloc(). When the init code was rewritten, that implicit initialization was lost, and gcc rightfully complains about an uninitialized variable being used. Cc: Srinivas Kandagatla Cc: Mark Brown Signed-off-by: Linus Torvalds --- sound/soc/qcom/lpass-platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 07000f53db44..b392e51de94d 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -75,6 +75,7 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream) data->i2s_port = cpu_dai->driver->id; runtime->private_data = data; + dma_ch = 0; if (v->alloc_dma_channel) dma_ch = v->alloc_dma_channel(drvdata, dir); if (dma_ch < 0) From 7020637bdf59589a403e01aca128bef643404317 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 12 Nov 2016 17:20:30 +0000 Subject: [PATCH 311/319] ps3_gelic: fix spelling mistake in debug message Trivial fix to spelling mistake "unmached" to "unmatched" in debug message. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_wireless.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c index 446ea580ad42..928c1dca2673 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c @@ -1694,7 +1694,7 @@ struct gelic_wl_scan_info *gelic_wl_find_best_bss(struct gelic_wl_info *wl) pr_debug("%s: bssid matched\n", __func__); break; } else { - pr_debug("%s: bssid unmached\n", __func__); + pr_debug("%s: bssid unmatched\n", __func__); continue; } } From 5d0d4b91bf627f14f95167b738d524156c9d440b Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sun, 13 Nov 2016 13:01:32 +0800 Subject: [PATCH 312/319] Revert "bnx2: Reset device during driver initialization" This reverts commit 3e1be7ad2d38c6bd6aeef96df9bd0a7822f4e51c. When people build bnx2 driver into kernel, it will fail to detect and load firmware because firmware is contained in initramfs and initramfs has not been uncompressed yet during do_initcalls. So revert commit 3e1be7a and work out a new way in the later patch. Signed-off-by: Baoquan He Acked-by: Paul Menzel Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index b3791b394715..c55797291b57 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6361,6 +6361,10 @@ bnx2_open(struct net_device *dev) struct bnx2 *bp = netdev_priv(dev); int rc; + rc = bnx2_request_firmware(bp); + if (rc < 0) + goto out; + netif_carrier_off(dev); bnx2_disable_int(bp); @@ -6429,6 +6433,7 @@ bnx2_open(struct net_device *dev) bnx2_free_irq(bp); bnx2_free_mem(bp); bnx2_del_napi(bp); + bnx2_release_firmware(bp); goto out; } @@ -8575,12 +8580,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); - rc = bnx2_request_firmware(bp); - if (rc < 0) - goto error; - - - bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET); memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | @@ -8613,7 +8612,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; error: - bnx2_release_firmware(bp); pci_iounmap(pdev, bp->regview); pci_release_regions(pdev); pci_disable_device(pdev); From 6df77862f63f389df3b1ad879738e04440d7385d Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sun, 13 Nov 2016 13:01:33 +0800 Subject: [PATCH 313/319] bnx2: Wait for in-flight DMA to complete at probe stage In-flight DMA from 1st kernel could continue going in kdump kernel. New io-page table has been created before bnx2 does reset at open stage. We have to wait for the in-flight DMA to complete to avoid it look up into the newly created io-page table at probe stage. Suggested-by: Michael Chan Signed-off-by: Baoquan He Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 38 +++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index c55797291b57..1f7034d739b0 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -49,6 +49,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_CNIC) #define BCM_CNIC 1 @@ -4764,15 +4765,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp) BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR); } -static int -bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) +static void +bnx2_wait_dma_complete(struct bnx2 *bp) { u32 val; - int i, rc = 0; - u8 old_port; + int i; - /* Wait for the current PCI transaction to complete before - * issuing a reset. */ + /* + * Wait for the current PCI transaction to complete before + * issuing a reset. + */ if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) || (BNX2_CHIP(bp) == BNX2_CHIP_5708)) { BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS, @@ -4796,6 +4798,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) } } + return; +} + + +static int +bnx2_reset_chip(struct bnx2 *bp, u32 reset_code) +{ + u32 val; + int i, rc = 0; + u8 old_port; + + /* Wait for the current PCI transaction to complete before + * issuing a reset. */ + bnx2_wait_dma_complete(bp); + /* Wait for the firmware to tell us it is ok to issue a reset. */ bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1); @@ -8580,6 +8597,15 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); + /* + * In-flight DMA from 1st kernel could continue going in kdump kernel. + * New io-page table has been created before bnx2 does reset at open stage. + * We have to wait for the in-flight DMA to complete to avoid it look up + * into the newly created io-page table. + */ + if (is_kdump_kernel()) + bnx2_wait_dma_complete(bp); + memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | From 5bf35ddfee052d44f39ebaa395d87101c8918405 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Nov 2016 21:44:37 +0800 Subject: [PATCH 314/319] sctp: change sk state only when it has assocs in sctp_shutdown Now when users shutdown a sock with SEND_SHUTDOWN in sctp, even if this sock has no connection (assoc), sk state would be changed to SCTP_SS_CLOSING, which is not as we expect. Besides, after that if users try to listen on this sock, kernel could even panic when it dereference sctp_sk(sk)->bind_hash in sctp_inet_listen, as bind_hash is null when sock has no assoc. This patch is to move sk state change after checking sk assocs is not empty, and also merge these two if() conditions and reduce indent level. Fixes: d46e416c11c8 ("sctp: sctp should change socket state when shutdown is received") Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index faa48ff5cf4b..f23ad913dc7a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4285,19 +4285,18 @@ static void sctp_shutdown(struct sock *sk, int how) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; - struct sctp_association *asoc; if (!sctp_style(sk, TCP)) return; - if (how & SEND_SHUTDOWN) { + ep = sctp_sk(sk)->ep; + if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { + struct sctp_association *asoc; + sk->sk_state = SCTP_SS_CLOSING; - ep = sctp_sk(sk)->ep; - if (!list_empty(&ep->asocs)) { - asoc = list_entry(ep->asocs.next, - struct sctp_association, asocs); - sctp_primitive_SHUTDOWN(net, asoc, NULL); - } + asoc = list_entry(ep->asocs.next, + struct sctp_association, asocs); + sctp_primitive_SHUTDOWN(net, asoc, NULL); } } From 977c1f9c8c022d0173181766b34a0db3705265a4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 7 Nov 2016 15:14:20 -0800 Subject: [PATCH 315/319] ftrace: Ignore FTRACE_FL_DISABLED while walking dyn_ftrace records ftrace_shutdown() checks for sanity of ftrace records and if dyn_ftrace->flags is not zero, it will warn. It can happen that 'flags' are set to FTRACE_FL_DISABLED at this point, since some module was loaded, but before ftrace_module_enable() cleared the flags for this module. In other words the module.c is doing: ftrace_module_init(mod); // calls ftrace_update_code() that sets flags=FTRACE_FL_DISABLED ... // here ftrace_shutdown() is called that warns, since err = prepare_coming_module(mod); // didn't have a chance to clear FTRACE_FL_DISABLED Fix it by ignoring disabled records. It's similar to what __ftrace_hash_rec_update() is already doing. Link: http://lkml.kernel.org/r/1478560460-3818619-1-git-send-email-ast@fb.com Cc: stable@vger.kernel.org Fixes: b7ffffbb46f2 "ftrace: Add infrastructure for delayed enabling of module functions" Signed-off-by: Alexei Starovoitov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2050a7652a86..326498baab83 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2763,7 +2763,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) struct dyn_ftrace *rec; do_for_each_ftrace_rec(pg, rec) { - if (FTRACE_WARN_ON_ONCE(rec->flags)) + if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_FL_DISABLED)) pr_warn(" %pS flags:%lx\n", (void *)rec->ip, rec->flags); } while_for_each_ftrace_rec(); From 546fece4eae871f033925ccf0ff2b740725ae915 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 14 Nov 2016 16:31:49 -0500 Subject: [PATCH 316/319] ftrace: Add more checks for FTRACE_FL_DISABLED in processing ip records When a module is first loaded and its function ip records are added to the ftrace list of functions to modify, they are set to DISABLED, as their text is still in a read only state. When the module is fully loaded, and can be updated, the flag is cleared, and if their's any functions that should be tracing them, it is updated at that moment. But there's several locations that do record accounting and should ignore records that are marked as disabled, or they can cause issues. Alexei already fixed one location, but others need to be addressed. Cc: stable@vger.kernel.org Fixes: b7ffffbb46f2 "ftrace: Add infrastructure for delayed enabling of module functions" Reported-by: Alexei Starovoitov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 326498baab83..da87b3cba5b3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1862,6 +1862,10 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, /* Update rec->flags */ do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + /* We need to update only differences of filter_hash */ in_old = !!ftrace_lookup_ip(old_hash, rec->ip); in_new = !!ftrace_lookup_ip(new_hash, rec->ip); @@ -1884,6 +1888,10 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, /* Roll back what we did above */ do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (rec == end) goto err_out; @@ -2397,6 +2405,10 @@ void __weak ftrace_replace_code(int enable) return; do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + failed = __ftrace_replace_code(rec, enable); if (failed) { ftrace_bug(failed, rec); @@ -3598,6 +3610,10 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) goto out_unlock; do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) { ret = enter_record(hash, rec, clear_filter); if (ret < 0) { @@ -3793,6 +3809,9 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, do_for_each_ftrace_rec(pg, rec) { + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (!ftrace_match_record(rec, &func_g, NULL, 0)) continue; @@ -4685,6 +4704,9 @@ ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer) do_for_each_ftrace_rec(pg, rec) { + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (ftrace_match_record(rec, &func_g, NULL, 0)) { /* if it is in the array */ exists = false; From c51e424dc79e1428afc4d697cdb6a07f7af70cbf Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 13 Nov 2016 17:50:35 -0800 Subject: [PATCH 317/319] net: stmmac: Fix lack of link transition for fixed PHYs Commit 52f95bbfcf72 ("stmmac: fix adjust link call in case of a switch is attached") added some logic to avoid polling the fixed PHY and therefore invoking the adjust_link callback more than once, since this is a fixed PHY and link events won't be generated. This works fine the first time, because we start with phydev->irq = PHY_POLL, so we call adjust_link, then we set phydev->irq = PHY_IGNORE_INTERRUPT and we stop polling the PHY. Now, if we called ndo_close(), which calls both phy_stop() and does an explicit netif_carrier_off(), we end up with a link down. Upon calling ndo_open() again, despite starting the PHY state machine, we have PHY_IGNORE_INTERRUPT set, and we generate no link event at all, so the link is permanently down. Fixes: 52f95bbfcf72 ("stmmac: fix adjust link call in case of a switch is attached") Signed-off-by: Florian Fainelli Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 48e71fad4210..e2c94ec4edd0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -880,6 +880,13 @@ static int stmmac_init_phy(struct net_device *dev) return -ENODEV; } + /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid + * subsequent PHY polling, make sure we force a link transition if + * we have a UP/DOWN/UP transition + */ + if (phydev->is_pseudo_fixed_link) + phydev->irq = PHY_POLL; + pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)" " Link = %d\n", dev->name, phydev->phy_id, phydev->link); From ac571de999e14b87890cb960ad6f03fbdde6abc8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Nov 2016 11:26:32 +0100 Subject: [PATCH 318/319] mlxsw: spectrum_router: Flush FIB tables during fini Since commit b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") we reflect to the device the entire FIB table and not only FIBs that point to netdevs created by the driver. During module removal, FIBs of the second type are removed following NETDEV_UNREGISTER events sent. The other FIBs are still present in both the driver's cache and the device's table. Fix this by iterating over all the FIB tables in the device and flush them. There's no need to take locks, as we're the only writer. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index cbeeddd70c5a..e83072da6272 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -594,8 +594,11 @@ static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) return 0; } +static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp); + static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) { + mlxsw_sp_router_fib_flush(mlxsw_sp); kfree(mlxsw_sp->router.vrs); } @@ -1867,18 +1870,18 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_resources *resources; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_fib_entry *tmp; struct mlxsw_sp_vr *vr; int i; - int err; resources = mlxsw_core_resources_get(mlxsw_sp->core); for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; + if (!vr->used) continue; @@ -1894,6 +1897,13 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) break; } } +} + +static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + mlxsw_sp_router_fib_flush(mlxsw_sp); mlxsw_sp->router.aborted = true; err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); if (err) From e123386bc31bbf467dc558f2f919de0b8b4ba58c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 7 Nov 2016 14:32:02 -0500 Subject: [PATCH 319/319] tile: handle __ro_after_init like parisc does The tile architecture already marks RO_DATA as read-only in the kernel, so grouping RO_AFTER_INIT_DATA with RO_DATA, as is done by default, means the kernel faults in init when it tries to write to RO_AFTER_INIT_DATA. For now, just arrange that __ro_after_init is handled like __write_once, i.e. __read_mostly. Reviewed-by: Kees Cook Signed-off-by: Chris Metcalf --- arch/tile/include/asm/cache.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index 6160761d5f61..4810e48dbbbf 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h @@ -61,4 +61,7 @@ */ #define __write_once __read_mostly +/* __ro_after_init is the generic name for the tile arch __write_once. */ +#define __ro_after_init __read_mostly + #endif /* _ASM_TILE_CACHE_H */