From 6cd249cfad68a231336983e2216d75b3ddfde1d6 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Mon, 8 Jul 2019 22:19:01 +0800 Subject: [PATCH 001/482] ASoC: max98357a: use mdelay for sdmode-delay max98357a_daiops_trigger() is possible to be called in atomic context if the .nonatomic flag is equal to 0 in the DAI links. When cancel_delayed_work_sync() in max98357a_daiops_trigger() is called in atomic context, kernel emits the following message: "BUG: sleeping function called from invalid context". According to the DT binding document, value less than or equal to 5ms of sdmod-delay should be sufficient to avoid the pop noise. Use mdelay (i.e. busy loop) for such low delay should be acceptable. Fixes: cec5b01f8f1c ("ASoC: max98357a: avoid speaker pop when playback startup") Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20190708141901.68797-1-tzungbi@google.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98357a.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/sound/soc/codecs/max98357a.c b/sound/soc/codecs/max98357a.c index 6f0e28f903bf..16313b973eaa 100644 --- a/sound/soc/codecs/max98357a.c +++ b/sound/soc/codecs/max98357a.c @@ -20,20 +20,10 @@ #include struct max98357a_priv { - struct delayed_work enable_sdmode_work; struct gpio_desc *sdmode; unsigned int sdmode_delay; }; -static void max98357a_enable_sdmode_work(struct work_struct *work) -{ - struct max98357a_priv *max98357a = - container_of(work, struct max98357a_priv, - enable_sdmode_work.work); - - gpiod_set_value(max98357a->sdmode, 1); -} - static int max98357a_daiops_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { @@ -46,14 +36,12 @@ static int max98357a_daiops_trigger(struct snd_pcm_substream *substream, case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - queue_delayed_work(system_power_efficient_wq, - &max98357a->enable_sdmode_work, - msecs_to_jiffies(max98357a->sdmode_delay)); + mdelay(max98357a->sdmode_delay); + gpiod_set_value(max98357a->sdmode, 1); break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - cancel_delayed_work_sync(&max98357a->enable_sdmode_work); gpiod_set_value(max98357a->sdmode, 0); break; } @@ -112,30 +100,25 @@ static int max98357a_platform_probe(struct platform_device *pdev) int ret; max98357a = devm_kzalloc(&pdev->dev, sizeof(*max98357a), GFP_KERNEL); - if (!max98357a) return -ENOMEM; max98357a->sdmode = devm_gpiod_get_optional(&pdev->dev, "sdmode", GPIOD_OUT_LOW); - if (IS_ERR(max98357a->sdmode)) return PTR_ERR(max98357a->sdmode); ret = device_property_read_u32(&pdev->dev, "sdmode-delay", &max98357a->sdmode_delay); - if (ret) { max98357a->sdmode_delay = 0; dev_dbg(&pdev->dev, - "no optional property 'sdmode-delay' found, default: no delay\n"); + "no optional property 'sdmode-delay' found, " + "default: no delay\n"); } dev_set_drvdata(&pdev->dev, max98357a); - INIT_DELAYED_WORK(&max98357a->enable_sdmode_work, - max98357a_enable_sdmode_work); - return devm_snd_soc_register_component(&pdev->dev, &max98357a_component_driver, &max98357a_dai_driver, 1); From 72365164cbefe3afa7a146d27d502ed688bf7323 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 9 Jul 2019 10:22:16 -0700 Subject: [PATCH 002/482] ASoC: rt1308: Remove executable attribute from source files These are source files not executable. Signed-off-by: Joe Perches Link: https://lore.kernel.org/r/d198a3e6ed3a0e9070afeb6aca69903c3e985149.camel@perches.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1308.c | 0 sound/soc/codecs/rt1308.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 sound/soc/codecs/rt1308.c mode change 100755 => 100644 sound/soc/codecs/rt1308.h diff --git a/sound/soc/codecs/rt1308.c b/sound/soc/codecs/rt1308.c old mode 100755 new mode 100644 diff --git a/sound/soc/codecs/rt1308.h b/sound/soc/codecs/rt1308.h old mode 100755 new mode 100644 From 9e944c9be2456159fb8c36b0ba3170b2f01c3887 Mon Sep 17 00:00:00 2001 From: Kirill Marinushkin Date: Wed, 10 Jul 2019 07:51:35 +0200 Subject: [PATCH 003/482] ASoC: Relocate my e-mail to .com domain zone Signed-off-by: Kirill Marinushkin Link: https://lore.kernel.org/r/20190710055135.21377-1-kmarinushkin@birdec.com Signed-off-by: Mark Brown --- MAINTAINERS | 2 +- sound/soc/codecs/pcm3060-i2c.c | 4 ++-- sound/soc/codecs/pcm3060-spi.c | 4 ++-- sound/soc/codecs/pcm3060.c | 4 ++-- sound/soc/codecs/pcm3060.h | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3e75361f9b3b..11db05b56744 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15795,7 +15795,7 @@ S: Maintained F: drivers/net/ethernet/ti/netcp* TI PCM3060 ASoC CODEC DRIVER -M: Kirill Marinushkin +M: Kirill Marinushkin L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/sound/pcm3060.txt diff --git a/sound/soc/codecs/pcm3060-i2c.c b/sound/soc/codecs/pcm3060-i2c.c index cdc8314882bc..abcdeb922201 100644 --- a/sound/soc/codecs/pcm3060-i2c.c +++ b/sound/soc/codecs/pcm3060-i2c.c @@ -2,7 +2,7 @@ // // PCM3060 I2C driver // -// Copyright (C) 2018 Kirill Marinushkin +// Copyright (C) 2018 Kirill Marinushkin #include #include @@ -56,5 +56,5 @@ static struct i2c_driver pcm3060_i2c_driver = { module_i2c_driver(pcm3060_i2c_driver); MODULE_DESCRIPTION("PCM3060 I2C driver"); -MODULE_AUTHOR("Kirill Marinushkin "); +MODULE_AUTHOR("Kirill Marinushkin "); MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/pcm3060-spi.c b/sound/soc/codecs/pcm3060-spi.c index f6f19fa80932..3b79734b832b 100644 --- a/sound/soc/codecs/pcm3060-spi.c +++ b/sound/soc/codecs/pcm3060-spi.c @@ -2,7 +2,7 @@ // // PCM3060 SPI driver // -// Copyright (C) 2018 Kirill Marinushkin +// Copyright (C) 2018 Kirill Marinushkin #include #include @@ -55,5 +55,5 @@ static struct spi_driver pcm3060_spi_driver = { module_spi_driver(pcm3060_spi_driver); MODULE_DESCRIPTION("PCM3060 SPI driver"); -MODULE_AUTHOR("Kirill Marinushkin "); +MODULE_AUTHOR("Kirill Marinushkin "); MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/pcm3060.c b/sound/soc/codecs/pcm3060.c index 32b26f1c2282..b2358069cf9b 100644 --- a/sound/soc/codecs/pcm3060.c +++ b/sound/soc/codecs/pcm3060.c @@ -2,7 +2,7 @@ // // PCM3060 codec driver // -// Copyright (C) 2018 Kirill Marinushkin +// Copyright (C) 2018 Kirill Marinushkin #include #include @@ -342,5 +342,5 @@ int pcm3060_probe(struct device *dev) EXPORT_SYMBOL(pcm3060_probe); MODULE_DESCRIPTION("PCM3060 codec driver"); -MODULE_AUTHOR("Kirill Marinushkin "); +MODULE_AUTHOR("Kirill Marinushkin "); MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/pcm3060.h b/sound/soc/codecs/pcm3060.h index 75931c9a9d85..18d51e5dac2c 100644 --- a/sound/soc/codecs/pcm3060.h +++ b/sound/soc/codecs/pcm3060.h @@ -2,7 +2,7 @@ /* * PCM3060 codec driver * - * Copyright (C) 2018 Kirill Marinushkin + * Copyright (C) 2018 Kirill Marinushkin */ #ifndef _SND_SOC_PCM3060_H From 52db6685932e326ed607644ab7ebdae8c194adda Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 10 Jul 2019 16:59:55 +0900 Subject: [PATCH 004/482] ASoC: simple_card_utils.h: care NULL dai at asoc_simple_debug_dai() props->xxx_dai might be NULL when DPCM. This patch cares it for debug. Fixes: commit 0580dde59438 ("ASoC: simple-card-utils: add asoc_simple_debug_info()") Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87o922gw4u.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index 954563ee2277..985a5f583de4 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -141,6 +141,10 @@ inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv, { struct device *dev = simple_priv_to_dev(priv); + /* dai might be NULL */ + if (!dai) + return; + if (dai->name) dev_dbg(dev, "%s dai name = %s\n", name, dai->name); From 794fcee8da3c0c8a01b08ecad1c181cb0a622868 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 10 Jul 2019 17:01:12 +0900 Subject: [PATCH 005/482] ASoC: simple-card-utils: care no Platform for DPCM commit 34614739988ad ("ASoC: soc-core: support dai_link with platforms_num != 1") supports multi Platform, and commit 9f3eb91753451 ("ASoC: simple-card-utils: consider CPU-Platform possibility") removed no Platform from simple-card. Multi Platform is now checking both Platform name/of_node are NULL case. But in normal case, DPCM be doesn't have Platform. asoc_simple_canonicalize_platform() try to use CPU of_node to Platform (This is needed for DMAEngine platform case), but it still might be NULL at DPCM be. This patch try to use no Platform after that if Platform of_node is still NULL. It can't probe without this patch. Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87muhmgw2o.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index ac8678fe55ff..556b1a789629 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -349,6 +349,13 @@ void asoc_simple_canonicalize_platform(struct snd_soc_dai_link *dai_link) /* Assumes platform == cpu */ if (!dai_link->platforms->of_node) dai_link->platforms->of_node = dai_link->cpus->of_node; + + /* + * DPCM BE can be no platform. + * Alloced memory will be waste, but not leak. + */ + if (!dai_link->platforms->of_node) + dai_link->num_platforms = 0; } EXPORT_SYMBOL_GPL(asoc_simple_canonicalize_platform); From 724808ad556c15e9473418d082f8aae81dd267f6 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 10 Jul 2019 15:25:06 +0800 Subject: [PATCH 006/482] ASoC: simple-card: fix an use-after-free in simple_dai_link_of_dpcm() The node variable is still being used after the of_node_put() call, which may result in use-after-free. Fixes: cfc652a73331 ("ASoC: simple-card: tidyup prefix for snd_soc_codec_conf") Link: https://lore.kernel.org/r/1562743509-30496-2-git-send-email-wen.yang99@zte.com.cn Signed-off-by: Wen Yang Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/generic/simple-card.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index e5cde0d5e63c..4117e54884e5 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -124,8 +124,6 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, li->link++; - of_node_put(node); - /* For single DAI link & old style of DT node */ if (is_top) prefix = PREFIX; @@ -147,17 +145,17 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, ret = asoc_simple_parse_cpu(np, dai_link, &is_single_links); if (ret) - return ret; + goto out_put_node; ret = asoc_simple_parse_clk_cpu(dev, np, dai_link, dai); if (ret < 0) - return ret; + goto out_put_node; ret = asoc_simple_set_dailink_name(dev, dai_link, "fe.%s", cpus->dai_name); if (ret < 0) - return ret; + goto out_put_node; asoc_simple_canonicalize_cpu(dai_link, is_single_links); } else { @@ -180,17 +178,17 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, ret = asoc_simple_parse_codec(np, dai_link); if (ret < 0) - return ret; + goto out_put_node; ret = asoc_simple_parse_clk_codec(dev, np, dai_link, dai); if (ret < 0) - return ret; + goto out_put_node; ret = asoc_simple_set_dailink_name(dev, dai_link, "be.%s", codecs->dai_name); if (ret < 0) - return ret; + goto out_put_node; /* check "prefix" from top node */ snd_soc_of_parse_node_prefix(top, cconf, codecs->of_node, @@ -208,19 +206,21 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, ret = asoc_simple_parse_tdm(np, dai); if (ret) - return ret; + goto out_put_node; ret = asoc_simple_parse_daifmt(dev, node, codec, prefix, &dai_link->dai_fmt); if (ret < 0) - return ret; + goto out_put_node; dai_link->dpcm_playback = 1; dai_link->dpcm_capture = 1; dai_link->ops = &simple_ops; dai_link->init = asoc_simple_dai_init; - return 0; +out_put_node: + of_node_put(node); + return ret; } static int simple_dai_link_of(struct asoc_simple_priv *priv, From 27862d5a3325bc531ec15e3c607e44aa0fd57f6f Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 10 Jul 2019 15:25:07 +0800 Subject: [PATCH 007/482] ASoC: simple-card: fix an use-after-free in simple_for_each_link() The codec variable is still being used after the of_node_put() call, which may result in use-after-free. Fixes: d947cdfd4be2 ("ASoC: simple-card: cleanup DAI link loop method - step1") Link: https://lore.kernel.org/r/1562743509-30496-3-git-send-email-wen.yang99@zte.com.cn Signed-off-by: Wen Yang Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/generic/simple-card.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 4117e54884e5..ef849151ba56 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -364,8 +364,6 @@ static int simple_for_each_link(struct asoc_simple_priv *priv, goto error; } - of_node_put(codec); - /* get convert-xxx property */ memset(&adata, 0, sizeof(adata)); for_each_child_of_node(node, np) @@ -387,11 +385,13 @@ static int simple_for_each_link(struct asoc_simple_priv *priv, ret = func_noml(priv, np, codec, li, is_top); if (ret < 0) { + of_node_put(codec); of_node_put(np); goto error; } } + of_node_put(codec); node = of_get_next_child(top, node); } while (!is_top && node); From aa2e362cb6b3f5ca88093ada01e1a0ace8a517b2 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 10 Jul 2019 15:25:08 +0800 Subject: [PATCH 008/482] ASoC: audio-graph-card: fix use-after-free in graph_dai_link_of_dpcm() After calling of_node_put() on the ports, port, and node variables, they are still being used, which may result in use-after-free. Fix this issue by calling of_node_put() after the last usage. Fixes: dd98fbc558a0 ("ASoC: audio-graph-card: cleanup DAI link loop method - step1") Link: https://lore.kernel.org/r/1562743509-30496-4-git-send-email-wen.yang99@zte.com.cn Signed-off-by: Wen Yang Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/generic/audio-graph-card.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index e438011f5e45..bddfcfd7bedf 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -208,10 +208,6 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, dev_dbg(dev, "link_of DPCM (%pOF)\n", ep); - of_node_put(ports); - of_node_put(port); - of_node_put(node); - if (li->cpu) { int is_single_links = 0; @@ -229,17 +225,17 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, ret = asoc_simple_parse_cpu(ep, dai_link, &is_single_links); if (ret) - return ret; + goto out_put_node; ret = asoc_simple_parse_clk_cpu(dev, ep, dai_link, dai); if (ret < 0) - return ret; + goto out_put_node; ret = asoc_simple_set_dailink_name(dev, dai_link, "fe.%s", cpus->dai_name); if (ret < 0) - return ret; + goto out_put_node; /* card->num_links includes Codec */ asoc_simple_canonicalize_cpu(dai_link, is_single_links); @@ -263,17 +259,17 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, ret = asoc_simple_parse_codec(ep, dai_link); if (ret < 0) - return ret; + goto out_put_node; ret = asoc_simple_parse_clk_codec(dev, ep, dai_link, dai); if (ret < 0) - return ret; + goto out_put_node; ret = asoc_simple_set_dailink_name(dev, dai_link, "be.%s", codecs->dai_name); if (ret < 0) - return ret; + goto out_put_node; /* check "prefix" from top node */ snd_soc_of_parse_node_prefix(top, cconf, codecs->of_node, @@ -293,19 +289,23 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, ret = asoc_simple_parse_tdm(ep, dai); if (ret) - return ret; + goto out_put_node; ret = asoc_simple_parse_daifmt(dev, cpu_ep, codec_ep, NULL, &dai_link->dai_fmt); if (ret < 0) - return ret; + goto out_put_node; dai_link->dpcm_playback = 1; dai_link->dpcm_capture = 1; dai_link->ops = &graph_ops; dai_link->init = asoc_simple_dai_init; - return 0; +out_put_node: + of_node_put(ports); + of_node_put(port); + of_node_put(node); + return ret; } static int graph_dai_link_of(struct asoc_simple_priv *priv, From c152f8491a8d9a4b25afd65a86eb5e55e2a8c380 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 10 Jul 2019 15:25:09 +0800 Subject: [PATCH 009/482] ASoC: audio-graph-card: fix an use-after-free in graph_get_dai_id() After calling of_node_put() on the node variable, it is still being used, which may result in use-after-free. Fix this issue by calling of_node_put() after the last usage. Fixes: a0c426fe1433 ("ASoC: simple-card-utils: check "reg" property on asoc_simple_card_get_dai_id()") Link: https://lore.kernel.org/r/1562743509-30496-5-git-send-email-wen.yang99@zte.com.cn Signed-off-by: Wen Yang Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/generic/audio-graph-card.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index bddfcfd7bedf..343ede8042c3 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -63,6 +63,7 @@ static int graph_get_dai_id(struct device_node *ep) struct device_node *endpoint; struct of_endpoint info; int i, id; + u32 *reg; int ret; /* use driver specified DAI ID if exist */ @@ -83,8 +84,9 @@ static int graph_get_dai_id(struct device_node *ep) return info.id; node = of_get_parent(ep); + reg = of_get_property(node, "reg", NULL); of_node_put(node); - if (of_get_property(node, "reg", NULL)) + if (reg) return info.port; } node = of_graph_get_port_parent(ep); From 09297c2f7a5428776369ba3b9904718a358e5559 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Thu, 11 Jul 2019 16:22:14 +0800 Subject: [PATCH 010/482] ASoC: rt1011: fix DC calibration offset not applying There are two issues to fix: - DC offset calibration data will be reset after stopping playback. - DC offset calibration data should be applied in the initial setting. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20190711082214.8142-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt1011.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt1011.c b/sound/soc/codecs/rt1011.c index 5605b660f4bf..0a6ff13d76e1 100644 --- a/sound/soc/codecs/rt1011.c +++ b/sound/soc/codecs/rt1011.c @@ -39,7 +39,7 @@ static const struct reg_sequence init_list[] = { { RT1011_POWER_9, 0xa840 }, { RT1011_ADC_SET_5, 0x0a20 }, - { RT1011_DAC_SET_2, 0xa232 }, + { RT1011_DAC_SET_2, 0xa032 }, { RT1011_ADC_SET_1, 0x2925 }, { RT1011_SPK_PRO_DC_DET_1, 0xb00c }, @@ -1917,7 +1917,7 @@ static int rt1011_set_bias_level(struct snd_soc_component *component, snd_soc_component_write(component, RT1011_SYSTEM_RESET_2, 0x0000); snd_soc_component_write(component, - RT1011_SYSTEM_RESET_3, 0x0000); + RT1011_SYSTEM_RESET_3, 0x0001); snd_soc_component_write(component, RT1011_SYSTEM_RESET_1, 0x003f); snd_soc_component_write(component, From ec3042ad39d4e2ddbc3a3344f90bb10d8feb53bc Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 11 Jul 2019 13:10:45 +0900 Subject: [PATCH 011/482] ASoC: audio-graph-card: add missing const at graph_get_dai_id() commit c152f8491a8d9 ("ASoC: audio-graph-card: fix an use-after-free in graph_get_dai_id()") fixups use-after-free issue, but, it need to use "const" for reg. This patch adds it. We will have below without this patch LINUX/sound/soc/generic/audio-graph-card.c: In function 'graph_get_dai_id': LINUX/sound/soc/generic/audio-graph-card.c:87:7: warning: assignment discards\ 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] reg = of_get_property(node, "reg", NULL); Fixes: c152f8491a8d9 ("ASoC: audio-graph-card: fix an use-after-free in graph_get_dai_id()") Signed-off-by: Kuninori Morimoto Acked-by: Wen Yang Link: https://lore.kernel.org/r/87sgrd43ja.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/audio-graph-card.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index 343ede8042c3..ebf2ca3249cb 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -63,7 +63,7 @@ static int graph_get_dai_id(struct device_node *ep) struct device_node *endpoint; struct of_endpoint info; int i, id; - u32 *reg; + const u32 *reg; int ret; /* use driver specified DAI ID if exist */ From ae8cc91a7d85e018c0c267f580820b2bb558cd48 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 9 Jul 2019 22:04:17 -0700 Subject: [PATCH 012/482] iio: adc: max9611: Fix misuse of GENMASK macro Arguments are supposed to be ordered high then low. Signed-off-by: Joe Perches Fixes: 69780a3bbc0b ("iio: adc: Add Maxim max9611 ADC driver") Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max9611.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c index 917223d5ff5b..0e3c6529fc4c 100644 --- a/drivers/iio/adc/max9611.c +++ b/drivers/iio/adc/max9611.c @@ -83,7 +83,7 @@ #define MAX9611_TEMP_MAX_POS 0x7f80 #define MAX9611_TEMP_MAX_NEG 0xff80 #define MAX9611_TEMP_MIN_NEG 0xd980 -#define MAX9611_TEMP_MASK GENMASK(7, 15) +#define MAX9611_TEMP_MASK GENMASK(15, 7) #define MAX9611_TEMP_SHIFT 0x07 #define MAX9611_TEMP_RAW(_r) ((_r) >> MAX9611_TEMP_SHIFT) #define MAX9611_TEMP_SCALE_NUM 1000000 From 5a304e1a4ea000177cf25f5ecf26e786dda25b98 Mon Sep 17 00:00:00 2001 From: Maarten ter Huurne Date: Thu, 4 Jul 2019 19:36:56 +0200 Subject: [PATCH 013/482] IIO: Ingenic JZ47xx: Set clock divider on probe The SADC component can run at up to 8 MHz on JZ4725B, but is fed a 12 MHz input clock (EXT). Divide it by two to get 6 MHz, then set up another divider to match, to produce a 10us clock. If the clock dividers are left on their power-on defaults (a divider of 1), the SADC mostly works, but will occasionally produce erroneous readings. This led to button presses being detected out of nowhere on the RS90 every few minutes. With this change, no ghost button presses were logged in almost a day worth of testing. The ADCLK register for configuring clock dividers doesn't exist on JZ4740, so avoid writing it there. A function has been introduced rather than a flag because there is a lot of variation between the ADCLK registers on JZ47xx SoCs, both in the internal layout of the register and in the frequency range supported by the SADC. So this solution should make it easier to add support for other JZ47xx SoCs later. Fixes: 1a78daea107d ("iio: adc: probe should set clock divider") Signed-off-by: Maarten ter Huurne Signed-off-by: Artur Rojek Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ingenic-adc.c | 54 +++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/iio/adc/ingenic-adc.c b/drivers/iio/adc/ingenic-adc.c index 92b1d5037ac9..e234970b7150 100644 --- a/drivers/iio/adc/ingenic-adc.c +++ b/drivers/iio/adc/ingenic-adc.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -22,8 +23,11 @@ #define JZ_ADC_REG_ADTCH 0x18 #define JZ_ADC_REG_ADBDAT 0x1c #define JZ_ADC_REG_ADSDAT 0x20 +#define JZ_ADC_REG_ADCLK 0x28 #define JZ_ADC_REG_CFG_BAT_MD BIT(4) +#define JZ_ADC_REG_ADCLK_CLKDIV_LSB 0 +#define JZ_ADC_REG_ADCLK_CLKDIV10US_LSB 16 #define JZ_ADC_AUX_VREF 3300 #define JZ_ADC_AUX_VREF_BITS 12 @@ -34,6 +38,8 @@ #define JZ4740_ADC_BATTERY_HIGH_VREF (7500 * 0.986) #define JZ4740_ADC_BATTERY_HIGH_VREF_BITS 12 +struct ingenic_adc; + struct ingenic_adc_soc_data { unsigned int battery_high_vref; unsigned int battery_high_vref_bits; @@ -41,6 +47,7 @@ struct ingenic_adc_soc_data { size_t battery_raw_avail_size; const int *battery_scale_avail; size_t battery_scale_avail_size; + int (*init_clk_div)(struct device *dev, struct ingenic_adc *adc); }; struct ingenic_adc { @@ -151,6 +158,42 @@ static const int jz4740_adc_battery_scale_avail[] = { JZ_ADC_BATTERY_LOW_VREF, JZ_ADC_BATTERY_LOW_VREF_BITS, }; +static int jz4725b_adc_init_clk_div(struct device *dev, struct ingenic_adc *adc) +{ + struct clk *parent_clk; + unsigned long parent_rate, rate; + unsigned int div_main, div_10us; + + parent_clk = clk_get_parent(adc->clk); + if (!parent_clk) { + dev_err(dev, "ADC clock has no parent\n"); + return -ENODEV; + } + parent_rate = clk_get_rate(parent_clk); + + /* + * The JZ4725B ADC works at 500 kHz to 8 MHz. + * We pick the highest rate possible. + * In practice we typically get 6 MHz, half of the 12 MHz EXT clock. + */ + div_main = DIV_ROUND_UP(parent_rate, 8000000); + div_main = clamp(div_main, 1u, 64u); + rate = parent_rate / div_main; + if (rate < 500000 || rate > 8000000) { + dev_err(dev, "No valid divider for ADC main clock\n"); + return -EINVAL; + } + + /* We also need a divider that produces a 10us clock. */ + div_10us = DIV_ROUND_UP(rate, 100000); + + writel(((div_10us - 1) << JZ_ADC_REG_ADCLK_CLKDIV10US_LSB) | + (div_main - 1) << JZ_ADC_REG_ADCLK_CLKDIV_LSB, + adc->base + JZ_ADC_REG_ADCLK); + + return 0; +} + static const struct ingenic_adc_soc_data jz4725b_adc_soc_data = { .battery_high_vref = JZ4725B_ADC_BATTERY_HIGH_VREF, .battery_high_vref_bits = JZ4725B_ADC_BATTERY_HIGH_VREF_BITS, @@ -158,6 +201,7 @@ static const struct ingenic_adc_soc_data jz4725b_adc_soc_data = { .battery_raw_avail_size = ARRAY_SIZE(jz4725b_adc_battery_raw_avail), .battery_scale_avail = jz4725b_adc_battery_scale_avail, .battery_scale_avail_size = ARRAY_SIZE(jz4725b_adc_battery_scale_avail), + .init_clk_div = jz4725b_adc_init_clk_div, }; static const struct ingenic_adc_soc_data jz4740_adc_soc_data = { @@ -167,6 +211,7 @@ static const struct ingenic_adc_soc_data jz4740_adc_soc_data = { .battery_raw_avail_size = ARRAY_SIZE(jz4740_adc_battery_raw_avail), .battery_scale_avail = jz4740_adc_battery_scale_avail, .battery_scale_avail_size = ARRAY_SIZE(jz4740_adc_battery_scale_avail), + .init_clk_div = NULL, /* no ADCLK register on JZ4740 */ }; static int ingenic_adc_read_avail(struct iio_dev *iio_dev, @@ -317,6 +362,15 @@ static int ingenic_adc_probe(struct platform_device *pdev) return ret; } + /* Set clock dividers. */ + if (soc_data->init_clk_div) { + ret = soc_data->init_clk_div(dev, adc); + if (ret) { + clk_disable_unprepare(adc->clk); + return ret; + } + } + /* Put hardware in a known passive state. */ writeb(0x00, adc->base + JZ_ADC_REG_ENABLE); writeb(0xff, adc->base + JZ_ADC_REG_CTRL); From 6cdff99c9f7d7d28b87cf05dd464f7c7736332ae Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 28 Jun 2019 12:17:09 -0700 Subject: [PATCH 014/482] iio: cros_ec_accel_legacy: Fix incorrect channel setting INFO_SCALE is set both for each channel and all channels. iio is using all channel setting, so the error was not user visible. Signed-off-by: Gwendal Grignou Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/cros_ec_accel_legacy.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/accel/cros_ec_accel_legacy.c b/drivers/iio/accel/cros_ec_accel_legacy.c index 46bb2e421bb9..ad19d9c716f4 100644 --- a/drivers/iio/accel/cros_ec_accel_legacy.c +++ b/drivers/iio/accel/cros_ec_accel_legacy.c @@ -319,7 +319,6 @@ static const struct iio_chan_spec_ext_info cros_ec_accel_legacy_ext_info[] = { .modified = 1, \ .info_mask_separate = \ BIT(IIO_CHAN_INFO_RAW) | \ - BIT(IIO_CHAN_INFO_SCALE) | \ BIT(IIO_CHAN_INFO_CALIBBIAS), \ .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SCALE), \ .ext_info = cros_ec_accel_legacy_ext_info, \ From 1244a720572fd1680ac8d6b8a4235f2e8557b810 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Thu, 27 Jun 2019 13:19:53 +0000 Subject: [PATCH 015/482] iio: imu: mpu6050: add missing available scan masks Driver only supports 3-axis gyro and/or 3-axis accel. For icm20602, temp data is mandatory for all configurations. Fix all single and double axis configurations (almost never used) and more importantly fix 3-axis gyro and 6-axis accel+gyro buffer on icm20602 when temp data is not enabled. Signed-off-by: Jean-Baptiste Maneyrol Fixes: 1615fe41a195 ("iio: imu: mpu6050: Fix FIFO layout for ICM20602") Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index 53a59957cc54..8a704cd5bddb 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -845,6 +845,25 @@ static const struct iio_chan_spec inv_mpu_channels[] = { INV_MPU6050_CHAN(IIO_ACCEL, IIO_MOD_Z, INV_MPU6050_SCAN_ACCL_Z), }; +static const unsigned long inv_mpu_scan_masks[] = { + /* 3-axis accel */ + BIT(INV_MPU6050_SCAN_ACCL_X) + | BIT(INV_MPU6050_SCAN_ACCL_Y) + | BIT(INV_MPU6050_SCAN_ACCL_Z), + /* 3-axis gyro */ + BIT(INV_MPU6050_SCAN_GYRO_X) + | BIT(INV_MPU6050_SCAN_GYRO_Y) + | BIT(INV_MPU6050_SCAN_GYRO_Z), + /* 6-axis accel + gyro */ + BIT(INV_MPU6050_SCAN_ACCL_X) + | BIT(INV_MPU6050_SCAN_ACCL_Y) + | BIT(INV_MPU6050_SCAN_ACCL_Z) + | BIT(INV_MPU6050_SCAN_GYRO_X) + | BIT(INV_MPU6050_SCAN_GYRO_Y) + | BIT(INV_MPU6050_SCAN_GYRO_Z), + 0, +}; + static const struct iio_chan_spec inv_icm20602_channels[] = { IIO_CHAN_SOFT_TIMESTAMP(INV_ICM20602_SCAN_TIMESTAMP), { @@ -871,6 +890,28 @@ static const struct iio_chan_spec inv_icm20602_channels[] = { INV_MPU6050_CHAN(IIO_ACCEL, IIO_MOD_Z, INV_ICM20602_SCAN_ACCL_Z), }; +static const unsigned long inv_icm20602_scan_masks[] = { + /* 3-axis accel + temp (mandatory) */ + BIT(INV_ICM20602_SCAN_ACCL_X) + | BIT(INV_ICM20602_SCAN_ACCL_Y) + | BIT(INV_ICM20602_SCAN_ACCL_Z) + | BIT(INV_ICM20602_SCAN_TEMP), + /* 3-axis gyro + temp (mandatory) */ + BIT(INV_ICM20602_SCAN_GYRO_X) + | BIT(INV_ICM20602_SCAN_GYRO_Y) + | BIT(INV_ICM20602_SCAN_GYRO_Z) + | BIT(INV_ICM20602_SCAN_TEMP), + /* 6-axis accel + gyro + temp (mandatory) */ + BIT(INV_ICM20602_SCAN_ACCL_X) + | BIT(INV_ICM20602_SCAN_ACCL_Y) + | BIT(INV_ICM20602_SCAN_ACCL_Z) + | BIT(INV_ICM20602_SCAN_GYRO_X) + | BIT(INV_ICM20602_SCAN_GYRO_Y) + | BIT(INV_ICM20602_SCAN_GYRO_Z) + | BIT(INV_ICM20602_SCAN_TEMP), + 0, +}; + /* * The user can choose any frequency between INV_MPU6050_MIN_FIFO_RATE and * INV_MPU6050_MAX_FIFO_RATE, but only these frequencies are matched by the @@ -1130,9 +1171,11 @@ int inv_mpu_core_probe(struct regmap *regmap, int irq, const char *name, if (chip_type == INV_ICM20602) { indio_dev->channels = inv_icm20602_channels; indio_dev->num_channels = ARRAY_SIZE(inv_icm20602_channels); + indio_dev->available_scan_masks = inv_icm20602_scan_masks; } else { indio_dev->channels = inv_mpu_channels; indio_dev->num_channels = ARRAY_SIZE(inv_mpu_channels); + indio_dev->available_scan_masks = inv_mpu_scan_masks; } indio_dev->info = &mpu_info; From 9b6d104a6b150bd4d3e5b039340e1f6b20c2e3c1 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Sat, 13 Jul 2019 11:46:14 +0800 Subject: [PATCH 016/482] ASoC: samsung: odroid: fix an use-after-free issue for codec The codec variable is still being used after the of_node_put() call, which may result in use-after-free. Fixes: bc3cf17b575a ("ASoC: samsung: odroid: Add support for secondary CPU DAI") Signed-off-by: Wen Yang Cc: Krzysztof Kozlowski Cc: Sangbeom Kim Cc: Sylwester Nawrocki Cc: Liam Girdwood Cc: Mark Brown Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: alsa-devel@alsa-project.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1562989575-33785-2-git-send-email-wen.yang99@zte.com.cn Signed-off-by: Mark Brown --- sound/soc/samsung/odroid.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/samsung/odroid.c b/sound/soc/samsung/odroid.c index dfb6e460e7eb..64ebe895cdd7 100644 --- a/sound/soc/samsung/odroid.c +++ b/sound/soc/samsung/odroid.c @@ -284,9 +284,8 @@ static int odroid_audio_probe(struct platform_device *pdev) } of_node_put(cpu); - of_node_put(codec); if (ret < 0) - return ret; + goto err_put_node; ret = snd_soc_of_get_dai_link_codecs(dev, codec, codec_link); if (ret < 0) @@ -317,6 +316,7 @@ static int odroid_audio_probe(struct platform_device *pdev) goto err_put_clk_i2s; } + of_node_put(codec); return 0; err_put_clk_i2s: @@ -326,6 +326,8 @@ static int odroid_audio_probe(struct platform_device *pdev) err_put_cpu_dai: of_node_put(cpu_dai); snd_soc_of_put_dai_link_codecs(codec_link); +err_put_node: + of_node_put(codec); return ret; } From 2abee12c0ab1924a69993d2c063a39a952e7d836 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Sat, 13 Jul 2019 11:46:15 +0800 Subject: [PATCH 017/482] ASoC: samsung: odroid: fix a double-free issue for cpu_dai The cpu_dai variable is still being used after the of_node_put() call, which may result in double-free: of_node_put(cpu_dai); ---> released here ret = devm_snd_soc_register_card(dev, card); if (ret < 0) { ... goto err_put_clk_i2s; --> jump to err_put_clk_i2s ... err_put_clk_i2s: clk_put(priv->clk_i2s_bus); err_put_sclk: clk_put(priv->sclk_i2s); err_put_cpu_dai: of_node_put(cpu_dai); --> double-free here Fixes: d832d2b246c5 ("ASoC: samsung: odroid: Fix of_node refcount unbalance") Signed-off-by: Wen Yang Cc: Krzysztof Kozlowski Cc: Sangbeom Kim Cc: Sylwester Nawrocki Cc: Liam Girdwood Cc: Mark Brown Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: alsa-devel@alsa-project.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/1562989575-33785-3-git-send-email-wen.yang99@zte.com.cn Signed-off-by: Mark Brown --- sound/soc/samsung/odroid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/samsung/odroid.c b/sound/soc/samsung/odroid.c index 64ebe895cdd7..f0f5fa9c27d3 100644 --- a/sound/soc/samsung/odroid.c +++ b/sound/soc/samsung/odroid.c @@ -308,7 +308,6 @@ static int odroid_audio_probe(struct platform_device *pdev) ret = PTR_ERR(priv->clk_i2s_bus); goto err_put_sclk; } - of_node_put(cpu_dai); ret = devm_snd_soc_register_card(dev, card); if (ret < 0) { @@ -316,6 +315,7 @@ static int odroid_audio_probe(struct platform_device *pdev) goto err_put_clk_i2s; } + of_node_put(cpu_dai); of_node_put(codec); return 0; From aa2ba991c4206d5b778dcaa7b4997396e79f8e90 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 12 Jul 2019 13:27:08 +0200 Subject: [PATCH 018/482] ASoC: Intel: bytcht_es8316: Add quirk for Irbis NB41 netbook The Irbis NB41 netbook has its internal mic on IN2, inverted jack-detect and stereo speakers, add a quirk for this. Cc: russianneuromancer@ya.ru Reported-and-tested-by: russianneuromancer@ya.ru Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20190712112708.25327-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcht_es8316.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index fac09be3cade..46612331f5ea 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -437,6 +437,14 @@ static const struct acpi_gpio_mapping byt_cht_es8316_gpios[] = { /* Please keep this list alphabetically sorted */ static const struct dmi_system_id byt_cht_es8316_quirk_table[] = { + { /* Irbis NB41 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IRBIS"), + DMI_MATCH(DMI_PRODUCT_NAME, "NB41"), + }, + .driver_data = (void *)(BYT_CHT_ES8316_INTMIC_IN2_MAP + | BYT_CHT_ES8316_JD_INVERTED), + }, { /* Teclast X98 Plus II */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), From a7cd67e2b76edd46fbd2517078eeba369f933575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ronald=20Tschal=C3=A4r?= Date: Sun, 21 Jul 2019 15:15:12 +0300 Subject: [PATCH 019/482] Input: applespi - fix warnings detected by sparse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes sure that we convert from on-wire to CPU endianness in applespi_debug_update_dimensions() and also marks as "static" as it is not needed to be visible outside of the driver. Reported-by: kbuild test robot Signed-off-by: Ronald Tschalär Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/applespi.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c index 548737e7aeda..52d394f473a2 100644 --- a/drivers/input/keyboard/applespi.c +++ b/drivers/input/keyboard/applespi.c @@ -944,10 +944,14 @@ static inline int le16_to_int(__le16 x) static void applespi_debug_update_dimensions(struct applespi_data *applespi, const struct tp_finger *f) { - applespi->tp_dim_min_x = min_t(int, applespi->tp_dim_min_x, f->abs_x); - applespi->tp_dim_max_x = max_t(int, applespi->tp_dim_max_x, f->abs_x); - applespi->tp_dim_min_y = min_t(int, applespi->tp_dim_min_y, f->abs_y); - applespi->tp_dim_max_y = max_t(int, applespi->tp_dim_max_y, f->abs_y); + applespi->tp_dim_min_x = min(applespi->tp_dim_min_x, + le16_to_int(f->abs_x)); + applespi->tp_dim_max_x = max(applespi->tp_dim_max_x, + le16_to_int(f->abs_x)); + applespi->tp_dim_min_y = min(applespi->tp_dim_min_y, + le16_to_int(f->abs_y)); + applespi->tp_dim_max_y = max(applespi->tp_dim_max_y, + le16_to_int(f->abs_y)); } static int applespi_tp_dim_open(struct inode *inode, struct file *file) @@ -1611,8 +1615,8 @@ static void applespi_save_bl_level(struct applespi_data *applespi, efi_attr = EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS; - sts = efivar_entry_set_safe(EFI_BL_LEVEL_NAME, efi_guid, efi_attr, true, - efi_data_len, &efi_data); + sts = efivar_entry_set_safe((efi_char16_t *)EFI_BL_LEVEL_NAME, efi_guid, + efi_attr, true, efi_data_len, &efi_data); if (sts) dev_warn(&applespi->spi->dev, "Error saving backlight level to EFI vars: %d\n", sts); @@ -1953,7 +1957,7 @@ static const struct acpi_device_id applespi_acpi_match[] = { }; MODULE_DEVICE_TABLE(acpi, applespi_acpi_match); -const struct dev_pm_ops applespi_pm_ops = { +static const struct dev_pm_ops applespi_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(applespi_suspend, applespi_resume) .poweroff_late = applespi_poweroff_late, }; From 8dd26dff00c0636b1d8621acaeef3f6f3a39dd77 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 18 Jul 2019 09:43:33 +0100 Subject: [PATCH 020/482] ASoC: dapm: Fix handling of custom_stop_condition on DAPM graph walks DPCM uses snd_soc_dapm_dai_get_connected_widgets to build a list of the widgets connected to a specific front end DAI so it can search through this list for available back end DAIs. The custom_stop_condition was added to is_connected_ep to facilitate this list not containing more widgets than is necessary. Doing so both speeds up the DPCM handling as less widgets need to be searched and avoids issues with CODEC to CODEC links as these would be confused with back end DAIs if they appeared in the list of available widgets. custom_stop_condition was implemented by aborting the graph walk when the condition is triggered, however there is an issue with this approach. Whilst walking the graph is_connected_ep should update the endpoints cache on each widget, if the walk is aborted the number of attached end points is unknown for that sub-graph. When the stop condition triggered, the original patch ignored the triggering widget and returned zero connected end points; a later patch updated this to set the triggering widget's cache to 1 and return that. Both of these approaches result in inaccurate values being stored in various end point caches as the values propagate back through the graph, which can result in later issues with widgets powering/not powering unexpectedly. As the original goal was to reduce the size of the widget list passed to the DPCM code, the simplest solution is to limit the functionality of the custom_stop_condition to the widget list. This means the rest of the graph will still be processed resulting in correct end point caches, but only widgets up to the stop condition will be added to the returned widget list. Fixes: 6742064aef7f ("ASoC: dapm: support user-defined stop condition in dai_get_connected_widgets") Fixes: 5fdd022c2026 ("ASoC: dpcm: play nice with CODEC<->CODEC links") Fixes: 09464974eaa8 ("ASoC: dapm: Fix to return correct path list in is_connected_ep.") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20190718084333.15598-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 6b44b4a78b8e..9cd87e47ee8f 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1157,8 +1157,8 @@ static __always_inline int is_connected_ep(struct snd_soc_dapm_widget *widget, list_add_tail(&widget->work_list, list); if (custom_stop_condition && custom_stop_condition(widget, dir)) { - widget->endpoints[dir] = 1; - return widget->endpoints[dir]; + list = NULL; + custom_stop_condition = NULL; } if ((widget->is_ep & SND_SOC_DAPM_DIR_TO_EP(dir)) && widget->connected) { @@ -1195,8 +1195,8 @@ static __always_inline int is_connected_ep(struct snd_soc_dapm_widget *widget, * * Optionally, can be supplied with a function acting as a stopping condition. * This function takes the dapm widget currently being examined and the walk - * direction as an arguments, it should return true if the walk should be - * stopped and false otherwise. + * direction as an arguments, it should return true if widgets from that point + * in the graph onwards should not be added to the widget list. */ static int is_connected_output_ep(struct snd_soc_dapm_widget *widget, struct list_head *list, From 48dfd37a0f85400610153101c72222bf01523699 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 16 Jul 2019 17:45:47 +0800 Subject: [PATCH 021/482] ASoC: cs42xx8: Fix MFREQ selection issue for async mode When sample rate of TX is different with sample rate of RX in async mode, the MFreq selection will be wrong. For example, sysclk = 24.576MHz, TX rate = 96000Hz, RX rate = 48000Hz. Then ratio of TX = 256, ratio of RX = 512, For MFreq is shared by TX and RX instance, the correct value of MFreq is 2 for both TX and RX. But original method will cause MFreq = 0 for TX, MFreq = 2 for RX. If TX is started after RX, RX will be impacted, RX work abnormal with MFreq = 0. This patch is to select proper MFreq value according to TX rate and RX rate. Fixes: 0c516b4ff85c ("ASoC: cs42xx8: Add codec driver support for CS42448/CS42888") Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/20190716094547.46787-1-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42xx8.c | 116 +++++++++++++++++++++++++++++++------ 1 file changed, 97 insertions(+), 19 deletions(-) diff --git a/sound/soc/codecs/cs42xx8.c b/sound/soc/codecs/cs42xx8.c index 6203f54d9f25..5b049fcdba20 100644 --- a/sound/soc/codecs/cs42xx8.c +++ b/sound/soc/codecs/cs42xx8.c @@ -47,6 +47,7 @@ struct cs42xx8_priv { unsigned long sysclk; u32 tx_channels; struct gpio_desc *gpiod_reset; + u32 rate[2]; }; /* -127.5dB to 0dB with step of 0.5dB */ @@ -176,21 +177,27 @@ static const struct snd_soc_dapm_route cs42xx8_adc3_dapm_routes[] = { }; struct cs42xx8_ratios { - unsigned int ratio; - unsigned char speed; - unsigned char mclk; + unsigned int mfreq; + unsigned int min_mclk; + unsigned int max_mclk; + unsigned int ratio[3]; }; +/* + * According to reference mannual, define the cs42xx8_ratio struct + * MFreq2 | MFreq1 | MFreq0 | Description | SSM | DSM | QSM | + * 0 | 0 | 0 |1.029MHz to 12.8MHz | 256 | 128 | 64 | + * 0 | 0 | 1 |1.536MHz to 19.2MHz | 384 | 192 | 96 | + * 0 | 1 | 0 |2.048MHz to 25.6MHz | 512 | 256 | 128 | + * 0 | 1 | 1 |3.072MHz to 38.4MHz | 768 | 384 | 192 | + * 1 | x | x |4.096MHz to 51.2MHz |1024 | 512 | 256 | + */ static const struct cs42xx8_ratios cs42xx8_ratios[] = { - { 64, CS42XX8_FM_QUAD, CS42XX8_FUNCMOD_MFREQ_256(4) }, - { 96, CS42XX8_FM_QUAD, CS42XX8_FUNCMOD_MFREQ_384(4) }, - { 128, CS42XX8_FM_QUAD, CS42XX8_FUNCMOD_MFREQ_512(4) }, - { 192, CS42XX8_FM_QUAD, CS42XX8_FUNCMOD_MFREQ_768(4) }, - { 256, CS42XX8_FM_SINGLE, CS42XX8_FUNCMOD_MFREQ_256(1) }, - { 384, CS42XX8_FM_SINGLE, CS42XX8_FUNCMOD_MFREQ_384(1) }, - { 512, CS42XX8_FM_SINGLE, CS42XX8_FUNCMOD_MFREQ_512(1) }, - { 768, CS42XX8_FM_SINGLE, CS42XX8_FUNCMOD_MFREQ_768(1) }, - { 1024, CS42XX8_FM_SINGLE, CS42XX8_FUNCMOD_MFREQ_1024(1) } + { 0, 1029000, 12800000, {256, 128, 64} }, + { 2, 1536000, 19200000, {384, 192, 96} }, + { 4, 2048000, 25600000, {512, 256, 128} }, + { 6, 3072000, 38400000, {768, 384, 192} }, + { 8, 4096000, 51200000, {1024, 512, 256} }, }; static int cs42xx8_set_dai_sysclk(struct snd_soc_dai *codec_dai, @@ -257,14 +264,68 @@ static int cs42xx8_hw_params(struct snd_pcm_substream *substream, struct snd_soc_component *component = dai->component; struct cs42xx8_priv *cs42xx8 = snd_soc_component_get_drvdata(component); bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; - u32 ratio = cs42xx8->sysclk / params_rate(params); - u32 i, fm, val, mask; + u32 ratio[2]; + u32 rate[2]; + u32 fm[2]; + u32 i, val, mask; + bool condition1, condition2; if (tx) cs42xx8->tx_channels = params_channels(params); + rate[tx] = params_rate(params); + rate[!tx] = cs42xx8->rate[!tx]; + + ratio[tx] = rate[tx] > 0 ? cs42xx8->sysclk / rate[tx] : 0; + ratio[!tx] = rate[!tx] > 0 ? cs42xx8->sysclk / rate[!tx] : 0; + + /* Get functional mode for tx and rx according to rate */ + for (i = 0; i < 2; i++) { + if (cs42xx8->slave_mode) { + fm[i] = CS42XX8_FM_AUTO; + } else { + if (rate[i] < 50000) { + fm[i] = CS42XX8_FM_SINGLE; + } else if (rate[i] > 50000 && rate[i] < 100000) { + fm[i] = CS42XX8_FM_DOUBLE; + } else if (rate[i] > 100000 && rate[i] < 200000) { + fm[i] = CS42XX8_FM_QUAD; + } else { + dev_err(component->dev, + "unsupported sample rate\n"); + return -EINVAL; + } + } + } + for (i = 0; i < ARRAY_SIZE(cs42xx8_ratios); i++) { - if (cs42xx8_ratios[i].ratio == ratio) + /* Is the ratio[tx] valid ? */ + condition1 = ((fm[tx] == CS42XX8_FM_AUTO) ? + (cs42xx8_ratios[i].ratio[0] == ratio[tx] || + cs42xx8_ratios[i].ratio[1] == ratio[tx] || + cs42xx8_ratios[i].ratio[2] == ratio[tx]) : + (cs42xx8_ratios[i].ratio[fm[tx]] == ratio[tx])) && + cs42xx8->sysclk >= cs42xx8_ratios[i].min_mclk && + cs42xx8->sysclk <= cs42xx8_ratios[i].max_mclk; + + if (!ratio[tx]) + condition1 = true; + + /* Is the ratio[!tx] valid ? */ + condition2 = ((fm[!tx] == CS42XX8_FM_AUTO) ? + (cs42xx8_ratios[i].ratio[0] == ratio[!tx] || + cs42xx8_ratios[i].ratio[1] == ratio[!tx] || + cs42xx8_ratios[i].ratio[2] == ratio[!tx]) : + (cs42xx8_ratios[i].ratio[fm[!tx]] == ratio[!tx])); + + if (!ratio[!tx]) + condition2 = true; + + /* + * Both ratio[tx] and ratio[!tx] is valid, then we get + * a proper MFreq. + */ + if (condition1 && condition2) break; } @@ -273,18 +334,34 @@ static int cs42xx8_hw_params(struct snd_pcm_substream *substream, return -EINVAL; } - mask = CS42XX8_FUNCMOD_MFREQ_MASK; - val = cs42xx8_ratios[i].mclk; + cs42xx8->rate[tx] = params_rate(params); - fm = cs42xx8->slave_mode ? CS42XX8_FM_AUTO : cs42xx8_ratios[i].speed; + mask = CS42XX8_FUNCMOD_MFREQ_MASK; + val = cs42xx8_ratios[i].mfreq; regmap_update_bits(cs42xx8->regmap, CS42XX8_FUNCMOD, CS42XX8_FUNCMOD_xC_FM_MASK(tx) | mask, - CS42XX8_FUNCMOD_xC_FM(tx, fm) | val); + CS42XX8_FUNCMOD_xC_FM(tx, fm[tx]) | val); return 0; } +static int cs42xx8_hw_free(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42xx8_priv *cs42xx8 = snd_soc_component_get_drvdata(component); + bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; + + /* Clear stored rate */ + cs42xx8->rate[tx] = 0; + + regmap_update_bits(cs42xx8->regmap, CS42XX8_FUNCMOD, + CS42XX8_FUNCMOD_xC_FM_MASK(tx), + CS42XX8_FUNCMOD_xC_FM(tx, CS42XX8_FM_AUTO)); + return 0; +} + static int cs42xx8_digital_mute(struct snd_soc_dai *dai, int mute) { struct snd_soc_component *component = dai->component; @@ -302,6 +379,7 @@ static const struct snd_soc_dai_ops cs42xx8_dai_ops = { .set_fmt = cs42xx8_set_dai_fmt, .set_sysclk = cs42xx8_set_dai_sysclk, .hw_params = cs42xx8_hw_params, + .hw_free = cs42xx8_hw_free, .digital_mute = cs42xx8_digital_mute, }; From f86621cd6c6f54edfdd62da347b2bbb8d7fddc8d Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Fri, 19 Jul 2019 19:39:29 +0200 Subject: [PATCH 022/482] SoC: rockchip: rockchip_max98090: Enable MICBIAS for headset keypress detection The TS3A227E says that the headset keypress detection needs the MICBIAS power in order to report the key events to ensure proper operation The headset keypress detection needs the MICBIAS power in order to report the key events all the time as long as MIC is present. So MICBIAS pin is forced on when a MICROPHONE is detected. On Veyron Minnie I observed that if the MICBIAS power is not present and the key press detection is activated (just because it is enabled when you insert a headset), it randomly reports a keypress on insert. E.g. (KEY_PLAYPAUSE) Event: (SW_HEADPHONE_INSERT), value 1 Event: (SW_MICROPHONE_INSERT), value 1 Event: -------------- SYN_REPORT ------------ Event: (KEY_PLAYPAUSE), value 1 Userspace thinks that KEY_PLAYPAUSE is pressed and produces the annoying effect that the media player starts a play/pause loop. Note that, although most of the time the key reported is the one associated with BTN_0, not always this is true. On my tests I also saw different keys reported Signed-off-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20190719173929.24065-1-enric.balletbo@collabora.com Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_max98090.c | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/sound/soc/rockchip/rockchip_max98090.c b/sound/soc/rockchip/rockchip_max98090.c index c5fc24675a33..782e534d4c0d 100644 --- a/sound/soc/rockchip/rockchip_max98090.c +++ b/sound/soc/rockchip/rockchip_max98090.c @@ -61,6 +61,37 @@ static const struct snd_kcontrol_new rk_mc_controls[] = { SOC_DAPM_PIN_SWITCH("Speaker"), }; +static int rk_jack_event(struct notifier_block *nb, unsigned long event, + void *data) +{ + struct snd_soc_jack *jack = (struct snd_soc_jack *)data; + struct snd_soc_dapm_context *dapm = &jack->card->dapm; + + if (event & SND_JACK_MICROPHONE) + snd_soc_dapm_force_enable_pin(dapm, "MICBIAS"); + else + snd_soc_dapm_disable_pin(dapm, "MICBIAS"); + + snd_soc_dapm_sync(dapm); + + return 0; +} + +static struct notifier_block rk_jack_nb = { + .notifier_call = rk_jack_event, +}; + +static int rk_init(struct snd_soc_pcm_runtime *runtime) +{ + /* + * The jack has already been created in the rk_98090_headset_init() + * function. + */ + snd_soc_jack_notifier_register(&headset_jack, &rk_jack_nb); + + return 0; +} + static int rk_aif1_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { @@ -119,6 +150,7 @@ SND_SOC_DAILINK_DEFS(hifi, static struct snd_soc_dai_link rk_dailink = { .name = "max98090", .stream_name = "Audio", + .init = rk_init, .ops = &rk_aif1_ops, /* set max98090 as slave */ .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | From 62ec3d13601bd626ca7a0edef6d45dbb753d94e8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 21 Jul 2019 23:23:08 +0900 Subject: [PATCH 023/482] ASoC: SOF: use __u32 instead of uint32_t in uapi headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_UAPI_HEADER_TEST=y, exported headers are compile-tested to make sure they can be included from user-space. Currently, header.h and fw.h are excluded from the test coverage. To make them join the compile-test, we need to fix the build errors attached below. For a case like this, we decided to use __u{8,16,32,64} variable types in this discussion: https://lkml.org/lkml/2019/6/5/18 Build log: CC usr/include/sound/sof/header.h.s CC usr/include/sound/sof/fw.h.s In file included from :32:0: ./usr/include/sound/sof/header.h:19:2: error: unknown type name ‘uint32_t’ uint32_t magic; /**< 'S', 'O', 'F', '\0' */ ^~~~~~~~ ./usr/include/sound/sof/header.h:20:2: error: unknown type name ‘uint32_t’ uint32_t type; /**< component specific type */ ^~~~~~~~ ./usr/include/sound/sof/header.h:21:2: error: unknown type name ‘uint32_t’ uint32_t size; /**< size in bytes of data excl. this struct */ ^~~~~~~~ ./usr/include/sound/sof/header.h:22:2: error: unknown type name ‘uint32_t’ uint32_t abi; /**< SOF ABI version */ ^~~~~~~~ ./usr/include/sound/sof/header.h:23:2: error: unknown type name ‘uint32_t’ uint32_t reserved[4]; /**< reserved for future use */ ^~~~~~~~ ./usr/include/sound/sof/header.h:24:2: error: unknown type name ‘uint32_t’ uint32_t data[0]; /**< Component data - opaque to core */ ^~~~~~~~ In file included from :32:0: ./usr/include/sound/sof/fw.h:49:2: error: unknown type name ‘uint32_t’ uint32_t size; /* bytes minus this header */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:50:2: error: unknown type name ‘uint32_t’ uint32_t offset; /* offset from base */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:64:2: error: unknown type name ‘uint32_t’ uint32_t size; /* bytes minus this header */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:65:2: error: unknown type name ‘uint32_t’ uint32_t num_blocks; /* number of blocks */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:73:2: error: unknown type name ‘uint32_t’ uint32_t file_size; /* size of file minus this header */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:74:2: error: unknown type name ‘uint32_t’ uint32_t num_modules; /* number of modules */ ^~~~~~~~ ./usr/include/sound/sof/fw.h:75:2: error: unknown type name ‘uint32_t’ uint32_t abi; /* version of header format */ ^~~~~~~~ Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20190721142308.30306-1-yamada.masahiro@socionext.com Signed-off-by: Mark Brown --- include/uapi/sound/sof/fw.h | 16 +++++++++------- include/uapi/sound/sof/header.h | 14 ++++++++------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/uapi/sound/sof/fw.h b/include/uapi/sound/sof/fw.h index 1afca973eb09..e9f697467a86 100644 --- a/include/uapi/sound/sof/fw.h +++ b/include/uapi/sound/sof/fw.h @@ -13,6 +13,8 @@ #ifndef __INCLUDE_UAPI_SOF_FW_H__ #define __INCLUDE_UAPI_SOF_FW_H__ +#include + #define SND_SOF_FW_SIG_SIZE 4 #define SND_SOF_FW_ABI 1 #define SND_SOF_FW_SIG "Reef" @@ -46,8 +48,8 @@ enum snd_sof_fw_blk_type { struct snd_sof_blk_hdr { enum snd_sof_fw_blk_type type; - uint32_t size; /* bytes minus this header */ - uint32_t offset; /* offset from base */ + __u32 size; /* bytes minus this header */ + __u32 offset; /* offset from base */ } __packed; /* @@ -61,8 +63,8 @@ enum snd_sof_fw_mod_type { struct snd_sof_mod_hdr { enum snd_sof_fw_mod_type type; - uint32_t size; /* bytes minus this header */ - uint32_t num_blocks; /* number of blocks */ + __u32 size; /* bytes minus this header */ + __u32 num_blocks; /* number of blocks */ } __packed; /* @@ -70,9 +72,9 @@ struct snd_sof_mod_hdr { */ struct snd_sof_fw_header { unsigned char sig[SND_SOF_FW_SIG_SIZE]; /* "Reef" */ - uint32_t file_size; /* size of file minus this header */ - uint32_t num_modules; /* number of modules */ - uint32_t abi; /* version of header format */ + __u32 file_size; /* size of file minus this header */ + __u32 num_modules; /* number of modules */ + __u32 abi; /* version of header format */ } __packed; #endif diff --git a/include/uapi/sound/sof/header.h b/include/uapi/sound/sof/header.h index 7868990b0d6f..5f4518e7a972 100644 --- a/include/uapi/sound/sof/header.h +++ b/include/uapi/sound/sof/header.h @@ -9,6 +9,8 @@ #ifndef __INCLUDE_UAPI_SOUND_SOF_USER_HEADER_H__ #define __INCLUDE_UAPI_SOUND_SOF_USER_HEADER_H__ +#include + /* * Header for all non IPC ABI data. * @@ -16,12 +18,12 @@ * Used by any bespoke component data structures or binary blobs. */ struct sof_abi_hdr { - uint32_t magic; /**< 'S', 'O', 'F', '\0' */ - uint32_t type; /**< component specific type */ - uint32_t size; /**< size in bytes of data excl. this struct */ - uint32_t abi; /**< SOF ABI version */ - uint32_t reserved[4]; /**< reserved for future use */ - uint32_t data[0]; /**< Component data - opaque to core */ + __u32 magic; /**< 'S', 'O', 'F', '\0' */ + __u32 type; /**< component specific type */ + __u32 size; /**< size in bytes of data excl. this struct */ + __u32 abi; /**< SOF ABI version */ + __u32 reserved[4]; /**< reserved for future use */ + __u32 data[0]; /**< Component data - opaque to core */ } __packed; #endif From f0414087a1b9933adac40974c3ea50261227b5f0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Jul 2019 14:44:43 +0200 Subject: [PATCH 024/482] dmaengine: dw-edma: fix unnecessary stack usage Putting large constant data on the stack causes unnecessary overhead and stack usage: drivers/dma/dw-edma/dw-edma-v0-debugfs.c:285:6: error: stack frame size of 1376 bytes in function 'dw_edma_v0_debugfs_on' [-Werror,-Wframe-larger-than=] Mark the variable 'static const' in order for the compiler to move it into the .rodata section where it does no such harm. Fixes: 305aebeff879 ("dmaengine: Add Synopsys eDMA IP version 0 debugfs support") Signed-off-by: Arnd Bergmann Acked-by: Gustavo Pimentel Link: https://lore.kernel.org/r/20190722124457.1093886-1-arnd@arndb.de Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-v0-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c index 3226f528cc11..5728b6fe938c 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c +++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c @@ -48,7 +48,7 @@ static struct { } lim[2][EDMA_V0_MAX_NR_CH]; struct debugfs_entries { - char name[24]; + const char *name; dma_addr_t *reg; }; From 756c3ef93492af382c541e039c1417b96a3d335e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Jul 2019 14:44:44 +0200 Subject: [PATCH 025/482] dmaengine: dw-edma: fix __iomem type confusion The new driver mixes up dma_addr_t and __iomem pointers, which results in warnings on some 32-bit architectures, like: drivers/dma/dw-edma/dw-edma-v0-core.c: In function '__dw_regs': drivers/dma/dw-edma/dw-edma-v0-core.c:28:9: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] return (struct dw_edma_v0_regs __iomem *)dw->rg_region.vaddr; Make it use __iomem pointers consistently here, and avoid using dma_addr_t for __iomem tokens altogether. A small complication here is the debugfs code, which passes an __iomem token as the private data for debugfs files, requiring the use of extra __force. Fixes: 7e4b8a4fbe2c ("dmaengine: Add Synopsys eDMA IP version 0 support") Link: https://lore.kernel.org/lkml/20190617131918.2518727-1-arnd@arndb.de/ Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20190722124457.1093886-2-arnd@arndb.de Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-core.h | 2 +- drivers/dma/dw-edma/dw-edma-pcie.c | 18 ++++++++-------- drivers/dma/dw-edma/dw-edma-v0-core.c | 10 ++++----- drivers/dma/dw-edma/dw-edma-v0-debugfs.c | 27 ++++++++++++------------ 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-core.h b/drivers/dma/dw-edma/dw-edma-core.h index b6cc90cbc9dc..4e5f9f6e901b 100644 --- a/drivers/dma/dw-edma/dw-edma-core.h +++ b/drivers/dma/dw-edma/dw-edma-core.h @@ -50,7 +50,7 @@ struct dw_edma_burst { struct dw_edma_region { phys_addr_t paddr; - dma_addr_t vaddr; + void __iomem *vaddr; size_t sz; }; diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c index 4c96e1c948f2..dc85f55e1bb8 100644 --- a/drivers/dma/dw-edma/dw-edma-pcie.c +++ b/drivers/dma/dw-edma/dw-edma-pcie.c @@ -130,19 +130,19 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, chip->id = pdev->devfn; chip->irq = pdev->irq; - dw->rg_region.vaddr = (dma_addr_t)pcim_iomap_table(pdev)[pdata->rg_bar]; + dw->rg_region.vaddr = pcim_iomap_table(pdev)[pdata->rg_bar]; dw->rg_region.vaddr += pdata->rg_off; dw->rg_region.paddr = pdev->resource[pdata->rg_bar].start; dw->rg_region.paddr += pdata->rg_off; dw->rg_region.sz = pdata->rg_sz; - dw->ll_region.vaddr = (dma_addr_t)pcim_iomap_table(pdev)[pdata->ll_bar]; + dw->ll_region.vaddr = pcim_iomap_table(pdev)[pdata->ll_bar]; dw->ll_region.vaddr += pdata->ll_off; dw->ll_region.paddr = pdev->resource[pdata->ll_bar].start; dw->ll_region.paddr += pdata->ll_off; dw->ll_region.sz = pdata->ll_sz; - dw->dt_region.vaddr = (dma_addr_t)pcim_iomap_table(pdev)[pdata->dt_bar]; + dw->dt_region.vaddr = pcim_iomap_table(pdev)[pdata->dt_bar]; dw->dt_region.vaddr += pdata->dt_off; dw->dt_region.paddr = pdev->resource[pdata->dt_bar].start; dw->dt_region.paddr += pdata->dt_off; @@ -158,17 +158,17 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, pci_dbg(pdev, "Mode:\t%s\n", dw->mode == EDMA_MODE_LEGACY ? "Legacy" : "Unroll"); - pci_dbg(pdev, "Registers:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%pa, p=%pa)\n", + pci_dbg(pdev, "Registers:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", pdata->rg_bar, pdata->rg_off, pdata->rg_sz, - &dw->rg_region.vaddr, &dw->rg_region.paddr); + dw->rg_region.vaddr, &dw->rg_region.paddr); - pci_dbg(pdev, "L. List:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%pa, p=%pa)\n", + pci_dbg(pdev, "L. List:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", pdata->ll_bar, pdata->ll_off, pdata->ll_sz, - &dw->ll_region.vaddr, &dw->ll_region.paddr); + dw->ll_region.vaddr, &dw->ll_region.paddr); - pci_dbg(pdev, "Data:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%pa, p=%pa)\n", + pci_dbg(pdev, "Data:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n", pdata->dt_bar, pdata->dt_off, pdata->dt_sz, - &dw->dt_region.vaddr, &dw->dt_region.paddr); + dw->dt_region.vaddr, &dw->dt_region.paddr); pci_dbg(pdev, "Nr. IRQs:\t%u\n", dw->nr_irqs); diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c index 8a3180ed49a6..97e3fd41c8a8 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-core.c +++ b/drivers/dma/dw-edma/dw-edma-v0-core.c @@ -25,7 +25,7 @@ enum dw_edma_control { static inline struct dw_edma_v0_regs __iomem *__dw_regs(struct dw_edma *dw) { - return (struct dw_edma_v0_regs __iomem *)dw->rg_region.vaddr; + return dw->rg_region.vaddr; } #define SET(dw, name, value) \ @@ -192,13 +192,13 @@ u32 dw_edma_v0_core_status_abort_int(struct dw_edma *dw, enum dw_edma_dir dir) static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk) { struct dw_edma_burst *child; - struct dw_edma_v0_lli *lli; - struct dw_edma_v0_llp *llp; + struct dw_edma_v0_lli __iomem *lli; + struct dw_edma_v0_llp __iomem *llp; u32 control = 0, i = 0; u64 sar, dar, addr; int j; - lli = (struct dw_edma_v0_lli *)chunk->ll_region.vaddr; + lli = chunk->ll_region.vaddr; if (chunk->cb) control = DW_EDMA_V0_CB; @@ -224,7 +224,7 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk) i++; } - llp = (struct dw_edma_v0_llp *)&lli[i]; + llp = (void __iomem *)&lli[i]; control = DW_EDMA_V0_LLP | DW_EDMA_V0_TCB; if (!chunk->cb) control |= DW_EDMA_V0_CB; diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c index 5728b6fe938c..42739508c0d8 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c +++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c @@ -14,7 +14,7 @@ #include "dw-edma-core.h" #define REGS_ADDR(name) \ - ((dma_addr_t *)®s->name) + ((void __force *)®s->name) #define REGISTER(name) \ { #name, REGS_ADDR(name) } @@ -40,11 +40,11 @@ static struct dentry *base_dir; static struct dw_edma *dw; -static struct dw_edma_v0_regs *regs; +static struct dw_edma_v0_regs __iomem *regs; static struct { - void *start; - void *end; + void __iomem *start; + void __iomem *end; } lim[2][EDMA_V0_MAX_NR_CH]; struct debugfs_entries { @@ -54,22 +54,23 @@ struct debugfs_entries { static int dw_edma_debugfs_u32_get(void *data, u64 *val) { + void __iomem *reg = (void __force __iomem *)data; if (dw->mode == EDMA_MODE_LEGACY && - data >= (void *)®s->type.legacy.ch) { - void *ptr = (void *)®s->type.legacy.ch; + reg >= (void __iomem *)®s->type.legacy.ch) { + void __iomem *ptr = ®s->type.legacy.ch; u32 viewport_sel = 0; unsigned long flags; u16 ch; for (ch = 0; ch < dw->wr_ch_cnt; ch++) - if (lim[0][ch].start >= data && data < lim[0][ch].end) { - ptr += (data - lim[0][ch].start); + if (lim[0][ch].start >= reg && reg < lim[0][ch].end) { + ptr += (reg - lim[0][ch].start); goto legacy_sel_wr; } for (ch = 0; ch < dw->rd_ch_cnt; ch++) - if (lim[1][ch].start >= data && data < lim[1][ch].end) { - ptr += (data - lim[1][ch].start); + if (lim[1][ch].start >= reg && reg < lim[1][ch].end) { + ptr += (reg - lim[1][ch].start); goto legacy_sel_rd; } @@ -86,7 +87,7 @@ static int dw_edma_debugfs_u32_get(void *data, u64 *val) raw_spin_unlock_irqrestore(&dw->lock, flags); } else { - *val = readl(data); + *val = readl(reg); } return 0; @@ -105,7 +106,7 @@ static void dw_edma_debugfs_create_x32(const struct debugfs_entries entries[], } } -static void dw_edma_debugfs_regs_ch(struct dw_edma_v0_ch_regs *regs, +static void dw_edma_debugfs_regs_ch(struct dw_edma_v0_ch_regs __iomem *regs, struct dentry *dir) { int nr_entries; @@ -288,7 +289,7 @@ void dw_edma_v0_debugfs_on(struct dw_edma_chip *chip) if (!dw) return; - regs = (struct dw_edma_v0_regs *)dw->rg_region.vaddr; + regs = dw->rg_region.vaddr; if (!regs) return; From 6f4722b1d1ebf274deb0459a36fad57a7d7fab31 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Jul 2019 14:44:45 +0200 Subject: [PATCH 026/482] dmaengine: dw-edma: fix endianess confusion When building with 'make C=1', sparse reports an endianess bug: drivers/dma/dw-edma/dw-edma-v0-debugfs.c:60:30: warning: cast removes address space of expression drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: warning: incorrect type in argument 1 (different address spaces) drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: expected void const volatile [noderef] *addr drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: got void *[assigned] ptr drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: warning: incorrect type in argument 1 (different address spaces) drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: expected void const volatile [noderef] *addr drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: got void *[assigned] ptr drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: warning: incorrect type in argument 1 (different address spaces) drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: expected void const volatile [noderef] *addr drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: got void *[assigned] ptr The current code is clearly wrong, as it passes an endian-swapped word into a register function where it gets swapped again. Just pass the variables directly into lower_32_bits()/upper_32_bits(). Fixes: 7e4b8a4fbe2c ("dmaengine: Add Synopsys eDMA IP version 0 support") Link: https://lore.kernel.org/lkml/20190617131820.2470686-1-arnd@arndb.de/ Signed-off-by: Arnd Bergmann Acked-by: Gustavo Pimentel Link: https://lore.kernel.org/r/20190722124457.1093886-3-arnd@arndb.de Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-v0-core.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c index 97e3fd41c8a8..692de47b1670 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-core.c +++ b/drivers/dma/dw-edma/dw-edma-v0-core.c @@ -195,7 +195,6 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk) struct dw_edma_v0_lli __iomem *lli; struct dw_edma_v0_llp __iomem *llp; u32 control = 0, i = 0; - u64 sar, dar, addr; int j; lli = chunk->ll_region.vaddr; @@ -214,13 +213,11 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk) /* Transfer size */ SET_LL(&lli[i].transfer_size, child->sz); /* SAR - low, high */ - sar = cpu_to_le64(child->sar); - SET_LL(&lli[i].sar_low, lower_32_bits(sar)); - SET_LL(&lli[i].sar_high, upper_32_bits(sar)); + SET_LL(&lli[i].sar_low, lower_32_bits(child->sar)); + SET_LL(&lli[i].sar_high, upper_32_bits(child->sar)); /* DAR - low, high */ - dar = cpu_to_le64(child->dar); - SET_LL(&lli[i].dar_low, lower_32_bits(dar)); - SET_LL(&lli[i].dar_high, upper_32_bits(dar)); + SET_LL(&lli[i].dar_low, lower_32_bits(child->dar)); + SET_LL(&lli[i].dar_high, upper_32_bits(child->dar)); i++; } @@ -232,9 +229,8 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk) /* Channel control */ SET_LL(&llp->control, control); /* Linked list - low, high */ - addr = cpu_to_le64(chunk->ll_region.paddr); - SET_LL(&llp->llp_low, lower_32_bits(addr)); - SET_LL(&llp->llp_high, upper_32_bits(addr)); + SET_LL(&llp->llp_low, lower_32_bits(chunk->ll_region.paddr)); + SET_LL(&llp->llp_high, upper_32_bits(chunk->ll_region.paddr)); } void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) @@ -242,7 +238,6 @@ void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) struct dw_edma_chan *chan = chunk->chan; struct dw_edma *dw = chan->chip->dw; u32 tmp; - u64 llp; dw_edma_v0_core_write_chunk(chunk); @@ -262,9 +257,10 @@ void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) SET_CH(dw, chan->dir, chan->id, ch_control1, (DW_EDMA_V0_CCS | DW_EDMA_V0_LLE)); /* Linked list - low, high */ - llp = cpu_to_le64(chunk->ll_region.paddr); - SET_CH(dw, chan->dir, chan->id, llp_low, lower_32_bits(llp)); - SET_CH(dw, chan->dir, chan->id, llp_high, upper_32_bits(llp)); + SET_CH(dw, chan->dir, chan->id, llp_low, + lower_32_bits(chunk->ll_region.paddr)); + SET_CH(dw, chan->dir, chan->id, llp_high, + upper_32_bits(chunk->ll_region.paddr)); } /* Doorbell */ SET_RW(dw, chan->dir, doorbell, From 5d6fb560729a5d5554e23db8d00eb57cd0021083 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Jul 2019 11:13:30 +0200 Subject: [PATCH 027/482] dmaengine: ste_dma40: fix unneeded variable warning clang-9 points out that there are two variables that depending on the configuration may only be used in an ARRAY_SIZE() expression but not referenced: drivers/dma/ste_dma40.c:145:12: error: variable 'd40_backup_regs' is not needed and will not be emitted [-Werror,-Wunneeded-internal-declaration] static u32 d40_backup_regs[] = { ^ drivers/dma/ste_dma40.c:214:12: error: variable 'd40_backup_regs_chan' is not needed and will not be emitted [-Werror,-Wunneeded-internal-declaration] static u32 d40_backup_regs_chan[] = { Mark these __maybe_unused to shut up the warning. Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20190712091357.744515-1-arnd@arndb.de Signed-off-by: Vinod Koul --- drivers/dma/ste_dma40.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 89d710899010..de8bfd9a76e9 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -142,7 +142,7 @@ enum d40_events { * when the DMA hw is powered off. * TODO: Add save/restore of D40_DREG_GCC on dma40 v3 or later, if that works. */ -static u32 d40_backup_regs[] = { +static __maybe_unused u32 d40_backup_regs[] = { D40_DREG_LCPA, D40_DREG_LCLA, D40_DREG_PRMSE, @@ -211,7 +211,7 @@ static u32 d40_backup_regs_v4b[] = { #define BACKUP_REGS_SZ_V4B ARRAY_SIZE(d40_backup_regs_v4b) -static u32 d40_backup_regs_chan[] = { +static __maybe_unused u32 d40_backup_regs_chan[] = { D40_CHAN_REG_SSCFG, D40_CHAN_REG_SSELT, D40_CHAN_REG_SSPTR, From 45004d66f2a28d78f543fb2ffbc133e31dc2d162 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 22 Jul 2019 08:57:44 -0500 Subject: [PATCH 028/482] ASoC: dapm: fix a memory leak bug In snd_soc_dapm_new_control_unlocked(), a kernel buffer is allocated in dapm_cnew_widget() to hold the new dapm widget. Then, different actions are taken according to the id of the widget, i.e., 'w->id'. If any failure occurs during this process, snd_soc_dapm_new_control_unlocked() should be terminated by going to the 'request_failed' label. However, the allocated kernel buffer is not freed on this code path, leading to a memory leak bug. To fix the above issue, free the buffer before returning from snd_soc_dapm_new_control_unlocked() through the 'request_failed' label. Signed-off-by: Wenwen Wang Link: https://lore.kernel.org/r/1563803864-2809-1-git-send-email-wang6495@umn.edu Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 9cd87e47ee8f..656cb5cd9cd8 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3704,6 +3704,8 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, dev_err(dapm->dev, "ASoC: Failed to request %s: %d\n", w->name, ret); + kfree_const(w->sname); + kfree(w); return ERR_PTR(ret); } From fa3a03da549a889fc9dbc0d3c5908eb7882cac8f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 7 Jul 2019 22:15:13 +0200 Subject: [PATCH 029/482] batman-adv: Fix netlink dumping of all mcast_flags buckets The bucket variable is only updated outside the loop over the mcast_flags buckets. It will only be updated during a dumping run when the dumping has to be interrupted and a new message has to be started. This could result in repeated or missing entries when the multicast flags are dumped to userspace. Fixes: d2d489b7d851 ("batman-adv: Add inconsistent multicast netlink dump detection") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 67d7f83009ae..a3488cfb3d1e 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2303,7 +2303,7 @@ __batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, while (bucket_tmp < hash->size) { if (batadv_mcast_flags_dump_bucket(msg, portid, cb, hash, - *bucket, &idx_tmp)) + bucket_tmp, &idx_tmp)) break; bucket_tmp++; From f7af86ccf1882084293b11077deec049fd01da63 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 7 Jul 2019 23:04:57 +0200 Subject: [PATCH 030/482] batman-adv: Fix deletion of RTR(4|6) mcast list entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The multicast code uses the lists bat_priv->mcast.want_all_rtr*_list to store all all originator nodes which don't have the flag no-RTR4 or no-RTR6 set. When an originator is purged, it has to be removed from these lists. Since all entries without the BATADV_MCAST_WANT_NO_RTR4/6 are stored in these lists, they have to be handled like entries which have these flags set to force the update routines to remove them from the lists when purging the originator. Not doing so will leave a pointer to a freed memory region inside the list. Trying to operate on these lists will then cause an use-after-free error: BUG: KASAN: use-after-free in batadv_mcast_want_rtr4_update+0x335/0x3a0 [batman_adv] Write of size 8 at addr ffff888007b41a38 by task swapper/0/0 Fixes: 61caf3d109f5 ("batman-adv: mcast: detect, distribute and maintain multicast router presence") Signed-off-by: Sven Eckelmann Acked-by: Linus Lüssing Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index a3488cfb3d1e..1d5bdf3a4b65 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2420,8 +2420,10 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS); batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS); - batadv_mcast_want_rtr4_update(bat_priv, orig, BATADV_NO_FLAGS); - batadv_mcast_want_rtr6_update(bat_priv, orig, BATADV_NO_FLAGS); + batadv_mcast_want_rtr4_update(bat_priv, orig, + BATADV_MCAST_WANT_NO_RTR4); + batadv_mcast_want_rtr6_update(bat_priv, orig, + BATADV_MCAST_WANT_NO_RTR6); spin_unlock_bh(&orig->mcast_handler_lock); } From bca031e2c8aa22a978a2452bf959e27e9fa73dc7 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Thu, 18 Jul 2019 08:15:10 +0000 Subject: [PATCH 031/482] KVM: arm/arm64: Introduce kvm_pmu_vcpu_init() to setup PMU counter index We use "pmc->idx" and the "chained" bitmap to determine if the pmc is chained, in kvm_pmu_pmc_is_chained(). But idx might be uninitialized (and random) when we doing this decision, through a KVM_ARM_VCPU_INIT ioctl -> kvm_pmu_vcpu_reset(). And the test_bit() against this random idx will potentially hit a KASAN BUG [1]. In general, idx is the static property of a PMU counter that is not expected to be modified across resets, as suggested by Julien. It looks more reasonable if we can setup the PMU counter idx for a vcpu in its creation time. Introduce a new function - kvm_pmu_vcpu_init() for this basic setup. Oh, and the KASAN BUG will get fixed this way. [1] https://www.spinics.net/lists/kvm-arm/msg36700.html Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters") Suggested-by: Andrew Murray Suggested-by: Julien Thierry Acked-by: Julien Thierry Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier --- include/kvm/arm_pmu.h | 2 ++ virt/kvm/arm/arm.c | 2 ++ virt/kvm/arm/pmu.c | 18 +++++++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 16c769a7f979..6db030439e29 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -34,6 +34,7 @@ struct kvm_pmu { u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu); +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu); void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu); void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val); @@ -71,6 +72,7 @@ static inline u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) { return 0; } +static inline void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {} static inline void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {} diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index f645c0fbf7ec..c704fa696184 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -340,6 +340,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) /* Set up the timer */ kvm_timer_vcpu_init(vcpu); + kvm_pmu_vcpu_init(vcpu); + kvm_arm_reset_debug_ptr(vcpu); return kvm_vgic_vcpu_init(vcpu); diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 3dd8238ed246..362a01886bab 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -214,6 +214,20 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) kvm_pmu_release_perf_event(pmc); } +/** + * kvm_pmu_vcpu_init - assign pmu counter idx for cpu + * @vcpu: The vcpu pointer + * + */ +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) + pmu->pmc[i].idx = i; +} + /** * kvm_pmu_vcpu_reset - reset pmu state for cpu * @vcpu: The vcpu pointer @@ -224,10 +238,8 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) int i; struct kvm_pmu *pmu = &vcpu->arch.pmu; - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); - pmu->pmc[i].idx = i; - } bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); } From 8201f11a1f75e3aa7d5327d0b1d8cb544aeaa62f Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 22 Jul 2019 15:03:52 +0200 Subject: [PATCH 032/482] ASoC: qcom: apq8016_sbc: Fix oops with multiple DAI links apq8016_sbc_parse_of() sets up multiple DAI links, depending on the number of nodes in the device tree. However, at the moment CPU and platform components are only allocated for the first link. This causes an oops when more than one link is defined: Internal error: Oops: 96000044 [#1] SMP CPU: 0 PID: 1015 Comm: kworker/0:2 Not tainted 5.3.0-rc1 #4 Call trace: apq8016_sbc_platform_probe+0x1a8/0x3f0 platform_drv_probe+0x50/0xa0 ... Move the allocation inside the loop to ensure that each link is properly initialized. Fixes: 98b232ca9e0e ("ASoC: qcom: apq8016_sbc: use modern dai_link style") Signed-off-by: Stephan Gerhold Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/r/20190722130352.95874-1-stephan@gerhold.net Signed-off-by: Mark Brown --- sound/soc/qcom/apq8016_sbc.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/soc/qcom/apq8016_sbc.c b/sound/soc/qcom/apq8016_sbc.c index f60a71990f66..ac75838bbfab 100644 --- a/sound/soc/qcom/apq8016_sbc.c +++ b/sound/soc/qcom/apq8016_sbc.c @@ -150,17 +150,17 @@ static struct apq8016_sbc_data *apq8016_sbc_parse_of(struct snd_soc_card *card) link = data->dai_link; - dlc = devm_kzalloc(dev, 2 * sizeof(*dlc), GFP_KERNEL); - if (!dlc) - return ERR_PTR(-ENOMEM); - - link->cpus = &dlc[0]; - link->platforms = &dlc[1]; - - link->num_cpus = 1; - link->num_platforms = 1; - for_each_child_of_node(node, np) { + dlc = devm_kzalloc(dev, 2 * sizeof(*dlc), GFP_KERNEL); + if (!dlc) + return ERR_PTR(-ENOMEM); + + link->cpus = &dlc[0]; + link->platforms = &dlc[1]; + + link->num_cpus = 1; + link->num_platforms = 1; + cpu = of_get_child_by_name(np, "cpu"); codec = of_get_child_by_name(np, "codec"); From 717dedb1dcee92788b81233aa0a221573c95daff Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Mon, 22 Jul 2019 09:14:01 -0500 Subject: [PATCH 033/482] ASoC: SOF: Intel: hda: remove misleading error trace from IRQ thread Downgrade "nothing to do in IRQ thread" message from error to a debug message in the IPC interrupt handler thread. The spurious wake-up can happen if a HDA stream interrupt is raised while the IPC interrupt thread is running. IPC functionality is not impacted by this condition, so debug is a more appropriate trace level. Signed-off-by: Kai Vehmanen Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190722141402.7194-21-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/cnl.c | 4 ++-- sound/soc/sof/intel/hda-ipc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/sof/intel/cnl.c b/sound/soc/sof/intel/cnl.c index f2b392998f20..ffd8d4394537 100644 --- a/sound/soc/sof/intel/cnl.c +++ b/sound/soc/sof/intel/cnl.c @@ -101,8 +101,8 @@ static irqreturn_t cnl_ipc_irq_thread(int irq, void *context) /* * This interrupt is not shared so no need to return IRQ_NONE. */ - dev_err_ratelimited(sdev->dev, - "error: nothing to do in IRQ thread\n"); + dev_dbg_ratelimited(sdev->dev, + "nothing to do in IPC IRQ thread\n"); } /* re-enable IPC interrupt */ diff --git a/sound/soc/sof/intel/hda-ipc.c b/sound/soc/sof/intel/hda-ipc.c index 50244b82600c..2ecba91f5219 100644 --- a/sound/soc/sof/intel/hda-ipc.c +++ b/sound/soc/sof/intel/hda-ipc.c @@ -224,8 +224,8 @@ irqreturn_t hda_dsp_ipc_irq_thread(int irq, void *context) /* * This interrupt is not shared so no need to return IRQ_NONE. */ - dev_err_ratelimited(sdev->dev, - "error: nothing to do in IRQ thread\n"); + dev_dbg_ratelimited(sdev->dev, + "nothing to do in IPC IRQ thread\n"); } /* re-enable IPC interrupt */ From 020834bebbcb5db4026757556710ea58ccb76a06 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Jul 2019 16:41:35 +0300 Subject: [PATCH 034/482] Input: applespi - select CRC16 module In some rare randconfig builds, CRC16 is disabled, which leads to a link error: drivers/input/keyboard/applespi.o: In function `applespi_send_cmd_msg': applespi.c:(.text+0x449f): undefined reference to `crc16' drivers/input/keyboard/applespi.o: In function `applespi_verify_crc': applespi.c:(.text+0x7538): undefined reference to `crc16' This symbol is meant to be selected for each user in Kconfig, so do that here as well. Fixes: 038b1a05eae6 ("Input: add Apple SPI keyboard and trackpad driver") Signed-off-by: Arnd Bergmann Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 8e9c3ea9d5e7..ebb19e21473e 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -76,6 +76,7 @@ config KEYBOARD_APPLESPI depends on ACPI && EFI depends on SPI depends on X86 || COMPILE_TEST + select CRC16 help Say Y here if you are running Linux on any Apple MacBook8,1 or later, or any MacBookPro13,* or MacBookPro14,*. From b1cbb2f09d7b3f1726215987d5b28f5aea9548da Mon Sep 17 00:00:00 2001 From: Nikolas Nyby Date: Tue, 23 Jul 2019 16:42:57 +0300 Subject: [PATCH 035/482] Input: applespi - fix trivial typo in struct description This fixes a typo in the keyboard_protocol description. coodinate -> coordinate. Signed-off-by: Nikolas Nyby Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/applespi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c index 52d394f473a2..acf34a5ff571 100644 --- a/drivers/input/keyboard/applespi.c +++ b/drivers/input/keyboard/applespi.c @@ -134,10 +134,10 @@ struct keyboard_protocol { * struct tp_finger - single trackpad finger structure, le16-aligned * * @origin: zero when switching track finger - * @abs_x: absolute x coodinate - * @abs_y: absolute y coodinate - * @rel_x: relative x coodinate - * @rel_y: relative y coodinate + * @abs_x: absolute x coordinate + * @abs_y: absolute y coordinate + * @rel_x: relative x coordinate + * @rel_y: relative y coordinate * @tool_major: tool area, major axis * @tool_minor: tool area, minor axis * @orientation: 16384 when point, else 15 bit angle From 883a2a80f79ca5c0c105605fafabd1f3df99b34c Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 22 Jul 2019 10:56:55 +0300 Subject: [PATCH 036/482] Input: elantech - enable SMBus on new (2018+) systems There are some new HP laptops with Elantech touchpad that don't support multitouch. Currently we use ETP_NEW_IC_SMBUS_HOST_NOTIFY() to check if SMBus is supported, but in addition to firmware version, the bus type also informs us whether the IC can support SMBus. To avoid breaking old ICs, we will only enable SMbus support based the bus type on systems manufactured after 2018. Lastly, let's consolidate all checks into elantech_use_host_notify() and use it to determine whether to use PS/2 or SMBus. Signed-off-by: Kai-Heng Feng Acked-by: Benjamin Tissoires Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 54 ++++++++++++++++------------------ 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 2d8434b7b623..73544776a9ed 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1827,6 +1827,30 @@ static int elantech_create_smbus(struct psmouse *psmouse, leave_breadcrumbs); } +static bool elantech_use_host_notify(struct psmouse *psmouse, + struct elantech_device_info *info) +{ + if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version)) + return true; + + switch (info->bus) { + case ETP_BUS_PS2_ONLY: + /* expected case */ + break; + case ETP_BUS_SMB_HST_NTFY_ONLY: + case ETP_BUS_PS2_SMB_HST_NTFY: + /* SMbus implementation is stable since 2018 */ + if (dmi_get_bios_year() >= 2018) + return true; + default: + psmouse_dbg(psmouse, + "Ignoring SMBus bus provider %d\n", info->bus); + break; + } + + return false; +} + /** * elantech_setup_smbus - called once the PS/2 devices are enumerated * and decides to instantiate a SMBus InterTouch device. @@ -1846,7 +1870,7 @@ static int elantech_setup_smbus(struct psmouse *psmouse, * i2c_blacklist_pnp_ids. * Old ICs are up to the user to decide. */ - if (!ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version) || + if (!elantech_use_host_notify(psmouse, info) || psmouse_matches_pnp_id(psmouse, i2c_blacklist_pnp_ids)) return -ENXIO; } @@ -1866,34 +1890,6 @@ static int elantech_setup_smbus(struct psmouse *psmouse, return 0; } -static bool elantech_use_host_notify(struct psmouse *psmouse, - struct elantech_device_info *info) -{ - if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version)) - return true; - - switch (info->bus) { - case ETP_BUS_PS2_ONLY: - /* expected case */ - break; - case ETP_BUS_SMB_ALERT_ONLY: - /* fall-through */ - case ETP_BUS_PS2_SMB_ALERT: - psmouse_dbg(psmouse, "Ignoring SMBus provider through alert protocol.\n"); - break; - case ETP_BUS_SMB_HST_NTFY_ONLY: - /* fall-through */ - case ETP_BUS_PS2_SMB_HST_NTFY: - return true; - default: - psmouse_dbg(psmouse, - "Ignoring SMBus bus provider %d.\n", - info->bus); - } - - return false; -} - int elantech_init_smbus(struct psmouse *psmouse) { struct elantech_device_info info; From f3b5720cabafe90b8b7cffbc7b8fec1c17d4ff4b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 24 Jul 2019 10:29:03 +0300 Subject: [PATCH 037/482] Input: elantech - annotate fall-through case in elantech_use_host_notify() This avoids a warning when building with -Wimplicit-fallthrough. Fixes: 883a2a80f79c ("Input: elantech - enable SMBus on new (2018+) systems") Reported-by: Stephen Rothwell Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 73544776a9ed..04fe43440a3c 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1842,6 +1842,7 @@ static bool elantech_use_host_notify(struct psmouse *psmouse, /* SMbus implementation is stable since 2018 */ if (dmi_get_bios_year() >= 2018) return true; + /* fall through */ default: psmouse_dbg(psmouse, "Ignoring SMBus bus provider %d\n", info->bus); From 40aa5383e393d72f6aa3943a4e7b1aae25a1e43b Mon Sep 17 00:00:00 2001 From: Ricard Wanderlof Date: Wed, 24 Jul 2019 11:38:44 +0200 Subject: [PATCH 038/482] ASoC: Fail card instantiation if DAI format setup fails If the DAI format setup fails, there is no valid communication format between CPU and CODEC, so fail card instantiation, rather than continue with a card that will most likely not function properly. Signed-off-by: Ricard Wanderlof Link: https://lore.kernel.org/r/alpine.DEB.2.20.1907241132350.6338@lnxricardw1.se.axis.com Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c0a774d0a5ff..1486fb2eb921 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1511,8 +1511,11 @@ static int soc_probe_link_dais(struct snd_soc_card *card, } } - if (dai_link->dai_fmt) - snd_soc_runtime_set_dai_fmt(rtd, dai_link->dai_fmt); + if (dai_link->dai_fmt) { + ret = snd_soc_runtime_set_dai_fmt(rtd, dai_link->dai_fmt); + if (ret) + return ret; + } ret = soc_post_component_init(rtd, dai_link->name); if (ret) From dbc4f989c878fe101fb7920e9609e8ec44e097cd Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 16 Jul 2019 08:24:36 +0800 Subject: [PATCH 039/482] Staging: fbtft: Fix probing of gpio descriptor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Conversion to use gpio descriptors broke all gpio lookups as devm_gpiod_get_index was converted to use dev->driver->name for the gpio name lookup. Fix this by using the name param. In addition gpiod_get post-fixes the -gpios to the name so that shouldn't be included in the call. However this then breaks the of_find_property call to see if the gpio entry exists as all fbtft treats all gpios as optional. So use devm_gpiod_get_index_optional instead which achieves the same thing and is simpler. Nishad confirmed the changes where only ever compile tested. Fixes: c440eee1a7a1 ("Staging: fbtft: Switch to the gpio descriptor interface") Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne Tested-by: Jan Sebastian Götte Signed-off-by: Phil Reid Cc: stable Link: https://lore.kernel.org/r/1563236677-5045-2-git-send-email-preid@electromag.com.au Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fbtft/fbtft-core.c | 39 ++++++++++++++---------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index 7cbc1bdd2d8a..b963cccdc3f6 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -76,21 +76,18 @@ static int fbtft_request_one_gpio(struct fbtft_par *par, struct gpio_desc **gpiop) { struct device *dev = par->info->device; - struct device_node *node = dev->of_node; int ret = 0; - if (of_find_property(node, name, NULL)) { - *gpiop = devm_gpiod_get_index(dev, dev->driver->name, index, - GPIOD_OUT_HIGH); - if (IS_ERR(*gpiop)) { - ret = PTR_ERR(*gpiop); - dev_err(dev, - "Failed to request %s GPIO:%d\n", name, ret); - return ret; - } - fbtft_par_dbg(DEBUG_REQUEST_GPIOS, par, "%s: '%s' GPIO\n", - __func__, name); + *gpiop = devm_gpiod_get_index_optional(dev, name, index, + GPIOD_OUT_HIGH); + if (IS_ERR(*gpiop)) { + ret = PTR_ERR(*gpiop); + dev_err(dev, + "Failed to request %s GPIO: %d\n", name, ret); + return ret; } + fbtft_par_dbg(DEBUG_REQUEST_GPIOS, par, "%s: '%s' GPIO\n", + __func__, name); return ret; } @@ -103,34 +100,34 @@ static int fbtft_request_gpios_dt(struct fbtft_par *par) if (!par->info->device->of_node) return -EINVAL; - ret = fbtft_request_one_gpio(par, "reset-gpios", 0, &par->gpio.reset); + ret = fbtft_request_one_gpio(par, "reset", 0, &par->gpio.reset); if (ret) return ret; - ret = fbtft_request_one_gpio(par, "dc-gpios", 0, &par->gpio.dc); + ret = fbtft_request_one_gpio(par, "dc", 0, &par->gpio.dc); if (ret) return ret; - ret = fbtft_request_one_gpio(par, "rd-gpios", 0, &par->gpio.rd); + ret = fbtft_request_one_gpio(par, "rd", 0, &par->gpio.rd); if (ret) return ret; - ret = fbtft_request_one_gpio(par, "wr-gpios", 0, &par->gpio.wr); + ret = fbtft_request_one_gpio(par, "wr", 0, &par->gpio.wr); if (ret) return ret; - ret = fbtft_request_one_gpio(par, "cs-gpios", 0, &par->gpio.cs); + ret = fbtft_request_one_gpio(par, "cs", 0, &par->gpio.cs); if (ret) return ret; - ret = fbtft_request_one_gpio(par, "latch-gpios", 0, &par->gpio.latch); + ret = fbtft_request_one_gpio(par, "latch", 0, &par->gpio.latch); if (ret) return ret; for (i = 0; i < 16; i++) { - ret = fbtft_request_one_gpio(par, "db-gpios", i, + ret = fbtft_request_one_gpio(par, "db", i, &par->gpio.db[i]); if (ret) return ret; - ret = fbtft_request_one_gpio(par, "led-gpios", i, + ret = fbtft_request_one_gpio(par, "led", i, &par->gpio.led[i]); if (ret) return ret; - ret = fbtft_request_one_gpio(par, "aux-gpios", i, + ret = fbtft_request_one_gpio(par, "aux", i, &par->gpio.aux[i]); if (ret) return ret; From b918d1c2706619cb0712a61cc8c05148b68b24b2 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 16 Jul 2019 08:24:37 +0800 Subject: [PATCH 040/482] Staging: fbtft: Fix reset assertion when using gpio descriptor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typically gpiod_set_value calls would assert the reset line and then release it using the symantics of: gpiod_set_value(par->gpio.reset, 0); ... delay gpiod_set_value(par->gpio.reset, 1); And the gpio binding would specify the polarity. Prior to conversion to gpiod calls the polarity in the DT was ignored and assumed to be active low. Fix it so that DT polarity is respected. Fixes: c440eee1a7a1 ("Staging: fbtft: Switch to the gpio descriptor interface") Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne Tested-by: Jan Sebastian Götte Signed-off-by: Phil Reid Cc: stable Link: https://lore.kernel.org/r/1563236677-5045-3-git-send-email-preid@electromag.com.au Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fbtft/fbtft-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index b963cccdc3f6..c3179cc847f8 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -231,9 +231,9 @@ static void fbtft_reset(struct fbtft_par *par) if (!par->gpio.reset) return; fbtft_par_dbg(DEBUG_RESET, par, "%s()\n", __func__); - gpiod_set_value_cansleep(par->gpio.reset, 0); - usleep_range(20, 40); gpiod_set_value_cansleep(par->gpio.reset, 1); + usleep_range(20, 40); + gpiod_set_value_cansleep(par->gpio.reset, 0); msleep(120); } From 66665bb9979246729562a09fcdbb101c83127989 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Wed, 10 Jul 2019 23:45:18 +0300 Subject: [PATCH 041/482] staging: gasket: apex: fix copy-paste typo In sysfs_show() case-branches ATTR_KERNEL_HIB_PAGE_TABLE_SIZE and ATTR_KERNEL_HIB_SIMPLE_PAGE_TABLE_SIZE do the same. It looks like copy-paste mistake. Signed-off-by: Ivan Bornyakov Cc: stable Link: https://lore.kernel.org/r/20190710204518.16814-1-brnkv.i1@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gasket/apex_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/gasket/apex_driver.c b/drivers/staging/gasket/apex_driver.c index 2be45ee9d061..464648ee2036 100644 --- a/drivers/staging/gasket/apex_driver.c +++ b/drivers/staging/gasket/apex_driver.c @@ -532,7 +532,7 @@ static ssize_t sysfs_show(struct device *device, struct device_attribute *attr, break; case ATTR_KERNEL_HIB_SIMPLE_PAGE_TABLE_SIZE: ret = scnprintf(buf, PAGE_SIZE, "%u\n", - gasket_page_table_num_entries( + gasket_page_table_num_simple_entries( gasket_dev->page_table[0])); break; case ATTR_KERNEL_HIB_NUM_ACTIVE_PAGES: From fb2b055b7e6e44efda737c7c92f46c0868bb04e5 Mon Sep 17 00:00:00 2001 From: Adham Abozaeid Date: Mon, 22 Jul 2019 21:38:44 +0000 Subject: [PATCH 042/482] staging: wilc1000: flush the workqueue before deinit the host Before deinitializing the host interface, the workqueue should be flushed to handle any pending deferred work Signed-off-by: Adham Abozaeid Cc: stable Link: https://lore.kernel.org/r/20190722213837.21952-1-adham.abozaeid@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/wilc_wfi_cfgoperations.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c index d72fdd333050..736eedef23b6 100644 --- a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c +++ b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c @@ -1969,6 +1969,7 @@ void wilc_deinit_host_int(struct net_device *net) priv->p2p_listen_state = false; + flush_workqueue(vif->wilc->hif_workqueue); mutex_destroy(&priv->scan_req_lock); ret = wilc_deinit(vif); From 610504301235e6d7c5a8505d1d3401bed0403d05 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 21 Jul 2019 19:08:24 +0200 Subject: [PATCH 043/482] staging: unisys: visornic: Update the description of 'poll_for_irq()' Commit e99e88a9d2b06 ("treewide: setup_timer() -> timer_setup()") has updated the parameters of 'poll_for_irq()' but not the comment above the function. Update the comment and fix a typo. s/visronic/visornic/ Signed-off-by: Christophe JAILLET Reviewed-By: Enrico Weigelt Link: https://lore.kernel.org/r/20190721170824.3412-1-christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/staging/unisys/visornic/visornic_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c index 9d4f1dab0968..40dd573e73c3 100644 --- a/drivers/staging/unisys/visornic/visornic_main.c +++ b/drivers/staging/unisys/visornic/visornic_main.c @@ -1750,7 +1750,8 @@ static int visornic_poll(struct napi_struct *napi, int budget) } /* poll_for_irq - checks the status of the response queue - * @v: Void pointer to the visronic devdata struct. + * @t: pointer to the 'struct timer_list' from which we can retrieve the + * the visornic devdata struct. * * Main function of the vnic_incoming thread. Periodically check the response * queue and drain it if needed. From 92e3e884887c0d278042fbbb6f6c9b41d6addb71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Sebastian=20G=C3=B6tte?= Date: Wed, 17 Jul 2019 23:41:37 +0900 Subject: [PATCH 044/482] Staging: fbtft: Fix GPIO handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c440eee1a7a1 ("Staging: fbtft: Switch to the gpio descriptor interface") breaks GPIO handling. In several places, checks to only set a GPIO if it was configured ended up backwards. I have tested this fix. The fixed driver works with a ili9486 display connected to a raspberry pi via SPI. Fixes: c440eee1a7a1d ("Staging: fbtft: Switch to the gpio descriptor interface") Tested-by: Jan Sebastian Götte Reviewed-by: Nicolas Saenz Julienne Signed-off-by: Jan Sebastian Götte Link: https://lore.kernel.org/r/75ada52f-afa1-08bc-d0ce-966fc1110e70@jaseg.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fbtft/fb_bd663474.c | 2 +- drivers/staging/fbtft/fb_ili9163.c | 2 +- drivers/staging/fbtft/fb_ili9325.c | 2 +- drivers/staging/fbtft/fb_s6d1121.c | 2 +- drivers/staging/fbtft/fb_ssd1289.c | 2 +- drivers/staging/fbtft/fb_ssd1331.c | 4 ++-- drivers/staging/fbtft/fb_upd161704.c | 2 +- drivers/staging/fbtft/fbtft-bus.c | 2 +- drivers/staging/fbtft/fbtft-core.c | 4 ++-- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/staging/fbtft/fb_bd663474.c b/drivers/staging/fbtft/fb_bd663474.c index b6c6d66e4eb1..e2c7646588f8 100644 --- a/drivers/staging/fbtft/fb_bd663474.c +++ b/drivers/staging/fbtft/fb_bd663474.c @@ -24,7 +24,7 @@ static int init_display(struct fbtft_par *par) { - if (!par->gpio.cs) + if (par->gpio.cs) gpiod_set_value(par->gpio.cs, 0); /* Activate chip */ par->fbtftops.reset(par); diff --git a/drivers/staging/fbtft/fb_ili9163.c b/drivers/staging/fbtft/fb_ili9163.c index d609a2b67db9..fd32376700e2 100644 --- a/drivers/staging/fbtft/fb_ili9163.c +++ b/drivers/staging/fbtft/fb_ili9163.c @@ -77,7 +77,7 @@ static int init_display(struct fbtft_par *par) { par->fbtftops.reset(par); - if (!par->gpio.cs) + if (par->gpio.cs) gpiod_set_value(par->gpio.cs, 0); /* Activate chip */ write_reg(par, MIPI_DCS_SOFT_RESET); /* software reset */ diff --git a/drivers/staging/fbtft/fb_ili9325.c b/drivers/staging/fbtft/fb_ili9325.c index b090e7ab6fdd..85e54a10ed72 100644 --- a/drivers/staging/fbtft/fb_ili9325.c +++ b/drivers/staging/fbtft/fb_ili9325.c @@ -85,7 +85,7 @@ static int init_display(struct fbtft_par *par) { par->fbtftops.reset(par); - if (!par->gpio.cs) + if (par->gpio.cs) gpiod_set_value(par->gpio.cs, 0); /* Activate chip */ bt &= 0x07; diff --git a/drivers/staging/fbtft/fb_s6d1121.c b/drivers/staging/fbtft/fb_s6d1121.c index b3d0701880fe..5a129b1352cc 100644 --- a/drivers/staging/fbtft/fb_s6d1121.c +++ b/drivers/staging/fbtft/fb_s6d1121.c @@ -29,7 +29,7 @@ static int init_display(struct fbtft_par *par) { par->fbtftops.reset(par); - if (!par->gpio.cs) + if (par->gpio.cs) gpiod_set_value(par->gpio.cs, 0); /* Activate chip */ /* Initialization sequence from Lib_UTFT */ diff --git a/drivers/staging/fbtft/fb_ssd1289.c b/drivers/staging/fbtft/fb_ssd1289.c index bbf75f795234..88a5b6925901 100644 --- a/drivers/staging/fbtft/fb_ssd1289.c +++ b/drivers/staging/fbtft/fb_ssd1289.c @@ -28,7 +28,7 @@ static int init_display(struct fbtft_par *par) { par->fbtftops.reset(par); - if (!par->gpio.cs) + if (par->gpio.cs) gpiod_set_value(par->gpio.cs, 0); /* Activate chip */ write_reg(par, 0x00, 0x0001); diff --git a/drivers/staging/fbtft/fb_ssd1331.c b/drivers/staging/fbtft/fb_ssd1331.c index 4cfe9f8535d0..37622c9462aa 100644 --- a/drivers/staging/fbtft/fb_ssd1331.c +++ b/drivers/staging/fbtft/fb_ssd1331.c @@ -81,7 +81,7 @@ static void write_reg8_bus8(struct fbtft_par *par, int len, ...) va_start(args, len); *buf = (u8)va_arg(args, unsigned int); - if (!par->gpio.dc) + if (par->gpio.dc) gpiod_set_value(par->gpio.dc, 0); ret = par->fbtftops.write(par, par->buf, sizeof(u8)); if (ret < 0) { @@ -104,7 +104,7 @@ static void write_reg8_bus8(struct fbtft_par *par, int len, ...) return; } } - if (!par->gpio.dc) + if (par->gpio.dc) gpiod_set_value(par->gpio.dc, 1); va_end(args); } diff --git a/drivers/staging/fbtft/fb_upd161704.c b/drivers/staging/fbtft/fb_upd161704.c index 564a38e34440..c77832ae5e5b 100644 --- a/drivers/staging/fbtft/fb_upd161704.c +++ b/drivers/staging/fbtft/fb_upd161704.c @@ -26,7 +26,7 @@ static int init_display(struct fbtft_par *par) { par->fbtftops.reset(par); - if (!par->gpio.cs) + if (par->gpio.cs) gpiod_set_value(par->gpio.cs, 0); /* Activate chip */ /* Initialization sequence from Lib_UTFT */ diff --git a/drivers/staging/fbtft/fbtft-bus.c b/drivers/staging/fbtft/fbtft-bus.c index 2ea814d0dca5..63c65dd67b17 100644 --- a/drivers/staging/fbtft/fbtft-bus.c +++ b/drivers/staging/fbtft/fbtft-bus.c @@ -135,7 +135,7 @@ int fbtft_write_vmem16_bus8(struct fbtft_par *par, size_t offset, size_t len) remain = len / 2; vmem16 = (u16 *)(par->info->screen_buffer + offset); - if (!par->gpio.dc) + if (par->gpio.dc) gpiod_set_value(par->gpio.dc, 1); /* non buffered write */ diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index c3179cc847f8..cf5700a2ea66 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -918,7 +918,7 @@ static int fbtft_init_display_dt(struct fbtft_par *par) return -EINVAL; par->fbtftops.reset(par); - if (!par->gpio.cs) + if (par->gpio.cs) gpiod_set_value(par->gpio.cs, 0); /* Activate chip */ while (p) { @@ -1009,7 +1009,7 @@ int fbtft_init_display(struct fbtft_par *par) } par->fbtftops.reset(par); - if (!par->gpio.cs) + if (par->gpio.cs) gpiod_set_value(par->gpio.cs, 0); /* Activate chip */ i = 0; From 25f8c834e2a6871920cc1ca113f02fb301d007c3 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 12 Jul 2019 11:37:17 -0700 Subject: [PATCH 045/482] Input: synaptics - enable RMI mode for HP Spectre X360 The 2016 kabylake HP Spectre X360 (model number 13-w013dx) works much better with psmouse.synaptics_intertouch=1 kernel parameter, so let's enable RMI4 mode automatically. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204115 Reported-by: Nate Graham Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index b1956ed4c0dd..46bbe99d6511 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -182,6 +182,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2055", /* E580 */ "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ + "SYN323d", /* HP Spectre X360 13-w013dx */ NULL }; From 8f9e86ee795971eabbf372e6d804d6b8578287a7 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 1 Jul 2019 19:55:19 +0900 Subject: [PATCH 046/482] staging: android: ion: Bail out upon SIGKILL when allocating memory. syzbot found that a thread can stall for minutes inside ion_system_heap_allocate() after that thread was killed by SIGKILL [1]. Let's check for SIGKILL before doing memory allocation. [1] https://syzkaller.appspot.com/bug?id=a0e3436829698d5824231251fad9d8e998f94f5e Signed-off-by: Tetsuo Handa Cc: stable Reported-by: syzbot Acked-by: Laura Abbott Acked-by: Sumit Semwal Link: https://lore.kernel.org/r/d088f188-5f32-d8fc-b9a0-0b404f7501cc@I-love.SAKURA.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_page_pool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/staging/android/ion/ion_page_pool.c b/drivers/staging/android/ion/ion_page_pool.c index fd4995fb676e..f85ec5b16b65 100644 --- a/drivers/staging/android/ion/ion_page_pool.c +++ b/drivers/staging/android/ion/ion_page_pool.c @@ -8,11 +8,14 @@ #include #include #include +#include #include "ion.h" static inline struct page *ion_page_pool_alloc_pages(struct ion_page_pool *pool) { + if (fatal_signal_pending(current)) + return NULL; return alloc_pages(pool->gfp_mask, pool->order); } From d105ef8120dd522c3233a1395ee0b318ed0929e8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 9 Jul 2019 16:32:58 +0800 Subject: [PATCH 047/482] dmaengine: tegra210-adma: Fix unused function warnings If CONFIG_PM is not set, build warnings: drivers/dma/tegra210-adma.c:747:12: warning: tegra_adma_runtime_resume defined but not used [-Wunused-function] static int tegra_adma_runtime_resume(struct device *dev) drivers/dma/tegra210-adma.c:715:12: warning: tegra_adma_runtime_suspend defined but not used [-Wunused-function] static int tegra_adma_runtime_suspend(struct device *dev) Mark the two function as __maybe_unused. Reported-by: Hulk Robot Signed-off-by: YueHaibing Fixes: 3145d73e69ba ("dmaengine: tegra210-adma: remove PM_CLK dependency") Fixes: f46b195799b5 ("dmaengine: tegra-adma: Add support for Tegra210 ADMA") Reported-by: Arnd Bergmann Acked-by: Jon Hunter Link: https://lore.kernel.org/r/20190709083258.57112-1-yuehaibing@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/tegra210-adma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 2805853e963f..b33cf6e8ab8e 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -712,7 +712,7 @@ static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, return chan; } -static int tegra_adma_runtime_suspend(struct device *dev) +static int __maybe_unused tegra_adma_runtime_suspend(struct device *dev) { struct tegra_adma *tdma = dev_get_drvdata(dev); struct tegra_adma_chan_regs *ch_reg; @@ -744,7 +744,7 @@ static int tegra_adma_runtime_suspend(struct device *dev) return 0; } -static int tegra_adma_runtime_resume(struct device *dev) +static int __maybe_unused tegra_adma_runtime_resume(struct device *dev) { struct tegra_adma *tdma = dev_get_drvdata(dev); struct tegra_adma_chan_regs *ch_reg; From 2a6fc3cb5cb68597f1072bfeef28d2ca02310220 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 5 Jul 2019 18:11:39 +0300 Subject: [PATCH 048/482] drm/tegra: Fix gpiod_get_from_of_node() regression That function now returns ERR_PTR instead of NULL if "hpd-gpio" is not present in device-tree. The offending patch missed to adapt the Tegra's DRM driver for the API change. Fixes: 025bf37725f1 ("gpio: Fix return value mismatch of function gpiod_get_from_of_node()") Signed-off-by: Dmitry Osipenko Acked-by: Jon Hunter Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/output.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c index 274cb955e2e1..bdcaa4c7168c 100644 --- a/drivers/gpu/drm/tegra/output.c +++ b/drivers/gpu/drm/tegra/output.c @@ -126,8 +126,12 @@ int tegra_output_probe(struct tegra_output *output) "nvidia,hpd-gpio", 0, GPIOD_IN, "HDMI hotplug detect"); - if (IS_ERR(output->hpd_gpio)) - return PTR_ERR(output->hpd_gpio); + if (IS_ERR(output->hpd_gpio)) { + if (PTR_ERR(output->hpd_gpio) != -ENOENT) + return PTR_ERR(output->hpd_gpio); + + output->hpd_gpio = NULL; + } if (output->hpd_gpio) { err = gpiod_to_irq(output->hpd_gpio); From 34a2a80ff30b5d2330abfa8980c7f0cc15a8158a Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 25 Jul 2019 11:34:23 +0300 Subject: [PATCH 049/482] ASoC: ti: davinci-mcasp: Fix clk PDIR handling for i2s master mode When running McASP as master capture alone will not record any audio unless a parallel playback stream is running. As soon as the playback stops the captured data is going to be silent again. In McASP master mode we need to set the PDIR for the clock pins and fix the mcasp_set_axr_pdir() to skip the bits in the PDIR registers above AMUTE. This went unnoticed as most of the boards uses McASP as slave and neither of these issues are visible (audible) in those setups. Fixes: ca3d9433349e ("ASoC: davinci-mcasp: Update PDIR (pin direction) register handling") Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20190725083423.7321-1-peter.ujfalusi@ti.com Signed-off-by: Mark Brown --- sound/soc/ti/davinci-mcasp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index 2c518088b64d..4d611565375b 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -195,7 +195,7 @@ static inline void mcasp_set_axr_pdir(struct davinci_mcasp *mcasp, bool enable) { u32 bit; - for_each_set_bit(bit, &mcasp->pdir, PIN_BIT_AFSR) { + for_each_set_bit(bit, &mcasp->pdir, PIN_BIT_AMUTE) { if (enable) mcasp_set_bits(mcasp, DAVINCI_MCASP_PDIR_REG, BIT(bit)); else @@ -223,6 +223,7 @@ static void mcasp_start_rx(struct davinci_mcasp *mcasp) if (mcasp_is_synchronous(mcasp)) { mcasp_set_ctl_reg(mcasp, DAVINCI_MCASP_GBLCTLX_REG, TXHCLKRST); mcasp_set_ctl_reg(mcasp, DAVINCI_MCASP_GBLCTLX_REG, TXCLKRST); + mcasp_set_clk_pdir(mcasp, true); } /* Activate serializer(s) */ From e51b69808b7ec06fc61f5a332f338d94b64b0537 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 25 Jul 2019 07:35:23 +0200 Subject: [PATCH 050/482] ASoC: Intel: Fix some acpi vs apci typo in somme comments Fix some typo to have the filaname given in a comment match the real name of the file. Some 'acpi' have erroneously been written 'apci' Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20190725053523.16542-1-christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-bxt-match.c | 2 +- sound/soc/intel/common/soc-acpi-intel-byt-match.c | 2 +- sound/soc/intel/common/soc-acpi-intel-cht-match.c | 2 +- sound/soc/intel/common/soc-acpi-intel-cnl-match.c | 2 +- sound/soc/intel/common/soc-acpi-intel-glk-match.c | 2 +- sound/soc/intel/common/soc-acpi-intel-hda-match.c | 2 +- sound/soc/intel/common/soc-acpi-intel-hsw-bdw-match.c | 2 +- sound/soc/intel/common/soc-acpi-intel-icl-match.c | 2 +- sound/soc/intel/common/soc-acpi-intel-kbl-match.c | 2 +- sound/soc/intel/common/soc-acpi-intel-skl-match.c | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-bxt-match.c b/sound/soc/intel/common/soc-acpi-intel-bxt-match.c index 229e39586868..4a5adae1d785 100644 --- a/sound/soc/intel/common/soc-acpi-intel-bxt-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-bxt-match.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * soc-apci-intel-bxt-match.c - tables and support for BXT ACPI enumeration. + * soc-acpi-intel-bxt-match.c - tables and support for BXT ACPI enumeration. * * Copyright (c) 2018, Intel Corporation. * diff --git a/sound/soc/intel/common/soc-acpi-intel-byt-match.c b/sound/soc/intel/common/soc-acpi-intel-byt-match.c index b94b482ac34f..1cc801ba92eb 100644 --- a/sound/soc/intel/common/soc-acpi-intel-byt-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-byt-match.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * soc-apci-intel-byt-match.c - tables and support for BYT ACPI enumeration. + * soc-acpi-intel-byt-match.c - tables and support for BYT ACPI enumeration. * * Copyright (c) 2017, Intel Corporation. */ diff --git a/sound/soc/intel/common/soc-acpi-intel-cht-match.c b/sound/soc/intel/common/soc-acpi-intel-cht-match.c index b7f11f6be1cf..d0fb43c2b9f6 100644 --- a/sound/soc/intel/common/soc-acpi-intel-cht-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-cht-match.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * soc-apci-intel-cht-match.c - tables and support for CHT ACPI enumeration. + * soc-acpi-intel-cht-match.c - tables and support for CHT ACPI enumeration. * * Copyright (c) 2017, Intel Corporation. */ diff --git a/sound/soc/intel/common/soc-acpi-intel-cnl-match.c b/sound/soc/intel/common/soc-acpi-intel-cnl-match.c index c36c0aa4f683..771b0ef21051 100644 --- a/sound/soc/intel/common/soc-acpi-intel-cnl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-cnl-match.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * soc-apci-intel-cnl-match.c - tables and support for CNL ACPI enumeration. + * soc-acpi-intel-cnl-match.c - tables and support for CNL ACPI enumeration. * * Copyright (c) 2018, Intel Corporation. * diff --git a/sound/soc/intel/common/soc-acpi-intel-glk-match.c b/sound/soc/intel/common/soc-acpi-intel-glk-match.c index 616eb09e78a0..60dea358fa04 100644 --- a/sound/soc/intel/common/soc-acpi-intel-glk-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-glk-match.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * soc-apci-intel-glk-match.c - tables and support for GLK ACPI enumeration. + * soc-acpi-intel-glk-match.c - tables and support for GLK ACPI enumeration. * * Copyright (c) 2018, Intel Corporation. * diff --git a/sound/soc/intel/common/soc-acpi-intel-hda-match.c b/sound/soc/intel/common/soc-acpi-intel-hda-match.c index 68ae43f7b4b2..cc972d2ac691 100644 --- a/sound/soc/intel/common/soc-acpi-intel-hda-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-hda-match.c @@ -2,7 +2,7 @@ // Copyright (c) 2018, Intel Corporation. /* - * soc-apci-intel-hda-match.c - tables and support for HDA+ACPI enumeration. + * soc-acpi-intel-hda-match.c - tables and support for HDA+ACPI enumeration. * */ diff --git a/sound/soc/intel/common/soc-acpi-intel-hsw-bdw-match.c b/sound/soc/intel/common/soc-acpi-intel-hsw-bdw-match.c index d27853e7a369..34eb0baaa951 100644 --- a/sound/soc/intel/common/soc-acpi-intel-hsw-bdw-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-hsw-bdw-match.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * soc-apci-intel-hsw-bdw-match.c - tables and support for ACPI enumeration. + * soc-acpi-intel-hsw-bdw-match.c - tables and support for ACPI enumeration. * * Copyright (c) 2017, Intel Corporation. */ diff --git a/sound/soc/intel/common/soc-acpi-intel-icl-match.c b/sound/soc/intel/common/soc-acpi-intel-icl-match.c index 0b430b9b3673..38977669b576 100644 --- a/sound/soc/intel/common/soc-acpi-intel-icl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-icl-match.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * soc-apci-intel-icl-match.c - tables and support for ICL ACPI enumeration. + * soc-acpi-intel-icl-match.c - tables and support for ICL ACPI enumeration. * * Copyright (c) 2018, Intel Corporation. * diff --git a/sound/soc/intel/common/soc-acpi-intel-kbl-match.c b/sound/soc/intel/common/soc-acpi-intel-kbl-match.c index 4b331058e807..e200baa11011 100644 --- a/sound/soc/intel/common/soc-acpi-intel-kbl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-kbl-match.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * soc-apci-intel-kbl-match.c - tables and support for KBL ACPI enumeration. + * soc-acpi-intel-kbl-match.c - tables and support for KBL ACPI enumeration. * * Copyright (c) 2018, Intel Corporation. * diff --git a/sound/soc/intel/common/soc-acpi-intel-skl-match.c b/sound/soc/intel/common/soc-acpi-intel-skl-match.c index 0c9c0edd35b3..42fa40a8d932 100644 --- a/sound/soc/intel/common/soc-acpi-intel-skl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-skl-match.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * soc-apci-intel-skl-match.c - tables and support for SKL ACPI enumeration. + * soc-acpi-intel-skl-match.c - tables and support for SKL ACPI enumeration. * * Copyright (c) 2018, Intel Corporation. * From 789e162a6255325325bd321ab0cd51dc7e285054 Mon Sep 17 00:00:00 2001 From: Cheng-Yi Chiang Date: Fri, 26 Jul 2019 12:42:02 +0800 Subject: [PATCH 051/482] ASoC: rockchip: Fix mono capture This reverts commit db51707b9c9aeedd310ebce60f15d5bb006567e0. Revert "ASoC: rockchip: i2s: Support mono capture" Previous discussion in https://patchwork.kernel.org/patch/10147153/ explains the issue of the patch. While device is configured as 1-ch, hardware is still generating a 2-ch stream. When user space reads the data and assumes it is a 1-ch stream, the rate will be slower by 2x. Revert the change so 1-ch is not supported. User space can selectively take one channel data out of two channel if 1-ch is preferred. Currently, both channels record identical data. Signed-off-by: Cheng-Yi Chiang Link: https://lore.kernel.org/r/20190726044202.26866-1-cychiang@chromium.org Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_i2s.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 0a34d0eb8dba..88ebaf6e1880 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -326,7 +326,6 @@ static int rockchip_i2s_hw_params(struct snd_pcm_substream *substream, val |= I2S_CHN_4; break; case 2: - case 1: val |= I2S_CHN_2; break; default: @@ -459,7 +458,7 @@ static struct snd_soc_dai_driver rockchip_i2s_dai = { }, .capture = { .stream_name = "Capture", - .channels_min = 1, + .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_8000_192000, .formats = (SNDRV_PCM_FMTBIT_S8 | @@ -659,7 +658,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev) } if (!of_property_read_u32(node, "rockchip,capture-channels", &val)) { - if (val >= 1 && val <= 8) + if (val >= 2 && val <= 8) soc_dai->capture.channels_max = val; } From 1e112c35e3c96db7c8ca6ddaa96574f00c06e7db Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 26 Jul 2019 09:42:43 +0300 Subject: [PATCH 052/482] ASoC: ti: davinci-mcasp: Correct slot_width posed constraint The slot_width is a property for the bus while the constraint for SNDRV_PCM_HW_PARAM_SAMPLE_BITS is for the in memory format. Applying slot_width constraint to sample_bits works most of the time, but it will blacklist valid formats in some cases. With slot_width 24 we can support S24_3LE and S24_LE formats as they both look the same on the bus, but a a 24 constraint on sample_bits would not allow S24_LE as it is stored in 32bits in memory. Implement a simple hw_rule function to allow all formats which require less or equal number of bits on the bus as slot_width (if configured). Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20190726064244.3762-2-peter.ujfalusi@ti.com Signed-off-by: Mark Brown --- sound/soc/ti/davinci-mcasp.c | 43 ++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index 4d611565375b..44708c8f90d6 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -1257,6 +1257,28 @@ static int davinci_mcasp_trigger(struct snd_pcm_substream *substream, return ret; } +static int davinci_mcasp_hw_rule_slot_width(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct davinci_mcasp_ruledata *rd = rule->private; + struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + struct snd_mask nfmt; + int i, slot_width; + + snd_mask_none(&nfmt); + slot_width = rd->mcasp->slot_width; + + for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) { + if (snd_mask_test(fmt, i)) { + if (snd_pcm_format_width(i) <= slot_width) { + snd_mask_set(&nfmt, i); + } + } + } + + return snd_mask_refine(fmt, &nfmt); +} + static const unsigned int davinci_mcasp_dai_rates[] = { 8000, 11025, 16000, 22050, 32000, 44100, 48000, 64000, 88200, 96000, 176400, 192000, @@ -1378,7 +1400,7 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, struct davinci_mcasp_ruledata *ruledata = &mcasp->ruledata[substream->stream]; u32 max_channels = 0; - int i, dir; + int i, dir, ret; int tdm_slots = mcasp->tdm_slots; /* Do not allow more then one stream per direction */ @@ -1407,6 +1429,7 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, max_channels++; } ruledata->serializers = max_channels; + ruledata->mcasp = mcasp; max_channels *= tdm_slots; /* * If the already active stream has less channels than the calculated @@ -1432,20 +1455,22 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, 0, SNDRV_PCM_HW_PARAM_CHANNELS, &mcasp->chconstr[substream->stream]); - if (mcasp->slot_width) - snd_pcm_hw_constraint_minmax(substream->runtime, - SNDRV_PCM_HW_PARAM_SAMPLE_BITS, - 8, mcasp->slot_width); + if (mcasp->slot_width) { + /* Only allow formats require <= slot_width bits on the bus */ + ret = snd_pcm_hw_rule_add(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_FORMAT, + davinci_mcasp_hw_rule_slot_width, + ruledata, + SNDRV_PCM_HW_PARAM_FORMAT, -1); + if (ret) + return ret; + } /* * If we rely on implicit BCLK divider setting we should * set constraints based on what we can provide. */ if (mcasp->bclk_master && mcasp->bclk_div == 0 && mcasp->sysclk_freq) { - int ret; - - ruledata->mcasp = mcasp; - ret = snd_pcm_hw_rule_add(substream->runtime, 0, SNDRV_PCM_HW_PARAM_RATE, davinci_mcasp_hw_rule_rate, From 3d584a3c85d6fe2cf878f220d4ad7145e7f89218 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 26 Jul 2019 13:27:05 +0200 Subject: [PATCH 053/482] arm64: KVM: regmap: Fix unexpected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fall-through warnings was enabled by default, commit d93512ef0f0e ("Makefile: Globally enable fall-through warning"), the following warnings was starting to show up: In file included from ../arch/arm64/include/asm/kvm_emulate.h:19, from ../arch/arm64/kvm/regmap.c:13: ../arch/arm64/kvm/regmap.c: In function ‘vcpu_write_spsr32’: ../arch/arm64/include/asm/kvm_hyp.h:31:3: warning: this statement may fall through [-Wimplicit-fallthrough=] asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \ ^~~ ../arch/arm64/include/asm/kvm_hyp.h:46:31: note: in expansion of macro ‘write_sysreg_elx’ #define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12) ^~~~~~~~~~~~~~~~ ../arch/arm64/kvm/regmap.c:180:3: note: in expansion of macro ‘write_sysreg_el1’ write_sysreg_el1(v, SYS_SPSR); ^~~~~~~~~~~~~~~~ ../arch/arm64/kvm/regmap.c:181:2: note: here case KVM_SPSR_ABT: ^~~~ In file included from ../arch/arm64/include/asm/cputype.h:132, from ../arch/arm64/include/asm/cache.h:8, from ../include/linux/cache.h:6, from ../include/linux/printk.h:9, from ../include/linux/kernel.h:15, from ../include/asm-generic/bug.h:18, from ../arch/arm64/include/asm/bug.h:26, from ../include/linux/bug.h:5, from ../include/linux/mmdebug.h:5, from ../include/linux/mm.h:9, from ../arch/arm64/kvm/regmap.c:11: ../arch/arm64/include/asm/sysreg.h:837:2: warning: this statement may fall through [-Wimplicit-fallthrough=] asm volatile("msr " __stringify(r) ", %x0" \ ^~~ ../arch/arm64/kvm/regmap.c:182:3: note: in expansion of macro ‘write_sysreg’ write_sysreg(v, spsr_abt); ^~~~~~~~~~~~ ../arch/arm64/kvm/regmap.c:183:2: note: here case KVM_SPSR_UND: ^~~~ Rework to add a 'break;' in the swich-case since it didn't have that, leading to an interresting set of bugs. Cc: stable@vger.kernel.org # v4.17+ Fixes: a892819560c4 ("KVM: arm64: Prepare to handle deferred save/restore of 32-bit registers") Signed-off-by: Anders Roxell [maz: reworked commit message, fixed stable range] Signed-off-by: Marc Zyngier --- arch/arm64/kvm/regmap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index 0d60e4f0af66..a900181e3867 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -178,13 +178,18 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) switch (spsr_idx) { case KVM_SPSR_SVC: write_sysreg_el1(v, SYS_SPSR); + break; case KVM_SPSR_ABT: write_sysreg(v, spsr_abt); + break; case KVM_SPSR_UND: write_sysreg(v, spsr_und); + break; case KVM_SPSR_IRQ: write_sysreg(v, spsr_irq); + break; case KVM_SPSR_FIQ: write_sysreg(v, spsr_fiq); + break; } } From 1a8248c74c81a15a32dc3344fb5c622e19072791 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 26 Jul 2019 13:28:31 +0200 Subject: [PATCH 054/482] KVM: arm: vgic-v3: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fall-through warnings was enabled by default the following warnings was starting to show up: ../virt/kvm/arm/hyp/vgic-v3-sr.c: In function ‘__vgic_v3_save_aprs’: ../virt/kvm/arm/hyp/vgic-v3-sr.c:351:24: warning: this statement may fall through [-Wimplicit-fallthrough=] cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); ~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ ../virt/kvm/arm/hyp/vgic-v3-sr.c:352:2: note: here case 6: ^~~~ ../virt/kvm/arm/hyp/vgic-v3-sr.c:353:24: warning: this statement may fall through [-Wimplicit-fallthrough=] cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); ~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ ../virt/kvm/arm/hyp/vgic-v3-sr.c:354:2: note: here default: ^~~~~~~ Rework so that the compiler doesn't warn about fall-through. Fixes: d93512ef0f0e ("Makefile: Globally enable fall-through warning") Signed-off-by: Anders Roxell Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v3-sr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 254c5f190a3d..ccf1fde9836c 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -349,8 +349,10 @@ void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) case 7: cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); + /* Fall through */ case 6: cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); + /* Fall through */ default: cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); } @@ -359,8 +361,10 @@ void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) case 7: cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); + /* Fall through */ case 6: cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); + /* Fall through */ default: cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); } @@ -382,8 +386,10 @@ void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu) case 7: __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); + /* Fall through */ case 6: __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); + /* Fall through */ default: __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); } @@ -392,8 +398,10 @@ void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu) case 7: __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); + /* Fall through */ case 6: __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); + /* Fall through */ default: __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); } From 6701c619fa082e6660ecd7573fbad2177380c7cc Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 13 Jul 2019 04:40:54 +0000 Subject: [PATCH 055/482] KVM: arm64: Update kvm_arm_exception_class and esr_class_str for new EC We've added two ESR exception classes for new ARM hardware extensions: ESR_ELx_EC_PAC and ESR_ELx_EC_SVE, but failed to update the strings used in tracing and other debug. Let's update "kvm_arm_exception_class" for these two EC, which the new EC will be visible to user-space via kvm_exit trace events Also update to "esr_class_str" for ESR_ELx_EC_PAC, by which we can get more readable debug info. Cc: Marc Zyngier Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Dave Martin Reviewed-by: James Morse Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 7 ++++--- arch/arm64/kernel/traps.c | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index a8b205e5c4a8..ddf9d762ac62 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -316,9 +316,10 @@ #define kvm_arm_exception_class \ ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ - ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \ - ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \ - ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \ + ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \ + ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \ + ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8c03456dade6..969e1565152b 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -734,6 +734,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_CP14_LS] = "CP14 LDC/STC", [ESR_ELx_EC_FP_ASIMD] = "ASIMD", [ESR_ELx_EC_CP10_ID] = "CP10 MRC/VMRS", + [ESR_ELx_EC_PAC] = "PAC", [ESR_ELx_EC_CP14_64] = "CP14 MCRR/MRRC", [ESR_ELx_EC_ILL] = "PSTATE.IL", [ESR_ELx_EC_SVC32] = "SVC (AArch32)", From 5d01ab7bac467edfc530e6ccf953921def935c62 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 26 Jul 2019 14:24:38 -0700 Subject: [PATCH 056/482] libbpf: fix erroneous multi-closing of BTF FD Libbpf stores associated BTF FD per each instance of bpf_program. When program is unloaded, that FD is closed. This is wrong, because leads to a race and possibly closing of unrelated files, if application simultaneously opens new files while bpf_programs are unloaded. It's also unnecessary, because struct btf "owns" that FD, and btf__free(), called from bpf_object__close() will close it. Thus the fix is to never have per-program BTF FD and fetch it from obj->btf, when necessary. Fixes: 2993e0515bb4 ("tools/bpf: add support to read .BTF.ext sections") Reported-by: Andrey Ignatov Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2586b6cb8f34..6718d0b90130 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -182,7 +182,6 @@ struct bpf_program { bpf_program_clear_priv_t clear_priv; enum bpf_attach_type expected_attach_type; - int btf_fd; void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@ -313,7 +312,6 @@ void bpf_program__unload(struct bpf_program *prog) prog->instances.nr = -1; zfree(&prog->instances.fds); - zclose(prog->btf_fd); zfree(&prog->func_info); zfree(&prog->line_info); } @@ -392,7 +390,6 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->instances.fds = NULL; prog->instances.nr = -1; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->btf_fd = -1; return 0; errout: @@ -2288,9 +2285,6 @@ bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); } - if (!insn_offset) - prog->btf_fd = btf__fd(obj->btf); - return 0; } @@ -2463,7 +2457,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *cp, errmsg[STRERR_BUFSIZE]; int log_buf_size = BPF_LOG_BUF_SIZE; char *log_buf; - int ret; + int btf_fd, ret; if (!insns || !insns_cnt) return -EINVAL; @@ -2478,7 +2472,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.license = license; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; + btf_fd = bpf_object__btf_fd(prog->obj); + load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = prog->func_info_cnt; From 41e6ada93356400ec8bc8dba04818d9e30ee6e9d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 26 Jul 2019 08:47:22 -0300 Subject: [PATCH 057/482] docs: generic-counter.rst: fix broken references for ABI file There are two references to the generic counter ABI, with was added on a separate patch. Both point to a non-existing file. Fix them. Fixes: ea2b23b89579 ("counter: Documentation: Add Generic Counter sysfs documentation") Fixes: 09e7d4ed8991 ("docs: Add Generic Counter interface documentation") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Cameron --- Documentation/driver-api/generic-counter.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/driver-api/generic-counter.rst b/Documentation/driver-api/generic-counter.rst index 0c161b1a3be6..8382f01a53e3 100644 --- a/Documentation/driver-api/generic-counter.rst +++ b/Documentation/driver-api/generic-counter.rst @@ -233,7 +233,7 @@ Userspace Interface Several sysfs attributes are generated by the Generic Counter interface, and reside under the /sys/bus/counter/devices/counterX directory, where counterX refers to the respective counter device. Please see -Documentation/ABI/testing/sys-bus-counter-generic-sysfs for detailed +Documentation/ABI/testing/sysfs-bus-counter for detailed information on each Generic Counter interface sysfs attribute. Through these sysfs attributes, programs and scripts may interact with @@ -325,7 +325,7 @@ sysfs attributes, where Y is the unique ID of the respective Count: For a more detailed breakdown of the available Generic Counter interface sysfs attributes, please refer to the -Documentation/ABI/testing/sys-bus-counter file. +Documentation/ABI/testing/sysfs-bus-counter file. The Signals and Counts associated with the Counter device are registered to the system as well by the counter_register function. The From 90c6260c1905a68fb596844087f2223bd4657fee Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Jul 2019 15:57:49 +0200 Subject: [PATCH 058/482] iio: adc: gyroadc: fix uninitialized return code gcc-9 complains about a blatant uninitialized variable use that all earlier compiler versions missed: drivers/iio/adc/rcar-gyroadc.c:510:5: warning: 'ret' may be used uninitialized in this function [-Wmaybe-uninitialized] Return -EINVAL instead here and a few lines above it where we accidentally return 0 on failure. Cc: stable@vger.kernel.org Fixes: 059c53b32329 ("iio: adc: Add Renesas GyroADC driver") Signed-off-by: Arnd Bergmann Reviewed-by: Wolfram Sang Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rcar-gyroadc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/rcar-gyroadc.c b/drivers/iio/adc/rcar-gyroadc.c index 2d685730f867..c37f201294b2 100644 --- a/drivers/iio/adc/rcar-gyroadc.c +++ b/drivers/iio/adc/rcar-gyroadc.c @@ -382,7 +382,7 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev) dev_err(dev, "Only %i channels supported with %pOFn, but reg = <%i>.\n", num_channels, child, reg); - return ret; + return -EINVAL; } } @@ -391,7 +391,7 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev) dev_err(dev, "Channel %i uses different ADC mode than the rest.\n", reg); - return ret; + return -EINVAL; } /* Channel is valid, grab the regulator. */ From 674fa8daa8c922c42a84b6aa85bf5ddc616accf5 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Wed, 24 Jul 2019 17:31:55 +0930 Subject: [PATCH 059/482] pinctrl: aspeed-g5: Delay acquisition of regmaps While sorting out some devicetree issues I found that the pinctrl driver was failing to acquire its GFX regmap even though the phandle was present in the devicetree: [ 0.124190] aspeed-g5-pinctrl 1e6e2000.syscon:pinctrl: No GFX phandle found, some mux configurations may fail Without access to the GFX regmap we fail to configure the mux for the VPO function: [ 1.548866] pinctrl core: add 1 pinctrl maps [ 1.549826] aspeed-g5-pinctrl 1e6e2000.syscon:pinctrl: found group selector 164 for VPO [ 1.550638] aspeed-g5-pinctrl 1e6e2000.syscon:pinctrl: request pin 144 (V20) for 1e6e6000.display [ 1.551346] aspeed-g5-pinctrl 1e6e2000.syscon:pinctrl: request pin 145 (U19) for 1e6e6000.display ... [ 1.562057] aspeed-g5-pinctrl 1e6e2000.syscon:pinctrl: request pin 218 (T22) for 1e6e6000.display [ 1.562541] aspeed-g5-pinctrl 1e6e2000.syscon:pinctrl: request pin 219 (R20) for 1e6e6000.display [ 1.563113] Muxing pin 144 for VPO [ 1.563456] Want SCU8C[0x00000001]=0x1, got 0x0 from 0x00000000 [ 1.564624] aspeed_gfx 1e6e6000.display: Error applying setting, reverse things back This turned out to be a simple problem of timing: The ASPEED pinctrl driver is probed during arch_initcall(), while GFX is processed much later. As such the GFX syscon is not yet registered during the pinctrl probe() and we get an -EPROBE_DEFER when we try to look it up, however we must not defer probing the pinctrl driver for the inability to mux some GFX-related functions. Switch to lazily grabbing the regmaps when they're first required by the mux configuration. This generates a bit of noise in the patch as we have to drop the `const` qualifier on arguments for several function prototypes, but has the benefit of working. I've smoke tested this for the ast2500-evb under qemu with a dummy graphics device. We now succeed in our attempts to configure the SoC's VPO pinmux function. Fixes: 7d29ed88acbb ("pinctrl: aspeed: Read and write bits in LPC and GFX controllers") Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20190724080155.12209-1-andrew@aj.id.au Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g4.c | 2 +- drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c | 92 +++++++++++++++------- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 12 ++- drivers/pinctrl/aspeed/pinmux-aspeed.h | 5 +- 4 files changed, 74 insertions(+), 37 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g4.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g4.c index 384396cbb22d..22256576b69a 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g4.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g4.c @@ -2412,7 +2412,7 @@ static const struct aspeed_pin_config aspeed_g4_configs[] = { { PIN_CONFIG_INPUT_DEBOUNCE, { C14, B14 }, SCUA8, 27 }, }; -static int aspeed_g4_sig_expr_set(const struct aspeed_pinmux_data *ctx, +static int aspeed_g4_sig_expr_set(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr, bool enable) { diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c index 053101f795a2..ba6438ac4d72 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c @@ -2507,6 +2507,61 @@ static struct aspeed_pin_config aspeed_g5_configs[] = { { PIN_CONFIG_INPUT_DEBOUNCE, { A20, B19 }, SCUA8, 27 }, }; +static struct regmap *aspeed_g5_acquire_regmap(struct aspeed_pinmux_data *ctx, + int ip) +{ + if (ip == ASPEED_IP_SCU) { + WARN(!ctx->maps[ip], "Missing SCU syscon!"); + return ctx->maps[ip]; + } + + if (ip >= ASPEED_NR_PINMUX_IPS) + return ERR_PTR(-EINVAL); + + if (likely(ctx->maps[ip])) + return ctx->maps[ip]; + + if (ip == ASPEED_IP_GFX) { + struct device_node *node; + struct regmap *map; + + node = of_parse_phandle(ctx->dev->of_node, + "aspeed,external-nodes", 0); + if (node) { + map = syscon_node_to_regmap(node); + of_node_put(node); + if (IS_ERR(map)) + return map; + } else + return ERR_PTR(-ENODEV); + + ctx->maps[ASPEED_IP_GFX] = map; + dev_dbg(ctx->dev, "Acquired GFX regmap"); + return map; + } + + if (ip == ASPEED_IP_LPC) { + struct device_node *node; + struct regmap *map; + + node = of_parse_phandle(ctx->dev->of_node, + "aspeed,external-nodes", 1); + if (node) { + map = syscon_node_to_regmap(node->parent); + of_node_put(node); + if (IS_ERR(map)) + return map; + } else + map = ERR_PTR(-ENODEV); + + ctx->maps[ASPEED_IP_LPC] = map; + dev_dbg(ctx->dev, "Acquired LPC regmap"); + return map; + } + + return ERR_PTR(-EINVAL); +} + /** * Configure a pin's signal by applying an expression's descriptor state for * all descriptors in the expression. @@ -2520,7 +2575,7 @@ static struct aspeed_pin_config aspeed_g5_configs[] = { * Return: 0 if the expression is configured as requested and a negative error * code otherwise */ -static int aspeed_g5_sig_expr_set(const struct aspeed_pinmux_data *ctx, +static int aspeed_g5_sig_expr_set(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr, bool enable) { @@ -2531,9 +2586,15 @@ static int aspeed_g5_sig_expr_set(const struct aspeed_pinmux_data *ctx, const struct aspeed_sig_desc *desc = &expr->descs[i]; u32 pattern = enable ? desc->enable : desc->disable; u32 val = (pattern << __ffs(desc->mask)); + struct regmap *map; - if (!ctx->maps[desc->ip]) - return -ENODEV; + map = aspeed_g5_acquire_regmap(ctx, desc->ip); + if (IS_ERR(map)) { + dev_err(ctx->dev, + "Failed to acquire regmap for IP block %d\n", + desc->ip); + return PTR_ERR(map); + } /* * Strap registers are configured in hardware or by early-boot @@ -2641,34 +2702,11 @@ static struct pinctrl_desc aspeed_g5_pinctrl_desc = { static int aspeed_g5_pinctrl_probe(struct platform_device *pdev) { int i; - struct regmap *map; - struct device_node *node; for (i = 0; i < ARRAY_SIZE(aspeed_g5_pins); i++) aspeed_g5_pins[i].number = i; - node = of_parse_phandle(pdev->dev.of_node, "aspeed,external-nodes", 0); - map = syscon_node_to_regmap(node); - of_node_put(node); - if (IS_ERR(map)) { - dev_warn(&pdev->dev, "No GFX phandle found, some mux configurations may fail\n"); - map = NULL; - } - aspeed_g5_pinctrl_data.pinmux.maps[ASPEED_IP_GFX] = map; - - node = of_parse_phandle(pdev->dev.of_node, "aspeed,external-nodes", 1); - if (node) { - map = syscon_node_to_regmap(node->parent); - if (IS_ERR(map)) { - dev_warn(&pdev->dev, "LHC parent is not a syscon, some mux configurations may fail\n"); - map = NULL; - } - } else { - dev_warn(&pdev->dev, "No LHC phandle found, some mux configurations may fail\n"); - map = NULL; - } - of_node_put(node); - aspeed_g5_pinctrl_data.pinmux.maps[ASPEED_IP_LPC] = map; + aspeed_g5_pinctrl_data.pinmux.dev = &pdev->dev; return aspeed_pinctrl_probe(pdev, &aspeed_g5_pinctrl_desc, &aspeed_g5_pinctrl_data); diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index 535db3de490b..54933665b5f8 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -71,7 +71,7 @@ int aspeed_pinmux_get_fn_groups(struct pinctrl_dev *pctldev, return 0; } -static int aspeed_sig_expr_enable(const struct aspeed_pinmux_data *ctx, +static int aspeed_sig_expr_enable(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr) { int ret; @@ -86,7 +86,7 @@ static int aspeed_sig_expr_enable(const struct aspeed_pinmux_data *ctx, return 0; } -static int aspeed_sig_expr_disable(const struct aspeed_pinmux_data *ctx, +static int aspeed_sig_expr_disable(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr) { int ret; @@ -109,7 +109,7 @@ static int aspeed_sig_expr_disable(const struct aspeed_pinmux_data *ctx, * * Return: 0 if all expressions are disabled, otherwise a negative error code */ -static int aspeed_disable_sig(const struct aspeed_pinmux_data *ctx, +static int aspeed_disable_sig(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr **exprs) { int ret = 0; @@ -217,8 +217,7 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function, { int i; int ret; - const struct aspeed_pinctrl_data *pdata = - pinctrl_dev_get_drvdata(pctldev); + struct aspeed_pinctrl_data *pdata = pinctrl_dev_get_drvdata(pctldev); const struct aspeed_pin_group *pgroup = &pdata->pinmux.groups[group]; const struct aspeed_pin_function *pfunc = &pdata->pinmux.functions[function]; @@ -306,8 +305,7 @@ int aspeed_gpio_request_enable(struct pinctrl_dev *pctldev, unsigned int offset) { int ret; - const struct aspeed_pinctrl_data *pdata = - pinctrl_dev_get_drvdata(pctldev); + struct aspeed_pinctrl_data *pdata = pinctrl_dev_get_drvdata(pctldev); const struct aspeed_pin_desc *pdesc = pdata->pins[offset].drv_data; const struct aspeed_sig_expr ***prios, **funcs, *expr; diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.h b/drivers/pinctrl/aspeed/pinmux-aspeed.h index 329d54d48667..52d299b59ce2 100644 --- a/drivers/pinctrl/aspeed/pinmux-aspeed.h +++ b/drivers/pinctrl/aspeed/pinmux-aspeed.h @@ -702,11 +702,12 @@ struct aspeed_pin_function { struct aspeed_pinmux_data; struct aspeed_pinmux_ops { - int (*set)(const struct aspeed_pinmux_data *ctx, + int (*set)(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr, bool enabled); }; struct aspeed_pinmux_data { + struct device *dev; struct regmap *maps[ASPEED_NR_PINMUX_IPS]; const struct aspeed_pinmux_ops *ops; @@ -725,7 +726,7 @@ int aspeed_sig_expr_eval(const struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr, bool enabled); -static inline int aspeed_sig_expr_set(const struct aspeed_pinmux_data *ctx, +static inline int aspeed_sig_expr_set(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr, bool enabled) { From 75035fe22b808a520e1d712ebe913684ba406e01 Mon Sep 17 00:00:00 2001 From: Ben Segal Date: Tue, 23 Jul 2019 11:22:42 +0300 Subject: [PATCH 060/482] habanalabs: fix F/W download in BE architecture writeX macros might perform byte-swapping in BE architectures. As our F/W is in LE format, we need to make sure no byte-swapping will occur. There is a standard kernel function (called memcpy_toio) for copying data to I/O area which is used in a lot of drivers to download F/W to PCIe adapters. That function also makes sure the data is copied "as-is", without byte-swapping. This patch use that function to copy the F/W to the GOYA ASIC instead of writeX macros. Signed-off-by: Ben Segal Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/firmware_if.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index cc8168bacb24..61112eda4dd2 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -24,7 +24,7 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name, { const struct firmware *fw; const u64 *fw_data; - size_t fw_size, i; + size_t fw_size; int rc; rc = request_firmware(&fw, fw_name, hdev->dev); @@ -45,22 +45,7 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name, fw_data = (const u64 *) fw->data; - if ((fw->size % 8) != 0) - fw_size -= 8; - - for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) { - if (!(i & (0x80000 - 1))) { - dev_dbg(hdev->dev, - "copied so far %zu out of %zu for %s firmware", - i, fw_size, fw_name); - usleep_range(20, 100); - } - - writeq(*fw_data, dst); - } - - if ((fw->size % 8) != 0) - writel(*(const u32 *) fw_data, dst); + memcpy_toio(dst, fw_data, fw_size); out: release_firmware(fw); From 2aa4e410795cb94b6577fe0e251b5f5226499310 Mon Sep 17 00:00:00 2001 From: Ben Segal Date: Thu, 18 Jul 2019 12:27:00 +0000 Subject: [PATCH 061/482] habanalabs: fix host memory polling in BE architecture This patch fix a bug in the host memory polling macro. The bug is that the memory being polled can be written by the device, which always writes it in LE. However, if the host is running Linux in BE mode, we need to convert the value that was written by the device before matching it to the required value that the caller has given to the macro. Signed-off-by: Ben Segal Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/command_submission.c | 2 +- drivers/misc/habanalabs/firmware_if.c | 3 ++- drivers/misc/habanalabs/goya/goya.c | 5 +++-- drivers/misc/habanalabs/habanalabs.h | 16 ++++++++++++++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index 6ad83d5ef4b0..f00d1c32f6d6 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -683,7 +683,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) rc = hl_poll_timeout_memory(hdev, &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), - 100, jiffies_to_usecs(hdev->timeout_jiffies)); + 100, jiffies_to_usecs(hdev->timeout_jiffies), false); if (rc == -ETIMEDOUT) { dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index 61112eda4dd2..ea2ca67fbfbf 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -97,7 +97,8 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, } rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, - (tmp == ARMCP_PACKET_FENCE_VAL), 1000, timeout); + (tmp == ARMCP_PACKET_FENCE_VAL), 1000, + timeout, true); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 1a2c062a57d4..a0e181714891 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2864,7 +2864,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) } rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, - (tmp == GOYA_QMAN0_FENCE_VAL), 1000, timeout); + (tmp == GOYA_QMAN0_FENCE_VAL), 1000, + timeout, true); hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0); @@ -2945,7 +2946,7 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) } rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), - 1000, GOYA_TEST_QUEUE_WAIT_USEC); + 1000, GOYA_TEST_QUEUE_WAIT_USEC, true); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 10da9940ee0d..6a4c64b97f38 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -1062,9 +1062,17 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); /* * address in this macro points always to a memory location in the * host's (server's) memory. That location is updated asynchronously - * either by the direct access of the device or by another core + * either by the direct access of the device or by another core. + * + * To work both in LE and BE architectures, we need to distinguish between the + * two states (device or another core updates the memory location). Therefore, + * if mem_written_by_device is true, the host memory being polled will be + * updated directly by the device. If false, the host memory being polled will + * be updated by host CPU. Required so host knows whether or not the memory + * might need to be byte-swapped before returning value to caller. */ -#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us) \ +#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \ + mem_written_by_device) \ ({ \ ktime_t __timeout; \ /* timeout should be longer when working with simulator */ \ @@ -1077,10 +1085,14 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); /* Verify we read updates done by other cores or by device */ \ mb(); \ (val) = *((u32 *) (uintptr_t) (addr)); \ + if (mem_written_by_device) \ + (val) = le32_to_cpu(val); \ if (cond) \ break; \ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ (val) = *((u32 *) (uintptr_t) (addr)); \ + if (mem_written_by_device) \ + (val) = le32_to_cpu(val); \ break; \ } \ if (sleep_us) \ From cdb2d3ee0436d74fa9092f2df46aaa6f9e03c969 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 26 Jul 2019 13:27:10 +0200 Subject: [PATCH 062/482] arm64: KVM: hyp: debug-sr: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fall-through warnings was enabled by default the following warnings was starting to show up: ../arch/arm64/kvm/hyp/debug-sr.c: In function ‘__debug_save_state’: ../arch/arm64/kvm/hyp/debug-sr.c:20:19: warning: this statement may fall through [-Wimplicit-fallthrough=] case 15: ptr[15] = read_debug(reg, 15); \ ../arch/arm64/kvm/hyp/debug-sr.c:113:2: note: in expansion of macro ‘save_debug’ save_debug(dbg->dbg_bcr, dbgbcr, brps); ^~~~~~~~~~ ../arch/arm64/kvm/hyp/debug-sr.c:21:2: note: here case 14: ptr[14] = read_debug(reg, 14); \ ^~~~ ../arch/arm64/kvm/hyp/debug-sr.c:113:2: note: in expansion of macro ‘save_debug’ save_debug(dbg->dbg_bcr, dbgbcr, brps); ^~~~~~~~~~ ../arch/arm64/kvm/hyp/debug-sr.c:21:19: warning: this statement may fall through [-Wimplicit-fallthrough=] case 14: ptr[14] = read_debug(reg, 14); \ ../arch/arm64/kvm/hyp/debug-sr.c:113:2: note: in expansion of macro ‘save_debug’ save_debug(dbg->dbg_bcr, dbgbcr, brps); ^~~~~~~~~~ ../arch/arm64/kvm/hyp/debug-sr.c:22:2: note: here case 13: ptr[13] = read_debug(reg, 13); \ ^~~~ ../arch/arm64/kvm/hyp/debug-sr.c:113:2: note: in expansion of macro ‘save_debug’ save_debug(dbg->dbg_bcr, dbgbcr, brps); ^~~~~~~~~~ Rework to add a 'Fall through' comment where the compiler warned about fall-through, hence silencing the warning. Fixes: d93512ef0f0e ("Makefile: Globally enable fall-through warning") Signed-off-by: Anders Roxell [maz: fixed commit message] Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/debug-sr.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 26781da3ad3e..0fc9872a1467 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -18,40 +18,70 @@ #define save_debug(ptr,reg,nr) \ switch (nr) { \ case 15: ptr[15] = read_debug(reg, 15); \ + /* Fall through */ \ case 14: ptr[14] = read_debug(reg, 14); \ + /* Fall through */ \ case 13: ptr[13] = read_debug(reg, 13); \ + /* Fall through */ \ case 12: ptr[12] = read_debug(reg, 12); \ + /* Fall through */ \ case 11: ptr[11] = read_debug(reg, 11); \ + /* Fall through */ \ case 10: ptr[10] = read_debug(reg, 10); \ + /* Fall through */ \ case 9: ptr[9] = read_debug(reg, 9); \ + /* Fall through */ \ case 8: ptr[8] = read_debug(reg, 8); \ + /* Fall through */ \ case 7: ptr[7] = read_debug(reg, 7); \ + /* Fall through */ \ case 6: ptr[6] = read_debug(reg, 6); \ + /* Fall through */ \ case 5: ptr[5] = read_debug(reg, 5); \ + /* Fall through */ \ case 4: ptr[4] = read_debug(reg, 4); \ + /* Fall through */ \ case 3: ptr[3] = read_debug(reg, 3); \ + /* Fall through */ \ case 2: ptr[2] = read_debug(reg, 2); \ + /* Fall through */ \ case 1: ptr[1] = read_debug(reg, 1); \ + /* Fall through */ \ default: ptr[0] = read_debug(reg, 0); \ } #define restore_debug(ptr,reg,nr) \ switch (nr) { \ case 15: write_debug(ptr[15], reg, 15); \ + /* Fall through */ \ case 14: write_debug(ptr[14], reg, 14); \ + /* Fall through */ \ case 13: write_debug(ptr[13], reg, 13); \ + /* Fall through */ \ case 12: write_debug(ptr[12], reg, 12); \ + /* Fall through */ \ case 11: write_debug(ptr[11], reg, 11); \ + /* Fall through */ \ case 10: write_debug(ptr[10], reg, 10); \ + /* Fall through */ \ case 9: write_debug(ptr[9], reg, 9); \ + /* Fall through */ \ case 8: write_debug(ptr[8], reg, 8); \ + /* Fall through */ \ case 7: write_debug(ptr[7], reg, 7); \ + /* Fall through */ \ case 6: write_debug(ptr[6], reg, 6); \ + /* Fall through */ \ case 5: write_debug(ptr[5], reg, 5); \ + /* Fall through */ \ case 4: write_debug(ptr[4], reg, 4); \ + /* Fall through */ \ case 3: write_debug(ptr[3], reg, 3); \ + /* Fall through */ \ case 2: write_debug(ptr[2], reg, 2); \ + /* Fall through */ \ case 1: write_debug(ptr[1], reg, 1); \ + /* Fall through */ \ default: write_debug(ptr[0], reg, 0); \ } From 8c4407de3be44c2a0ec3e316cd3e4a711bc2aaba Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 11 Jul 2019 22:24:57 +0800 Subject: [PATCH 063/482] pinctrl: aspeed: Make aspeed_pinmux_ips static Fix sparse warning: drivers/pinctrl/aspeed/pinmux-aspeed.c:8:12: warning: symbol 'aspeed_pinmux_ips' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20190711142457.37028-1-yuehaibing@huawei.com Reviewed-by: Andrew Jeffery Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinmux-aspeed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.c b/drivers/pinctrl/aspeed/pinmux-aspeed.c index 5b0fe178ccf2..839c01b7953f 100644 --- a/drivers/pinctrl/aspeed/pinmux-aspeed.c +++ b/drivers/pinctrl/aspeed/pinmux-aspeed.c @@ -5,7 +5,7 @@ #include "pinmux-aspeed.h" -const char *const aspeed_pinmux_ips[] = { +static const char *const aspeed_pinmux_ips[] = { [ASPEED_IP_SCU] = "SCU", [ASPEED_IP_GFX] = "GFX", [ASPEED_IP_LPC] = "LPC", From 1957de95d425d1c06560069dc7277a73a8b28683 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 24 Jul 2019 07:38:32 -0700 Subject: [PATCH 064/482] usb: typec: tcpm: Add NULL check before dereferencing config When instantiating tcpm on an NXP OM 13588 board with NXP PTN5110, the following crash is seen when writing into the 'preferred_role' sysfs attribute. Unable to handle kernel NULL pointer dereference at virtual address 00000028 pgd = f69149ad [00000028] *pgd=00000000 Internal error: Oops: 5 [#1] THUMB2 Modules linked in: tcpci tcpm CPU: 0 PID: 1882 Comm: bash Not tainted 5.1.18-sama5-armv7-r2 #4 Hardware name: Atmel SAMA5 PC is at tcpm_try_role+0x3a/0x4c [tcpm] LR is at tcpm_try_role+0x15/0x4c [tcpm] pc : [] lr : [] psr: 60030033 sp : dc1a1e88 ip : c03fb47d fp : 00000000 r10: dc216190 r9 : dc1a1f78 r8 : 00000001 r7 : df4ae044 r6 : dd032e90 r5 : dd1ce340 r4 : df4ae054 r3 : 00000000 r2 : 00000000 r1 : 00000000 r0 : df4ae044 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA Thumb Segment none Control: 50c53c7d Table: 3efec059 DAC: 00000051 Process bash (pid: 1882, stack limit = 0x6a6d4aa5) Stack: (0xdc1a1e88 to 0xdc1a2000) 1e80: dd05d808 dd1ce340 00000001 00000007 dd1ce340 c03fb4a7 1ea0: 00000007 00000007 dc216180 00000000 00000000 c01e1e03 00000000 00000000 1ec0: c0907008 dee98b40 c01e1d5d c06106c4 00000000 00000000 00000007 c0194e8b 1ee0: 0000000a 00000400 00000000 c01a97db dc22bf00 ffffe000 df4b6a00 df745900 1f00: 00000001 00000001 000000dd c01a9c2f 7aeab3be c0907008 00000000 dc22bf00 1f20: c0907008 00000000 00000000 00000000 00000000 7aeab3be 00000007 dee98b40 1f40: 005dc318 dc1a1f78 00000000 00000000 00000007 c01969f7 0000000a c01a20cb 1f60: dee98b40 c0907008 dee98b40 005dc318 00000000 c0196b9b 00000000 00000000 1f80: dee98b40 7aeab3be 00000074 005dc318 b6f3bdb0 00000004 c0101224 dc1a0000 1fa0: 00000004 c0101001 00000074 005dc318 00000001 005dc318 00000007 00000000 1fc0: 00000074 005dc318 b6f3bdb0 00000004 00000007 00000007 00000000 00000000 1fe0: 00000004 be800880 b6ed35b3 b6e5c746 60030030 00000001 00000000 00000000 [] (tcpm_try_role [tcpm]) from [] (preferred_role_store+0x2b/0x5c) [] (preferred_role_store) from [] (kernfs_fop_write+0xa7/0x150) [] (kernfs_fop_write) from [] (__vfs_write+0x1f/0x104) [] (__vfs_write) from [] (vfs_write+0x6b/0x104) [] (vfs_write) from [] (ksys_write+0x43/0x94) [] (ksys_write) from [] (ret_fast_syscall+0x1/0x62) Since commit 96232cbc6c994 ("usb: typec: tcpm: support get typec and pd config from device properties"), the 'config' pointer in struct tcpc_dev is optional when registering a Type-C port. Since it is optional, we have to check if it is NULL before dereferencing it. Reported-by: Douglas Gilbert Cc: Douglas Gilbert Fixes: 96232cbc6c994 ("usb: typec: tcpm: support get typec and pd config from device properties") Signed-off-by: Guenter Roeck Cc: stable Reviewed-by: Jun Li Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/1563979112-22483-1-git-send-email-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index fba32d84e578..77f71f602f73 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -379,7 +379,8 @@ static enum tcpm_state tcpm_default_state(struct tcpm_port *port) return SNK_UNATTACHED; else if (port->try_role == TYPEC_SOURCE) return SRC_UNATTACHED; - else if (port->tcpc->config->default_role == TYPEC_SINK) + else if (port->tcpc->config && + port->tcpc->config->default_role == TYPEC_SINK) return SNK_UNATTACHED; /* Fall through to return SRC_UNATTACHED */ } else if (port->port_type == TYPEC_PORT_SNK) { @@ -4114,7 +4115,7 @@ static int tcpm_try_role(const struct typec_capability *cap, int role) mutex_lock(&port->lock); if (tcpc->try_role) ret = tcpc->try_role(tcpc, role); - if (!ret && !tcpc->config->try_role_hw) + if (!ret && (!tcpc->config || !tcpc->config->try_role_hw)) port->try_role = role; port->try_src_count = 0; port->try_snk_count = 0; @@ -4701,7 +4702,7 @@ static int tcpm_copy_caps(struct tcpm_port *port, port->typec_caps.prefer_role = tcfg->default_role; port->typec_caps.type = tcfg->type; port->typec_caps.data = tcfg->data; - port->self_powered = port->tcpc->config->self_powered; + port->self_powered = tcfg->self_powered; return 0; } From fd5da3e2cc61b4a7c877172fdc9348c82cf6ccfc Mon Sep 17 00:00:00 2001 From: Li Jun Date: Wed, 17 Jul 2019 16:06:45 +0800 Subject: [PATCH 065/482] usb: typec: tcpm: free log buf memory when remove debug file The logbuffer memory should be freed when remove debug file. Cc: stable@vger.kernel.org # v4.15+ Fixes: 4b4e02c83167 ("typec: tcpm: Move out of staging") Signed-off-by: Li Jun Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190717080646.30421-1-jun.li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 77f71f602f73..7b6497c1031e 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -587,6 +587,15 @@ static void tcpm_debugfs_init(struct tcpm_port *port) static void tcpm_debugfs_exit(struct tcpm_port *port) { + int i; + + mutex_lock(&port->logbuffer_lock); + for (i = 0; i < LOG_BUFFER_ENTRIES; i++) { + kfree(port->logbuffer[i]); + port->logbuffer[i] = NULL; + } + mutex_unlock(&port->logbuffer_lock); + debugfs_remove(port->dentry); } From 12ca7297b8855c0af1848503d37196159b24e6b9 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Wed, 17 Jul 2019 16:06:46 +0800 Subject: [PATCH 066/482] usb: typec: tcpm: remove tcpm dir if no children If config tcpm as module, module unload will not remove tcpm dir, then the next module load will have problem: the rootdir is NULL but tcpm dir is still there, so tcpm_debugfs_init() will create tcpm dir again with failure, fix it by remove the tcpm dir if no children. Cc: stable@vger.kernel.org # v4.15+ Fixes: 4b4e02c83167 ("typec: tcpm: Move out of staging") Signed-off-by: Li Jun Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20190717080646.30421-2-jun.li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 7b6497c1031e..ab6456622120 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -597,6 +597,10 @@ static void tcpm_debugfs_exit(struct tcpm_port *port) mutex_unlock(&port->logbuffer_lock); debugfs_remove(port->dentry); + if (list_empty(&rootdir->d_subdirs)) { + debugfs_remove(rootdir); + rootdir = NULL; + } } #else From fab7772bfbcfe8fb8e3e352a6a8fcaf044cded17 Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Mon, 29 Jul 2019 14:40:40 +0200 Subject: [PATCH 067/482] nvme-multipath: revalidate nvme_ns_head gendisk in nvme_validate_ns When CONFIG_NVME_MULTIPATH is set, only the hidden gendisk associated with the per-controller ns is run through revalidate_disk when a rescan is triggered, while the visible blockdev never gets its size (bdev->bd_inode->i_size) updated to reflect any capacity changes that may have occurred. This prevents online resizing of nvme block devices and in extension of any filesystems atop that will are unable to expand while mounted, as userspace relies on the blockdev size for obtaining the disk capacity (via BLKGETSIZE/64 ioctls). Fix this by explicitly revalidating the actual namespace gendisk in addition to the per-controller gendisk, when multipath is enabled. Signed-off-by: Anthony Iliopoulos Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8f3fbe5ca937..80c7a7ee240b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1715,6 +1715,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); blk_queue_stack_limits(ns->head->disk->queue, ns->queue); + revalidate_disk(ns->head->disk); } #endif } From 46c42d844211ef5902e32aa507beac0817c585e9 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 29 Jul 2019 13:49:54 -0700 Subject: [PATCH 068/482] driver core: platform: return -ENXIO for missing GpioInt Commit daaef255dc96 ("driver: platform: Support parsing GpioInt 0 in platform_get_irq()") broke the Embedded Controller driver on most LPC Chromebooks (i.e., most x86 Chromebooks), because cros_ec_lpc expects platform_get_irq() to return -ENXIO for non-existent IRQs. Unfortunately, acpi_dev_gpio_irq_get() doesn't follow this convention and returns -ENOENT instead. So we get this error from cros_ec_lpc: couldn't retrieve IRQ number (-2) I see a variety of drivers that treat -ENXIO specially, so rather than fix all of them, let's fix up the API to restore its previous behavior. I reported this on v2 of this patch: https://lore.kernel.org/lkml/20190220180538.GA42642@google.com/ but apparently the patch had already been merged before v3 got sent out: https://lore.kernel.org/lkml/20190221193429.161300-1-egranata@chromium.org/ and the result is that the bug landed and remains unfixed. I differ from the v3 patch by: * allowing for ret==0, even though acpi_dev_gpio_irq_get() specifically documents (and enforces) that 0 is not a valid return value (noted on the v3 review) * adding a small comment Reported-by: Brian Norris Reported-by: Salvatore Bellizzi Cc: Enrico Granata Cc: Fixes: daaef255dc96 ("driver: platform: Support parsing GpioInt 0 in platform_get_irq()") Signed-off-by: Brian Norris Reviewed-by: Andy Shevchenko Acked-by: Enrico Granata Link: https://lore.kernel.org/r/20190729204954.25510-1-briannorris@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 506a0175a5a7..ec974ba9c0c4 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -157,8 +157,13 @@ int platform_get_irq(struct platform_device *dev, unsigned int num) * the device will only expose one IRQ, and this fallback * allows a common code path across either kind of resource. */ - if (num == 0 && has_acpi_companion(&dev->dev)) - return acpi_dev_gpio_irq_get(ACPI_COMPANION(&dev->dev), num); + if (num == 0 && has_acpi_companion(&dev->dev)) { + int ret = acpi_dev_gpio_irq_get(ACPI_COMPANION(&dev->dev), num); + + /* Our callers expect -ENXIO for missing IRQs. */ + if (ret >= 0 || ret == -EPROBE_DEFER) + return ret; + } return -ENXIO; #endif From 2067b2b3f4846402a040286135f98f46f8919939 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:38:51 +0100 Subject: [PATCH 069/482] afs: Fix the CB.ProbeUuid service handler to reply correctly Fix the service handler function for the CB.ProbeUuid RPC call so that it replies in the correct manner - that is an empty reply for success and an abort of 1 for failure. Putting 0 or 1 in an integer in the body of the reply should result in the fileserver throwing an RX_PROTOCOL_ERROR abort and discarding its record of the client; older servers, however, don't necessarily check that all the data got consumed, and so might incorrectly think that they got a positive response and associate the client with the wrong host record. If the client is incorrectly associated, this will result in callbacks intended for a different client being delivered to this one and then, when the other client connects and responds positively, all of the callback promises meant for the client that issued the improper response will be lost and it won't receive any further change notifications. Fixes: 9396d496d745 ("afs: support the CB.ProbeUuid RPC op") Signed-off-by: David Howells Reviewed-by: Jeffrey Altman --- fs/afs/cmservice.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 4f1b6f466ff5..b86195e4dc6c 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -505,18 +505,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) struct afs_call *call = container_of(work, struct afs_call, work); struct afs_uuid *r = call->request; - struct { - __be32 match; - } reply; - _enter(""); if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) - reply.match = htonl(0); + afs_send_empty_reply(call); else - reply.match = htonl(1); + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + 1, 1, "K-1"); - afs_send_simple_reply(call, &reply, sizeof(reply)); afs_put_call(call); _leave(""); } From 4a46fdba449a5cd890271df5a9e23927d519ed00 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Tue, 30 Jul 2019 14:38:51 +0100 Subject: [PATCH 070/482] afs: Fix loop index mixup in afs_deliver_vl_get_entry_by_name_u() afs_deliver_vl_get_entry_by_name_u() scans through the vl entry received from the volume location server and builds a return list containing the sites that are currently valid. When assigning values for the return list, the index into the vl entry (i) is used rather than the one for the new list (entry->nr_server). If all sites are usable, this works out fine as the indices will match. If some sites are not valid, for example if AFS_VLSF_DONTUSE is set, fs_mask and the uuid will be set for the wrong return site. Fix this by using entry->nr_server as the index into the arrays being filled in rather than i. This can lead to EDESTADDRREQ errors if none of the returned sites have a valid fs_mask. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Jeffrey Altman --- fs/afs/vlclient.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index d7e0fd3c00df..cfb0ac4bd039 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -56,23 +56,24 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) struct afs_uuid__xdr *xdr; struct afs_uuid *uuid; int j; + int n = entry->nr_servers; tmp = ntohl(uvldb->serverFlags[i]); if (tmp & AFS_VLSF_DONTUSE || (new_only && !(tmp & AFS_VLSF_NEWREPSITE))) continue; if (tmp & AFS_VLSF_RWVOL) { - entry->fs_mask[i] |= AFS_VOL_VTM_RW; + entry->fs_mask[n] |= AFS_VOL_VTM_RW; if (vlflags & AFS_VLF_BACKEXISTS) - entry->fs_mask[i] |= AFS_VOL_VTM_BAK; + entry->fs_mask[n] |= AFS_VOL_VTM_BAK; } if (tmp & AFS_VLSF_ROVOL) - entry->fs_mask[i] |= AFS_VOL_VTM_RO; - if (!entry->fs_mask[i]) + entry->fs_mask[n] |= AFS_VOL_VTM_RO; + if (!entry->fs_mask[n]) continue; xdr = &uvldb->serverNumber[i]; - uuid = (struct afs_uuid *)&entry->fs_server[i]; + uuid = (struct afs_uuid *)&entry->fs_server[n]; uuid->time_low = xdr->time_low; uuid->time_mid = htons(ntohl(xdr->time_mid)); uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version)); From a6eed4ab5dd4bfb696c1a3f49742b8d1846a66a0 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 30 Jul 2019 14:38:51 +0100 Subject: [PATCH 071/482] fs: afs: Fix a possible null-pointer dereference in afs_put_read() In afs_read_dir(), there is an if statement on line 255 to check whether req->pages is NULL: if (!req->pages) goto error; If req->pages is NULL, afs_put_read() on line 337 is executed. In afs_put_read(), req->pages[i] is used on line 195. Thus, a possible null-pointer dereference may occur in this case. To fix this possible bug, an if statement is added in afs_put_read() to check req->pages. This bug is found by a static analysis tool STCheck written by us. Fixes: f3ddee8dc4e2 ("afs: Fix directory handling") Signed-off-by: Jia-Ju Bai Signed-off-by: David Howells --- fs/afs/file.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index 56b69576274d..dd3c55c9101c 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -191,11 +191,13 @@ void afs_put_read(struct afs_read *req) int i; if (refcount_dec_and_test(&req->usage)) { - for (i = 0; i < req->nr_pages; i++) - if (req->pages[i]) - put_page(req->pages[i]); - if (req->pages != req->array) - kfree(req->pages); + if (req->pages) { + for (i = 0; i < req->nr_pages; i++) + if (req->pages[i]) + put_page(req->pages[i]); + if (req->pages != req->array) + kfree(req->pages); + } kfree(req); } } From 37c0bbb3326674940e657118306ac52364314523 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:38:51 +0100 Subject: [PATCH 072/482] afs: Fix off-by-one in afs_rename() expected data version calculation When afs_rename() calculates the expected data version of the target directory in a cross-directory rename, it doesn't increment it as it should, so it always thinks that the target inode is unexpectedly modified on the server. Fixes: a58823ac4589 ("afs: Fix application of status and callback to be under same lock") Signed-off-by: David Howells --- fs/afs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index e640d67274be..20aa18b38a49 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1804,7 +1804,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, afs_end_vnode_operation(&fc); goto error_rehash; } - new_data_version = new_dvnode->status.data_version; + new_data_version = new_dvnode->status.data_version + 1; } else { new_data_version = orig_data_version; new_scb = &scb[0]; From 5dc84855b0fc7e1db182b55c5564fd539d6eff92 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:38:51 +0100 Subject: [PATCH 073/482] afs: Only update d_fsdata if different in afs_d_revalidate() In the in-kernel afs filesystem, d_fsdata is set with the data version of the parent directory. afs_d_revalidate() will update this to the current directory version, but it shouldn't do this if it the value it read from d_fsdata is the same as no lock is held and cmpxchg() is not used. Fix the code to only change the value if it is different from the current directory version. Fixes: 260a980317da ("[AFS]: Add "directory write" support.") Signed-off-by: David Howells --- fs/afs/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 20aa18b38a49..618e26cea887 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1017,7 +1017,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) dir_version = (long)dir->status.data_version; de_version = (long)dentry->d_fsdata; if (de_version == dir_version) - goto out_valid; + goto out_valid_noupdate; dir_version = (long)dir->invalid_before; if (de_version - dir_version >= 0) @@ -1081,6 +1081,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) out_valid: dentry->d_fsdata = (void *)dir_version; +out_valid_noupdate: dput(parent); key_put(key); _leave(" = 1 [valid]"); From 9dd0b82ef530cdfe805c9f7079c99e104be59a14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Jul 2019 14:38:52 +0100 Subject: [PATCH 074/482] afs: Fix missing dentry data version updating In the in-kernel afs filesystem, the d_fsdata dentry field is used to hold the data version of the parent directory when it was created or when d_revalidate() last caused it to be updated. This is compared to the ->invalid_before field in the directory inode, rather than the actual data version number, thereby allowing changes due to local edits to be ignored. Only if the server data version gets bumped unexpectedly (eg. by a competing client), do we need to revalidate stuff. However, the d_fsdata field should also be updated if an rpc op is performed that modifies that particular dentry. Such ops return the revised data version of the directory(ies) involved, so we should use that. This is particularly problematic for rename, since a dentry from one directory may be moved directly into another directory (ie. mv a/x b/x). It would then be sporting the wrong data version - and if this is in the future, for the destination directory, revalidations would be missed, leading to foreign renames and hard-link deletion being missed. Fix this by the following means: (1) Return the data version number from operations that read the directory contents - if they issue the read. This starts in afs_dir_iterate() and is used, ignored or passed back by its callers. (2) In afs_lookup*(), set the dentry version to the version returned by (1) before d_splice_alias() is called and the dentry published. (3) In afs_d_revalidate(), set the dentry version to that returned from (1) if an rpc call was issued. This means that if a parallel procedure, such as mkdir(), modifies the directory, we won't accidentally use the data version from that. (4) In afs_{mkdir,create,link,symlink}(), set the new dentry's version to the directory data version before d_instantiate() is called. (5) In afs_{rmdir,unlink}, update the target dentry's version to the directory data version as soon as we've updated the directory inode. (6) In afs_rename(), we need to unhash the old dentry before we start so that we don't get afs_d_revalidate() reverting the version change in cross-directory renames. We then need to set both the old and the new dentry versions the data version of the new directory before we call d_move() as d_move() will rehash them. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: David Howells --- fs/afs/dir.c | 84 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 14 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 618e26cea887..81207dc3c997 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -440,7 +440,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, * iterate through the data blob that lists the contents of an AFS directory */ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, - struct key *key) + struct key *key, afs_dataversion_t *_dir_version) { struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_xdr_dir_page *dbuf; @@ -460,6 +460,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, req = afs_read_dir(dvnode, key); if (IS_ERR(req)) return PTR_ERR(req); + *_dir_version = req->data_version; /* round the file position up to the next entry boundary */ ctx->pos += sizeof(union afs_xdr_dirent) - 1; @@ -514,7 +515,10 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, */ static int afs_readdir(struct file *file, struct dir_context *ctx) { - return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file)); + afs_dataversion_t dir_version; + + return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file), + &dir_version); } /* @@ -555,7 +559,8 @@ static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, * - just returns the FID the dentry name maps to if found */ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, - struct afs_fid *fid, struct key *key) + struct afs_fid *fid, struct key *key, + afs_dataversion_t *_dir_version) { struct afs_super_info *as = dir->i_sb->s_fs_info; struct afs_lookup_one_cookie cookie = { @@ -568,7 +573,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); /* search the directory */ - ret = afs_dir_iterate(dir, &cookie.ctx, key); + ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version); if (ret < 0) { _leave(" = %d [iter]", ret); return ret; @@ -642,6 +647,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, struct afs_server *server; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct inode *inode = NULL, *ti; + afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version); int ret, i; _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); @@ -669,12 +675,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, cookie->fids[i].vid = as->volume->vid; /* search the directory */ - ret = afs_dir_iterate(dir, &cookie->ctx, key); + ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version); if (ret < 0) { inode = ERR_PTR(ret); goto out; } + dentry->d_fsdata = (void *)(unsigned long)data_version; + inode = ERR_PTR(-ENOENT); if (!cookie->found) goto out; @@ -968,7 +976,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) struct dentry *parent; struct inode *inode; struct key *key; - long dir_version, de_version; + afs_dataversion_t dir_version; + long de_version; int ret; if (flags & LOOKUP_RCU) @@ -1014,20 +1023,20 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) * on a 32-bit system, we only have 32 bits in the dentry to store the * version. */ - dir_version = (long)dir->status.data_version; + dir_version = dir->status.data_version; de_version = (long)dentry->d_fsdata; - if (de_version == dir_version) + if (de_version == (long)dir_version) goto out_valid_noupdate; - dir_version = (long)dir->invalid_before; - if (de_version - dir_version >= 0) + dir_version = dir->invalid_before; + if (de_version - (long)dir_version >= 0) goto out_valid; _debug("dir modified"); afs_stat_v(dir, n_reval); /* search the directory for this vnode */ - ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key); + ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key, &dir_version); switch (ret) { case 0: /* the filename maps to something */ @@ -1080,7 +1089,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) } out_valid: - dentry->d_fsdata = (void *)dir_version; + dentry->d_fsdata = (void *)(unsigned long)dir_version; out_valid_noupdate: dput(parent); key_put(key); @@ -1186,6 +1195,20 @@ static void afs_prep_for_new_inode(struct afs_fs_cursor *fc, iget_data->cb_s_break = fc->cbi->server->cb_s_break; } +/* + * Note that a dentry got changed. We need to set d_fsdata to the data version + * number derived from the result of the operation. It doesn't matter if + * d_fsdata goes backwards as we'll just revalidate. + */ +static void afs_update_dentry_version(struct afs_fs_cursor *fc, + struct dentry *dentry, + struct afs_status_cb *scb) +{ + if (fc->ac.error == 0) + dentry->d_fsdata = + (void *)(unsigned long)scb->status.data_version; +} + /* * create a directory on an AFS filesystem */ @@ -1228,6 +1251,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) afs_check_for_remote_deletion(&fc, dvnode); afs_vnode_commit_status(&fc, dvnode, fc.cb_break, &data_version, &scb[0]); + afs_update_dentry_version(&fc, dentry, &scb[0]); afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); ret = afs_end_vnode_operation(&fc); if (ret < 0) @@ -1320,6 +1344,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) afs_vnode_commit_status(&fc, dvnode, fc.cb_break, &data_version, scb); + afs_update_dentry_version(&fc, dentry, scb); ret = afs_end_vnode_operation(&fc); if (ret == 0) { afs_dir_remove_subdir(dentry); @@ -1459,6 +1484,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) &data_version, &scb[0]); afs_vnode_commit_status(&fc, vnode, fc.cb_break_2, &data_version_2, &scb[1]); + afs_update_dentry_version(&fc, dentry, &scb[0]); ret = afs_end_vnode_operation(&fc); if (ret == 0 && !(scb[1].have_status || scb[1].have_error)) ret = afs_dir_remove_link(dvnode, dentry, key); @@ -1527,6 +1553,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, afs_check_for_remote_deletion(&fc, dvnode); afs_vnode_commit_status(&fc, dvnode, fc.cb_break, &data_version, &scb[0]); + afs_update_dentry_version(&fc, dentry, &scb[0]); afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); ret = afs_end_vnode_operation(&fc); if (ret < 0) @@ -1608,6 +1635,7 @@ static int afs_link(struct dentry *from, struct inode *dir, afs_vnode_commit_status(&fc, vnode, fc.cb_break_2, NULL, &scb[1]); ihold(&vnode->vfs_inode); + afs_update_dentry_version(&fc, dentry, &scb[0]); d_instantiate(dentry, &vnode->vfs_inode); mutex_unlock(&vnode->io_lock); @@ -1687,6 +1715,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, afs_check_for_remote_deletion(&fc, dvnode); afs_vnode_commit_status(&fc, dvnode, fc.cb_break, &data_version, &scb[0]); + afs_update_dentry_version(&fc, dentry, &scb[0]); afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); ret = afs_end_vnode_operation(&fc); if (ret < 0) @@ -1792,6 +1821,17 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, } } + /* This bit is potentially nasty as there's a potential race with + * afs_d_revalidate{,_rcu}(). We have to change d_fsdata on the dentry + * to reflect it's new parent's new data_version after the op, but + * d_revalidate may see old_dentry between the op having taken place + * and the version being updated. + * + * So drop the old_dentry for now to make other threads go through + * lookup instead - which we hold a lock against. + */ + d_drop(old_dentry); + ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { afs_dataversion_t orig_data_version; @@ -1803,7 +1843,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, if (orig_dvnode != new_dvnode) { if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) { afs_end_vnode_operation(&fc); - goto error_rehash; + goto error_rehash_old; } new_data_version = new_dvnode->status.data_version + 1; } else { @@ -1828,7 +1868,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, } ret = afs_end_vnode_operation(&fc); if (ret < 0) - goto error_rehash; + goto error_rehash_old; } if (ret == 0) { @@ -1854,10 +1894,26 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, drop_nlink(new_inode); spin_unlock(&new_inode->i_lock); } + + /* Now we can update d_fsdata on the dentries to reflect their + * new parent's data_version. + * + * Note that if we ever implement RENAME_EXCHANGE, we'll have + * to update both dentries with opposing dir versions. + */ + if (new_dvnode != orig_dvnode) { + afs_update_dentry_version(&fc, old_dentry, &scb[1]); + afs_update_dentry_version(&fc, new_dentry, &scb[1]); + } else { + afs_update_dentry_version(&fc, old_dentry, &scb[0]); + afs_update_dentry_version(&fc, new_dentry, &scb[0]); + } d_move(old_dentry, new_dentry); goto error_tmp; } +error_rehash_old: + d_rehash(new_dentry); error_rehash: if (rehash) d_rehash(rehash); From 81eaadcae81b4c1bf01649a3053d1f54e2d81cf1 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 25 Jul 2019 11:35:51 -0700 Subject: [PATCH 075/482] kgdboc: disable the console lock when in kgdb After commit ddde3c18b700 ("vt: More locking checks") kdb / kgdb has become useless because my console is filled with spews of: WARNING: CPU: 0 PID: 0 at .../drivers/tty/vt/vt.c:3846 con_is_visible+0x50/0x74 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.3.0-rc1+ #48 Hardware name: Rockchip (Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0xb0/0xd0) [] (dump_stack) from [] (__warn+0xec/0x11c) [] (__warn) from [] (warn_slowpath_null+0x4c/0x58) [] (warn_slowpath_null) from [] (con_is_visible+0x50/0x74) [] (con_is_visible) from [] (con_scroll+0x108/0x1ac) [] (con_scroll) from [] (lf+0x44/0x88) [] (lf) from [] (vt_console_print+0x1a4/0x2bc) [] (vt_console_print) from [] (vkdb_printf+0x420/0x8a4) [] (vkdb_printf) from [] (kdb_printf+0x44/0x60) [] (kdb_printf) from [] (kdb_main_loop+0xf4/0x6e0) [] (kdb_main_loop) from [] (kdb_stub+0x268/0x398) [] (kdb_stub) from [] (kgdb_cpu_enter+0x1f8/0x674) [] (kgdb_cpu_enter) from [] (kgdb_handle_exception+0x1c4/0x1fc) [] (kgdb_handle_exception) from [] (kgdb_compiled_brk_fn+0x30/0x3c) [] (kgdb_compiled_brk_fn) from [] (do_undefinstr+0x180/0x1a0) [] (do_undefinstr) from [] (__und_svc_finish+0x0/0x3c) ... [] (kgdb_breakpoint) from [] (sysrq_handle_dbg+0x58/0x6c) [] (sysrq_handle_dbg) from [] (__handle_sysrq+0xac/0x154) Let's disable this warning when we're in kgdb to avoid the spew. The whole system is stopped when we're in kgdb so we can't exactly wait for someone else to drop the lock. Presumably the best we can do is to disable the warning and hope for the best. Fixes: ddde3c18b700 ("vt: More locking checks") Cc: Daniel Vetter Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20190725183551.169208-1-dianders@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index bfe5e9e034ec..c7d51b51898f 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -277,10 +277,14 @@ static void kgdboc_pre_exp_handler(void) /* Increment the module count when the debugger is active */ if (!kgdb_connected) try_module_get(THIS_MODULE); + + atomic_inc(&ignore_console_lock_warning); } static void kgdboc_post_exp_handler(void) { + atomic_dec(&ignore_console_lock_warning); + /* decrement the module count when the debugger detaches */ if (!kgdb_connected) module_put(THIS_MODULE); From 92f5b0313e37e2b37aaf8f0bb75b6c50eafb5808 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 26 Jul 2019 10:01:07 -0400 Subject: [PATCH 076/482] media: vivid: fix missing cec adapter name Commit "vivid: reorder CEC allocation and control set-up" missed that the CEC adapter needs a valid vfd->name, and that was now filled in after the CEC adapter was created, leading to an empty adapter name. Fill in the name earlier. Signed-off-by: Hans Verkuil Fixes: 4ee895e71abb ("media: vivid: reorder CEC allocation and control set-up") Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/vivid/vivid-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/vivid/vivid-core.c b/drivers/media/platform/vivid/vivid-core.c index bc2a176937a4..d535aac68ce1 100644 --- a/drivers/media/platform/vivid/vivid-core.c +++ b/drivers/media/platform/vivid/vivid-core.c @@ -1099,6 +1099,8 @@ static int vivid_create_instance(struct platform_device *pdev, int inst) /* start creating the vb2 queues */ if (dev->has_vid_cap) { + snprintf(dev->vid_cap_dev.name, sizeof(dev->vid_cap_dev.name), + "vivid-%03d-vid-cap", inst); /* initialize vid_cap queue */ q = &dev->vb_vid_cap_q; q->type = dev->multiplanar ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE : @@ -1122,6 +1124,8 @@ static int vivid_create_instance(struct platform_device *pdev, int inst) } if (dev->has_vid_out) { + snprintf(dev->vid_out_dev.name, sizeof(dev->vid_out_dev.name), + "vivid-%03d-vid-out", inst); /* initialize vid_out queue */ q = &dev->vb_vid_out_q; q->type = dev->multiplanar ? V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE : @@ -1265,8 +1269,6 @@ static int vivid_create_instance(struct platform_device *pdev, int inst) /* finally start creating the device nodes */ if (dev->has_vid_cap) { vfd = &dev->vid_cap_dev; - snprintf(vfd->name, sizeof(vfd->name), - "vivid-%03d-vid-cap", inst); vfd->fops = &vivid_fops; vfd->ioctl_ops = &vivid_ioctl_ops; vfd->device_caps = dev->vid_cap_caps; @@ -1312,8 +1314,6 @@ static int vivid_create_instance(struct platform_device *pdev, int inst) if (dev->has_vid_out) { vfd = &dev->vid_out_dev; - snprintf(vfd->name, sizeof(vfd->name), - "vivid-%03d-vid-out", inst); vfd->vfl_dir = VFL_DIR_TX; vfd->fops = &vivid_fops; vfd->ioctl_ops = &vivid_ioctl_ops; From e70d8b287301eb6d7c7761c6171c56af62110ea3 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 28 Jul 2019 18:42:55 +0200 Subject: [PATCH 077/482] nvmem: Use the same permissions for eeprom as for nvmem The compatibility "eeprom" attribute is currently root-only no matter what the configuration says. The "nvmem" attribute does respect the setting of the root_only configuration bit, so do the same for "eeprom". Signed-off-by: Jean Delvare Fixes: b6c217ab9be6 ("nvmem: Add backwards compatibility support for older EEPROM drivers.") Reviewed-by: Bartosz Golaszewski Cc: Andrew Lunn Cc: Srinivas Kandagatla Cc: Arnd Bergmann Link: https://lore.kernel.org/r/20190728184255.563332e6@endymion Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/nvmem-sysfs.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/nvmem/nvmem-sysfs.c b/drivers/nvmem/nvmem-sysfs.c index 6f303b91f6e7..9e0c429cd08a 100644 --- a/drivers/nvmem/nvmem-sysfs.c +++ b/drivers/nvmem/nvmem-sysfs.c @@ -224,10 +224,17 @@ int nvmem_sysfs_setup_compat(struct nvmem_device *nvmem, if (!config->base_dev) return -EINVAL; - if (nvmem->read_only) - nvmem->eeprom = bin_attr_ro_root_nvmem; - else - nvmem->eeprom = bin_attr_rw_root_nvmem; + if (nvmem->read_only) { + if (config->root_only) + nvmem->eeprom = bin_attr_ro_root_nvmem; + else + nvmem->eeprom = bin_attr_ro_nvmem; + } else { + if (config->root_only) + nvmem->eeprom = bin_attr_rw_root_nvmem; + else + nvmem->eeprom = bin_attr_rw_nvmem; + } nvmem->eeprom.attr.name = "eeprom"; nvmem->eeprom.size = nvmem->size; #ifdef CONFIG_DEBUG_LOCK_ALLOC From 1dd38ae96973006c45a8010a7fc3c313f1c229a9 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Thu, 25 Jul 2019 10:45:17 -0700 Subject: [PATCH 078/482] MAINTAINERS: Move linux-fpga tree to new location Move the linux-fpga tree to new location at: git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git Signed-off-by: Moritz Fischer Link: https://lore.kernel.org/r/20190725174517.10516-1-mdf@kernel.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6426db5198f0..8af274c743de 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6339,7 +6339,7 @@ FPGA MANAGER FRAMEWORK M: Moritz Fischer L: linux-fpga@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/atull/linux-fpga.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git Q: http://patchwork.kernel.org/project/linux-fpga/list/ F: Documentation/fpga/ F: Documentation/driver-api/fpga/ From 2b089bf8d19c66f70ae3b2d2d101be1ae49bfe24 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 22 Jul 2019 11:20:08 +0200 Subject: [PATCH 079/482] kernel/configs: Replace GPL boilerplate code with SPDX identifier The FSF does not reside in "675 Mass Ave, Cambridge" anymore... let's replace the old GPL boilerplate code with a proper SPDX identifier instead. Signed-off-by: Thomas Huth Signed-off-by: Greg Kroah-Hartman --- kernel/configs.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/kernel/configs.c b/kernel/configs.c index b062425ccf8d..c09ea4c995e1 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * kernel/configs.c * Echo the kernel .config file used to build the kernel @@ -6,21 +7,6 @@ * Copyright (C) 2002 Randy Dunlap * Copyright (C) 2002 Al Stone * Copyright (C) 2002 Hewlett-Packard Company - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include From ef349abd91c1b37f87639170bcd9d11fe3ac58ff Mon Sep 17 00:00:00 2001 From: Matthias Maennich Date: Thu, 25 Jul 2019 11:17:04 +0100 Subject: [PATCH 080/482] coccinelle: api/atomic_as_refcounter: add SPDX License Identifier Add the missing GPLv2 SPDX license identifier. It appears this single file was missing from 7f904d7e1f3e ("treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 505"), which addressed all other files in scripts/coccinelle. Hence I added GPL-2.0-only consitently with the mentioned patch. Cc: linux-spdx@vger.kernel.org Cc: Elena Reshetova Signed-off-by: Matthias Maennich Acked-by: Julia Lawall Signed-off-by: Greg Kroah-Hartman --- scripts/coccinelle/api/atomic_as_refcounter.cocci | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/coccinelle/api/atomic_as_refcounter.cocci b/scripts/coccinelle/api/atomic_as_refcounter.cocci index 988120e0fd67..0f78d94abc35 100644 --- a/scripts/coccinelle/api/atomic_as_refcounter.cocci +++ b/scripts/coccinelle/api/atomic_as_refcounter.cocci @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only // Check if refcount_t type and API should be used // instead of atomic_t type when dealing with refcounters // From ac43432cb1f5c2950408534987e57c2071e24d8f Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Sat, 27 Jul 2019 11:21:22 +0800 Subject: [PATCH 081/482] driver core: Fix use-after-free and double free on glue directory There is a race condition between removing glue directory and adding a new device under the glue dir. It can be reproduced in following test: CPU1: CPU2: device_add() get_device_parent() class_dir_create_and_add() kobject_add_internal() create_dir() // create glue_dir device_add() get_device_parent() kobject_get() // get glue_dir device_del() cleanup_glue_dir() kobject_del(glue_dir) kobject_add() kobject_add_internal() create_dir() // in glue_dir sysfs_create_dir_ns() kernfs_create_dir_ns(sd) sysfs_remove_dir() // glue_dir->sd=NULL sysfs_put() // free glue_dir->sd // sd is freed kernfs_new_node(sd) kernfs_get(glue_dir) kernfs_add_one() kernfs_put() Before CPU1 remove last child device under glue dir, if CPU2 add a new device under glue dir, the glue_dir kobject reference count will be increase to 2 via kobject_get() in get_device_parent(). And CPU2 has been called kernfs_create_dir_ns(), but not call kernfs_new_node(). Meanwhile, CPU1 call sysfs_remove_dir() and sysfs_put(). This result in glue_dir->sd is freed and it's reference count will be 0. Then CPU2 call kernfs_get(glue_dir) will trigger a warning in kernfs_get() and increase it's reference count to 1. Because glue_dir->sd is freed by CPU1, the next call kernfs_add_one() by CPU2 will fail(This is also use-after-free) and call kernfs_put() to decrease reference count. Because the reference count is decremented to 0, it will also call kmem_cache_free() to free the glue_dir->sd again. This will result in double free. In order to avoid this happening, we also should make sure that kernfs_node for glue_dir is released in CPU1 only when refcount for glue_dir kobj is 1 to fix this race. The following calltrace is captured in kernel 4.14 with the following patch applied: commit 726e41097920 ("drivers: core: Remove glue dirs from sysfs earlier") -------------------------------------------------------------------------- [ 3.633703] WARNING: CPU: 4 PID: 513 at .../fs/kernfs/dir.c:494 Here is WARN_ON(!atomic_read(&kn->count) in kernfs_get(). .... [ 3.633986] Call trace: [ 3.633991] kernfs_create_dir_ns+0xa8/0xb0 [ 3.633994] sysfs_create_dir_ns+0x54/0xe8 [ 3.634001] kobject_add_internal+0x22c/0x3f0 [ 3.634005] kobject_add+0xe4/0x118 [ 3.634011] device_add+0x200/0x870 [ 3.634017] _request_firmware+0x958/0xc38 [ 3.634020] request_firmware_into_buf+0x4c/0x70 .... [ 3.634064] kernel BUG at .../mm/slub.c:294! Here is BUG_ON(object == fp) in set_freepointer(). .... [ 3.634346] Call trace: [ 3.634351] kmem_cache_free+0x504/0x6b8 [ 3.634355] kernfs_put+0x14c/0x1d8 [ 3.634359] kernfs_create_dir_ns+0x88/0xb0 [ 3.634362] sysfs_create_dir_ns+0x54/0xe8 [ 3.634366] kobject_add_internal+0x22c/0x3f0 [ 3.634370] kobject_add+0xe4/0x118 [ 3.634374] device_add+0x200/0x870 [ 3.634378] _request_firmware+0x958/0xc38 [ 3.634381] request_firmware_into_buf+0x4c/0x70 -------------------------------------------------------------------------- Fixes: 726e41097920 ("drivers: core: Remove glue dirs from sysfs earlier") Signed-off-by: Muchun Song Reviewed-by: Mukesh Ojha Signed-off-by: Prateek Sood Link: https://lore.kernel.org/r/20190727032122.24639-1-smuchun@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 53 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index da84a73f2ba6..c83d7e87764a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1820,12 +1820,63 @@ static inline struct kobject *get_glue_dir(struct device *dev) */ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) { + unsigned int ref; + /* see if we live in a "glue" directory */ if (!live_in_glue_dir(glue_dir, dev)) return; mutex_lock(&gdp_mutex); - if (!kobject_has_children(glue_dir)) + /** + * There is a race condition between removing glue directory + * and adding a new device under the glue directory. + * + * CPU1: CPU2: + * + * device_add() + * get_device_parent() + * class_dir_create_and_add() + * kobject_add_internal() + * create_dir() // create glue_dir + * + * device_add() + * get_device_parent() + * kobject_get() // get glue_dir + * + * device_del() + * cleanup_glue_dir() + * kobject_del(glue_dir) + * + * kobject_add() + * kobject_add_internal() + * create_dir() // in glue_dir + * sysfs_create_dir_ns() + * kernfs_create_dir_ns(sd) + * + * sysfs_remove_dir() // glue_dir->sd=NULL + * sysfs_put() // free glue_dir->sd + * + * // sd is freed + * kernfs_new_node(sd) + * kernfs_get(glue_dir) + * kernfs_add_one() + * kernfs_put() + * + * Before CPU1 remove last child device under glue dir, if CPU2 add + * a new device under glue dir, the glue_dir kobject reference count + * will be increase to 2 in kobject_get(k). And CPU2 has been called + * kernfs_create_dir_ns(). Meanwhile, CPU1 call sysfs_remove_dir() + * and sysfs_put(). This result in glue_dir->sd is freed. + * + * Then the CPU2 will see a stale "empty" but still potentially used + * glue dir around in kernfs_new_node(). + * + * In order to avoid this happening, we also should make sure that + * kernfs_node for glue_dir is released in CPU1 only when refcount + * for glue_dir kobj is 1. + */ + ref = kref_read(&glue_dir->kref); + if (!kobject_has_children(glue_dir) && !--ref) kobject_del(glue_dir); kobject_put(glue_dir); mutex_unlock(&gdp_mutex); From fac7b714c514fcc555541e1d6450c694b0a5f8d3 Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Fri, 26 Jul 2019 19:58:45 +0530 Subject: [PATCH 082/482] intel_th: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files related to Drivers for Intel(R) Trace Hub controller. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46 Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.h | 2 +- drivers/hwtracing/intel_th/pti.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/intel_th/msu.h b/drivers/hwtracing/intel_th/msu.h index 574c16004cb2..13d9b141daaa 100644 --- a/drivers/hwtracing/intel_th/msu.h +++ b/drivers/hwtracing/intel_th/msu.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Intel(R) Trace Hub Memory Storage Unit (MSU) data structures * diff --git a/drivers/hwtracing/intel_th/pti.h b/drivers/hwtracing/intel_th/pti.h index e9381babc84c..7dfc0431333b 100644 --- a/drivers/hwtracing/intel_th/pti.h +++ b/drivers/hwtracing/intel_th/pti.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Intel(R) Trace Hub PTI output data structures * From 740ce365a4dccc1df4d05219cd5bf68f01359196 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 30 Jul 2019 08:49:34 -0600 Subject: [PATCH 083/482] dt-bindings: Fix generated example files getting added to schemas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 837158b847a4 ("dt-bindings: Check the examples against the schemas") started generating YAML encoded DT files to validate the examples against the schema. When running 'make dt_binding_check' in tree after the 1st time, the generated example .dt.yaml files are mistakenly added to the list of schema files. Exclude *.example.dt.yaml files from the search for schema files. Fixes: 837158b847a4 ("dt-bindings: Check the examples against the schemas") Reported-by: Guido Günther Tested-by: Guido Günther Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile index 6b0dfd5c17ba..5138a2f6232a 100644 --- a/Documentation/devicetree/bindings/Makefile +++ b/Documentation/devicetree/bindings/Makefile @@ -19,7 +19,9 @@ quiet_cmd_mk_schema = SCHEMA $@ DT_DOCS = $(shell \ cd $(srctree)/$(src) && \ - find * \( -name '*.yaml' ! -name $(DT_TMP_SCHEMA) \) \ + find * \( -name '*.yaml' ! \ + -name $(DT_TMP_SCHEMA) ! \ + -name '*.example.dt.yaml' \) \ ) DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS)) From 6963d00bb6e42e91882d2eb36639701aca9fe032 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 26 Jul 2019 20:15:17 +0200 Subject: [PATCH 084/482] MAINTAINERS: Update Intel ASoC drivers maintainers Adding myself to Intel ASoC drivers maintainers list. Signed-off-by: Cezary Rojewski Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190726181517.27655-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 11db05b56744..92829ca37459 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7861,6 +7861,7 @@ S: Maintained F: drivers/video/fbdev/i810/ INTEL ASoC DRIVERS +M: Cezary Rojewski M: Pierre-Louis Bossart M: Liam Girdwood M: Jie Yang From 52f87f3ca251f5e43b42e78ab9816b2b07718bfe Mon Sep 17 00:00:00 2001 From: Marcus Cooper Date: Mon, 29 Jul 2019 17:21:30 +0200 Subject: [PATCH 085/482] ASoC: sun4i-i2s: Incorrect SR and WSS computation The A64 audio codec uses the original I2S block but the SR and WSS computation currently assigned is for the newer block. Fixes: 619c15f7fac9 (ASoC: sun4i-i2s: Change SR and WSS computation) Signed-off-by: Marcus Cooper Link: https://lore.kernel.org/r/20190729152130.27955-1-codekipper@gmail.com Signed-off-by: Mark Brown --- sound/soc/sunxi/sun4i-i2s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c index 9b2232908b65..7fa5c61169db 100644 --- a/sound/soc/sunxi/sun4i-i2s.c +++ b/sound/soc/sunxi/sun4i-i2s.c @@ -1002,8 +1002,8 @@ static const struct sun4i_i2s_quirks sun50i_a64_codec_i2s_quirks = { .field_rxchanmap = REG_FIELD(SUN4I_I2S_RX_CHAN_MAP_REG, 0, 31), .field_txchansel = REG_FIELD(SUN4I_I2S_TX_CHAN_SEL_REG, 0, 2), .field_rxchansel = REG_FIELD(SUN4I_I2S_RX_CHAN_SEL_REG, 0, 2), - .get_sr = sun8i_i2s_get_sr_wss, - .get_wss = sun8i_i2s_get_sr_wss, + .get_sr = sun4i_i2s_get_sr, + .get_wss = sun4i_i2s_get_wss, }; static int sun4i_i2s_init_regmap_fields(struct device *dev, From b9da500bde81ad820b5d95c6bf52fc33e1f490ee Mon Sep 17 00:00:00 2001 From: fengchunguo Date: Wed, 31 Jul 2019 15:41:56 +0800 Subject: [PATCH 086/482] ASoC: max98373: add 88200 and 96000 sampling rate support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 88200 and 96000 sampling rate was not enabled on driver, so can't be played. The error information: max98373 3-0031:rate 96000 not supported max98373 3-0031:ASoC: can't set max98373-aif1 hw params: -22 Signed-off-by: fengchunguo Link: https://lore.kernel.org/r/20190731074156.5620-1-chunguo.feng@amlogic.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98373.c | 6 ++++++ sound/soc/codecs/max98373.h | 2 ++ 2 files changed, 8 insertions(+) mode change 100644 => 100755 sound/soc/codecs/max98373.c mode change 100644 => 100755 sound/soc/codecs/max98373.h diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c old mode 100644 new mode 100755 index 528695cd6a1c..8c601a3ebc27 --- a/sound/soc/codecs/max98373.c +++ b/sound/soc/codecs/max98373.c @@ -267,6 +267,12 @@ static int max98373_dai_hw_params(struct snd_pcm_substream *substream, case 48000: sampling_rate = MAX98373_PCM_SR_SET1_SR_48000; break; + case 88200: + sampling_rate = MAX98373_PCM_SR_SET1_SR_88200; + break; + case 96000: + sampling_rate = MAX98373_PCM_SR_SET1_SR_96000; + break; default: dev_err(component->dev, "rate %d not supported\n", params_rate(params)); diff --git a/sound/soc/codecs/max98373.h b/sound/soc/codecs/max98373.h old mode 100644 new mode 100755 index f6a37aa02f26..a59e51355a84 --- a/sound/soc/codecs/max98373.h +++ b/sound/soc/codecs/max98373.h @@ -130,6 +130,8 @@ #define MAX98373_PCM_SR_SET1_SR_32000 (0x6 << 0) #define MAX98373_PCM_SR_SET1_SR_44100 (0x7 << 0) #define MAX98373_PCM_SR_SET1_SR_48000 (0x8 << 0) +#define MAX98373_PCM_SR_SET1_SR_88200 (0x9 << 0) +#define MAX98373_PCM_SR_SET1_SR_96000 (0xA << 0) /* MAX98373_R2028_PCM_SR_SETUP_2 */ #define MAX98373_PCM_SR_SET2_SR_MASK (0xF << 4) From ed4289e8b48845888ee46377bd2b55884a55e60b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 29 Jul 2019 22:28:54 +1000 Subject: [PATCH 087/482] Revert "powerpc: slightly improve cache helpers" This reverts commit 6c5875843b87c3adea2beade9d1b8b3d4523900a. It triggers a probable compiler bug on clang which leads to crashes. With GCC it allows the compiler to use a more efficient register allocation but current GCC versions never do that at any of the current call sites, so there's no benefit. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cache.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index b3388d95f451..45e3137ccd71 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -107,22 +107,22 @@ extern void _set_L3CR(unsigned long); static inline void dcbz(void *addr) { - __asm__ __volatile__ ("dcbz %y0" : : "Z"(*(u8 *)addr) : "memory"); + __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory"); } static inline void dcbi(void *addr) { - __asm__ __volatile__ ("dcbi %y0" : : "Z"(*(u8 *)addr) : "memory"); + __asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory"); } static inline void dcbf(void *addr) { - __asm__ __volatile__ ("dcbf %y0" : : "Z"(*(u8 *)addr) : "memory"); + __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory"); } static inline void dcbst(void *addr) { - __asm__ __volatile__ ("dcbst %y0" : : "Z"(*(u8 *)addr) : "memory"); + __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory"); } #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ From e77ff779a0c6666b697c56a6e41972bbf30b977f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 31 Jul 2019 06:48:09 -0700 Subject: [PATCH 088/482] Input: applespi - add dependency on LEDS_CLASS If applespi is enabled, but LEDs class support is not, the build fails: drivers/input/keyboard/applespi.o: In function `applespi_probe': applespi.c:(.text+0x1fcd): undefined reference to `devm_led_classdev_register_ext' Add "depends on LEDS_CLASS" to the Konfig Reported-by: Hulk Robot Fixes: 038b1a05eae6 ("Input: add Apple SPI keyboard and trackpad driver") Signed-off-by: YueHaibing Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index ebb19e21473e..90e8a7f2f07c 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -76,6 +76,7 @@ config KEYBOARD_APPLESPI depends on ACPI && EFI depends on SPI depends on X86 || COMPILE_TEST + depends on LEDS_CLASS select CRC16 help Say Y here if you are running Linux on any Apple MacBook8,1 or later, From 3aed86731ee2b23e4dc4d2c6d943d33992cd551b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 31 Jul 2019 17:35:31 -0600 Subject: [PATCH 089/482] nvmet: Fix use-after-free bug when a port is removed When a port is removed through configfs, any connected controllers are still active and can still send commands. This causes a use-after-free bug which is detected by KASAN for any admin command that dereferences req->port (like in nvmet_execute_identify_ctrl). To fix this, disconnect all active controllers when a subsystem is removed from a port. This ensures there are no active controllers when the port is eventually removed. Signed-off-by: Logan Gunthorpe Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by : Chaitanya Kulkarni Signed-off-by: Sagi Grimberg --- drivers/nvme/target/configfs.c | 1 + drivers/nvme/target/core.c | 12 ++++++++++++ drivers/nvme/target/nvmet.h | 3 +++ 3 files changed, 16 insertions(+) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index cd52b9f15376..98613a45bd3b 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -675,6 +675,7 @@ static void nvmet_port_subsys_drop_link(struct config_item *parent, found: list_del(&p->entry); + nvmet_port_del_ctrls(port, subsys); nvmet_port_disc_changed(port, subsys); if (list_empty(&port->subsystems)) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index dad0243c7c96..b86a23aa9020 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -280,6 +280,18 @@ void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) } EXPORT_SYMBOL_GPL(nvmet_unregister_transport); +void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys) +{ + struct nvmet_ctrl *ctrl; + + mutex_lock(&subsys->lock); + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { + if (ctrl->port == port) + ctrl->ops->delete_ctrl(ctrl); + } + mutex_unlock(&subsys->lock); +} + int nvmet_enable_port(struct nvmet_port *port) { const struct nvmet_fabrics_ops *ops; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 6ee66c610739..c51f8dd01dc4 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -418,6 +418,9 @@ void nvmet_port_send_ana_event(struct nvmet_port *port); int nvmet_register_transport(const struct nvmet_fabrics_ops *ops); void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops); +void nvmet_port_del_ctrls(struct nvmet_port *port, + struct nvmet_subsys *subsys); + int nvmet_enable_port(struct nvmet_port *port); void nvmet_disable_port(struct nvmet_port *port); From 86b9a63e595ff03f9d0a7b92b6acc231fecefc29 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 31 Jul 2019 17:35:32 -0600 Subject: [PATCH 090/482] nvmet-loop: Flush nvme_delete_wq when removing the port After calling nvme_loop_delete_ctrl(), the controllers will not yet be deleted because nvme_delete_ctrl() only schedules work to do the delete. This means a race can occur if a port is removed but there are still active controllers trying to access that memory. To fix this, flush the nvme_delete_wq before returning from nvme_loop_remove_port() so that any controllers that might be in the process of being deleted won't access a freed port. Signed-off-by: Logan Gunthorpe Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by : Chaitanya Kulkarni Signed-off-by: Sagi Grimberg --- drivers/nvme/target/loop.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index b16dc3981c69..0940c5024a34 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -654,6 +654,14 @@ static void nvme_loop_remove_port(struct nvmet_port *port) mutex_lock(&nvme_loop_ports_mutex); list_del_init(&port->entry); mutex_unlock(&nvme_loop_ports_mutex); + + /* + * Ensure any ctrls that are in the process of being + * deleted are in fact deleted before we return + * and free the port. This is to prevent active + * ctrls from using a port after it's freed. + */ + flush_workqueue(nvme_delete_wq); } static const struct nvmet_fabrics_ops nvme_loop_ops = { From cfc1a1af56200362d1508b82b9a3cc3acb2eae0c Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 31 Jul 2019 17:35:33 -0600 Subject: [PATCH 091/482] nvmet-file: fix nvmet_file_flush() always returning an error Presently, nvmet_file_flush() always returns a call to errno_to_nvme_status() but that helper doesn't take into account the case when errno=0. So nvmet_file_flush() always returns an error code. All other callers of errno_to_nvme_status() check for success before calling it. To fix this, ensure errno_to_nvme_status() returns success if the errno is zero. This should prevent future mistakes like this from happening. Fixes: c6aa3542e010 ("nvmet: add error log support for file backend") Signed-off-by: Logan Gunthorpe Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg --- drivers/nvme/target/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b86a23aa9020..3a67e244e568 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -46,6 +46,9 @@ inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) u16 status; switch (errno) { + case 0: + status = NVME_SC_SUCCESS; + break; case -ENOSPC: req->error_loc = offsetof(struct nvme_rw_command, length); status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; From 8c36e66fb407ce076535a7db98ab9f6d720b866a Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 31 Jul 2019 17:35:34 -0600 Subject: [PATCH 092/482] nvme-core: Fix extra device_put() call on error path In the error path for nvme_init_subsystem(), nvme_put_subsystem() will call device_put(), but it will get called again after the mutex_unlock(). The device_put() only needs to be called if device_add() fails. This bug caused a KASAN use-after-free error when adding and removing subsytems in a loop: BUG: KASAN: use-after-free in device_del+0x8d9/0x9a0 Read of size 8 at addr ffff8883cdaf7120 by task multipathd/329 CPU: 0 PID: 329 Comm: multipathd Not tainted 5.2.0-rc6-vmlocalyes-00019-g70a2b39005fd-dirty #314 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0x7b/0xb5 print_address_description+0x6f/0x280 ? device_del+0x8d9/0x9a0 __kasan_report+0x148/0x199 ? device_del+0x8d9/0x9a0 ? class_release+0x100/0x130 ? device_del+0x8d9/0x9a0 kasan_report+0x12/0x20 __asan_report_load8_noabort+0x14/0x20 device_del+0x8d9/0x9a0 ? device_platform_notify+0x70/0x70 nvme_destroy_subsystem+0xf9/0x150 nvme_free_ctrl+0x280/0x3a0 device_release+0x72/0x1d0 kobject_put+0x144/0x410 put_device+0x13/0x20 nvme_free_ns+0xc4/0x100 nvme_release+0xb3/0xe0 __blkdev_put+0x549/0x6e0 ? kasan_check_write+0x14/0x20 ? bd_set_size+0xb0/0xb0 ? kasan_check_write+0x14/0x20 ? mutex_lock+0x8f/0xe0 ? __mutex_lock_slowpath+0x20/0x20 ? locks_remove_file+0x239/0x370 blkdev_put+0x72/0x2c0 blkdev_close+0x8d/0xd0 __fput+0x256/0x770 ? _raw_read_lock_irq+0x40/0x40 ____fput+0xe/0x10 task_work_run+0x10c/0x180 ? filp_close+0xf7/0x140 exit_to_usermode_loop+0x151/0x170 do_syscall_64+0x240/0x2e0 ? prepare_exit_to_usermode+0xd5/0x190 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f5a79af05d7 Code: 00 00 0f 05 48 3d 00 f0 ff ff 77 3f c3 66 0f 1f 44 00 00 53 89 fb 48 83 ec 10 e8 c4 fb ff ff 89 df 89 c2 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 2b 89 d7 89 44 24 0c e8 06 fc ff ff 8b 44 24 RSP: 002b:00007f5a7799c810 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000008 RCX: 00007f5a79af05d7 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000008 RBP: 00007f5a58000f98 R08: 0000000000000002 R09: 00007f5a7935ee80 R10: 0000000000000000 R11: 0000000000000293 R12: 000055e432447240 R13: 0000000000000000 R14: 0000000000000001 R15: 000055e4324a9cf0 Allocated by task 1236: save_stack+0x21/0x80 __kasan_kmalloc.constprop.6+0xab/0xe0 kasan_kmalloc+0x9/0x10 kmem_cache_alloc_trace+0x102/0x210 nvme_init_identify+0x13c3/0x3820 nvme_loop_configure_admin_queue+0x4fa/0x5e0 nvme_loop_create_ctrl+0x469/0xf40 nvmf_dev_write+0x19a3/0x21ab __vfs_write+0x66/0x120 vfs_write+0x154/0x490 ksys_write+0x104/0x240 __x64_sys_write+0x73/0xb0 do_syscall_64+0xa5/0x2e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 329: save_stack+0x21/0x80 __kasan_slab_free+0x129/0x190 kasan_slab_free+0xe/0x10 kfree+0xa7/0x200 nvme_release_subsystem+0x49/0x60 device_release+0x72/0x1d0 kobject_put+0x144/0x410 put_device+0x13/0x20 klist_class_dev_put+0x31/0x40 klist_put+0x8f/0xf0 klist_del+0xe/0x10 device_del+0x3a7/0x9a0 nvme_destroy_subsystem+0xf9/0x150 nvme_free_ctrl+0x280/0x3a0 device_release+0x72/0x1d0 kobject_put+0x144/0x410 put_device+0x13/0x20 nvme_free_ns+0xc4/0x100 nvme_release+0xb3/0xe0 __blkdev_put+0x549/0x6e0 blkdev_put+0x72/0x2c0 blkdev_close+0x8d/0xd0 __fput+0x256/0x770 ____fput+0xe/0x10 task_work_run+0x10c/0x180 exit_to_usermode_loop+0x151/0x170 do_syscall_64+0x240/0x2e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 32fd90c40768 ("nvme: change locking for the per-subsystem controller list") Signed-off-by: Logan Gunthorpe Reviewed-by: Sagi Grimberg Reviewed-by : Chaitanya Kulkarni Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 80c7a7ee240b..e35f16b60fc9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2488,6 +2488,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) if (ret) { dev_err(ctrl->device, "failed to register subsystem device.\n"); + put_device(&subsys->dev); goto out_unlock; } ida_init(&subsys->ns_ida); @@ -2510,7 +2511,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) nvme_put_subsystem(subsys); out_unlock: mutex_unlock(&nvme_subsystems_lock); - put_device(&subsys->dev); return ret; } From b9156daeb1601d69007b7e50efcf89d69d72ec1d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 31 Jul 2019 11:00:26 -0700 Subject: [PATCH 093/482] nvme: fix a possible deadlock when passthru commands sent to a multipath device When the user issues a command with side effects, we will end up freezing the namespace request queue when updating disk info (and the same for the corresponding mpath disk node). However, we are not freezing the mpath node request queue, which means that mpath I/O can still come in and block on blk_queue_enter (called from nvme_ns_head_make_request -> direct_make_request). This is a deadlock, because blk_queue_enter will block until the inner namespace request queue is unfroze, but that process is blocked because the namespace revalidation is trying to update the mpath disk info and freeze its request queue (which will never complete because of the I/O that is blocked on blk_queue_enter). Fix this by freezing all the subsystem nsheads request queues before executing the passthru command. Given that these commands are infrequent we should not worry about this temporary I/O freeze to keep things sane. Here is the matching hang traces: -- [ 374.465002] INFO: task systemd-udevd:17994 blocked for more than 122 seconds. [ 374.472975] Not tainted 5.2.0-rc3-mpdebug+ #42 [ 374.478522] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 374.487274] systemd-udevd D 0 17994 1 0x00000000 [ 374.493407] Call Trace: [ 374.496145] __schedule+0x2ef/0x620 [ 374.500047] schedule+0x38/0xa0 [ 374.503569] blk_queue_enter+0x139/0x220 [ 374.507959] ? remove_wait_queue+0x60/0x60 [ 374.512540] direct_make_request+0x60/0x130 [ 374.517219] nvme_ns_head_make_request+0x11d/0x420 [nvme_core] [ 374.523740] ? generic_make_request_checks+0x307/0x6f0 [ 374.529484] generic_make_request+0x10d/0x2e0 [ 374.534356] submit_bio+0x75/0x140 [ 374.538163] ? guard_bio_eod+0x32/0xe0 [ 374.542361] submit_bh_wbc+0x171/0x1b0 [ 374.546553] block_read_full_page+0x1ed/0x330 [ 374.551426] ? check_disk_change+0x70/0x70 [ 374.556008] ? scan_shadow_nodes+0x30/0x30 [ 374.560588] blkdev_readpage+0x18/0x20 [ 374.564783] do_read_cache_page+0x301/0x860 [ 374.569463] ? blkdev_writepages+0x10/0x10 [ 374.574037] ? prep_new_page+0x88/0x130 [ 374.578329] ? get_page_from_freelist+0xa2f/0x1280 [ 374.583688] ? __alloc_pages_nodemask+0x179/0x320 [ 374.588947] read_cache_page+0x12/0x20 [ 374.593142] read_dev_sector+0x2d/0xd0 [ 374.597337] read_lba+0x104/0x1f0 [ 374.601046] find_valid_gpt+0xfa/0x720 [ 374.605243] ? string_nocheck+0x58/0x70 [ 374.609534] ? find_valid_gpt+0x720/0x720 [ 374.614016] efi_partition+0x89/0x430 [ 374.618113] ? string+0x48/0x60 [ 374.621632] ? snprintf+0x49/0x70 [ 374.625339] ? find_valid_gpt+0x720/0x720 [ 374.629828] check_partition+0x116/0x210 [ 374.634214] rescan_partitions+0xb6/0x360 [ 374.638699] __blkdev_reread_part+0x64/0x70 [ 374.643377] blkdev_reread_part+0x23/0x40 [ 374.647860] blkdev_ioctl+0x48c/0x990 [ 374.651956] block_ioctl+0x41/0x50 [ 374.655766] do_vfs_ioctl+0xa7/0x600 [ 374.659766] ? locks_lock_inode_wait+0xb1/0x150 [ 374.664832] ksys_ioctl+0x67/0x90 [ 374.668539] __x64_sys_ioctl+0x1a/0x20 [ 374.672732] do_syscall_64+0x5a/0x1c0 [ 374.676828] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 374.738474] INFO: task nvmeadm:49141 blocked for more than 123 seconds. [ 374.745871] Not tainted 5.2.0-rc3-mpdebug+ #42 [ 374.751419] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 374.760170] nvmeadm D 0 49141 36333 0x00004080 [ 374.766301] Call Trace: [ 374.769038] __schedule+0x2ef/0x620 [ 374.772939] schedule+0x38/0xa0 [ 374.776452] blk_mq_freeze_queue_wait+0x59/0x100 [ 374.781614] ? remove_wait_queue+0x60/0x60 [ 374.786192] blk_mq_freeze_queue+0x1a/0x20 [ 374.790773] nvme_update_disk_info.isra.57+0x5f/0x350 [nvme_core] [ 374.797582] ? nvme_identify_ns.isra.50+0x71/0xc0 [nvme_core] [ 374.804006] __nvme_revalidate_disk+0xe5/0x110 [nvme_core] [ 374.810139] nvme_revalidate_disk+0xa6/0x120 [nvme_core] [ 374.816078] ? nvme_submit_user_cmd+0x11e/0x320 [nvme_core] [ 374.822299] nvme_user_cmd+0x264/0x370 [nvme_core] [ 374.827661] nvme_dev_ioctl+0x112/0x1d0 [nvme_core] [ 374.833114] do_vfs_ioctl+0xa7/0x600 [ 374.837117] ? __audit_syscall_entry+0xdd/0x130 [ 374.842184] ksys_ioctl+0x67/0x90 [ 374.845891] __x64_sys_ioctl+0x1a/0x20 [ 374.850082] do_syscall_64+0x5a/0x1c0 [ 374.854178] entry_SYSCALL_64_after_hwframe+0x44/0xa9 -- Reported-by: James Puthukattukaran Tested-by: James Puthukattukaran Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 5 +++++ drivers/nvme/host/multipath.c | 30 ++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 12 ++++++++++++ 3 files changed, 47 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e35f16b60fc9..88b8dfd7928a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1286,6 +1286,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, */ if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { mutex_lock(&ctrl->scan_lock); + mutex_lock(&ctrl->subsys->lock); + nvme_mpath_start_freeze(ctrl->subsys); + nvme_mpath_wait_freeze(ctrl->subsys); nvme_start_freeze(ctrl); nvme_wait_freeze(ctrl); } @@ -1316,6 +1319,8 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) nvme_update_formats(ctrl); if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { nvme_unfreeze(ctrl); + nvme_mpath_unfreeze(ctrl->subsys); + mutex_unlock(&ctrl->subsys->lock); mutex_unlock(&ctrl->scan_lock); } if (effects & NVME_CMD_EFFECTS_CCC) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 4f0d0d12744e..b34dcb2288e7 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -12,6 +12,36 @@ module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); +void nvme_mpath_unfreeze(struct nvme_subsystem *subsys) +{ + struct nvme_ns_head *h; + + lockdep_assert_held(&subsys->lock); + list_for_each_entry(h, &subsys->nsheads, entry) + if (h->disk) + blk_mq_unfreeze_queue(h->disk->queue); +} + +void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys) +{ + struct nvme_ns_head *h; + + lockdep_assert_held(&subsys->lock); + list_for_each_entry(h, &subsys->nsheads, entry) + if (h->disk) + blk_mq_freeze_queue_wait(h->disk->queue); +} + +void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) +{ + struct nvme_ns_head *h; + + lockdep_assert_held(&subsys->lock); + list_for_each_entry(h, &subsys->nsheads, entry) + if (h->disk) + blk_freeze_queue_start(h->disk->queue); +} + /* * If multipathing is enabled we need to always use the subsystem instance * number for numbering our devices to avoid conflicts between subsystems that diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 26b563f9985b..6b4fb67124c6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -490,6 +490,9 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) return ctrl->ana_log_buf != NULL; } +void nvme_mpath_unfreeze(struct nvme_subsystem *subsys); +void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); +void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, struct nvme_ctrl *ctrl, int *flags); void nvme_failover_req(struct request *req); @@ -568,6 +571,15 @@ static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl) static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl) { } +static inline void nvme_mpath_unfreeze(struct nvme_subsystem *subsys) +{ +} +static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys) +{ +} +static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) +{ +} #endif /* CONFIG_NVME_MULTIPATH */ #ifdef CONFIG_NVM From d94211b8bad3787e0655a67284105f57db728cb1 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 26 Jul 2019 10:29:49 -0700 Subject: [PATCH 094/482] nvme-rdma: fix possible use-after-free in connect error flow When start_queue fails, we need to make sure to drain the queue cq before freeing the rdma resources because we might still race with the completion path. Have start_queue() error path safely stop the queue. -- [30371.808111] nvme nvme1: Failed reconnect attempt 11 [30371.808113] nvme nvme1: Reconnecting in 10 seconds... [...] [30382.069315] nvme nvme1: creating 4 I/O queues. [30382.257058] nvme nvme1: Connect Invalid SQE Parameter, qid 4 [30382.257061] nvme nvme1: failed to connect queue: 4 ret=386 [30382.305001] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 [30382.305022] IP: qedr_poll_cq+0x8a3/0x1170 [qedr] [30382.305028] PGD 0 P4D 0 [30382.305037] Oops: 0000 [#1] SMP PTI [...] [30382.305153] Call Trace: [30382.305166] ? __switch_to_asm+0x34/0x70 [30382.305187] __ib_process_cq+0x56/0xd0 [ib_core] [30382.305201] ib_poll_handler+0x26/0x70 [ib_core] [30382.305213] irq_poll_softirq+0x88/0x110 [30382.305223] ? sort_range+0x20/0x20 [30382.305232] __do_softirq+0xde/0x2c6 [30382.305241] ? sort_range+0x20/0x20 [30382.305249] run_ksoftirqd+0x1c/0x60 [30382.305258] smpboot_thread_fn+0xef/0x160 [30382.305265] kthread+0x113/0x130 [30382.305273] ? kthread_create_worker_on_cpu+0x50/0x50 [30382.305281] ret_from_fork+0x35/0x40 -- Reported-by: Nicolas Morey-Chaisemartin Reviewed-by: Max Gurtovoy Reviewed-by: Hannes Reinecke Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a249db528d54..1a6449bc547b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -562,13 +562,17 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, return ret; } +static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) +{ + rdma_disconnect(queue->cm_id); + ib_drain_qp(queue->qp); +} + static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) return; - - rdma_disconnect(queue->cm_id); - ib_drain_qp(queue->qp); + __nvme_rdma_stop_queue(queue); } static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) @@ -607,11 +611,13 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) else ret = nvmf_connect_admin_queue(&ctrl->ctrl); - if (!ret) + if (!ret) { set_bit(NVME_RDMA_Q_LIVE, &queue->flags); - else + } else { + __nvme_rdma_stop_queue(queue); dev_info(ctrl->ctrl.device, "failed to connect queue: %d ret=%d\n", idx, ret); + } return ret; } From 0157ec8dad3c8fc9bc9790f76e0831ffdaf2e7f0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 25 Jul 2019 11:56:57 -0700 Subject: [PATCH 095/482] nvme: fix controller removal race with scan work With multipath enabled, nvme_scan_work() can read from the device (through nvme_mpath_add_disk()) and hang [1]. However, with fabrics, once ctrl->state is set to NVME_CTRL_DELETING, the reads will hang (see nvmf_check_ready()) and the mpath stack device make_request will block if head->list is not empty. However, when the head->list consistst of only DELETING/DEAD controllers, we should actually not block, but rather fail immediately. In addition, before we go ahead and remove the namespaces, make sure to clear the current path and kick the requeue list so that the request will fast fail upon requeuing. [1]: -- INFO: task kworker/u4:3:166 blocked for more than 120 seconds. Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u4:3 D 0 166 2 0x80004000 Workqueue: nvme-wq nvme_scan_work Call Trace: __schedule+0x851/0x1400 schedule+0x99/0x210 io_schedule+0x21/0x70 do_read_cache_page+0xa57/0x1330 read_cache_page+0x4a/0x70 read_dev_sector+0xbf/0x380 amiga_partition+0xc4/0x1230 check_partition+0x30f/0x630 rescan_partitions+0x19a/0x980 __blkdev_get+0x85a/0x12f0 blkdev_get+0x2a5/0x790 __device_add_disk+0xe25/0x1250 device_add_disk+0x13/0x20 nvme_mpath_set_live+0x172/0x2b0 nvme_update_ns_ana_state+0x130/0x180 nvme_set_ns_ana_state+0x9a/0xb0 nvme_parse_ana_log+0x1c3/0x4a0 nvme_mpath_add_disk+0x157/0x290 nvme_validate_ns+0x1017/0x1bd0 nvme_scan_work+0x44d/0x6a0 process_one_work+0x7d7/0x1240 worker_thread+0x8e/0xff0 kthread+0x2c3/0x3b0 ret_from_fork+0x35/0x40 INFO: task kworker/u4:1:1034 blocked for more than 120 seconds. Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u4:1 D 0 1034 2 0x80004000 Workqueue: nvme-delete-wq nvme_delete_ctrl_work Call Trace: __schedule+0x851/0x1400 schedule+0x99/0x210 schedule_timeout+0x390/0x830 wait_for_completion+0x1a7/0x310 __flush_work+0x241/0x5d0 flush_work+0x10/0x20 nvme_remove_namespaces+0x85/0x3d0 nvme_do_delete_ctrl+0xb4/0x1e0 nvme_delete_ctrl_work+0x15/0x20 process_one_work+0x7d7/0x1240 worker_thread+0x8e/0xff0 kthread+0x2c3/0x3b0 ret_from_fork+0x35/0x40 -- Reported-by: Logan Gunthorpe Tested-by: Logan Gunthorpe Reviewed-by: Logan Gunthorpe Reviewed-by: Ming Lei Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 7 ++++++ drivers/nvme/host/multipath.c | 46 ++++++++++++++++++++++++++++++----- drivers/nvme/host/nvme.h | 9 +++++-- 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 88b8dfd7928a..c258a1ce4b28 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3577,6 +3577,13 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) struct nvme_ns *ns, *next; LIST_HEAD(ns_list); + /* + * make sure to requeue I/O to all namespaces as these + * might result from the scan itself and must complete + * for the scan_work to make progress + */ + nvme_mpath_clear_ctrl_paths(ctrl); + /* prevent racing with ns scanning */ flush_work(&ctrl->scan_work); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index b34dcb2288e7..888d4543894e 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -134,18 +134,34 @@ static const char *nvme_ana_state_names[] = { [NVME_ANA_CHANGE] = "change", }; -void nvme_mpath_clear_current_path(struct nvme_ns *ns) +bool nvme_mpath_clear_current_path(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; + bool changed = false; int node; if (!head) - return; + goto out; for_each_node(node) { - if (ns == rcu_access_pointer(head->current_path[node])) + if (ns == rcu_access_pointer(head->current_path[node])) { rcu_assign_pointer(head->current_path[node], NULL); + changed = true; + } } +out: + return changed; +} + +void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->scan_lock); + list_for_each_entry(ns, &ctrl->namespaces, list) + if (nvme_mpath_clear_current_path(ns)) + kblockd_schedule_work(&ns->head->requeue_work); + mutex_unlock(&ctrl->scan_lock); } static bool nvme_path_is_disabled(struct nvme_ns *ns) @@ -256,6 +272,24 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) return ns; } +static bool nvme_available_path(struct nvme_ns_head *head) +{ + struct nvme_ns *ns; + + list_for_each_entry_rcu(ns, &head->list, siblings) { + switch (ns->ctrl->state) { + case NVME_CTRL_LIVE: + case NVME_CTRL_RESETTING: + case NVME_CTRL_CONNECTING: + /* fallthru */ + return true; + default: + break; + } + } + return false; +} + static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, struct bio *bio) { @@ -282,14 +316,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, disk_devt(ns->head->disk), bio->bi_iter.bi_sector); ret = direct_make_request(bio); - } else if (!list_empty_careful(&head->list)) { - dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); + } else if (nvme_available_path(head)) { + dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n"); spin_lock_irq(&head->requeue_lock); bio_list_add(&head->requeue_list, bio); spin_unlock_irq(&head->requeue_lock); } else { - dev_warn_ratelimited(dev, "no path - failing I/O\n"); + dev_warn_ratelimited(dev, "no available path - failing I/O\n"); bio->bi_status = BLK_STS_IOERR; bio_endio(bio); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6b4fb67124c6..778b3a0b6adb 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -503,7 +503,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); -void nvme_mpath_clear_current_path(struct nvme_ns *ns); +bool nvme_mpath_clear_current_path(struct nvme_ns *ns); +void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) @@ -551,7 +552,11 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns, static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) { } -static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) +static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) +{ + return false; +} +static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { } static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) From bd46a90634302bfe791e93ad5496f98f165f7ae0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 29 Jul 2019 16:34:52 -0600 Subject: [PATCH 096/482] nvme-pci: Fix async probe remove race Ensure the controller is not in the NEW state when nvme_probe() exits. This will always allow a subsequent nvme_remove() to set the state to DELETING, fixing a potential race between the initial asynchronous probe and device removal. Reported-by: Li Zhong Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index db160cee42ad..0c2c4b0c6655 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2695,7 +2695,7 @@ static void nvme_async_probe(void *data, async_cookie_t cookie) { struct nvme_dev *dev = data; - nvme_reset_ctrl_sync(&dev->ctrl); + flush_work(&dev->ctrl.reset_work); flush_work(&dev->ctrl.scan_work); nvme_put_ctrl(&dev->ctrl); } @@ -2761,6 +2761,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); + nvme_reset_ctrl(&dev->ctrl); nvme_get_ctrl(&dev->ctrl); async_schedule(nvme_async_probe, dev); From 56fbc24116f458a0ea48f9f37fe770fd791042d9 Mon Sep 17 00:00:00 2001 From: Takshak Chahande Date: Wed, 31 Jul 2019 15:10:55 -0700 Subject: [PATCH 097/482] libbpf : make libbpf_num_possible_cpus function thread safe Having static variable `cpus` in libbpf_num_possible_cpus function without guarding it with mutex makes this function thread-unsafe. If multiple threads accessing this function, in the current form; it leads to incrementing the static variable value `cpus` in the multiple of total available CPUs. Used local stack variable to calculate the number of possible CPUs and then updated the static variable using WRITE_ONCE(). Changes since v1: * added stack variable to calculate cpus * serialized static variable update using WRITE_ONCE() * fixed Fixes tag Fixes: 6446b3155521 ("bpf: add a new API libbpf_num_possible_cpus()") Signed-off-by: Takshak Chahande Acked-by: Andrey Ignatov Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6718d0b90130..2e84fa5b8479 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4995,13 +4995,15 @@ int libbpf_num_possible_cpus(void) static const char *fcpu = "/sys/devices/system/cpu/possible"; int len = 0, n = 0, il = 0, ir = 0; unsigned int start = 0, end = 0; + int tmp_cpus = 0; static int cpus; char buf[128]; int error = 0; int fd = -1; - if (cpus > 0) - return cpus; + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus; fd = open(fcpu, O_RDONLY); if (fd < 0) { @@ -5024,7 +5026,7 @@ int libbpf_num_possible_cpus(void) } buf[len] = '\0'; - for (ir = 0, cpus = 0; ir <= len; ir++) { + for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ if (buf[ir] == ',' || buf[ir] == '\0') { buf[ir] = '\0'; @@ -5036,13 +5038,15 @@ int libbpf_num_possible_cpus(void) } else if (n == 1) { end = start; } - cpus += end - start + 1; + tmp_cpus += end - start + 1; il = ir + 1; } } - if (cpus <= 0) { - pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu); + if (tmp_cpus <= 0) { + pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); return -EINVAL; } - return cpus; + + WRITE_ONCE(cpus, tmp_cpus); + return tmp_cpus; } From 6bbfe4e602691b90ac866712bd4c43c51e546a60 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 1 Aug 2019 16:26:42 +0200 Subject: [PATCH 098/482] perf bench numa: Fix cpu0 binding Michael reported an issue with perf bench numa failing with binding to cpu0 with '-0' option. # perf bench numa mem -p 3 -t 1 -P 512 -s 100 -zZcm0 --thp 1 -M 1 -ddd # Running 'numa/mem' benchmark: # Running main, "perf bench numa numa-mem -p 3 -t 1 -P 512 -s 100 -zZcm0 --thp 1 -M 1 -ddd" binding to node 0, mask: 0000000000000001 => -1 perf: bench/numa.c:356: bind_to_memnode: Assertion `!(ret)' failed. Aborted (core dumped) This happens when the cpu0 is not part of node0, which is the benchmark assumption and we can see that's not the case for some powerpc servers. Using correct node for cpu0 binding. Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Satheesh Rajendran Link: http://lkml.kernel.org/r/20190801142642.28004-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index a640ca7aaada..513cb2f2fa32 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -379,8 +379,10 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags, /* Allocate and initialize all memory on CPU#0: */ if (init_cpu0) { - orig_mask = bind_to_node(0); - bind_to_memnode(0); + int node = numa_node_of_cpu(0); + + orig_mask = bind_to_node(node); + bind_to_memnode(node); } bytes = bytes0 + HPSIZE; From a29d56c2ed24ad33062bfdafdec9e34149715320 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 1 Aug 2019 10:55:12 +0300 Subject: [PATCH 099/482] usb: typec: ucsi: ccg: Fix uninitilized symbol error Fix smatch error: drivers/usb/typec/ucsi/ucsi_ccg.c:975 ccg_fw_update() error: uninitialized symbol 'err'. Fixes: 5c9ae5a87573 ("usb: typec: ucsi: ccg: add firmware flashing support") Cc: stable@vger.kernel.org Reported-by: kbuild test robot Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20190801075512.24354-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_ccg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index f7a79a23ebed..8e9f8fba55af 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -1018,7 +1018,7 @@ static int do_flash(struct ucsi_ccg *uc, enum enum_flash_mode mode) ******************************************************************************/ static int ccg_fw_update(struct ucsi_ccg *uc, enum enum_flash_mode flash_mode) { - int err; + int err = 0; while (flash_mode != FLASH_NOT_NEEDED) { err = do_flash(uc, flash_mode); From b55d996f057bf2e7ba9422a80b5e17e99860cb0b Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 1 Aug 2019 09:40:26 -0700 Subject: [PATCH 100/482] Input: usbtouchscreen - initialize PM mutex before using it Mutexes shall be initialized before they are used. Fixes: 12e510dbc57b2 ("Input: usbtouchscreen - fix deadlock in autosuspend") Reported-by: syzbot+199ea16c7f26418b4365@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/usbtouchscreen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index a2cec6cacf57..16d70201de4a 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -1659,6 +1659,8 @@ static int usbtouch_probe(struct usb_interface *intf, if (!usbtouch || !input_dev) goto out_free; + mutex_init(&usbtouch->pm_mutex); + type = &usbtouch_dev_info[id->driver_info]; usbtouch->type = type; if (!type->process_pkt) From c88090dfc84254fa149174eb3e6a8458de1912c4 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 1 Aug 2019 09:44:25 -0700 Subject: [PATCH 101/482] Input: kbtab - sanity check for endpoint type The driver should check whether the endpoint it uses has the correct type. Reported-by: syzbot+c7df50363aaff50aa363@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/kbtab.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c index 04b85571f41e..aa577898e952 100644 --- a/drivers/input/tablet/kbtab.c +++ b/drivers/input/tablet/kbtab.c @@ -117,6 +117,10 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i if (intf->cur_altsetting->desc.bNumEndpoints < 1) return -ENODEV; + endpoint = &intf->cur_altsetting->endpoint[0].desc; + if (!usb_endpoint_is_int_in(endpoint)) + return -ENODEV; + kbtab = kzalloc(sizeof(struct kbtab), GFP_KERNEL); input_dev = input_allocate_device(); if (!kbtab || !input_dev) @@ -155,8 +159,6 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i input_set_abs_params(input_dev, ABS_Y, 0, 0x1750, 4, 0); input_set_abs_params(input_dev, ABS_PRESSURE, 0, 0xff, 0, 0); - endpoint = &intf->cur_altsetting->endpoint[0].desc; - usb_fill_int_urb(kbtab->irq, dev, usb_rcvintpipe(dev, endpoint->bEndpointAddress), kbtab->data, 8, From 5511c0c309db4c526a6e9f8b2b8a1483771574bc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 1 Aug 2019 11:23:23 -0600 Subject: [PATCH 102/482] coresight: Fix DEBUG_LOCKS_WARN_ON for uninitialized attribute While running the linux-next with CONFIG_DEBUG_LOCKS_ALLOC enabled, I get the following splat. BUG: key ffffcb5636929298 has not been registered! ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 1 PID: 53 at kernel/locking/lockdep.c:3669 lockdep_init_map+0x164/0x1f0 CPU: 1 PID: 53 Comm: kworker/1:1 Tainted: G W 5.2.0-next-20190712-00015-g00ad4634222e-dirty #603 Workqueue: events amba_deferred_retry_func pstate: 60c00005 (nZCv daif +PAN +UAO) pc : lockdep_init_map+0x164/0x1f0 lr : lockdep_init_map+0x164/0x1f0 [ trimmed ] Call trace: lockdep_init_map+0x164/0x1f0 __kernfs_create_file+0x9c/0x158 sysfs_add_file_mode_ns+0xa8/0x1d0 sysfs_add_file_to_group+0x88/0xd8 etm_perf_add_symlink_sink+0xcc/0x138 coresight_register+0x110/0x280 tmc_probe+0x160/0x420 [ trimmed ] ---[ end trace ab4cc669615ba1b0 ]--- Fix this by initialising the dynamically allocated attribute properly. Cc: Mathieu Poirier Fixes: bb8e370bdc14 ("coresight: perf: Add "sinks" group to PMU directory") Cc: stable Signed-off-by: Suzuki K Poulose [Fixed a typograhic error in the changelog] Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20190801172323.18359-2-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight-etm-perf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 5c1ca0df5cb0..84f1dcb69827 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -544,6 +544,7 @@ int etm_perf_add_symlink_sink(struct coresight_device *csdev) /* See function coresight_get_sink_by_id() to know where this is used */ hash = hashlen_hash(hashlen_string(NULL, name)); + sysfs_attr_init(&ea->attr.attr); ea->attr.attr.name = devm_kstrdup(dev, name, GFP_KERNEL); if (!ea->attr.attr.name) return -ENOMEM; From 3415ec643e7bd644b03026efbe2f2b36cbe9b34b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 1 Aug 2019 00:24:05 -0700 Subject: [PATCH 103/482] libbpf: set BTF FD for prog only when there is supported .BTF.ext data 5d01ab7bac46 ("libbpf: fix erroneous multi-closing of BTF FD") introduced backwards-compatibility issue, manifesting itself as -E2BIG error returned on program load due to unknown non-zero btf_fd attribute value for BPF_PROG_LOAD sys_bpf() sub-command. This patch fixes bug by ensuring that we only ever associate BTF FD with program if there is a BTF.ext data that was successfully loaded into kernel, which automatically means kernel supports func_info/line_info and associated BTF FD for progs (checked and ensured also by BTF sanitization code). Fixes: 5d01ab7bac46 ("libbpf: fix erroneous multi-closing of BTF FD") Reported-by: Andrey Ignatov Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2e84fa5b8479..2b57d7ea7836 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2472,7 +2472,11 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.license = license; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - btf_fd = bpf_object__btf_fd(prog->obj); + /* if .BTF.ext was loaded, kernel supports associated BTF for prog */ + if (prog->obj->btf_ext) + btf_fd = bpf_object__btf_fd(prog->obj); + else + btf_fd = -1; load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; From 7c2e988f400e83501e0a3568250780609b7c8263 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 30 Jul 2019 18:38:26 -0700 Subject: [PATCH 104/482] bpf: fix x64 JIT code generation for jmp to 1st insn Introduction of bounded loops exposed old bug in x64 JIT. JIT maintains the array of offsets to the end of all instructions to compute jmp offsets. addrs[0] - offset of the end of the 1st insn (that includes prologue). addrs[1] - offset of the end of the 2nd insn. JIT didn't keep the offset of the beginning of the 1st insn, since classic BPF didn't have backward jumps and valid extended BPF couldn't have a branch to 1st insn, because it didn't allow loops. With bounded loops it's possible to construct a valid program that jumps backwards to the 1st insn. Fix JIT by computing: addrs[0] - offset of the end of prologue == start of the 1st insn. addrs[1] - offset of the end of 1st insn. v1->v2: - Yonghong noticed a bug in jit linfo. Fix it by passing 'addrs + 1' to bpf_prog_fill_jited_linfo(), since it expects insn_to_jit_off array to be offsets to last byte. Reported-by: syzbot+35101610ff3e83119b1b@syzkaller.appspotmail.com Fixes: 2589726d12a1 ("bpf: introduce bounded loops") Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64") Signed-off-by: Alexei Starovoitov Acked-by: Song Liu --- arch/x86/net/bpf_jit_comp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index eaaed5bfc4a4..991549a1c5f3 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -390,8 +390,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, emit_prologue(&prog, bpf_prog->aux->stack_depth, bpf_prog_was_classic(bpf_prog)); + addrs[0] = prog - temp; - for (i = 0; i < insn_cnt; i++, insn++) { + for (i = 1; i <= insn_cnt; i++, insn++) { const s32 imm32 = insn->imm; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; @@ -1105,7 +1106,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) extra_pass = true; goto skip_init_addrs; } - addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL); + addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL); if (!addrs) { prog = orig_prog; goto out_addrs; @@ -1115,7 +1116,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * Before first pass, make a rough estimation of addrs[] * each BPF instruction is translated to less than 64 bytes */ - for (proglen = 0, i = 0; i < prog->len; i++) { + for (proglen = 0, i = 0; i <= prog->len; i++) { proglen += 64; addrs[i] = proglen; } @@ -1180,7 +1181,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (!image || !prog->is_func || extra_pass) { if (image) - bpf_prog_fill_jited_linfo(prog, addrs); + bpf_prog_fill_jited_linfo(prog, addrs + 1); out_addrs: kfree(addrs); kfree(jit_data); From f1fc7249dddc0e52d9e805e2e661caa118649509 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 30 Jul 2019 18:38:27 -0700 Subject: [PATCH 105/482] selftests/bpf: tests for jmp to 1st insn Add 2 tests that check JIT code generation to jumps to 1st insn. 1st test is similar to syzbot reproducer. The backwards branch is never taken at runtime. 2nd test has branch to 1st insn that executes. The test is written as two bpf functions, since it's not possible to construct valid single bpf program that jumps to 1st insn. Signed-off-by: Alexei Starovoitov Acked-by: Song Liu --- tools/testing/selftests/bpf/verifier/loops1.c | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c index 5e980a5ab69d..1fc4e61e9f9f 100644 --- a/tools/testing/selftests/bpf/verifier/loops1.c +++ b/tools/testing/selftests/bpf/verifier/loops1.c @@ -159,3 +159,31 @@ .errstr = "loop detected", .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, +{ + "not-taken loop with back jump to 1st insn", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 123), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 4, -2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .retval = 123, +}, +{ + "taken loop with back jump to 1st insn", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, -3), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .retval = 55, +}, From 25e44338321af545ab34243a6081c3f0fc6107d0 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Tue, 30 Jul 2019 16:05:22 +0000 Subject: [PATCH 106/482] crypto: ccp - Fix oops by properly managing allocated structures A plaintext or ciphertext length of 0 is allowed in AES, in which case no encryption occurs. Ensure that we don't clean up data structures that were never allocated. Fixes: 36cf515b9bbe2 ("crypto: ccp - Enable support for AES GCM on v5 CCPs") Cc: Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 3ebe031773d5..59f9849c3662 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -859,11 +859,11 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, ccp_dm_free(&final_wa); e_dst: - if (aes->src_len && !in_place) + if (ilen > 0 && !in_place) ccp_free_data(&dst, cmd_q); e_src: - if (aes->src_len) + if (ilen > 0) ccp_free_data(&src, cmd_q); e_aad: From 9f00baf74e4b6f79a3a3dfab44fb7bb2e797b551 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Tue, 30 Jul 2019 16:05:24 +0000 Subject: [PATCH 107/482] crypto: ccp - Add support for valid authsize values less than 16 AES GCM encryption allows for authsize values of 4, 8, and 12-16 bytes. Validate the requested authsize, and retain it to save in the request context. Fixes: 36cf515b9bbe2 ("crypto: ccp - Enable support for AES GCM on v5 CCPs") Cc: Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-aes-galois.c | 14 ++++++++++++ drivers/crypto/ccp/ccp-ops.c | 26 +++++++++++++++++----- include/linux/ccp.h | 2 ++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c index d22631cb2bb3..02eba84028b3 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c @@ -58,6 +58,19 @@ static int ccp_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key, static int ccp_aes_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { + switch (authsize) { + case 16: + case 15: + case 14: + case 13: + case 12: + case 8: + case 4: + break; + default: + return -EINVAL; + } + return 0; } @@ -104,6 +117,7 @@ static int ccp_aes_gcm_crypt(struct aead_request *req, bool encrypt) memset(&rctx->cmd, 0, sizeof(rctx->cmd)); INIT_LIST_HEAD(&rctx->cmd.entry); rctx->cmd.engine = CCP_ENGINE_AES; + rctx->cmd.u.aes.authsize = crypto_aead_authsize(tfm); rctx->cmd.u.aes.type = ctx->u.aes.type; rctx->cmd.u.aes.mode = ctx->u.aes.mode; rctx->cmd.u.aes.action = encrypt; diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 59f9849c3662..ef723e2722a8 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -622,6 +622,7 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, unsigned long long *final; unsigned int dm_offset; + unsigned int authsize; unsigned int jobid; unsigned int ilen; bool in_place = true; /* Default value */ @@ -643,6 +644,21 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, if (!aes->key) /* Gotta have a key SGL */ return -EINVAL; + /* Zero defaults to 16 bytes, the maximum size */ + authsize = aes->authsize ? aes->authsize : AES_BLOCK_SIZE; + switch (authsize) { + case 16: + case 15: + case 14: + case 13: + case 12: + case 8: + case 4: + break; + default: + return -EINVAL; + } + /* First, decompose the source buffer into AAD & PT, * and the destination buffer into AAD, CT & tag, or * the input into CT & tag. @@ -657,7 +673,7 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen); } else { /* Input length for decryption includes tag */ - ilen = aes->src_len - AES_BLOCK_SIZE; + ilen = aes->src_len - authsize; p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen); } @@ -839,19 +855,19 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, if (aes->action == CCP_AES_ACTION_ENCRYPT) { /* Put the ciphered tag after the ciphertext. */ - ccp_get_dm_area(&final_wa, 0, p_tag, 0, AES_BLOCK_SIZE); + ccp_get_dm_area(&final_wa, 0, p_tag, 0, authsize); } else { /* Does this ciphered tag match the input? */ - ret = ccp_init_dm_workarea(&tag, cmd_q, AES_BLOCK_SIZE, + ret = ccp_init_dm_workarea(&tag, cmd_q, authsize, DMA_BIDIRECTIONAL); if (ret) goto e_tag; - ret = ccp_set_dm_area(&tag, 0, p_tag, 0, AES_BLOCK_SIZE); + ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize); if (ret) goto e_tag; ret = crypto_memneq(tag.address, final_wa.address, - AES_BLOCK_SIZE) ? -EBADMSG : 0; + authsize) ? -EBADMSG : 0; ccp_dm_free(&tag); } diff --git a/include/linux/ccp.h b/include/linux/ccp.h index 7e9c991c95e0..43ed9e77cf81 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -173,6 +173,8 @@ struct ccp_aes_engine { enum ccp_aes_mode mode; enum ccp_aes_action action; + u32 authsize; + struct scatterlist *key; u32 key_len; /* In bytes */ From e2664ecbb2f26225ac6646876f2899558ffb2604 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Tue, 30 Jul 2019 16:05:26 +0000 Subject: [PATCH 108/482] crypto: ccp - Ignore tag length when decrypting GCM ciphertext AES GCM input buffers for decryption contain AAD+CTEXT+TAG. Only decrypt the ciphertext, and use the tag for comparison. Fixes: 36cf515b9bbe2 ("crypto: ccp - Enable support for AES GCM on v5 CCPs") Cc: Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-ops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index ef723e2722a8..76e1b4dcd193 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -782,8 +782,7 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, while (src.sg_wa.bytes_left) { ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true); if (!src.sg_wa.bytes_left) { - unsigned int nbytes = aes->src_len - % AES_BLOCK_SIZE; + unsigned int nbytes = ilen % AES_BLOCK_SIZE; if (nbytes) { op.eom = 1; From b07dd9b400981f487940a4d84292d3a0e7cd9362 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 25 Jul 2019 18:40:05 -0500 Subject: [PATCH 109/482] soundwire: cadence_master: fix register definition for SLAVE_STATE wrong prefix and wrong macro. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190725234032.21152-14-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/cadence_master.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index ff4badc9b3de..c2e3d93c4ddc 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -81,8 +81,8 @@ #define CDNS_MCP_INTSET 0x4C -#define CDNS_SDW_SLAVE_STAT 0x50 -#define CDNS_MCP_SLAVE_STAT_MASK BIT(1, 0) +#define CDNS_MCP_SLAVE_STAT 0x50 +#define CDNS_MCP_SLAVE_STAT_MASK GENMASK(1, 0) #define CDNS_MCP_SLAVE_INTSTAT0 0x54 #define CDNS_MCP_SLAVE_INTSTAT1 0x58 From 664b16589f882202b8fa8149d0074f3159bade76 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 25 Jul 2019 18:40:06 -0500 Subject: [PATCH 110/482] soundwire: cadence_master: fix definitions for INTSTAT0/1 Two off-by-one errors: INTSTAT0 missed BIT(31) and INTSTAT1 is only defined on first 16 bits. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190725234032.21152-15-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/cadence_master.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index c2e3d93c4ddc..60e8bdee5c75 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -96,8 +96,8 @@ #define CDNS_MCP_SLAVE_INTMASK0 0x5C #define CDNS_MCP_SLAVE_INTMASK1 0x60 -#define CDNS_MCP_SLAVE_INTMASK0_MASK GENMASK(30, 0) -#define CDNS_MCP_SLAVE_INTMASK1_MASK GENMASK(16, 0) +#define CDNS_MCP_SLAVE_INTMASK0_MASK GENMASK(31, 0) +#define CDNS_MCP_SLAVE_INTMASK1_MASK GENMASK(15, 0) #define CDNS_MCP_PORT_INTSTAT 0x64 #define CDNS_MCP_PDI_STAT 0x6C From af655cc5aae800f30cd71c5d72d36bd3ecb1e7df Mon Sep 17 00:00:00 2001 From: Thong Thai Date: Thu, 25 Jul 2019 11:21:58 -0400 Subject: [PATCH 111/482] drm/amd/amdgpu/vcn_v2_0: Mark RB commands as KMD commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sets the CMD_SOURCE bit for VCN 2.0 Decoder Ring Buffer commands. This bit was previously set by the RBC HW on older firmware. Newer firmware uses a SW RBC and this bit has to be set by the driver. Signed-off-by: Thong Thai Reviewed-by: Leo Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 99f14fcc1460..19661c645703 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -30,6 +30,7 @@ #define AMDGPU_VCN_FIRMWARE_OFFSET 256 #define AMDGPU_VCN_MAX_ENC_RINGS 3 +#define VCN_DEC_KMD_CMD 0x80000000 #define VCN_DEC_CMD_FENCE 0x00000000 #define VCN_DEC_CMD_TRAP 0x00000001 #define VCN_DEC_CMD_WRITE_REG 0x00000004 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 1cfc2620b2dd..46593e323e77 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -1485,7 +1485,7 @@ static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); } /** @@ -1498,7 +1498,7 @@ static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1)); } /** @@ -1543,7 +1543,7 @@ static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1)); amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); amdgpu_ring_write(ring, 0); @@ -1553,7 +1553,7 @@ static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1)); } /** @@ -1597,7 +1597,7 @@ static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1)); } static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, @@ -1626,7 +1626,7 @@ static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); - amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1)); } /** From d1836f3813ee0742a2067d5f4d78e811d2b76d9d Mon Sep 17 00:00:00 2001 From: Thong Thai Date: Thu, 25 Jul 2019 11:26:56 -0400 Subject: [PATCH 112/482] drm/amd/amdgpu/vcn_v2_0: Move VCN 2.0 specific dec ring test to vcn_v2_0 VCN 2.0 firmware now requires a packet start command to be sent before any other decode ring buffer command. Signed-off-by: Thong Thai Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 32 ++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 46593e323e77..dfde886cc6bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -2079,6 +2079,36 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 4); + if (r) + return r; + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(adev->vcn.external.scratch9); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + return r; +} + + static int vcn_v2_0_set_powergating_state(void *handle, enum amd_powergating_state state) { @@ -2142,7 +2172,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .emit_ib = vcn_v2_0_dec_ring_emit_ib, .emit_fence = vcn_v2_0_dec_ring_emit_fence, .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ring = vcn_v2_0_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, .insert_nop = vcn_v2_0_dec_ring_insert_nop, .insert_start = vcn_v2_0_dec_ring_insert_start, From 783bda5e41acc71f98336e1a402c180f9748e5dc Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 2 Aug 2019 17:33:35 +0900 Subject: [PATCH 113/482] usb: host: xhci-rcar: Fix timeout in xhci_suspend() When a USB device is connected to the host controller and the system enters suspend, the following error happens in xhci_suspend(): xhci-hcd ee000000.usb: WARN: xHC CMD_RUN timeout Since the firmware/internal CPU control the USBSTS.STS_HALT and the process speed is down when the roothub port enters U3, long delay for the handshake of STS_HALT is neeed in xhci_suspend(). So, this patch adds to set the XHCI_SLOW_SUSPEND. Fixes: 435cc1138ec9 ("usb: host: xhci-plat: set resume_quirk() for R-Car controllers") Cc: # v4.12+ Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1564734815-17964-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-rcar.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c index 671bce18782c..8616c52849c6 100644 --- a/drivers/usb/host/xhci-rcar.c +++ b/drivers/usb/host/xhci-rcar.c @@ -238,10 +238,15 @@ int xhci_rcar_init_quirk(struct usb_hcd *hcd) * pointers. So, this driver clears the AC64 bit of xhci->hcc_params * to call dma_set_coherent_mask(dev, DMA_BIT_MASK(32)) in * xhci_gen_setup(). + * + * And, since the firmware/internal CPU control the USBSTS.STS_HALT + * and the process speed is down when the roothub port enters U3, + * long delay for the handshake of STS_HALT is neeed in xhci_suspend(). */ if (xhci_rcar_is_gen2(hcd->self.controller) || - xhci_rcar_is_gen3(hcd->self.controller)) - xhci->quirks |= XHCI_NO_64BIT_SUPPORT; + xhci_rcar_is_gen3(hcd->self.controller)) { + xhci->quirks |= XHCI_NO_64BIT_SUPPORT | XHCI_SLOW_SUSPEND; + } if (!xhci_rcar_wait_for_pll_active(hcd)) return -ETIMEDOUT; From cb53c517285f8d2548d11422173ca8ec7b9c8f44 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 2 Aug 2019 18:00:44 +0300 Subject: [PATCH 114/482] xhci: Fix NULL pointer dereference at endpoint zero reset. Usb core will reset the default control endpoint "ep0" before resetting a device. if the endpoint has a valid pointer back to the usb device then the xhci driver reset callback will try to clear the toggle for the endpoint. ep0 didn't use to have this pointer set as ep0 was always allocated by default together with a xhci slot for the usb device. Other endpoints got their usb device pointer set in xhci_add_endpoint() This changed with commit ef513be0a905 ("usb: xhci: Add Clear_TT_Buffer") which sets the pointer for any endpoint on a FS/LS device behind a HS hub that halts, including ep0. If xHC controller needs to be reset at resume, then all the xhci slots will be lost. Slots will be reenabled and reallocated at device reset, but unlike other endpoints the ep0 is reset before device reset, while the xhci slot may still be invalid, causing NULL pointer dereference. Fix it by checking that the endpoint has both a usb device pointer and valid xhci slot before trying to clear the toggle. This issue was not seen earlier as ep0 didn't use to have a valid usb device pointer, and other endpoints were only reset after device reset when xhci slots were properly reenabled. Reported-by: Bob Gleitsmann Reported-by: Enric Balletbo Serra Fixes: ef513be0a905 ("usb: xhci: Add Clear_TT_Buffer") Signed-off-by: Mathias Nyman Tested-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/1564758044-24748-1-git-send-email-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 248cd7a8b163..03d1e552769b 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3089,8 +3089,18 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, return; udev = (struct usb_device *) host_ep->hcpriv; vdev = xhci->devs[udev->slot_id]; + + /* + * vdev may be lost due to xHC restore error and re-initialization + * during S3/S4 resume. A new vdev will be allocated later by + * xhci_discover_or_reset_device() + */ + if (!udev->slot_id || !vdev) + return; ep_index = xhci_get_endpoint_index(&host_ep->desc); ep = &vdev->eps[ep_index]; + if (!ep) + return; /* Bail out if toggle is already being cleared by a endpoint reset */ if (ep->ep_state & EP_HARD_CLEAR_TOGGLE) { From 88d02c9ba2e83fc22d37ccb1f11c62ea6fc9ae50 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 2 Aug 2019 09:03:42 -0700 Subject: [PATCH 115/482] usb: typec: tcpm: Ignore unsupported/unknown alternate mode requests TCPM may receive PD messages associated with unknown or unsupported alternate modes. If that happens, calls to typec_match_altmode() will return NULL. The tcpm code does not currently take this into account. This results in crashes. Unable to handle kernel NULL pointer dereference at virtual address 000001f0 pgd = 41dad9a1 [000001f0] *pgd=00000000 Internal error: Oops: 5 [#1] THUMB2 Modules linked in: tcpci tcpm CPU: 0 PID: 2338 Comm: kworker/u2:0 Not tainted 5.1.18-sama5-armv7-r2 #6 Hardware name: Atmel SAMA5 Workqueue: 2-0050 tcpm_pd_rx_handler [tcpm] PC is at typec_altmode_attention+0x0/0x14 LR is at tcpm_pd_rx_handler+0xa3b/0xda0 [tcpm] ... [] (typec_altmode_attention) from [] (tcpm_pd_rx_handler+0xa3b/0xda0 [tcpm]) [] (tcpm_pd_rx_handler [tcpm]) from [] (process_one_work+0x123/0x2a8) [] (process_one_work) from [] (worker_thread+0xbd/0x3b0) [] (worker_thread) from [] (kthread+0xcf/0xf4) [] (kthread) from [] (ret_from_fork+0x11/0x38) Ignore PD messages if the associated alternate mode is not supported. Fixes: e9576fe8e605c ("usb: typec: tcpm: Support for Alternate Modes") Cc: stable Reported-by: Douglas Gilbert Cc: Douglas Gilbert Acked-by: Heikki Krogerus Tested-by: Douglas Gilbert Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/1564761822-13984-1-git-send-email-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 36 ++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index ab6456622120..15abe1d9958f 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1109,7 +1109,8 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt, break; case CMD_ATTENTION: /* Attention command does not have response */ - typec_altmode_attention(adev, p[1]); + if (adev) + typec_altmode_attention(adev, p[1]); return 0; default: break; @@ -1161,20 +1162,26 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt, } break; case CMD_ENTER_MODE: - typec_altmode_update_active(pdev, true); + if (adev && pdev) { + typec_altmode_update_active(pdev, true); - if (typec_altmode_vdm(adev, p[0], &p[1], cnt)) { - response[0] = VDO(adev->svid, 1, CMD_EXIT_MODE); - response[0] |= VDO_OPOS(adev->mode); - return 1; + if (typec_altmode_vdm(adev, p[0], &p[1], cnt)) { + response[0] = VDO(adev->svid, 1, + CMD_EXIT_MODE); + response[0] |= VDO_OPOS(adev->mode); + return 1; + } } return 0; case CMD_EXIT_MODE: - typec_altmode_update_active(pdev, false); + if (adev && pdev) { + typec_altmode_update_active(pdev, false); - /* Back to USB Operation */ - WARN_ON(typec_altmode_notify(adev, TYPEC_STATE_USB, - NULL)); + /* Back to USB Operation */ + WARN_ON(typec_altmode_notify(adev, + TYPEC_STATE_USB, + NULL)); + } break; default: break; @@ -1184,8 +1191,10 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt, switch (cmd) { case CMD_ENTER_MODE: /* Back to USB Operation */ - WARN_ON(typec_altmode_notify(adev, TYPEC_STATE_USB, - NULL)); + if (adev) + WARN_ON(typec_altmode_notify(adev, + TYPEC_STATE_USB, + NULL)); break; default: break; @@ -1196,7 +1205,8 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt, } /* Informing the alternate mode drivers about everything */ - typec_altmode_vdm(adev, p[0], &p[1], cnt); + if (adev) + typec_altmode_vdm(adev, p[0], &p[1], cnt); return rlen; } From 88639051017fb61a414b636dd0fc490da2b62b64 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 2 Aug 2019 19:21:23 +0530 Subject: [PATCH 116/482] ASoC: amd: acp3x: use dma_ops of parent device for acp3x dma driver AMD platform device acp3x_rv_i2s created by parent PCI device driver. Pass struct device of the parent to snd_pcm_lib_preallocate_pages() so dma_alloc_coherent() can use correct dma_ops. Otherwise, it will use default dma_ops which is nommu_dma_ops on x86_64 even when IOMMU is enabled and set to non passthrough mode. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/1564753899-17124-1-git-send-email-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/acp3x-pcm-dma.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c index a4ade6bb5beb..905ed2f1861b 100644 --- a/sound/soc/amd/raven/acp3x-pcm-dma.c +++ b/sound/soc/amd/raven/acp3x-pcm-dma.c @@ -385,9 +385,11 @@ static snd_pcm_uframes_t acp3x_dma_pointer(struct snd_pcm_substream *substream) static int acp3x_dma_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, + DRV_NAME); + struct device *parent = component->dev->parent; snd_pcm_lib_preallocate_pages_for_all(rtd->pcm, SNDRV_DMA_TYPE_DEV, - rtd->pcm->card->dev, - MIN_BUFFER, MAX_BUFFER); + parent, MIN_BUFFER, MAX_BUFFER); return 0; } From 30c21734d853dae99d05a5295a59b7e26ccd5135 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 2 Aug 2019 19:21:24 +0530 Subject: [PATCH 117/482] ASoC: amd: acp3x: use dma address for acp3x dma driver We shouldn't assume CPU physical address we get from page_to_phys() is same as DMA address we get from dma_alloc_coherent(). On x86_64, we won't run into any problem with the assumption when dma_ops is nommu_dma_ops. However, DMA address is IOVA when IOMMU is enabled. And it's most likely different from CPU physical address when AMD IOMMU is not in passthrough mode. This patch fixes page faults when IOMMU is enabled. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/1564753899-17124-2-git-send-email-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/acp3x-pcm-dma.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c index 905ed2f1861b..bc4dfafdfcd1 100644 --- a/sound/soc/amd/raven/acp3x-pcm-dma.c +++ b/sound/soc/amd/raven/acp3x-pcm-dma.c @@ -31,8 +31,8 @@ struct i2s_stream_instance { u16 num_pages; u16 channels; u32 xfer_resolution; - struct page *pg; u64 bytescount; + dma_addr_t dma_addr; void __iomem *acp3x_base; }; @@ -211,9 +211,8 @@ static irqreturn_t i2s_irq_handler(int irq, void *dev_id) static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction) { u16 page_idx; - u64 addr; u32 low, high, val, acp_fifo_addr; - struct page *pg = rtd->pg; + dma_addr_t addr = rtd->dma_addr; /* 8 scratch registers used to map one 64 bit address */ if (direction == SNDRV_PCM_STREAM_PLAYBACK) @@ -229,7 +228,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction) for (page_idx = 0; page_idx < rtd->num_pages; page_idx++) { /* Load the low address of page int ACP SRAM through SRBM */ - addr = page_to_phys(pg); low = lower_32_bits(addr); high = upper_32_bits(addr); @@ -239,7 +237,7 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction) + 4); /* Move to next physically contiguos page */ val += 8; - pg++; + addr += PAGE_SIZE; } if (direction == SNDRV_PCM_STREAM_PLAYBACK) { @@ -341,7 +339,6 @@ static int acp3x_dma_hw_params(struct snd_pcm_substream *substream, { int status; u64 size; - struct page *pg; struct snd_pcm_runtime *runtime = substream->runtime; struct i2s_stream_instance *rtd = runtime->private_data; @@ -354,9 +351,8 @@ static int acp3x_dma_hw_params(struct snd_pcm_substream *substream, return status; memset(substream->runtime->dma_area, 0, params_buffer_bytes(params)); - pg = virt_to_page(substream->dma_buffer.area); - if (pg) { - rtd->pg = pg; + if (substream->dma_buffer.area) { + rtd->dma_addr = substream->dma_buffer.addr; rtd->num_pages = (PAGE_ALIGN(size) >> PAGE_SHIFT); config_acp3x_dma(rtd, substream->stream); status = 0; From 4853fc32306faaef03d409c38921c401379da2ea Mon Sep 17 00:00:00 2001 From: Matt Coffin Date: Wed, 31 Jul 2019 14:14:35 -0600 Subject: [PATCH 118/482] drm/amd/powerplay: Allow changing of fan_control in smu_v11_0 [Why] Before this change, the fan control state on smu_v11 was not able to be changed because the capability check for checking if the fan control capability existed was inverted. [How] The capability check for fan control in smu_v11_0_auto_fan_control was inverted, to correctly check for the absence, instead of presence of fan control capabilities. Reviewed-by: Evan Quan Signed-off-by: Matt Coffin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index ac5b26228e75..5fde5cf65b42 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1391,7 +1391,7 @@ smu_v11_0_smc_fan_control(struct smu_context *smu, bool start) { int ret = 0; - if (smu_feature_is_supported(smu, SMU_FEATURE_FAN_CONTROL_BIT)) + if (!smu_feature_is_supported(smu, SMU_FEATURE_FAN_CONTROL_BIT)) return 0; ret = smu_feature_set_enabled(smu, SMU_FEATURE_FAN_CONTROL_BIT, start); From 8ecd998debe1280ef8aab45049e69ac3d2116398 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Fri, 2 Aug 2019 12:01:00 +0800 Subject: [PATCH 119/482] drm/amd/powerplay: honor hw limit on fetching metrics data for navi10 too frequently to update mertrics table will cause smu internal error. Signed-off-by: Kevin Wang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 56 +++++++++++++++------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index cc0a3b2256af..f63405b1a009 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -502,6 +502,8 @@ static int navi10_store_powerplay_table(struct smu_context *smu) static int navi10_tables_init(struct smu_context *smu, struct smu_table *tables) { + struct smu_table_context *smu_table = &smu->smu_table; + SMU_TABLE_INIT(tables, SMU_TABLE_PPTABLE, sizeof(PPTable_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t), @@ -516,9 +518,35 @@ static int navi10_tables_init(struct smu_context *smu, struct smu_table *tables) sizeof(DpmActivityMonitorCoeffInt_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); + if (!smu_table->metrics_table) + return -ENOMEM; + smu_table->metrics_time = 0; + return 0; } +static int navi10_get_metrics_table(struct smu_context *smu, + SmuMetrics_t *metrics_table) +{ + struct smu_table_context *smu_table= &smu->smu_table; + int ret = 0; + + if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + HZ / 1000)) { + ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, + (void *)smu_table->metrics_table, false); + if (ret) { + pr_info("Failed to export SMU metrics table!\n"); + return ret; + } + smu_table->metrics_time = jiffies; + } + + memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t)); + + return ret; +} + static int navi10_allocate_dpm_context(struct smu_context *smu) { struct smu_dpm_context *smu_dpm = &smu->smu_dpm; @@ -598,15 +626,10 @@ static int navi10_get_current_clk_freq_by_table(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *value) { - static SmuMetrics_t metrics; int ret = 0, clk_id = 0; + SmuMetrics_t metrics; - if (!value) - return -EINVAL; - - memset(&metrics, 0, sizeof(metrics)); - - ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)&metrics, false); + ret = navi10_get_metrics_table(smu, &metrics); if (ret) return ret; @@ -894,8 +917,9 @@ static int navi10_get_gpu_power(struct smu_context *smu, uint32_t *value) if (!value) return -EINVAL; - ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)&metrics, - false); + ret = navi10_get_metrics_table(smu, &metrics); + if (ret) + return ret; if (ret) return ret; @@ -914,10 +938,7 @@ static int navi10_get_current_activity_percent(struct smu_context *smu, if (!value) return -EINVAL; - msleep(1); - - ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, - (void *)&metrics, false); + ret = navi10_get_metrics_table(smu, &metrics); if (ret) return ret; @@ -956,10 +977,9 @@ static int navi10_get_fan_speed_rpm(struct smu_context *smu, if (!speed) return -EINVAL; - memset(&metrics, 0, sizeof(metrics)); - - ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, - (void *)&metrics, false); + ret = navi10_get_metrics_table(smu, &metrics); + if (ret) + return ret; if (ret) return ret; @@ -1307,7 +1327,7 @@ static int navi10_thermal_get_temperature(struct smu_context *smu, if (!value) return -EINVAL; - ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)&metrics, false); + ret = navi10_get_metrics_table(smu, &metrics); if (ret) return ret; From b887011803582c8539cd9f963035eef91373a169 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 2 Aug 2019 16:38:32 +0800 Subject: [PATCH 120/482] drm/amd/powerplay: correct navi10 vcn powergate vcn dpm on is a prerequisite for vcn power gate control. Signed-off-by: Evan Quan Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 4 +++- .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 1 + drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 23 ++++++++++++------- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 0685a3388e38..8a3eadeebdcb 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -315,6 +315,8 @@ int smu_get_power_num_states(struct smu_context *smu, int smu_common_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, uint32_t *size) { + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; switch (sensor) { @@ -339,7 +341,7 @@ int smu_common_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, *size = 4; break; case AMDGPU_PP_SENSOR_VCN_POWER_STATE: - *(uint32_t *)data = smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT) ? 1 : 0; + *(uint32_t *)data = power_gate->vcn_gated ? 0 : 1; *size = 4; break; default: diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 208e6711d506..a0f52c86d8c7 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -451,6 +451,7 @@ struct smu_dpm_context { struct smu_power_gate { bool uvd_gated; bool vce_gated; + bool vcn_gated; }; struct smu_power_context { diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index f63405b1a009..b81c7e715dc9 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -605,20 +605,27 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) static int navi10_dpm_set_uvd_enable(struct smu_context *smu, bool enable) { + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, 1); - if (ret) - return ret; + /* vcn dpm on is a prerequisite for vcn power gate messages */ + if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, 1); + if (ret) + return ret; + } + power_gate->vcn_gated = false; } else { - ret = smu_send_smc_msg(smu, SMU_MSG_PowerDownVcn); - if (ret) - return ret; + if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { + ret = smu_send_smc_msg(smu, SMU_MSG_PowerDownVcn); + if (ret) + return ret; + } + power_gate->vcn_gated = true; } - ret = smu_feature_set_enabled(smu, SMU_FEATURE_VCN_PG_BIT, enable); - return ret; } From 9b562437d388582f5463a4c3f60ee97a2d4d8d58 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Wed, 31 Jul 2019 10:01:40 +0800 Subject: [PATCH 121/482] drm/modes: Fix unterminated strncpy strncpy(dest, src, strlen(src)) leads to unterminated dest, which is dangerous. Fix it by using strscpy. Fixes: 3aeeb13d8996 ("drm/modes: Support modes names on the command line") Signed-off-by: Chuhong Yuan Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190731020140.3529-1-hslester96@gmail.com --- drivers/gpu/drm/drm_modes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 80fcd5dc1558..b0369e690f36 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1770,7 +1770,9 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option, } if (named_mode) { - strncpy(mode->name, name, mode_end); + if (mode_end + 1 > DRM_DISPLAY_MODE_LEN) + return false; + strscpy(mode->name, name, mode_end + 1); } else { ret = drm_mode_parse_cmdline_res_mode(name, mode_end, parse_extras, From 294fc7a4c8ec42b3053b1d2e87b0dafef80a76b8 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 1 Aug 2019 13:47:07 -0700 Subject: [PATCH 122/482] fs: xfs: xfs_log: Don't use KM_MAYFAIL at xfs_log_reserve(). When the system is close-to-OOM, fsync() may fail due to -ENOMEM because xfs_log_reserve() is using KM_MAYFAIL. It is a bad thing to fail writeback operation due to user-triggerable OOM condition. Since we are not using KM_MAYFAIL at xfs_trans_alloc() before calling xfs_log_reserve(), let's use the same flags at xfs_log_reserve(). oom-torture: page allocation failure: order:0, mode:0x46c40(GFP_NOFS|__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_COMP), nodemask=(null) CPU: 7 PID: 1662 Comm: oom-torture Kdump: loaded Not tainted 5.3.0-rc2+ #925 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 Call Trace: dump_stack+0x67/0x95 warn_alloc+0xa9/0x140 __alloc_pages_slowpath+0x9a8/0xbce __alloc_pages_nodemask+0x372/0x3b0 alloc_slab_page+0x3a/0x8d0 new_slab+0x330/0x420 ___slab_alloc.constprop.94+0x879/0xb00 __slab_alloc.isra.89.constprop.93+0x43/0x6f kmem_cache_alloc+0x331/0x390 kmem_zone_alloc+0x9f/0x110 [xfs] kmem_zone_alloc+0x9f/0x110 [xfs] xlog_ticket_alloc+0x33/0xd0 [xfs] xfs_log_reserve+0xb4/0x410 [xfs] xfs_trans_reserve+0x1d1/0x2b0 [xfs] xfs_trans_alloc+0xc9/0x250 [xfs] xfs_setfilesize_trans_alloc.isra.27+0x44/0xc0 [xfs] xfs_submit_ioend.isra.28+0xa5/0x180 [xfs] xfs_vm_writepages+0x76/0xa0 [xfs] do_writepages+0x17/0x80 __filemap_fdatawrite_range+0xc1/0xf0 file_write_and_wait_range+0x53/0xa0 xfs_file_fsync+0x87/0x290 [xfs] vfs_fsync_range+0x37/0x80 do_fsync+0x38/0x60 __x64_sys_fsync+0xf/0x20 do_syscall_64+0x4a/0x1c0 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: eb01c9cd87 ("[XFS] Remove the xlog_ticket allocator") Signed-off-by: Tetsuo Handa Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 00e9f5c388d3..7fc3c1ad36bc 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -429,10 +429,7 @@ xfs_log_reserve( ASSERT(*ticp == NULL); tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, - KM_SLEEP | KM_MAYFAIL); - if (!tic) - return -ENOMEM; - + KM_SLEEP); *ticp = tic; xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt From 8c39a39e28b86a4021d9be314ce01019bafa5fdc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Jul 2019 13:48:44 -0400 Subject: [PATCH 123/482] NFSv4: Fix a credential refcount leak in nfs41_check_delegation_stateid It is unsafe to dereference delegation outside the rcu lock, and in any case, the refcount is guaranteed held if cred is non-zero. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 39896afc6edf..a6d73609b163 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2778,8 +2778,7 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) nfs_finish_clear_delegation_stateid(state, &stateid); - if (delegation->cred) - put_cred(cred); + put_cred(cred); } /** From 5eb8d18ca0e001c6055da2b7f30d8f6dca23a44f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Jul 2019 14:08:37 -0400 Subject: [PATCH 124/482] NFSv4: Fix delegation state recovery Once we clear the NFS_DELEGATED_STATE flag, we're telling nfs_delegation_claim_opens() that we're done recovering all open state for that stateid, so we really need to ensure that we test for all open modes that are currently cached and recover them before exiting nfs4_open_delegation_recall(). Fixes: 24311f884189d ("NFSv4: Recovery of recalled read delegations...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.3+ --- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 2 +- fs/nfs/nfs4proc.c | 27 +++++++++++++-------------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 0ff3facf81da..0af854cce8ff 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -153,7 +153,7 @@ static int nfs_delegation_claim_opens(struct inode *inode, /* Block nfs4_proc_unlck */ mutex_lock(&sp->so_delegreturn_mutex); seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); - err = nfs4_open_delegation_recall(ctx, state, stateid, type); + err = nfs4_open_delegation_recall(ctx, state, stateid); if (!err) err = nfs_delegation_claim_locks(state, stateid); if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 5799777df5ec..9eb87ae4c982 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -63,7 +63,7 @@ void nfs_reap_expired_delegations(struct nfs_client *clp); /* NFSv4 delegation-related procedures */ int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync); -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred); bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a6d73609b163..21e3c159bc69 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2177,12 +2177,10 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_DEADSESSION: - set_bit(NFS_DELEGATED_STATE, &state->flags); nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); return -EAGAIN; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: - set_bit(NFS_DELEGATED_STATE, &state->flags); /* Don't recall a delegation if it was lost */ nfs4_schedule_lease_recovery(server->nfs_client); return -EAGAIN; @@ -2203,7 +2201,6 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct return -EAGAIN; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: - set_bit(NFS_DELEGATED_STATE, &state->flags); ssleep(1); return -EAGAIN; case -ENOMEM: @@ -2219,8 +2216,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct } int nfs4_open_delegation_recall(struct nfs_open_context *ctx, - struct nfs4_state *state, const nfs4_stateid *stateid, - fmode_t type) + struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_opendata *opendata; @@ -2231,20 +2227,23 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, if (IS_ERR(opendata)) return PTR_ERR(opendata); nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); - nfs_state_clear_delegation(state); - switch (type & (FMODE_READ|FMODE_WRITE)) { - case FMODE_READ|FMODE_WRITE: - case FMODE_WRITE: + if (!test_bit(NFS_O_RDWR_STATE, &state->flags)) { err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE); if (err) - break; + goto out; + } + if (!test_bit(NFS_O_WRONLY_STATE, &state->flags)) { err = nfs4_open_recover_helper(opendata, FMODE_WRITE); if (err) - break; - /* Fall through */ - case FMODE_READ: - err = nfs4_open_recover_helper(opendata, FMODE_READ); + goto out; } + if (!test_bit(NFS_O_RDONLY_STATE, &state->flags)) { + err = nfs4_open_recover_helper(opendata, FMODE_READ); + if (err) + goto out; + } + nfs_state_clear_delegation(state); +out: nfs4_opendata_put(opendata); return nfs4_handle_delegation_recall_error(server, state, stateid, NULL, err); } From 86dbd08b32838dc8940ccab2eebd47cf80224494 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 22 Jul 2019 09:44:04 +0100 Subject: [PATCH 125/482] NFSv4: Print an error in the syslog when state is marked as irrecoverable When error recovery fails due to a fatal error on the server, ensure we log it in the syslog. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9afd051a4876..a71a61e5fe2c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1463,7 +1463,7 @@ void nfs_inode_find_state_and_recover(struct inode *inode, nfs4_schedule_state_manager(clp); } -static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) +static void nfs4_state_mark_open_context_bad(struct nfs4_state *state, int err) { struct inode *inode = state->inode; struct nfs_inode *nfsi = NFS_I(inode); @@ -1474,6 +1474,8 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) if (ctx->state != state) continue; set_bit(NFS_CONTEXT_BAD, &ctx->flags); + pr_warn("NFSv4: state recovery failed for open file %pd2, " + "error = %d\n", ctx->dentry, err); } rcu_read_unlock(); } @@ -1481,7 +1483,7 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) { set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags); - nfs4_state_mark_open_context_bad(state); + nfs4_state_mark_open_context_bad(state, error); } From c34fae003c79570b6c930b425fea3f0b7b1e7056 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 22 Jul 2019 09:54:29 +0100 Subject: [PATCH 126/482] NFSv4: When recovering state fails with EAGAIN, retry the same recovery If the server returns with EAGAIN when we're trying to recover from a server reboot, we currently delay for 1 second, but then mark the stateid as needing recovery after the grace period has expired. Instead, we should just retry the same recovery process immediately after the 1 second delay. Break out of the loop after 10 retries. Fixes: 35a61606a612 ("NFS: Reduce indentation of the switch statement...") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a71a61e5fe2c..d03b9cf42bd0 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1607,6 +1607,7 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops) { struct nfs4_state *state; + unsigned int loop = 0; int status = 0; /* Note: we rely on the sp->so_states list being ordered @@ -1633,8 +1634,10 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs switch (status) { default: - if (status >= 0) + if (status >= 0) { + loop = 0; break; + } printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status); /* Fall through */ case -ENOENT: @@ -1648,6 +1651,10 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs break; case -EAGAIN: ssleep(1); + if (loop++ < 10) { + set_bit(ops->state_flag_bit, &state->flags); + break; + } /* Fall through */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: From 731c74dd987e4f1f3179223314c039ce38dd4f19 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 22 Jul 2019 18:06:17 +0100 Subject: [PATCH 127/482] NFSv4: Report the error from nfs4_select_rw_stateid() In pnfs_update_layout() ensure that we do report any fatal errors from nfs4_select_rw_stateid(). Fixes: d9aba2b40de6 ("NFSv4: Don't use the zero stateid with layoutget") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 75bd5b552ba4..4525d5acae38 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1903,12 +1903,6 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; } - if (!nfs4_valid_open_stateid(ctx->state)) { - trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, - PNFS_UPDATE_LAYOUT_INVALID_OPEN); - goto out_unlock; - } - /* * Choose a stateid for the LAYOUTGET. If we don't have a layout * stateid, or it has been invalidated, then we must use the open @@ -1939,6 +1933,7 @@ pnfs_update_layout(struct inode *ino, iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, NULL, &stateid, NULL); if (status != 0) { + lseg = ERR_PTR(status); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_INVALID_OPEN); From 27a30cf64a5cbe2105e4ff9613246b32d584766a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 22 Jul 2019 18:32:59 +0100 Subject: [PATCH 128/482] NFSv4.1: Fix open stateid recovery The logic for checking in nfs41_check_open_stateid() whether the state is supported by a delegation is inverted. In addition, it makes more sense to perform that check before we check for expired locks. Fixes: 8a64c4ef106d1 ("NFSv4.1: Even if the stateid is OK,...") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 65 +++++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 21e3c159bc69..c9e14ce0b7b2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1683,6 +1683,14 @@ static void nfs_state_set_open_stateid(struct nfs4_state *state, write_sequnlock(&state->seqlock); } +static void nfs_state_clear_open_state_flags(struct nfs4_state *state) +{ + clear_bit(NFS_O_RDWR_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); +} + static void nfs_state_set_delegation(struct nfs4_state *state, const nfs4_stateid *deleg_stateid, fmode_t fmode) @@ -2074,13 +2082,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * { int ret; - /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */ - clear_bit(NFS_O_RDWR_STATE, &state->flags); - clear_bit(NFS_O_WRONLY_STATE, &state->flags); - clear_bit(NFS_O_RDONLY_STATE, &state->flags); /* memory barrier prior to reading state->n_* */ - clear_bit(NFS_DELEGATED_STATE, &state->flags); - clear_bit(NFS_OPEN_STATE, &state->flags); smp_rmb(); ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE); if (ret != 0) @@ -2156,6 +2158,8 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return -EAGAIN; + clear_bit(NFS_DELEGATED_STATE, &state->flags); + nfs_state_clear_open_state_flags(state); ret = nfs4_do_open_reclaim(ctx, state); put_nfs_open_context(ctx); return ret; @@ -2697,6 +2701,7 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st { /* NFSv4.0 doesn't allow for delegation recovery on open expire */ nfs40_clear_delegation_stateid(state); + nfs_state_clear_open_state_flags(state); return nfs4_open_expired(sp, state); } @@ -2739,13 +2744,13 @@ static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, return -NFS4ERR_EXPIRED; } -static void nfs41_check_delegation_stateid(struct nfs4_state *state) +static int nfs41_check_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); nfs4_stateid stateid; struct nfs_delegation *delegation; const struct cred *cred = NULL; - int status; + int status, ret = NFS_OK; /* Get the delegation credential for use by test/free_stateid */ rcu_read_lock(); @@ -2753,20 +2758,15 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) if (delegation == NULL) { rcu_read_unlock(); nfs_state_clear_delegation(state); - return; + return NFS_OK; } nfs4_stateid_copy(&stateid, &delegation->stateid); - if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { - rcu_read_unlock(); - nfs_state_clear_delegation(state); - return; - } if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) { rcu_read_unlock(); - return; + return NFS_OK; } if (delegation->cred) @@ -2776,8 +2776,24 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) trace_nfs4_test_delegation_stateid(state, NULL, status); if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) nfs_finish_clear_delegation_stateid(state, &stateid); + else + ret = status; put_cred(cred); + return ret; +} + +static void nfs41_delegation_recover_stateid(struct nfs4_state *state) +{ + nfs4_stateid tmp; + + if (test_bit(NFS_DELEGATED_STATE, &state->flags) && + nfs4_copy_delegation_stateid(state->inode, state->state, + &tmp, NULL) && + nfs4_stateid_match_other(&state->stateid, &tmp)) + nfs_state_set_delegation(state, &tmp, state->state); + else + nfs_state_clear_delegation(state); } /** @@ -2847,21 +2863,12 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) const struct cred *cred = state->owner->so_cred; int status; - if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) { - if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) { - if (nfs4_have_delegation(state->inode, state->state)) - return NFS_OK; - return -NFS4ERR_OPENMODE; - } + if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) return -NFS4ERR_BAD_STATEID; - } status = nfs41_test_and_free_expired_stateid(server, stateid, cred); trace_nfs4_test_open_stateid(state, NULL, status); if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) { - clear_bit(NFS_O_RDONLY_STATE, &state->flags); - clear_bit(NFS_O_WRONLY_STATE, &state->flags); - clear_bit(NFS_O_RDWR_STATE, &state->flags); - clear_bit(NFS_OPEN_STATE, &state->flags); + nfs_state_clear_open_state_flags(state); stateid->type = NFS4_INVALID_STATEID_TYPE; return status; } @@ -2874,7 +2881,11 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st { int status; - nfs41_check_delegation_stateid(state); + status = nfs41_check_delegation_stateid(state); + if (status != NFS_OK) + return status; + nfs41_delegation_recover_stateid(state); + status = nfs41_check_expired_locks(state); if (status != NFS_OK) return status; From ad11408970df79d5f481aa9964e91f183133424c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 26 Jul 2019 14:40:53 +0100 Subject: [PATCH 129/482] NFSv4.1: Only reap expired delegations Fix nfs_reap_expired_delegations() to ensure that we only reap delegations that are actually expired, rather than triggering on random errors. Fixes: 45870d6909d5a ("NFSv4.1: Test delegation stateids when server...") Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 0af854cce8ff..071b90a45933 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1046,6 +1046,22 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } +static void +nfs_delegation_test_free_expired(struct inode *inode, + nfs4_stateid *stateid, + const struct cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + const struct nfs4_minor_version_ops *ops = server->nfs_client->cl_mvops; + int status; + + if (!cred) + return; + status = ops->test_and_free_expired(server, stateid, cred); + if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) + nfs_remove_bad_delegation(inode, stateid); +} + /** * nfs_reap_expired_delegations - reap expired delegations * @clp: nfs_client to process @@ -1057,7 +1073,6 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp) */ void nfs_reap_expired_delegations(struct nfs_client *clp) { - const struct nfs4_minor_version_ops *ops = clp->cl_mvops; struct nfs_delegation *delegation; struct nfs_server *server; struct inode *inode; @@ -1088,11 +1103,7 @@ void nfs_reap_expired_delegations(struct nfs_client *clp) nfs4_stateid_copy(&stateid, &delegation->stateid); clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); rcu_read_unlock(); - if (cred != NULL && - ops->test_and_free_expired(server, &stateid, cred) < 0) { - nfs_revoke_delegation(inode, &stateid); - nfs_inode_find_state_and_recover(inode, &stateid); - } + nfs_delegation_test_free_expired(inode, &stateid, cred); put_cred(cred); if (nfs4_server_rebooted(clp)) { nfs_inode_mark_test_expired_delegation(server,inode); From e3c8dc761ead061da2220ee8f8132f729ac3ddfe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 29 Jul 2019 18:25:00 +0100 Subject: [PATCH 130/482] NFSv4: Check the return value of update_open_stateid() Ensure that we always check the return value of update_open_stateid() so that we can retry if the update of local state failed. This fixes infinite looping on state recovery. Fixes: e23008ec81ef3 ("NFSv4 reduce attribute requests for open reclaim") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v3.7+ --- fs/nfs/nfs4proc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c9e14ce0b7b2..3e0b93f2b61a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1915,8 +1915,9 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); update: - update_open_stateid(state, &data->o_res.stateid, NULL, - data->o_arg.fmode); + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) + return ERR_PTR(-EAGAIN); refcount_inc(&state->count); return state; @@ -1981,8 +1982,11 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); - update_open_stateid(state, &data->o_res.stateid, NULL, - data->o_arg.fmode); + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) { + nfs4_put_open_state(state); + state = ERR_PTR(-EAGAIN); + } out: nfs_release_seqid(data->o_arg.seqid); return state; From c77e22834ae9a11891cb613bd9a551be1b94f2bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Aug 2019 10:11:27 -0400 Subject: [PATCH 131/482] NFSv4: Fix a potential sleep while atomic in nfs4_do_reclaim() John Hubbard reports seeing the following stack trace: nfs4_do_reclaim rcu_read_lock /* we are now in_atomic() and must not sleep */ nfs4_purge_state_owners nfs4_free_state_owner nfs4_destroy_seqid_counter rpc_destroy_wait_queue cancel_delayed_work_sync __cancel_work_timer __flush_work start_flush_work might_sleep: (kernel/workqueue.c:2975: BUG) The solution is to separate out the freeing of the state owners from nfs4_purge_state_owners(), and perform that outside the atomic context. Reported-by: John Hubbard Fixes: 0aaaf5c424c7f ("NFS: Cache state owners after files are closed") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 3 ++- fs/nfs/nfs4client.c | 5 ++++- fs/nfs/nfs4state.c | 27 ++++++++++++++++++++++----- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d778dad9a75e..3564da1ba8a1 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -465,7 +465,8 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, const struct cred *, gfp_t); extern void nfs4_put_state_owner(struct nfs4_state_owner *); -extern void nfs4_purge_state_owners(struct nfs_server *); +extern void nfs4_purge_state_owners(struct nfs_server *, struct list_head *); +extern void nfs4_free_state_owners(struct list_head *head); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, fmode_t); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 616393a01c06..da6204025a2d 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -758,9 +758,12 @@ int nfs41_walk_client_list(struct nfs_client *new, static void nfs4_destroy_server(struct nfs_server *server) { + LIST_HEAD(freeme); + nfs_server_return_all_delegations(server); unset_pnfs_layoutdriver(server); - nfs4_purge_state_owners(server); + nfs4_purge_state_owners(server, &freeme); + nfs4_free_state_owners(&freeme); } /* diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d03b9cf42bd0..a4e866b2b43b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -624,24 +624,39 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) /** * nfs4_purge_state_owners - Release all cached state owners * @server: nfs_server with cached state owners to release + * @head: resulting list of state owners * * Called at umount time. Remaining state owners will be on * the LRU with ref count of zero. + * Note that the state owners are not freed, but are added + * to the list @head, which can later be used as an argument + * to nfs4_free_state_owners. */ -void nfs4_purge_state_owners(struct nfs_server *server) +void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head) { struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *tmp; - LIST_HEAD(doomed); spin_lock(&clp->cl_lock); list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) { - list_move(&sp->so_lru, &doomed); + list_move(&sp->so_lru, head); nfs4_remove_state_owner_locked(sp); } spin_unlock(&clp->cl_lock); +} - list_for_each_entry_safe(sp, tmp, &doomed, so_lru) { +/** + * nfs4_purge_state_owners - Release all cached state owners + * @head: resulting list of state owners + * + * Frees a list of state owners that was generated by + * nfs4_purge_state_owners + */ +void nfs4_free_state_owners(struct list_head *head) +{ + struct nfs4_state_owner *sp, *tmp; + + list_for_each_entry_safe(sp, tmp, head, so_lru) { list_del(&sp->so_lru); nfs4_free_state_owner(sp); } @@ -1865,12 +1880,13 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov struct nfs4_state_owner *sp; struct nfs_server *server; struct rb_node *pos; + LIST_HEAD(freeme); int status = 0; restart: rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - nfs4_purge_state_owners(server); + nfs4_purge_state_owners(server, &freeme); spin_lock(&clp->cl_lock); for (pos = rb_first(&server->state_owners); pos != NULL; @@ -1899,6 +1915,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov spin_unlock(&clp->cl_lock); } rcu_read_unlock(); + nfs4_free_state_owners(&freeme); return 0; } From 09a54f0ebfe263bc27c90bbd80187b9a93283887 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Aug 2019 10:28:18 -0400 Subject: [PATCH 132/482] NFSv4: Fix an Oops in nfs4_do_setattr If the user specifies an open mode of 3, then we don't have a NFSv4 state attached to the context, and so we Oops when we try to dereference it. Reported-by: Olga Kornievskaia Fixes: 29b59f9416937 ("NFSv4: change nfs4_do_setattr to take...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.10: 991eedb1371dc: NFSv4: Only pass the... Cc: stable@vger.kernel.org # v4.10+ --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3e0b93f2b61a..12b2b65ad8a8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3214,7 +3214,7 @@ static int _nfs4_do_setattr(struct inode *inode, if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) { /* Use that stateid */ - } else if (ctx != NULL) { + } else if (ctx != NULL && ctx->state) { struct nfs_lock_context *l_ctx; if (!nfs4_valid_open_stateid(ctx->state)) return -EBADF; From dea1bb35c5f35e0577cfc61f79261d80b8715221 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Aug 2019 13:39:24 -0400 Subject: [PATCH 133/482] NFS: Fix regression whereby fscache errors are appearing on 'nofsc' mounts People are reporing seeing fscache errors being reported concerning duplicate cookies even in cases where they are not setting up fscache at all. The rule needs to be that if fscache is not enabled, then it should have no side effects at all. To ensure this is the case, we disable fscache completely on all superblocks for which the 'fsc' mount option was not set. In order to avoid issues with '-oremount', we also disable the ability to turn fscache on via remount. Fixes: f1fe29b4a02d ("NFS: Use i_writecount to control whether...") Link: https://bugzilla.kernel.org/show_bug.cgi?id=200145 Signed-off-by: Trond Myklebust Cc: Steve Dickson Cc: David Howells --- fs/nfs/fscache.c | 7 ++++++- fs/nfs/fscache.h | 2 +- fs/nfs/super.c | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 53507aa96b0b..3800ab6f08fa 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -114,6 +114,10 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int struct rb_node **p, *parent; int diff; + nfss->fscache_key = NULL; + nfss->fscache = NULL; + if (!(nfss->options & NFS_OPTION_FSCACHE)) + return; if (!uniq) { uniq = ""; ulen = 1; @@ -226,10 +230,11 @@ void nfs_fscache_release_super_cookie(struct super_block *sb) void nfs_fscache_init_inode(struct inode *inode) { struct nfs_fscache_inode_auxdata auxdata; + struct nfs_server *nfss = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); nfsi->fscache = NULL; - if (!S_ISREG(inode->i_mode)) + if (!(nfss->fscache && S_ISREG(inode->i_mode))) return; memset(&auxdata, 0, sizeof(auxdata)); diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 25a75e40d91d..ad041cfbf9ec 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -182,7 +182,7 @@ static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) */ static inline const char *nfs_server_fscache_state(struct nfs_server *server) { - if (server->fscache && (server->options & NFS_OPTION_FSCACHE)) + if (server->fscache) return "yes"; return "no "; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 628631e2e34f..703f595dce90 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2260,6 +2260,7 @@ nfs_compare_remount_data(struct nfs_server *nfss, data->acdirmin != nfss->acdirmin / HZ || data->acdirmax != nfss->acdirmax / HZ || data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || + (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) || data->nfs_server.port != nfss->port || data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address, From 247bc9470b1eeefc7b58cdf2c39f2866ba651509 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 23 Jul 2019 22:14:29 -0500 Subject: [PATCH 134/482] cifs: fix rmmod regression in cifs.ko caused by force_sig changes Fixes: 72abe3bcf091 ("signal/cifs: Fix cifs_put_tcp_session to call send_sig instead of force_sig") The global change from force_sig caused module unloading of cifs.ko to fail (since the cifsd process could not be killed, "rmmod cifs" now would always fail) Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Eric W. Biederman --- fs/cifs/connect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a4830ced0f98..a15a6e738eb5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1113,6 +1113,7 @@ cifs_demultiplex_thread(void *p) mempool_resize(cifs_req_poolp, length + cifs_min_rcv); set_freezable(); + allow_signal(SIGKILL); while (server->tcpStatus != CifsExiting) { if (try_to_freeze()) continue; From 0584674d7fe407c2db91a009294d5ec19f7cb1d5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 4 Jul 2019 10:45:34 +0000 Subject: [PATCH 135/482] drm/i915: fix possible memory leak in intel_hdcp_auth_downstream() 'ksv_fifo' is malloced in intel_hdcp_auth_downstream() and should be freed before leaving from the error handling cases, otherwise it will cause memory leak. Fixes: f26ae6a652f2 ("drm/i915: SRM revocation check for HDCP1.4 and 2.2") Signed-off-by: Wei Yongjun Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190704104534.12508-1-weiyongjun1@huawei.com (cherry picked from commit de70fdd7d24cd07e51fbec420f8704d956a47949) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_hdcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index bc3a94d491c4..27bd7276a82d 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -536,7 +536,8 @@ int intel_hdcp_auth_downstream(struct intel_connector *connector) if (drm_hdcp_check_ksvs_revoked(dev, ksv_fifo, num_downstream)) { DRM_ERROR("Revoked Ksv(s) in ksv_fifo\n"); - return -EPERM; + ret = -EPERM; + goto err; } /* From 73a0ff0b30af79bf0303d557eb82f1d1945bb6ee Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Fri, 12 Jul 2019 11:19:38 +0300 Subject: [PATCH 136/482] drm/i915: Fix wrong escape clock divisor init for GLK According to Bspec clock divisor registers in GeminiLake should be initialized by shifting 1(<<) to amount of correspondent divisor. While i915 was writing all this time that value as is. Surprisingly that it by accident worked, until we met some issues with Microtech Etab. v2: Added Fixes tag and cc v3: Added stable to cc as well. Signed-off-by: Stanislav Lisovskiy Reviewed-by: Vandita Kulkarni Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108826 Fixes: bcc657004841 ("drm/i915/glk: Program txesc clock divider for GLK") Cc: Deepak M Cc: Madhav Chauhan Cc: Jani Nikula Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190712081938.14185-1-stanislav.lisovskiy@intel.com (cherry picked from commit ce52ad5dd52cfaf3398058384e0ff94134bbd89c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/vlv_dsi_pll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c index 99cc3e2e9c2c..f016a776a39e 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c @@ -396,8 +396,8 @@ static void glk_dsi_program_esc_clock(struct drm_device *dev, else txesc2_div = 10; - I915_WRITE(MIPIO_TXESC_CLK_DIV1, txesc1_div & GLK_TX_ESC_CLK_DIV1_MASK); - I915_WRITE(MIPIO_TXESC_CLK_DIV2, txesc2_div & GLK_TX_ESC_CLK_DIV2_MASK); + I915_WRITE(MIPIO_TXESC_CLK_DIV1, (1 << (txesc1_div - 1)) & GLK_TX_ESC_CLK_DIV1_MASK); + I915_WRITE(MIPIO_TXESC_CLK_DIV2, (1 << (txesc2_div - 1)) & GLK_TX_ESC_CLK_DIV2_MASK); } /* Program BXT Mipi clocks and dividers */ From 0ca1bbb7f4212aeef83a67a8aed9da1d84567fcc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jul 2019 14:57:18 +0200 Subject: [PATCH 137/482] selftests: netfilter: extend flowtable test script for ipsec 'flow offload' expression should not offload flows that will be subject to ipsec, but it does. This results in a connectivity blackhole for the affected flows -- first packets will go through (offload happens after established state is reached), but all remaining ones bypass ipsec encryption and are thus discarded by the peer. This can be worked around by adding "rt ipsec exists accept" before the 'flow offload' rule matches. This test case will fail, support for such flows is added in next patch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_flowtable.sh | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index fe52488a6f72..16571ac1dab4 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -321,4 +321,52 @@ else ip netns exec nsr1 nft list ruleset fi +KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) +KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) +SPI1=$RANDOM +SPI2=$RANDOM + +if [ $SPI1 -eq $SPI2 ]; then + SPI2=$((SPI2+1)) +fi + +do_esp() { + local ns=$1 + local me=$2 + local remote=$3 + local lnet=$4 + local rnet=$5 + local spi_out=$6 + local spi_in=$7 + + ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet + ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet + + # to encrypt packets as they go out (includes forwarded packets that need encapsulation) + ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow + # to fwd decrypted packets after esp processing: + ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow + +} + +do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 + +do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 + +ip netns exec nsr1 nft delete table ip nat + +# restore default routes +ip -net ns2 route del 192.168.10.1 via 10.0.2.1 +ip -net ns2 route add default via 10.0.2.1 +ip -net ns2 route add default via dead:2::1 + +test_tcp_forwarding ns1 ns2 +if [ $? -eq 0 ] ;then + echo "PASS: ipsec tunnel mode for ns1/ns2" +else + echo "FAIL: ipsec tunnel mode for ns1/ns2" + ip netns exec nsr1 nft list ruleset 1>&2 + ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2 +fi + exit $ret From 589b474a4b7ce409d6821ef17234a995841bd131 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jul 2019 14:57:19 +0200 Subject: [PATCH 138/482] netfilter: nf_flow_table: fix offload for flows that are subject to xfrm This makes the previously added 'encap test' pass. Because its possible that the xfrm dst entry becomes stale while such a flow is offloaded, we need to call dst_check() -- the notifier that handles this for non-tunneled traffic isn't sufficient, because SA or or policies might have changed. If dst becomes stale the flow offload entry will be tagged for teardown and packets will be passed to 'classic' forwarding path. Removing the entry right away is problematic, as this would introduce a race condition with the gc worker. In case flow is long-lived, it could eventually be offloaded again once the gc worker removes the entry from the flow table. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index cdfc33517e85..d68c801dd614 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -214,6 +214,25 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) return true; } +static int nf_flow_offload_dst_check(struct dst_entry *dst) +{ + if (unlikely(dst_xfrm(dst))) + return dst_check(dst, 0) ? 0 : -1; + + return 0; +} + +static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, + const struct nf_hook_state *state, + struct dst_entry *dst) +{ + skb_orphan(skb); + skb_dst_set_noref(skb, dst); + skb->tstamp = 0; + dst_output(state->net, state->sk, skb); + return NF_STOLEN; +} + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -254,6 +273,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) return NF_ACCEPT; + if (nf_flow_offload_dst_check(&rt->dst)) { + flow_offload_teardown(flow); + return NF_ACCEPT; + } + if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) return NF_DROP; @@ -261,6 +285,13 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, iph = ip_hdr(skb); ip_decrease_ttl(iph); + if (unlikely(dst_xfrm(&rt->dst))) { + memset(skb->cb, 0, sizeof(struct inet_skb_parm)); + IPCB(skb)->iif = skb->dev->ifindex; + IPCB(skb)->flags = IPSKB_FORWARDED; + return nf_flow_xmit_xfrm(skb, state, &rt->dst); + } + skb->dev = outdev; nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); skb_dst_set_noref(skb, &rt->dst); @@ -467,6 +498,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, sizeof(*ip6h))) return NF_ACCEPT; + if (nf_flow_offload_dst_check(&rt->dst)) { + flow_offload_teardown(flow); + return NF_ACCEPT; + } + if (skb_try_make_writable(skb, sizeof(*ip6h))) return NF_DROP; @@ -477,6 +513,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, ip6h = ipv6_hdr(skb); ip6h->hop_limit--; + if (unlikely(dst_xfrm(&rt->dst))) { + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + IP6CB(skb)->iif = skb->dev->ifindex; + IP6CB(skb)->flags = IP6SKB_FORWARDED; + return nf_flow_xmit_xfrm(skb, state, &rt->dst); + } + skb->dev = outdev; nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); skb_dst_set_noref(skb, &rt->dst); From a48d06f9b7cedbb8ad7804d1720168b7ee6a34e7 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 1 Aug 2019 11:30:13 +0800 Subject: [PATCH 139/482] KVM: LAPIC: Don't need to wakeup vCPU twice afer timer fire MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_set_pending_timer() will take care to wake up the sleeping vCPU which has pending timer, don't need to check this in apic_timer_expired() again. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0aa158657f20..685d17c11461 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1548,7 +1548,6 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) static void apic_timer_expired(struct kvm_lapic *apic) { struct kvm_vcpu *vcpu = apic->vcpu; - struct swait_queue_head *q = &vcpu->wq; struct kvm_timer *ktimer = &apic->lapic_timer; if (atomic_read(&apic->lapic_timer.pending)) @@ -1566,13 +1565,6 @@ static void apic_timer_expired(struct kvm_lapic *apic) atomic_inc(&apic->lapic_timer.pending); kvm_set_pending_timer(vcpu); - - /* - * For x86, the atomic_inc() is serialized, thus - * using swait_active() is safe. - */ - if (swait_active(q)) - swake_up_one(q); } static void start_sw_tscdeadline(struct kvm_lapic *apic) From 046ddeed0461b5d270470c253cbb321103d048b6 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 1 Aug 2019 11:30:14 +0800 Subject: [PATCH 140/482] KVM: Check preempted_in_kernel for involuntary preemption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit preempted_in_kernel is updated in preempt_notifier when involuntary preemption ocurrs, it can be stale when the voluntarily preempted vCPUs are taken into account by kvm_vcpu_on_spin() loop. This patch lets it just check preempted_in_kernel for involuntary preemption. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 887f3b0c2b60..ed061d8a457c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2508,7 +2508,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; - if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) + if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && + !kvm_arch_vcpu_in_kernel(vcpu)) continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; @@ -4205,7 +4206,7 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); - vcpu->preempted = false; + WRITE_ONCE(vcpu->preempted, false); WRITE_ONCE(vcpu->ready, false); kvm_arch_sched_in(vcpu, cpu); @@ -4219,7 +4220,7 @@ static void kvm_sched_out(struct preempt_notifier *pn, struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); if (current->state == TASK_RUNNING) { - vcpu->preempted = true; + WRITE_ONCE(vcpu->preempted, true); WRITE_ONCE(vcpu->ready, true); } kvm_arch_vcpu_put(vcpu); From 17e433b54393a6269acbcb792da97791fe1592d8 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 5 Aug 2019 10:03:19 +0800 Subject: [PATCH 141/482] KVM: Fix leak vCPU's VMCS value into other pCPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit d73eb57b80b (KVM: Boost vCPUs that are delivering interrupts), a five years old bug is exposed. Running ebizzy benchmark in three 80 vCPUs VMs on one 80 pCPUs Skylake server, a lot of rcu_sched stall warning splatting in the VMs after stress testing: INFO: rcu_sched detected stalls on CPUs/tasks: { 4 41 57 62 77} (detected by 15, t=60004 jiffies, g=899, c=898, q=15073) Call Trace: flush_tlb_mm_range+0x68/0x140 tlb_flush_mmu.part.75+0x37/0xe0 tlb_finish_mmu+0x55/0x60 zap_page_range+0x142/0x190 SyS_madvise+0x3cd/0x9c0 system_call_fastpath+0x1c/0x21 swait_active() sustains to be true before finish_swait() is called in kvm_vcpu_block(), voluntarily preempted vCPUs are taken into account by kvm_vcpu_on_spin() loop greatly increases the probability condition kvm_arch_vcpu_runnable(vcpu) is checked and can be true, when APICv is enabled the yield-candidate vCPU's VMCS RVI field leaks(by vmx_sync_pir_to_irr()) into spinning-on-a-taken-lock vCPU's current VMCS. This patch fixes it by checking conservatively a subset of events. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Christian Borntraeger Cc: Marc Zyngier Cc: stable@vger.kernel.org Fixes: 98f4a1467 (KVM: add kvm_arch_vcpu_runnable() test to kvm_vcpu_on_spin() loop) Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/powerpc.c | 5 +++++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 6 ++++++ arch/x86/kvm/vmx/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 16 ++++++++++++++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 25 ++++++++++++++++++++++++- 7 files changed, 59 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0dba7eb24f92..3e34d5fa6708 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -50,6 +50,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) return !!(v->arch.pending_exceptions) || kvm_request_pending(v); } +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + return kvm_arch_vcpu_runnable(vcpu); +} + bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { return false; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e74f0711eaaf..fc046ca89d32 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1175,6 +1175,7 @@ struct kvm_x86_ops { int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); + bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, bool *expired); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7eafc6907861..d685491fce4d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5190,6 +5190,11 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) kvm_vcpu_wake_up(vcpu); } +static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) +{ + return false; +} + static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) { unsigned long flags; @@ -7314,6 +7319,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .pmu_ops = &amd_pmu_ops, .deliver_posted_interrupt = svm_deliver_avic_intr, + .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, .update_pi_irte = svm_update_pi_irte, .setup_mce = svm_setup_mce, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 074385c86c09..42ed3faa6af8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6117,6 +6117,11 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) return max_irr; } +static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) +{ + return pi_test_on(vcpu_to_pi_desc(vcpu)); +} + static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { if (!kvm_vcpu_apicv_active(vcpu)) @@ -7726,6 +7731,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, .sync_pir_to_irr = vmx_sync_pir_to_irr, .deliver_posted_interrupt = vmx_deliver_posted_interrupt, + .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt, .set_tss_addr = vmx_set_tss_addr, .set_identity_map_addr = vmx_set_identity_map_addr, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c6d951cbd76c..93b0bd45ac73 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9698,6 +9698,22 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + if (READ_ONCE(vcpu->arch.pv.pv_unhalted)) + return true; + + if (kvm_test_request(KVM_REQ_NMI, vcpu) || + kvm_test_request(KVM_REQ_SMI, vcpu) || + kvm_test_request(KVM_REQ_EVENT, vcpu)) + return true; + + if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu)) + return true; + + return false; +} + bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { return vcpu->arch.preempted_in_kernel; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5c5b5867024c..9e4c2bb90297 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -872,6 +872,7 @@ int kvm_arch_check_processor_compat(void); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); #ifndef __KVM_HAVE_ARCH_VM_ALLOC /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ed061d8a457c..1f05aeb9da27 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2477,6 +2477,29 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) #endif } +/* + * Unlike kvm_arch_vcpu_runnable, this function is called outside + * a vcpu_load/vcpu_put pair. However, for most architectures + * kvm_arch_vcpu_runnable does not require vcpu_load. + */ +bool __weak kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + return kvm_arch_vcpu_runnable(vcpu); +} + +static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu) +{ + if (kvm_arch_dy_runnable(vcpu)) + return true; + +#ifdef CONFIG_KVM_ASYNC_PF + if (!list_empty_careful(&vcpu->async_pf.done)) + return true; +#endif + + return false; +} + void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { struct kvm *kvm = me->kvm; @@ -2506,7 +2529,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; if (vcpu == me) continue; - if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) + if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu)) continue; if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) From 741cbbae0768b828be2d48331eb371a4f08bbea8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 3 Aug 2019 08:14:25 +0200 Subject: [PATCH 142/482] KVM: remove kvm_arch_has_vcpu_debugfs() There is no need for this function as all arches have to implement kvm_arch_create_vcpu_debugfs() no matter what. A #define symbol let us actually simplify the code. Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 10 ---------- arch/powerpc/kvm/powerpc.c | 10 ---------- arch/s390/kvm/kvm-s390.c | 10 ---------- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/debugfs.c | 5 ----- include/linux/kvm_host.h | 3 ++- virt/kvm/arm/arm.c | 5 ----- virt/kvm/kvm_main.c | 5 ++--- 8 files changed, 6 insertions(+), 44 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 2cfe839f0b3a..1109924560d8 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -150,16 +150,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return 0; } -bool kvm_arch_has_vcpu_debugfs(void) -{ - return false; -} - -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - return 0; -} - void kvm_mips_free_vcpus(struct kvm *kvm) { unsigned int i; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3e34d5fa6708..3e566c2e6066 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -457,16 +457,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return -EINVAL; } -bool kvm_arch_has_vcpu_debugfs(void) -{ - return false; -} - -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - return 0; -} - void kvm_arch_destroy_vm(struct kvm *kvm) { unsigned int i; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3f520cd837fb..f329dcb3f44c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2516,16 +2516,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return rc; } -bool kvm_arch_has_vcpu_debugfs(void) -{ - return false; -} - -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - return 0; -} - void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 3, "%s", "free cpu"); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fc046ca89d32..e92725b2a46f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,6 +35,8 @@ #include #include +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS + #define KVM_MAX_VCPUS 288 #define KVM_SOFT_MAX_VCPUS 240 #define KVM_MAX_VCPU_ID 1023 diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 329361b69d5e..9bd93e0d5f63 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -8,11 +8,6 @@ #include #include "lapic.h" -bool kvm_arch_has_vcpu_debugfs(void) -{ - return true; -} - static int vcpu_get_timer_advance_ns(void *data, u64 *val) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9e4c2bb90297..8d34db3c8bc6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -861,8 +861,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -bool kvm_arch_has_vcpu_debugfs(void); +#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); +#endif int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index acc43242a310..13f5a1aa6d79 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -144,11 +144,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return ret; } -bool kvm_arch_has_vcpu_debugfs(void) -{ - return false; -} - int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { return 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1f05aeb9da27..4afb1a234018 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2617,12 +2617,10 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { +#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS char dir_name[ITOA_MAX_LEN * 2]; int ret; - if (!kvm_arch_has_vcpu_debugfs()) - return 0; - if (!debugfs_initialized()) return 0; @@ -2637,6 +2635,7 @@ static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) debugfs_remove_recursive(vcpu->debugfs_dentry); return ret; } +#endif return 0; } From 3e7093d045196b1016517631645e874fe903db7e Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 31 Jul 2019 20:56:20 +0200 Subject: [PATCH 143/482] KVM: no need to check return value of debugfs_create functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Also, when doing this, change kvm_arch_create_vcpu_debugfs() to return void instead of an integer, as we should not care at all about if this function actually does anything or not. Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Cc: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Paolo Bonzini --- arch/x86/kvm/debugfs.c | 41 +++++++++++++--------------------------- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 21 +++++--------------- 3 files changed, 19 insertions(+), 45 deletions(-) diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 9bd93e0d5f63..018aebce33ff 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -43,37 +43,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n"); -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { - struct dentry *ret; + debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu, + &vcpu_tsc_offset_fops); - ret = debugfs_create_file("tsc-offset", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_tsc_offset_fops); - if (!ret) - return -ENOMEM; - - if (lapic_in_kernel(vcpu)) { - ret = debugfs_create_file("lapic_timer_advance_ns", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_timer_advance_ns_fops); - if (!ret) - return -ENOMEM; - } + if (lapic_in_kernel(vcpu)) + debugfs_create_file("lapic_timer_advance_ns", 0444, + vcpu->debugfs_dentry, vcpu, + &vcpu_timer_advance_ns_fops); if (kvm_has_tsc_control) { - ret = debugfs_create_file("tsc-scaling-ratio", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_tsc_scaling_fops); - if (!ret) - return -ENOMEM; - ret = debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, - vcpu->debugfs_dentry, - vcpu, &vcpu_tsc_scaling_frac_fops); - if (!ret) - return -ENOMEM; - + debugfs_create_file("tsc-scaling-ratio", 0444, + vcpu->debugfs_dentry, vcpu, + &vcpu_tsc_scaling_fops); + debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, + vcpu->debugfs_dentry, vcpu, + &vcpu_tsc_scaling_frac_fops); } - - return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8d34db3c8bc6..fcb46b3374c6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -862,7 +862,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); #endif int kvm_arch_hardware_enable(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4afb1a234018..4feceaa03fb1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2615,29 +2615,20 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } -static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS char dir_name[ITOA_MAX_LEN * 2]; - int ret; if (!debugfs_initialized()) - return 0; + return; snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); vcpu->debugfs_dentry = debugfs_create_dir(dir_name, - vcpu->kvm->debugfs_dentry); - if (!vcpu->debugfs_dentry) - return -ENOMEM; + vcpu->kvm->debugfs_dentry); - ret = kvm_arch_create_vcpu_debugfs(vcpu); - if (ret < 0) { - debugfs_remove_recursive(vcpu->debugfs_dentry); - return ret; - } + kvm_arch_create_vcpu_debugfs(vcpu); #endif - - return 0; } /* @@ -2672,9 +2663,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r) goto vcpu_destroy; - r = kvm_create_vcpu_debugfs(vcpu); - if (r) - goto vcpu_destroy; + kvm_create_vcpu_debugfs(vcpu); mutex_lock(&kvm->lock); if (kvm_get_vcpu_by_id(kvm, id)) { From 57b76bdb20ecb05c87f4e12b7ced66bc03a976c3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 25 Jul 2019 12:59:40 +0200 Subject: [PATCH 144/482] x86: kvm: remove useless calls to kvm_para_available Most code in arch/x86/kernel/kvm.c is called through x86_hyper_kvm, and thus only runs if KVM has been detected. There is no need to check again for the CPUID base. Cc: Sergio Lopez Cc: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 82caf01b63dd..edd2179ad2da 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -308,9 +308,6 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val) static void kvm_guest_cpu_init(void) { - if (!kvm_para_available()) - return; - if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); @@ -625,9 +622,6 @@ static void __init kvm_guest_init(void) { int i; - if (!kvm_para_available()) - return; - paravirt_ops_setup(); register_reboot_notifier(&kvm_pv_reboot_nb); for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) @@ -847,8 +841,6 @@ asm( */ void __init kvm_spinlock_init(void) { - if (!kvm_para_available()) - return; /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; From 7e7ae38bf928c5cfa6dd6e9a2cf8b42c84a27c92 Mon Sep 17 00:00:00 2001 From: Yoshiaki Okamoto Date: Sat, 20 Jul 2019 22:23:18 +0900 Subject: [PATCH 145/482] USB: serial: option: Add support for ZTE MF871A This patch adds support for MF871A USB modem (aka Speed USB STICK U03) to option driver. This modem is manufactured by ZTE corporation, and sold by KDDI. Interface layout: 0: AT 1: MODEM usb-devices output: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=19d2 ProdID=1481 Rev=52.87 S: Manufacturer=ZTE,Incorporated S: Product=ZTE Technologies MSM S: SerialNumber=1234567890ABCDEF C: #Ifs= 2 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option Co-developed-by: Hiroyuki Yamamoto Signed-off-by: Hiroyuki Yamamoto Signed-off-by: Yoshiaki Okamoto Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c1582fbd1150..23d4a5f543af 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1549,6 +1549,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1428, 0xff, 0xff, 0xff), /* Telewell TW-LTE 4G v2 */ .driver_info = RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */ + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1481, 0xff, 0x00, 0x00) }, /* ZTE MF871A */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, From 552573e42aab5f75aff9bab855a9677979d9a7d5 Mon Sep 17 00:00:00 2001 From: Rogan Dawes Date: Wed, 17 Jul 2019 11:11:34 +0200 Subject: [PATCH 146/482] USB: serial: option: add D-Link DWM-222 device ID Add device id for D-Link DWM-222 A2. MI_00 D-Link HS-USB Diagnostics MI_01 D-Link HS-USB Modem MI_02 D-Link HS-USB AT Port MI_03 D-Link HS-USB NMEA MI_04 D-Link HS-USB WWAN Adapter (qmi_wwan) MI_05 USB Mass Storage Device Cc: stable@vger.kernel.org Signed-off-by: Rogan Dawes Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 23d4a5f543af..f2c19660ed16 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1953,6 +1953,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff), /* D-Link DWM-222 */ .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e3d, 0xff), /* D-Link DWM-222 A2 */ + .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ From f7ccbed656f78212593ca965d9a8f34bf24e0aab Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 2 Aug 2019 11:46:16 -0700 Subject: [PATCH 147/482] drm/rockchip: Suspend DP late In commit fe64ba5c6323 ("drm/rockchip: Resume DP early") we moved resume to be early but left suspend at its normal time. This seems like it could be OK, but casues problems if a suspend gets interrupted partway through. The OS only balances matching suspend/resume levels. ...so if suspend was called then resume will be called. If suspend late was called then resume early will be called. ...but if suspend was called resume early might not get called. This leads to an unbalance in the clock enables / disables. Lets take the simple fix and just move suspend to be late to match. This makes the PM core take proper care in keeping things balanced. Fixes: fe64ba5c6323 ("drm/rockchip: Resume DP early") Signed-off-by: Douglas Anderson Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190802184616.44822-1-dianders@chromium.org --- drivers/gpu/drm/rockchip/analogix_dp-rockchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index 95e5c517a15f..9aae3d8e99ef 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -432,7 +432,7 @@ static int rockchip_dp_resume(struct device *dev) static const struct dev_pm_ops rockchip_dp_pm_ops = { #ifdef CONFIG_PM_SLEEP - .suspend = rockchip_dp_suspend, + .suspend_late = rockchip_dp_suspend, .resume_early = rockchip_dp_resume, #endif }; From 5eeaf10eec394b28fad2c58f1f5c3a5da0e87d1c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Aug 2019 10:28:32 +0100 Subject: [PATCH 148/482] KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block Since commit commit 328e56647944 ("KVM: arm/arm64: vgic: Defer touching GICH_VMCR to vcpu_load/put"), we leave ICH_VMCR_EL2 (or its GICv2 equivalent) loaded as long as we can, only syncing it back when we're scheduled out. There is a small snag with that though: kvm_vgic_vcpu_pending_irq(), which is indirectly called from kvm_vcpu_check_block(), needs to evaluate the guest's view of ICC_PMR_EL1. At the point were we call kvm_vcpu_check_block(), the vcpu is still loaded, and whatever changes to PMR is not visible in memory until we do a vcpu_put(). Things go really south if the guest does the following: mov x0, #0 // or any small value masking interrupts msr ICC_PMR_EL1, x0 [vcpu preempted, then rescheduled, VMCR sampled] mov x0, #ff // allow all interrupts msr ICC_PMR_EL1, x0 wfi // traps to EL2, so samping of VMCR [interrupt arrives just after WFI] Here, the hypervisor's view of PMR is zero, while the guest has enabled its interrupts. kvm_vgic_vcpu_pending_irq() will then say that no interrupts are pending (despite an interrupt being received) and we'll block for no reason. If the guest doesn't have a periodic interrupt firing once it has blocked, it will stay there forever. To avoid this unfortuante situation, let's resync VMCR from kvm_arch_vcpu_blocking(), ensuring that a following kvm_vcpu_check_block() will observe the latest value of PMR. This has been found by booting an arm64 Linux guest with the pseudo NMI feature, and thus using interrupt priorities to mask interrupts instead of the usual PSTATE masking. Cc: stable@vger.kernel.org # 4.12 Fixes: 328e56647944 ("KVM: arm/arm64: vgic: Defer touching GICH_VMCR to vcpu_load/put") Signed-off-by: Marc Zyngier --- include/kvm/arm_vgic.h | 1 + virt/kvm/arm/arm.c | 11 +++++++++++ virt/kvm/arm/vgic/vgic-v2.c | 9 ++++++++- virt/kvm/arm/vgic/vgic-v3.c | 7 ++++++- virt/kvm/arm/vgic/vgic.c | 11 +++++++++++ virt/kvm/arm/vgic/vgic.h | 2 ++ 6 files changed, 39 insertions(+), 2 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 46bbc949c20a..7a30524a80ee 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -350,6 +350,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); void kvm_vgic_load(struct kvm_vcpu *vcpu); void kvm_vgic_put(struct kvm_vcpu *vcpu); +void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.initialized) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c704fa696184..482b20256fa8 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -323,6 +323,17 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { + /* + * If we're about to block (most likely because we've just hit a + * WFI), we need to sync back the state of the GIC CPU interface + * so that we have the lastest PMR and group enables. This ensures + * that kvm_arch_vcpu_runnable has up-to-date data to decide + * whether we have pending interrupts. + */ + preempt_disable(); + kvm_vgic_vmcr_sync(vcpu); + preempt_enable(); + kvm_vgic_v4_enable_doorbell(vcpu); } diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 6dd5ad706c92..96aab77d0471 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -484,10 +484,17 @@ void vgic_v2_load(struct kvm_vcpu *vcpu) kvm_vgic_global_state.vctrl_base + GICH_APR); } -void vgic_v2_put(struct kvm_vcpu *vcpu) +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); +} + +void vgic_v2_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + vgic_v2_vmcr_sync(vcpu); cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index c2c9ce009f63..0c653a1e5215 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -662,12 +662,17 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) __vgic_v3_activate_traps(vcpu); } -void vgic_v3_put(struct kvm_vcpu *vcpu) +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); +} + +void vgic_v3_put(struct kvm_vcpu *vcpu) +{ + vgic_v3_vmcr_sync(vcpu); kvm_call_hyp(__vgic_v3_save_aprs, vcpu); diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 04786c8ec77e..13d4b38a94ec 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -919,6 +919,17 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu) vgic_v3_put(vcpu); } +void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) +{ + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_vmcr_sync(vcpu); + else + vgic_v3_vmcr_sync(vcpu); +} + int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 57205beaa981..11adbdac1d56 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -193,6 +193,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, void vgic_v2_init_lrs(void); void vgic_v2_load(struct kvm_vcpu *vcpu); void vgic_v2_put(struct kvm_vcpu *vcpu); +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu); @@ -223,6 +224,7 @@ bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); void vgic_v3_put(struct kvm_vcpu *vcpu); +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); From fc05481b2fcabaaeccf63e32ac1baab54e5b6963 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 5 Aug 2019 12:15:28 +0100 Subject: [PATCH 149/482] usb: yurex: Fix use-after-free in yurex_delete syzbot reported the following crash [0]: BUG: KASAN: use-after-free in usb_free_coherent+0x79/0x80 drivers/usb/core/usb.c:928 Read of size 8 at addr ffff8881b18599c8 by task syz-executor.4/16007 CPU: 0 PID: 16007 Comm: syz-executor.4 Not tainted 5.3.0-rc2+ #23 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xca/0x13e lib/dump_stack.c:113 print_address_description+0x6a/0x32c mm/kasan/report.c:351 __kasan_report.cold+0x1a/0x33 mm/kasan/report.c:482 kasan_report+0xe/0x12 mm/kasan/common.c:612 usb_free_coherent+0x79/0x80 drivers/usb/core/usb.c:928 yurex_delete+0x138/0x330 drivers/usb/misc/yurex.c:100 kref_put include/linux/kref.h:65 [inline] yurex_release+0x66/0x90 drivers/usb/misc/yurex.c:392 __fput+0x2d7/0x840 fs/file_table.c:280 task_work_run+0x13f/0x1c0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_usermode_loop+0x1d2/0x200 arch/x86/entry/common.c:163 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] syscall_return_slowpath arch/x86/entry/common.c:274 [inline] do_syscall_64+0x45f/0x580 arch/x86/entry/common.c:299 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x413511 Code: 75 14 b8 03 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 04 1b 00 00 c3 48 83 ec 08 e8 0a fc ff ff 48 89 04 24 b8 03 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 53 fc ff ff 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00007ffc424ea2e0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000007 RCX: 0000000000413511 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000006 RBP: 0000000000000001 R08: 0000000029a2fc22 R09: 0000000029a2fc26 R10: 00007ffc424ea3c0 R11: 0000000000000293 R12: 000000000075c9a0 R13: 000000000075c9a0 R14: 0000000000761938 R15: ffffffffffffffff Allocated by task 2776: save_stack+0x1b/0x80 mm/kasan/common.c:69 set_track mm/kasan/common.c:77 [inline] __kasan_kmalloc mm/kasan/common.c:487 [inline] __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:460 kmalloc include/linux/slab.h:552 [inline] kzalloc include/linux/slab.h:748 [inline] usb_alloc_dev+0x51/0xf95 drivers/usb/core/usb.c:583 hub_port_connect drivers/usb/core/hub.c:5004 [inline] hub_port_connect_change drivers/usb/core/hub.c:5213 [inline] port_event drivers/usb/core/hub.c:5359 [inline] hub_event+0x15c0/0x3640 drivers/usb/core/hub.c:5441 process_one_work+0x92b/0x1530 kernel/workqueue.c:2269 worker_thread+0x96/0xe20 kernel/workqueue.c:2415 kthread+0x318/0x420 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Freed by task 16007: save_stack+0x1b/0x80 mm/kasan/common.c:69 set_track mm/kasan/common.c:77 [inline] __kasan_slab_free+0x130/0x180 mm/kasan/common.c:449 slab_free_hook mm/slub.c:1423 [inline] slab_free_freelist_hook mm/slub.c:1470 [inline] slab_free mm/slub.c:3012 [inline] kfree+0xe4/0x2f0 mm/slub.c:3953 device_release+0x71/0x200 drivers/base/core.c:1064 kobject_cleanup lib/kobject.c:693 [inline] kobject_release lib/kobject.c:722 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x171/0x280 lib/kobject.c:739 put_device+0x1b/0x30 drivers/base/core.c:2213 usb_put_dev+0x1f/0x30 drivers/usb/core/usb.c:725 yurex_delete+0x40/0x330 drivers/usb/misc/yurex.c:95 kref_put include/linux/kref.h:65 [inline] yurex_release+0x66/0x90 drivers/usb/misc/yurex.c:392 __fput+0x2d7/0x840 fs/file_table.c:280 task_work_run+0x13f/0x1c0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_usermode_loop+0x1d2/0x200 arch/x86/entry/common.c:163 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] syscall_return_slowpath arch/x86/entry/common.c:274 [inline] do_syscall_64+0x45f/0x580 arch/x86/entry/common.c:299 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8881b1859980 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 72 bytes inside of 2048-byte region [ffff8881b1859980, ffff8881b185a180) The buggy address belongs to the page: page:ffffea0006c61600 refcount:1 mapcount:0 mapping:ffff8881da00c000 index:0x0 compound_mapcount: 0 flags: 0x200000000010200(slab|head) raw: 0200000000010200 0000000000000000 0000000100000001 ffff8881da00c000 raw: 0000000000000000 00000000000f000f 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881b1859880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8881b1859900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc > ffff8881b1859980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8881b1859a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8881b1859a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== A quick look at the yurex_delete() shows that we drop the reference to the usb_device before releasing any buffers associated with the device. Delay the reference drop until we have finished the cleanup. [0] https://lore.kernel.org/lkml/0000000000003f86d8058f0bd671@google.com/ Fixes: 6bc235a2e24a5e ("USB: add driver for Meywa-Denki & Kayac YUREX") Cc: Jiri Kosina Cc: Tomoki Sekiyama Cc: Oliver Neukum Cc: andreyknvl@google.com Cc: gregkh@linuxfoundation.org Cc: Alan Stern Cc: syzkaller-bugs@googlegroups.com Cc: dtor@chromium.org Reported-by: syzbot+d1fedb1c1fdb07fca507@syzkaller.appspotmail.com Signed-off-by: Suzuki K Poulose Cc: stable Link: https://lore.kernel.org/r/20190805111528.6758-1-suzuki.poulose@arm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 7b306aa22d25..6715a128e6c8 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -92,7 +92,6 @@ static void yurex_delete(struct kref *kref) dev_dbg(&dev->interface->dev, "%s\n", __func__); - usb_put_dev(dev->udev); if (dev->cntl_urb) { usb_kill_urb(dev->cntl_urb); kfree(dev->cntl_req); @@ -108,6 +107,7 @@ static void yurex_delete(struct kref *kref) dev->int_buffer, dev->urb->transfer_dma); usb_free_urb(dev->urb); } + usb_put_dev(dev->udev); kfree(dev); } From c43f28dfdc4654e738aa6d3fd08a105b2bee758d Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Sun, 4 Aug 2019 16:50:44 -0700 Subject: [PATCH 150/482] usb: usbfs: fix double-free of usb memory upon submiturb error Upon an error within proc_do_submiturb(), dec_usb_memory_use_count() gets called once by the error handling tail and again by free_async(). Remove the first call. Signed-off-by: Gavin Li Acked-by: Alan Stern Cc: stable Link: https://lore.kernel.org/r/20190804235044.22327-1-gavinli@thegavinli.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index b265ab5405f9..9063ede411ae 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1812,8 +1812,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb return 0; error: - if (as && as->usbm) - dec_usb_memory_use_count(as->usbm, &as->usbm->urb_use_count); kfree(isopkt); kfree(dr); if (as) From 0dda5907b0fc60f72f67f479f224e02c95d06e21 Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Sat, 3 Aug 2019 19:43:35 +0530 Subject: [PATCH 151/482] i2c: stm32: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header file related to STM32 Driver for I2C hardware bus support. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46 Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-stm32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32.h b/drivers/i2c/busses/i2c-stm32.h index 868755f82f88..2c21893905a3 100644 --- a/drivers/i2c/busses/i2c-stm32.h +++ b/drivers/i2c/busses/i2c-stm32.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * i2c-stm32.h * From 82a5008a341d301da3ab529ca888c64f529bd075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Mon, 5 Aug 2019 15:37:16 +0200 Subject: [PATCH 152/482] iio: frequency: adf4371: Fix output frequency setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fract1 word was not being properly programmed on the device leading to wrong output frequencies. Fixes: 7f699bd14913 (iio: frequency: adf4371: Add support for ADF4371 PLL) Signed-off-by: Nuno Sá Reviewed-by: Stefan Popa Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/frequency/adf4371.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/frequency/adf4371.c b/drivers/iio/frequency/adf4371.c index e48f15cc9ab5..ff82863cbf42 100644 --- a/drivers/iio/frequency/adf4371.c +++ b/drivers/iio/frequency/adf4371.c @@ -276,11 +276,11 @@ static int adf4371_set_freq(struct adf4371_state *st, unsigned long long freq, st->buf[0] = st->integer >> 8; st->buf[1] = 0x40; /* REG12 default */ st->buf[2] = 0x00; - st->buf[3] = st->fract2 & 0xFF; - st->buf[4] = st->fract2 >> 7; - st->buf[5] = st->fract2 >> 15; + st->buf[3] = st->fract1 & 0xFF; + st->buf[4] = st->fract1 >> 8; + st->buf[5] = st->fract1 >> 16; st->buf[6] = ADF4371_FRAC2WORD_L(st->fract2 & 0x7F) | - ADF4371_FRAC1WORD(st->fract1 >> 23); + ADF4371_FRAC1WORD(st->fract1 >> 24); st->buf[7] = ADF4371_FRAC2WORD_H(st->fract2 >> 7); st->buf[8] = st->mod2 & 0xFF; st->buf[9] = ADF4371_MOD2WORD(st->mod2 >> 8); From b9ddd5091160793ee9fac10da765cf3f53d2aaf0 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 5 Aug 2019 17:55:15 +0200 Subject: [PATCH 153/482] iio: adc: max9611: Fix temperature reading in probe The max9611 driver reads the die temperature at probe time to validate the communication channel. Use the actual read value to perform the test instead of the read function return value, which was mistakenly used so far. The temperature reading test was only successful because the 0 return value is in the range of supported temperatures. Fixes: 69780a3bbc0b ("iio: adc: Add Maxim max9611 ADC driver") Signed-off-by: Jacopo Mondi Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/max9611.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c index 0e3c6529fc4c..da073d72f649 100644 --- a/drivers/iio/adc/max9611.c +++ b/drivers/iio/adc/max9611.c @@ -480,7 +480,7 @@ static int max9611_init(struct max9611_dev *max9611) if (ret) return ret; - regval = ret & MAX9611_TEMP_MASK; + regval &= MAX9611_TEMP_MASK; if ((regval > MAX9611_TEMP_MAX_POS && regval < MAX9611_TEMP_MIN_NEG) || From cc798c83898ea0a77fcaa1a92afda35c3c3ded74 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Mon, 5 Aug 2019 10:34:04 -0700 Subject: [PATCH 154/482] kernfs: fix memleak in kernel_ops_readdir() If getdents64 is killed or hits on segfault, it'll leave cgroups directories in sysfs pinned leaking memory because the kernfs node won't be freed on rmdir and the parent neither. Repro: # for i in `seq 1000`; do mkdir $i; done # rmdir * # for i in `seq 1000`; do mkdir $i; done # rmdir * # for i in `seq 1000`; do while :; do ls $i/ >/dev/null; done & done # while :; do killall ls; done kernfs_node_cache in /proc/slabinfo keeps going up as expected. Signed-off-by: Andrea Arcangeli Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # goes way back to original sysfs days Link: https://lore.kernel.org/r/20190805173404.GF136335@devbig004.ftw2.facebook.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index a387534c9577..1e98efc2bf6d 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1684,11 +1684,14 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) kernfs_get(pos); mutex_unlock(&kernfs_mutex); - if (!dir_emit(ctx, name, len, ino, type)) - return 0; + if (unlikely(!dir_emit(ctx, name, len, ino, type))) { + kernfs_put(pos); + goto out; + } mutex_lock(&kernfs_mutex); } mutex_unlock(&kernfs_mutex); +out: file->private_data = NULL; ctx->pos = INT_MAX; return 0; From 49da065f7b1f27be625de65d6d55bdd22ac6b5c2 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 26 Jul 2019 12:31:30 -0600 Subject: [PATCH 155/482] NTB/msi: remove incorrect MODULE defines msi.c is not a module on its own right and should not have the MODULE_[LICENSE|VERSION|AUTHOR|DESCRIPTION] definitions. This caused a regression noticed by lkp with the following back trace: WARNING: CPU: 0 PID: 1 at kernel/params.c:861 param_sysfs_init+0xb1/0x20a Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.2.0-rc1-00018-g26b3a37b928457 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 RIP: 0010:param_sysfs_init+0xb1/0x20a Code: 24 38 e8 ec 17 2e fd 49 8b 7c 24 38 e8 76 fe ff ff 48 85 c0 48 89 c5 74 25 31 d2 4c 89 e6 48 89 c7 e8 6d 6f 3c fd 85 c0 74 02 <0f> 0b 48 89 ef 31 f6 e8 5d 70 a7 fe 48 89 ef e8 95 52 a7 fe 48 83 RSP: 0000:ffff88806b0ffe30 EFLAGS: 00010282 RAX: 00000000ffffffef RBX: ffffffff83774220 RCX: ffff88806a85e880 RDX: 00000000ffffffef RSI: ffff88806b000400 RDI: ffff88806a8608c0 RBP: ffff88806b392000 R08: ffffed100d61ff59 R09: ffffed100d61ff59 R10: 0000000000000001 R11: ffffed100d61ff58 R12: ffffffff83974bc0 R13: 0000000000000004 R14: 0000000000000028 R15: 00000000000003b9 FS: 0000000000000000(0000) GS:ffff88806b800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000000380e000 CR4: 00000000000406b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? file_caps_disable+0x10/0x10 ? locate_module_kobject+0xf2/0xf2 do_one_initcall+0x47/0x1f0 kernel_init_freeable+0x1b1/0x243 ? rest_init+0xd0/0xd0 kernel_init+0xa/0x130 ? calculate_sigpending+0x63/0x80 ? rest_init+0xd0/0xd0 ret_from_fork+0x1f/0x30 ---[ end trace 78201497ae74cc91 ]--- Reported-by: kernel test robot Fixes: 26b3a37b9284 ("NTB: Introduce MSI library") Signed-off-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/msi.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c index 9dddf133658f..0a5e884a920c 100644 --- a/drivers/ntb/msi.c +++ b/drivers/ntb/msi.c @@ -6,11 +6,6 @@ #include #include -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION("0.1"); -MODULE_AUTHOR("Logan Gunthorpe "); -MODULE_DESCRIPTION("NTB MSI Interrupt Library"); - struct ntb_msi { u64 base_addr; u64 end_addr; From b33d567560c1aadf3033290d74d4fd67af47aa61 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Mon, 8 Jul 2019 20:42:18 +0800 Subject: [PATCH 156/482] auxdisplay: panel: need to delete scan_timer when misc_register fails in panel_attach In panel_attach, if misc_register fails, we need to delete scan_timer, which was setup in keypad_init->init_scan_timer. Reported-by: Hulk Robot Signed-off-by: zhengbin Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/panel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c index e06de63497cf..e6bd727da503 100644 --- a/drivers/auxdisplay/panel.c +++ b/drivers/auxdisplay/panel.c @@ -1617,6 +1617,8 @@ static void panel_attach(struct parport *port) return; err_lcd_unreg: + if (scan_timer.function) + del_timer_sync(&scan_timer); if (lcd.enabled) charlcd_unregister(lcd.charlcd); err_unreg_device: From ba2c1340d7c8e1ba5957d814ec18592b315fd4de Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 4 Jul 2019 20:33:54 +0100 Subject: [PATCH 157/482] auxdisplay: charlcd: add help text for backlight initial state While the individual CHARLCD_BL_xxx options have help texts, the menu itself does not. Fix this. Suggested-by: Linus Torvalds Signed-off-by: Mans Rullgard [Added a bit more text to address Linus' suggestion] Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index dd61fdd400f0..68489d1f00bb 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -448,6 +448,11 @@ config PANEL_BOOT_MESSAGE choice prompt "Backlight initial state" default CHARLCD_BL_FLASH + ---help--- + Select the initial backlight state on boot or module load. + + Previously, there was no option for this: the backlight flashed + briefly on init. Now you can also turn it off/on. config CHARLCD_BL_OFF bool "Off" From d75996dd022b6d83bd14af59b2775b1aa639e4b9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 2 Aug 2019 15:29:56 -0400 Subject: [PATCH 158/482] dax: dax_layout_busy_page() should not unmap cow pages Vivek: "As of now dax_layout_busy_page() calls unmap_mapping_range() with last argument as 1, which says even unmap cow pages. I am wondering who needs to get rid of cow pages as well. I noticed one interesting side affect of this. I mount xfs with -o dax and mmaped a file with MAP_PRIVATE and wrote some data to a page which created cow page. Then I called fallocate() on that file to zero a page of file. fallocate() called dax_layout_busy_page() which unmapped cow pages as well and then I tried to read back the data I wrote and what I get is old data from persistent memory. I lost the data I had written. This read basically resulted in new fault and read back the data from persistent memory. This sounds wrong. Are there any users which need to unmap cow pages as well? If not, I am proposing changing it to not unmap cow pages. I noticed this while while writing virtio_fs code where when I tried to reclaim a memory range and that corrupted the executable and I was running from virtio-fs and program got segment violation." Dan: "In fact the unmap_mapping_range() in this path is only to synchronize against get_user_pages_fast() and force it to call back into the filesystem to re-establish the mapping. COW pages should be left untouched by dax_layout_busy_page()." Cc: Fixes: 5fac7408d828 ("mm, fs, dax: handle layout changes to pinned dax mappings") Signed-off-by: Vivek Goyal Link: https://lore.kernel.org/r/20190802192956.GA3032@redhat.com Signed-off-by: Dan Williams --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index b64964ef44f6..6bf81f931de3 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -600,7 +600,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) * guaranteed to either see new references or prevent new * references from being established. */ - unmap_mapping_range(mapping, 0, 0, 1); + unmap_mapping_range(mapping, 0, 0, 0); xas_lock_irq(&xas); xas_for_each(&xas, entry, ULONG_MAX) { From e99c63e4d86d3a94818693147b469fa70de6f945 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 22 Jul 2019 11:34:59 -0700 Subject: [PATCH 159/482] SMB3: Fix deadlock in validate negotiate hits reconnect Currently we skip SMB2_TREE_CONNECT command when checking during reconnect because Tree Connect happens when establishing an SMB session. For SMB 3.0 protocol version the code also calls validate negotiate which results in SMB2_IOCL command being sent over the wire. This may deadlock on trying to acquire a mutex when checking for reconnect. Fix this by skipping SMB2_IOCL command when doing the reconnect check. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Stable --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c8cd7b6cdda2..33efc5fb293c 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -252,7 +252,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) if (tcon == NULL) return 0; - if (smb2_command == SMB2_TREE_CONNECT) + if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL) return 0; if (tcon->tidStatus == CifsExiting) { From 3edeb4a4146dc3b54d6fa71b7ee0585cb52ebfdf Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 22 Jul 2019 11:38:22 -0700 Subject: [PATCH 160/482] SMB3: Fix potential memory leak when processing compound chain When a reconnect happens in the middle of processing a compound chain the code leaks a buffer from the memory pool. Fix this by properly checking for a return code and freeing buffers in case of error. Also maintain a buf variable to be equal to either smallbuf or bigbuf depending on a response buffer size while parsing a chain and when returning to the caller. Signed-off-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index a5bc1b671c12..19589922ef2b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4070,7 +4070,6 @@ receive_encrypted_standard(struct TCP_Server_Info *server, { int ret, length; char *buf = server->smallbuf; - char *tmpbuf; struct smb2_sync_hdr *shdr; unsigned int pdu_length = server->pdu_size; unsigned int buf_size; @@ -4100,18 +4099,15 @@ receive_encrypted_standard(struct TCP_Server_Info *server, return length; next_is_large = server->large_buf; - one_more: +one_more: shdr = (struct smb2_sync_hdr *)buf; if (shdr->NextCommand) { - if (next_is_large) { - tmpbuf = server->bigbuf; + if (next_is_large) next_buffer = (char *)cifs_buf_get(); - } else { - tmpbuf = server->smallbuf; + else next_buffer = (char *)cifs_small_buf_get(); - } memcpy(next_buffer, - tmpbuf + le32_to_cpu(shdr->NextCommand), + buf + le32_to_cpu(shdr->NextCommand), pdu_length - le32_to_cpu(shdr->NextCommand)); } @@ -4140,12 +4136,21 @@ receive_encrypted_standard(struct TCP_Server_Info *server, pdu_length -= le32_to_cpu(shdr->NextCommand); server->large_buf = next_is_large; if (next_is_large) - server->bigbuf = next_buffer; + server->bigbuf = buf = next_buffer; else - server->smallbuf = next_buffer; - - buf += le32_to_cpu(shdr->NextCommand); + server->smallbuf = buf = next_buffer; goto one_more; + } else if (ret != 0) { + /* + * ret != 0 here means that we didn't get to handle_mid() thus + * server->smallbuf and server->bigbuf are still valid. We need + * to free next_buffer because it is not going to be used + * anywhere. + */ + if (next_is_large) + free_rsp_buf(CIFS_LARGE_BUFFER, next_buffer); + else + free_rsp_buf(CIFS_SMALL_BUFFER, next_buffer); } return ret; From 8d33096a460d5b9bd13300f01615df5bb454db10 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 Jul 2019 18:13:10 -0500 Subject: [PATCH 161/482] smb3: send CAP_DFS capability during session setup We had a report of a server which did not do a DFS referral because the session setup Capabilities field was set to 0 (unlike negotiate protocol where we set CAP_DFS). Better to send it session setup in the capabilities as well (this also more closely matches Windows client behavior). Signed-off-by: Steve French Reviewed-off-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky CC: Stable --- fs/cifs/smb2pdu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 33efc5fb293c..31e4a1b0b170 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1196,7 +1196,12 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) else req->SecurityMode = 0; +#ifdef CONFIG_CIFS_DFS_UPCALL + req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS); +#else req->Capabilities = 0; +#endif /* DFS_UPCALL */ + req->Channel = 0; /* MBZ */ sess_data->iov[0].iov_base = (char *)req; From 108624eaff6f17f1966e4a94dc7bc5df2c5115ee Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 31 Jul 2019 16:41:03 -0500 Subject: [PATCH 162/482] smb3: update TODO list of missing features minor cleanup of documentation, updating to more current status. Signed-off-by: Steve French --- Documentation/filesystems/cifs/TODO | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/Documentation/filesystems/cifs/TODO b/Documentation/filesystems/cifs/TODO index 9267f3fb131f..edbbccda1942 100644 --- a/Documentation/filesystems/cifs/TODO +++ b/Documentation/filesystems/cifs/TODO @@ -13,7 +13,8 @@ a) SMB3 (and SMB3.1.1) missing optional features: - T10 copy offload ie "ODX" (copy chunk, and "Duplicate Extents" ioctl currently the only two server side copy mechanisms supported) -b) improved sparse file support +b) improved sparse file support (fiemap and SEEK_HOLE are implemented +but additional features would be supportable by the protocol). c) Directory entry caching relies on a 1 second timer, rather than using Directory Leases, currently only the root file handle is cached longer @@ -21,9 +22,13 @@ using Directory Leases, currently only the root file handle is cached longer d) quota support (needs minor kernel change since quota calls to make it to network filesystems or deviceless filesystems) -e) Additional use cases where we use "compoounding" (e.g. open/query/close -and open/setinfo/close) to reduce the number of roundtrips, and also -open to reduce redundant opens (using deferred close and reference counts more). +e) Additional use cases can be optimized to use "compounding" +(e.g. open/query/close and open/setinfo/close) to reduce the number +of roundtrips to the server and improve performance. Various cases +(stat, statfs, create, unlink, mkdir) already have been improved by +using compounding but more can be done. In addition we could significantly +reduce redundant opens by using deferred close (with handle caching leases) +and better using reference counters on file handles. f) Finish inotify support so kde and gnome file list windows will autorefresh (partially complete by Asser). Needs minor kernel @@ -43,18 +48,17 @@ mount or a per server basis to client UIDs or nobody if no mapping exists. Also better integration with winbind for resolving SID owners k) Add tools to take advantage of more smb3 specific ioctls and features -(passthrough ioctl/fsctl for sending various SMB3 fsctls to the server -is in progress, and a passthrough query_info call is already implemented -in cifs.ko to allow smb3 info levels queries to be sent from userspace) +(passthrough ioctl/fsctl is now implemented in cifs.ko to allow sending +various SMB3 fsctls and query info and set info calls directly from user space) +Add tools to make setting various non-POSIX metadata attributes easier +from tools (e.g. extending what was done in smb-info tool). l) encrypted file support m) improved stats gathering tools (perhaps integration with nfsometer?) to extend and make easier to use what is currently in /proc/fs/cifs/Stats -n) allow setting more NTFS/SMB3 file attributes remotely (currently limited to compressed -file attribute via chflags) and improve user space tools for managing and -viewing them. +n) Add support for claims based ACLs ("DAC") o) mount helper GUI (to simplify the various configuration options on mount) @@ -82,6 +86,8 @@ so far). w) Add support for additional strong encryption types, and additional spnego authentication mechanisms (see MS-SMB2) +x) Finish support for SMB3.1.1 compression + KNOWN BUGS ==================================== See http://bugzilla.samba.org - search on product "CifsVFS" for From ee9d66182392695535cc9fccfcb40c16f72de2a9 Mon Sep 17 00:00:00 2001 From: Sebastien Tisserant Date: Thu, 1 Aug 2019 12:06:08 -0500 Subject: [PATCH 163/482] SMB3: Kernel oops mounting a encryptData share with CONFIG_DEBUG_VIRTUAL Fix kernel oops when mounting a encryptData CIFS share with CONFIG_DEBUG_VIRTUAL Signed-off-by: Sebastien Tisserant Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 19589922ef2b..64a5864127be 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3489,7 +3489,15 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len, static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen) { - sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); + void *addr; + /* + * VMAP_STACK (at least) puts stack into the vmalloc address space + */ + if (is_vmalloc_addr(buf)) + addr = vmalloc_to_page(buf); + else + addr = virt_to_page(buf); + sg_set_page(sg, addr, buflen, offset_in_page(buf)); } /* Assumes the first rqst has a transform header as the first iov. From 7c0767643f3b6b0dd2cda923ae37a18590d431cf Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 6 Aug 2019 11:15:06 +0100 Subject: [PATCH 164/482] ASoC: max98373: Remove executable bits Signed-off-by: Mark Brown --- sound/soc/codecs/max98373.c | 0 sound/soc/codecs/max98373.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 sound/soc/codecs/max98373.c mode change 100755 => 100644 sound/soc/codecs/max98373.h diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c old mode 100755 new mode 100644 diff --git a/sound/soc/codecs/max98373.h b/sound/soc/codecs/max98373.h old mode 100755 new mode 100644 From a67060201b746a308b1674f66bf289c9faef6d09 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Tue, 6 Aug 2019 03:00:27 -0400 Subject: [PATCH 165/482] ALSA: usb-audio: fix a memory leak bug In snd_usb_get_audioformat_uac3(), a structure for channel maps 'chmap' is allocated through kzalloc() before the execution goto 'found_clock'. However, this structure is not deallocated if the memory allocation for 'pd' fails, leading to a memory leak bug. To fix the above issue, free 'fp->chmap' before returning NULL. Fixes: 7edf3b5e6a45 ("ALSA: usb-audio: AudioStreaming Power Domain parsing") Signed-off-by: Wenwen Wang Cc: Signed-off-by: Takashi Iwai --- sound/usb/stream.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 7ee9d17d0143..e852c7fd6109 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -1043,6 +1043,7 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, pd = kzalloc(sizeof(*pd), GFP_KERNEL); if (!pd) { + kfree(fp->chmap); kfree(fp->rate_table); kfree(fp); return NULL; From f4904815f97a934258445a8f763f6b6c48f007e7 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Fri, 2 Aug 2019 15:59:43 +0100 Subject: [PATCH 166/482] sched/deadline: Fix double accounting of rq/running bw in push & pull {push,pull}_dl_task() always calls {de,}activate_task() with .flags=0 which sets p->on_rq=TASK_ON_RQ_MIGRATING. {push,pull}_dl_task()->{de,}activate_task()->{de,en}queue_task()-> {de,en}queue_task_dl() calls {sub,add}_{running,rq}_bw() since p->on_rq==TASK_ON_RQ_MIGRATING. So {sub,add}_{running,rq}_bw() in {push,pull}_dl_task() is double-accounting for that task. Fix it by removing rq/running bw accounting in [push/pull]_dl_task(). Fixes: 7dd778841164 ("sched/core: Unify p->on_rq updates") Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra (Intel) Cc: Valentin Schneider Cc: Ingo Molnar Cc: Luca Abeni Cc: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Qais Yousef Link: https://lkml.kernel.org/r/20190802145945.18702-2-dietmar.eggemann@arm.com --- kernel/sched/deadline.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index ef5b9f6b1d42..46122edd8552 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2088,17 +2088,13 @@ static int push_dl_task(struct rq *rq) } deactivate_task(rq, next_task, 0); - sub_running_bw(&next_task->dl, &rq->dl); - sub_rq_bw(&next_task->dl, &rq->dl); set_task_cpu(next_task, later_rq->cpu); - add_rq_bw(&next_task->dl, &later_rq->dl); /* * Update the later_rq clock here, because the clock is used * by the cpufreq_update_util() inside __add_running_bw(). */ update_rq_clock(later_rq); - add_running_bw(&next_task->dl, &later_rq->dl); activate_task(later_rq, next_task, ENQUEUE_NOCLOCK); ret = 1; @@ -2186,11 +2182,7 @@ static void pull_dl_task(struct rq *this_rq) resched = true; deactivate_task(src_rq, p, 0); - sub_running_bw(&p->dl, &src_rq->dl); - sub_rq_bw(&p->dl, &src_rq->dl); set_task_cpu(p, this_cpu); - add_rq_bw(&p->dl, &this_rq->dl); - add_running_bw(&p->dl, &this_rq->dl); activate_task(this_rq, p, 0); dmin = p->dl.deadline; From 14f5c7b46a41a595fc61db37f55721714729e59e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 1 Aug 2019 12:41:31 +0200 Subject: [PATCH 167/482] sched/psi: Reduce psimon FIFO priority PSI defaults to a FIFO-99 thread, reduce this to FIFO-1. FIFO-99 is the very highest priority available to SCHED_FIFO and it not a suitable default; it would indicate the psi work is the most important work on the machine. Since Real-Time tasks will have pre-allocated memory and locked it in place, Real-Time tasks do not care about PSI. All it needs is to be above OTHER. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Johannes Weiner Tested-by: Suren Baghdasaryan Cc: Thomas Gleixner --- kernel/sched/psi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7acc632c3b82..7fe2c5fd26b5 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1051,7 +1051,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, if (!rcu_access_pointer(group->poll_kworker)) { struct sched_param param = { - .sched_priority = MAX_RT_PRIO - 1, + .sched_priority = 1, }; struct kthread_worker *kworker; From 04e048cf09d7b5fc995817cdc5ae1acd4482429c Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 29 Jul 2019 18:33:10 -0700 Subject: [PATCH 168/482] sched/psi: Do not require setsched permission from the trigger creator When a process creates a new trigger by writing into /proc/pressure/* files, permissions to write such a file should be used to determine whether the process is allowed to do so or not. Current implementation would also require such a process to have setsched capability. Setting of psi trigger thread's scheduling policy is an implementation detail and should not be exposed to the user level. Remove the permission check by using _nocheck version of the function. Suggested-by: Nick Kralevich Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Cc: lizefan@huawei.com Cc: mingo@redhat.com Cc: akpm@linux-foundation.org Cc: kernel-team@android.com Cc: dennisszhou@gmail.com Cc: dennis@kernel.org Cc: hannes@cmpxchg.org Cc: axboe@kernel.dk Link: https://lkml.kernel.org/r/20190730013310.162367-1-surenb@google.com --- kernel/sched/psi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7fe2c5fd26b5..23fbbcc414d5 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1061,7 +1061,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, mutex_unlock(&group->trigger_lock); return ERR_CAST(kworker); } - sched_setscheduler(kworker->task, SCHED_FIFO, ¶m); + sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m); kthread_init_delayed_work(&group->poll_work, psi_poll_work); rcu_assign_pointer(group->poll_kworker, kworker); From a287a49e672d9762bb85de117b477bdf3ef20bd5 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 2 Aug 2019 13:27:22 +0200 Subject: [PATCH 169/482] s390/protvirt: avoid memory sharing for diag 308 set/store This reverts commit db9492cef45e ("s390/protvirt: add memory sharing for diag 308 set/store") which due to ultravisor implementation change is not needed after all. Fixes: db9492cef45e ("s390/protvirt: add memory sharing for diag 308 set/store") Reviewed-by: Janosch Frank Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 2 -- arch/s390/kernel/ipl.c | 9 --------- 2 files changed, 11 deletions(-) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 3c49bde8aa5e..b8aa6a9f937b 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -48,9 +48,7 @@ void store_ipl_parmblock(void) { int rc; - uv_set_shared(__pa(&ipl_block)); rc = __diag308(DIAG308_STORE, &ipl_block); - uv_remove_shared(__pa(&ipl_block)); if (rc == DIAG308_RC_OK && ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION) ipl_block_valid = 1; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 2c0a515428d6..6837affc19e8 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -31,7 +31,6 @@ #include #include #include -#include #include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 @@ -892,21 +891,15 @@ static void __reipl_run(void *unused) { switch (reipl_type) { case IPL_TYPE_CCW: - uv_set_shared(__pa(reipl_block_ccw)); diag308(DIAG308_SET, reipl_block_ccw); - uv_remove_shared(__pa(reipl_block_ccw)); diag308(DIAG308_LOAD_CLEAR, NULL); break; case IPL_TYPE_FCP: - uv_set_shared(__pa(reipl_block_fcp)); diag308(DIAG308_SET, reipl_block_fcp); - uv_remove_shared(__pa(reipl_block_fcp)); diag308(DIAG308_LOAD_CLEAR, NULL); break; case IPL_TYPE_NSS: - uv_set_shared(__pa(reipl_block_nss)); diag308(DIAG308_SET, reipl_block_nss); - uv_remove_shared(__pa(reipl_block_nss)); diag308(DIAG308_LOAD_CLEAR, NULL); break; case IPL_TYPE_UNKNOWN: @@ -1176,9 +1169,7 @@ static struct kset *dump_kset; static void diag308_dump(void *dump_block) { - uv_set_shared(__pa(dump_block)); diag308(DIAG308_SET, dump_block); - uv_remove_shared(__pa(dump_block)); while (1) { if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302) break; From 8024b5a9fc2bed9a00f0bdba60b443fa3cc4bb5d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 26 Jul 2019 08:23:20 +0200 Subject: [PATCH 170/482] s390/mm: fix dump_pagetables top level page table walking Since commit d1874a0c2805 ("s390/mm: make the pxd_offset functions more robust") behaviour of p4d_offset, pud_offset and pmd_offset has been changed so that they cannot be used to iterate through top level page table, because the index for the top level page table is now calculated in pgd_offset. To avoid dumping the very first region/segment top level table entry 2048 times simply iterate entry pointer like it is already done in other page walking cases. Fixes: d1874a0c2805 ("s390/mm: make the pxd_offset functions more robust") Reported-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/dump_pagetables.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 3b93ba0b5d8d..5d67b81c704a 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -161,9 +161,9 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, } #endif - for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) { + pmd = pmd_offset(pud, addr); + for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++, pmd++) { st->current_address = addr; - pmd = pmd_offset(pud, addr); if (!pmd_none(*pmd)) { if (pmd_large(*pmd)) { prot = pmd_val(*pmd) & @@ -192,9 +192,9 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, } #endif - for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { + pud = pud_offset(p4d, addr); + for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++, pud++) { st->current_address = addr; - pud = pud_offset(p4d, addr); if (!pud_none(*pud)) if (pud_large(*pud)) { prot = pud_val(*pud) & @@ -222,9 +222,9 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, } #endif - for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) { + p4d = p4d_offset(pgd, addr); + for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++, p4d++) { st->current_address = addr; - p4d = p4d_offset(pgd, addr); if (!p4d_none(*p4d)) walk_pud_level(m, st, p4d, addr); else From 218ddd5acfeae9e19a2fa97c284bf3c2e9de8ebd Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 2 Aug 2019 17:44:29 +0200 Subject: [PATCH 171/482] s390/setup: adjust start_code of init_mm to _text After some investigation it doesn't look like init_mm fields start_code/end_code are used anywhere besides potentially in dump_mm for debugging purposes. Originally the value of 0 for start_code reflected the presence of lowcore and early boot code. But with kaslr in place start_code/end_code range should not span over unoccupied by the code segment memory. So, adjust init_mm start_code to point at the beginning of the code segment like other architectures do it. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 2b94b0ad3588..253177900950 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1114,8 +1114,7 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = Root_RAM0; - /* Is init_mm really needed? */ - init_mm.start_code = PAGE_OFFSET; + init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; init_mm.brk = (unsigned long) _end; From fd0c7435d7bfd46161ef43b185f55d759022238c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 5 Aug 2019 15:07:19 +0200 Subject: [PATCH 172/482] s390/unwind: remove stack recursion warning Remove pointless stack recursion on stack type ... warning, which only confuses people. There is no way to make backchain unwinder 100% reliable. When a task is interrupted in-between stack frame allocation and backchain write instructions new stack frame backchain pointer is left uninitialized (there are also sometimes additional instruction in-between stack frame allocation and backchain write instructions due to gcc shrink-wrapping). In attempt to unwind such stack the unwinder would still try to use that invalid backchain value and perform all kind of sanity checks on it to make sure we are not pointed out of stack. In some cases that invalid backchain value would be 0 and we would falsely treat next stackframe as pt_regs and again gprs[15] in those pt_regs might happen to point at some address within the task's stack. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/dumpstack.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index ac06c3949ab3..34bdc60c0b11 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -114,12 +114,8 @@ int get_stack_info(unsigned long sp, struct task_struct *task, * If it comes up a second time then there's something wrong going on: * just break out and report an unknown stack type. */ - if (*visit_mask & (1UL << info->type)) { - printk_deferred_once(KERN_WARNING - "WARNING: stack recursion on stack type %d\n", - info->type); + if (*visit_mask & (1UL << info->type)) goto unknown; - } *visit_mask |= 1UL << info->type; return 0; unknown: From b9f23b7376c21f5bf7f5e2b7dfcb82cc9ce92dfc Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 5 Aug 2019 16:36:56 +0200 Subject: [PATCH 173/482] s390/head64: cleanup unused labels Cleanup labels in head64 some of which are not being used since git recorded history. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/head64.S | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 5aea1a527443..f384a18e6c26 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -60,12 +60,5 @@ ENTRY(startup_continue) .align 16 .LPG1: -.Lpcmsk:.quad 0x0000000180000000 -.L4malign:.quad 0xffffffffffc00000 -.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8 -.Lnop: .long 0x07000700 -.Lparmaddr: - .quad PARMAREA - .align 64 .Ldw: .quad 0x0002000180000000,0x0000000000000000 .Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 From 24350fdadbdec780406a1ef988e6cd3875e374a8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 5 Aug 2019 14:25:16 +0200 Subject: [PATCH 174/482] s390: put _stext and _etext into .text section Perf relies on _etext and _stext symbols being one of 't', 'T', 'v' or 'V'. Put them into .text section to guarantee that. Also moves padding to page boundary inside .text which has an effect that .text section is now padded with nops rather than 0's, which apparently has been the initial intention for specifying 0x0700 fill expression. Reported-by: Thomas Richter Tested-by: Thomas Richter Suggested-by: Andreas Krebbel Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vmlinux.lds.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 49d55327de0b..7e0eb4020917 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -32,10 +32,9 @@ PHDRS { SECTIONS { . = 0x100000; - _stext = .; /* Start of text section */ .text : { - /* Text and read-only data */ - _text = .; + _stext = .; /* Start of text section */ + _text = .; /* Text and read-only data */ HEAD_TEXT TEXT_TEXT SCHED_TEXT @@ -47,11 +46,10 @@ SECTIONS *(.text.*_indirect_*) *(.fixup) *(.gnu.warning) + . = ALIGN(PAGE_SIZE); + _etext = .; /* End of text section */ } :text = 0x0700 - . = ALIGN(PAGE_SIZE); - _etext = .; /* End of text section */ - NOTES :text :note .dummy : { *(.dummy) } :data From c1c6c877b0c79fd7e05c931435aa42211eaeebaf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Aug 2019 14:03:56 +0200 Subject: [PATCH 175/482] ALSA: hda - Don't override global PCM hw info flag The commit bfcba288b97f ("ALSA - hda: Add support for link audio time reporting") introduced the conditional PCM hw info setup, but it overwrites the global azx_pcm_hw object. This will cause a problem if any other HD-audio controller, as it'll inherit the same bit flag although another controller doesn't support that feature. Fix the bug by setting the PCM hw info flag locally. Fixes: bfcba288b97f ("ALSA - hda: Add support for link audio time reporting") Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_controller.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index c8d1b4316245..2fbdde239936 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -598,11 +598,9 @@ static int azx_pcm_open(struct snd_pcm_substream *substream) } runtime->private_data = azx_dev; - if (chip->gts_present) - azx_pcm_hw.info = azx_pcm_hw.info | - SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME; - runtime->hw = azx_pcm_hw; + if (chip->gts_present) + runtime->hw.info |= SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME; runtime->hw.channels_min = hinfo->channels_min; runtime->hw.channels_max = hinfo->channels_max; runtime->hw.formats = hinfo->formats; From db341a049ec7e87053c91008cb452d0bfa6dde72 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 6 Aug 2019 03:08:08 -0500 Subject: [PATCH 176/482] ata: rb532_cf: Fix unused variable warning in rb532_pata_driver_probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following warning (Building: rb532_defconfig mips): drivers/ata/pata_rb532_cf.c: In function ‘rb532_pata_driver_remove’: drivers/ata/pata_rb532_cf.c:161:24: warning: unused variable ‘info’ [-Wunused-variable] struct rb532_cf_info *info = ah->private_data; ^~~~ Fixes: cd56f35e52d9 ("ata: rb532_cf: Convert to use GPIO descriptors") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- drivers/ata/pata_rb532_cf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c index 7c37f2ff09e4..deae466395de 100644 --- a/drivers/ata/pata_rb532_cf.c +++ b/drivers/ata/pata_rb532_cf.c @@ -158,7 +158,6 @@ static int rb532_pata_driver_probe(struct platform_device *pdev) static int rb532_pata_driver_remove(struct platform_device *pdev) { struct ata_host *ah = platform_get_drvdata(pdev); - struct rb532_cf_info *info = ah->private_data; ata_host_detach(ah); From 8af23fad626173eed7cc02733874d4124049bd5e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 29 Jul 2019 16:32:38 +0100 Subject: [PATCH 177/482] iommu/dma: Handle MSI mappings separately MSI pages must always be mapped into a device's *current* domain, which *might* be the default DMA domain, but might instead be a VFIO domain with its own MSI cookie. This subtlety got accidentally lost in the streamlining of __iommu_dma_map(), but rather than reintroduce more complexity and/or special-casing, it turns out neater to just split this path out entirely. Since iommu_dma_get_msi_page() already duplicates much of what __iommu_dma_map() does, it can easily just make the allocation and mapping calls directly as well. That way we can further streamline the helper back to exclusively operating on DMA domains. Fixes: b61d271e59d7 ("iommu/dma: Move domain lookup into __iommu_dma_{map,unmap}") Reported-by: Shameer Kolothum Reported-by: Andre Przywara Signed-off-by: Robin Murphy Tested-by: Marc Zyngier Tested-by: Andre Przywara Tested-by: Shameer Kolothum Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a7f9c3edbcb2..6441197a75ea 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -459,13 +459,11 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; - size_t iova_off = 0; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_off = iova_offset(iovad, phys); dma_addr_t iova; - if (cookie->type == IOMMU_DMA_IOVA_COOKIE) { - iova_off = iova_offset(&cookie->iovad, phys); - size = iova_align(&cookie->iovad, size + iova_off); - } + size = iova_align(iovad, size + iova_off); iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); if (!iova) @@ -1147,16 +1145,21 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!msi_page) return NULL; - iova = __iommu_dma_map(dev, msi_addr, size, prot); - if (iova == DMA_MAPPING_ERROR) + iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); + if (!iova) goto out_free_page; + if (iommu_map(domain, iova, msi_addr, size, prot)) + goto out_free_iova; + INIT_LIST_HEAD(&msi_page->list); msi_page->phys = msi_addr; msi_page->iova = iova; list_add(&msi_page->list, &cookie->msi_page_list); return msi_page; +out_free_iova: + iommu_dma_free_iova(cookie, iova, size); out_free_page: kfree(msi_page); return NULL; From 11f4fe9ba3c85a4efac7ec25e75056a9b612d9da Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 30 Jul 2019 17:20:11 +0200 Subject: [PATCH 178/482] iommu/arm-smmu: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that -Wimplicit-fallthrough is passed to GCC by default, the following warning shows up: ../drivers/iommu/arm-smmu-v3.c: In function ‘arm_smmu_write_strtab_ent’: ../drivers/iommu/arm-smmu-v3.c:1189:7: warning: this statement may fall through [-Wimplicit-fallthrough=] if (disable_bypass) ^ ../drivers/iommu/arm-smmu-v3.c:1191:3: note: here default: ^~~~~~~ Rework so that the compiler doesn't warn about fall-through. Make it clearer by calling 'BUG_ON()' when disable_bypass is set, and always 'break;' Signed-off-by: Anders Roxell Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index a9a9fabd3968..c5c93e48b4db 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1186,8 +1186,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, ste_live = true; break; case STRTAB_STE_0_CFG_ABORT: - if (disable_bypass) - break; + BUG_ON(!disable_bypass); + break; default: BUG(); /* STE corruption */ } From 458b7c8e0dde12d140e3472b80919cbb9ae793f4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 1 Aug 2019 11:14:58 +0800 Subject: [PATCH 179/482] iommu/vt-d: Detach domain when move device out of group When removing a device from an iommu group, the domain should be detached from the device. Otherwise, the stale domain info will still be cached by the driver and the driver will refuse to attach any domain to the device again. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Fixes: b7297783c2bb6 ("iommu/vt-d: Remove duplicated code for device hotplug") Reported-and-tested-by: Vlad Buslov Suggested-by: Robin Murphy Link: https://lkml.org/lkml/2019/7/26/1133 Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index bdaed2da8a55..3e22fa6ae8c8 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5316,6 +5316,8 @@ static void intel_iommu_remove_device(struct device *dev) if (!iommu) return; + dmar_remove_one_dev_info(dev); + iommu_group_remove_device(dev); iommu_device_unlink(&iommu->iommu, dev); From 37ad2e343449ced9e41505eae5494bf40f433181 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 6 Aug 2019 09:03:16 -0700 Subject: [PATCH 180/482] Input: applespi - use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct touchpad_protocol { ... struct tp_finger fingers[0]; }; Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. So, replace the following form: sizeof(*tp) + tp->number_of_fingers * sizeof(tp->fingers[0]); with: struct_size(tp, fingers, tp->number_of_fingers) This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/applespi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c index acf34a5ff571..584289b67fb3 100644 --- a/drivers/input/keyboard/applespi.c +++ b/drivers/input/keyboard/applespi.c @@ -1494,8 +1494,7 @@ static void applespi_got_data(struct applespi_data *applespi) size_t tp_len; tp = &message->touchpad; - tp_len = sizeof(*tp) + - tp->number_of_fingers * sizeof(tp->fingers[0]); + tp_len = struct_size(tp, fingers, tp->number_of_fingers); if (le16_to_cpu(message->length) + 2 != tp_len) { dev_warn_ratelimited(&applespi->spi->dev, From 849f5ae3a513c550cad741c68dd3d7eb2bcc2a2c Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 6 Aug 2019 09:05:55 -0700 Subject: [PATCH 181/482] Input: iforce - add sanity checks The endpoint type should also be checked before a device is accepted. Reported-by: syzbot+5efc10c005014d061a74@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/iforce/iforce-usb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index 29abfeeef9a5..6c554c11a7ac 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -201,7 +201,12 @@ static int iforce_usb_probe(struct usb_interface *intf, return -ENODEV; epirq = &interface->endpoint[0].desc; + if (!usb_endpoint_is_int_in(epirq)) + return -ENODEV; + epout = &interface->endpoint[1].desc; + if (!usb_endpoint_is_int_out(epout)) + return -ENODEV; iforce_usb = kzalloc(sizeof(*iforce_usb), GFP_KERNEL); if (!iforce_usb) From c6303c5d52d5ec3e5bce2e6a5480fa2a1baa45e6 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 25 Jul 2019 11:14:22 +0800 Subject: [PATCH 182/482] mmc: sdhci-sprd: Fix the incorrect soft reset operation when runtime resuming The SD host controller specification defines 3 types software reset: software reset for data line, software reset for command line and software reset for all. Software reset for all means this reset affects the entire Host controller except for the card detection circuit. In sdhci_runtime_resume_host() we always do a software "reset for all", which causes the Spreadtrum variant controller to work abnormally after resuming. To fix the problem, let's do a software reset for the data and the command part, rather than "for all". However, as sdhci_runtime_resume() is a common sdhci function and we don't want to change the behaviour for other variants, let's introduce a new in-parameter for it. This enables the caller to decide if a "reset for all" shall be done or not. Signed-off-by: Baolin Wang Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 2 +- drivers/mmc/host/sdhci-esdhc-imx.c | 2 +- drivers/mmc/host/sdhci-of-at91.c | 2 +- drivers/mmc/host/sdhci-pci-core.c | 4 ++-- drivers/mmc/host/sdhci-pxav3.c | 2 +- drivers/mmc/host/sdhci-s3c.c | 2 +- drivers/mmc/host/sdhci-sprd.c | 2 +- drivers/mmc/host/sdhci-xenon.c | 2 +- drivers/mmc/host/sdhci.c | 4 ++-- drivers/mmc/host/sdhci.h | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index b3a130a9ee23..1604f512c7bd 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -883,7 +883,7 @@ static int sdhci_acpi_runtime_resume(struct device *dev) sdhci_acpi_byt_setting(&c->pdev->dev); - return sdhci_runtime_resume_host(c->host); + return sdhci_runtime_resume_host(c->host, 0); } #endif diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index c391510e9ef4..776a94216248 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1705,7 +1705,7 @@ static int sdhci_esdhc_runtime_resume(struct device *dev) esdhc_pltfm_set_clock(host, imx_data->actual_clock); } - err = sdhci_runtime_resume_host(host); + err = sdhci_runtime_resume_host(host, 0); if (err) goto disable_ipg_clk; diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index e377b9bc55a4..d4e7e8b7be77 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -289,7 +289,7 @@ static int sdhci_at91_runtime_resume(struct device *dev) } out: - return sdhci_runtime_resume_host(host); + return sdhci_runtime_resume_host(host, 0); } #endif /* CONFIG_PM */ diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 4041878eb0f3..7d06e2860c36 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -167,7 +167,7 @@ static int sdhci_pci_runtime_suspend_host(struct sdhci_pci_chip *chip) err_pci_runtime_suspend: while (--i >= 0) - sdhci_runtime_resume_host(chip->slots[i]->host); + sdhci_runtime_resume_host(chip->slots[i]->host, 0); return ret; } @@ -181,7 +181,7 @@ static int sdhci_pci_runtime_resume_host(struct sdhci_pci_chip *chip) if (!slot) continue; - ret = sdhci_runtime_resume_host(slot->host); + ret = sdhci_runtime_resume_host(slot->host, 0); if (ret) return ret; } diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index 3ddecf479295..e55037ceda73 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -554,7 +554,7 @@ static int sdhci_pxav3_runtime_resume(struct device *dev) if (!IS_ERR(pxa->clk_core)) clk_prepare_enable(pxa->clk_core); - return sdhci_runtime_resume_host(host); + return sdhci_runtime_resume_host(host, 0); } #endif diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c index 8e4a8ba33f05..f5753aef7151 100644 --- a/drivers/mmc/host/sdhci-s3c.c +++ b/drivers/mmc/host/sdhci-s3c.c @@ -745,7 +745,7 @@ static int sdhci_s3c_runtime_resume(struct device *dev) clk_prepare_enable(busclk); if (ourhost->cur_clk >= 0) clk_prepare_enable(ourhost->clk_bus[ourhost->cur_clk]); - ret = sdhci_runtime_resume_host(host); + ret = sdhci_runtime_resume_host(host, 0); return ret; } #endif diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 603a5d9f045a..83a4767ca680 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -696,7 +696,7 @@ static int sdhci_sprd_runtime_resume(struct device *dev) if (ret) goto clk_disable; - sdhci_runtime_resume_host(host); + sdhci_runtime_resume_host(host, 1); return 0; clk_disable: diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 8a18f14cf842..1dea1ba66f7b 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -638,7 +638,7 @@ static int xenon_runtime_resume(struct device *dev) priv->restore_needed = false; } - ret = sdhci_runtime_resume_host(host); + ret = sdhci_runtime_resume_host(host, 0); if (ret) goto out; return 0; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 59acf8e3331e..a5dc5aae973e 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -3320,7 +3320,7 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host) } EXPORT_SYMBOL_GPL(sdhci_runtime_suspend_host); -int sdhci_runtime_resume_host(struct sdhci_host *host) +int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset) { struct mmc_host *mmc = host->mmc; unsigned long flags; @@ -3331,7 +3331,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host) host->ops->enable_dma(host); } - sdhci_init(host, 0); + sdhci_init(host, soft_reset); if (mmc->ios.power_mode != MMC_POWER_UNDEFINED && mmc->ios.power_mode != MMC_POWER_OFF) { diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 89fd96596a1f..902f855efe8f 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -781,7 +781,7 @@ void sdhci_adma_write_desc(struct sdhci_host *host, void **desc, int sdhci_suspend_host(struct sdhci_host *host); int sdhci_resume_host(struct sdhci_host *host); int sdhci_runtime_suspend_host(struct sdhci_host *host); -int sdhci_runtime_resume_host(struct sdhci_host *host); +int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset); #endif void sdhci_cqe_enable(struct mmc_host *mmc); From fa25eba6993b3750f417baabba169afaba076178 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Fri, 26 Jul 2019 10:30:48 +0800 Subject: [PATCH 183/482] mmc: cavium: Set the correct dma max segment size for mmc_host We have set the mmc_host.max_seg_size to 8M, but the dma max segment size of PCI device is set to 64K by default in function pci_device_add(). The mmc_host.max_seg_size is used to set the max segment size of the blk queue. Then this mismatch will trigger a calltrace like below when a bigger than 64K segment request arrives at mmc dev. So we should consider the limitation of the cvm_mmc_host when setting the mmc_host.max_seg_size. DMA-API: thunderx_mmc 0000:01:01.4: mapping sg segment longer than device claims to support [len=131072] [max=65536] WARNING: CPU: 6 PID: 238 at kernel/dma/debug.c:1221 debug_dma_map_sg+0x2b8/0x350 Modules linked in: CPU: 6 PID: 238 Comm: kworker/6:1H Not tainted 5.3.0-rc1-next-20190724-yocto-standard+ #62 Hardware name: Marvell OcteonTX CN96XX board (DT) Workqueue: kblockd blk_mq_run_work_fn pstate: 80c00009 (Nzcv daif +PAN +UAO) pc : debug_dma_map_sg+0x2b8/0x350 lr : debug_dma_map_sg+0x2b8/0x350 sp : ffff00001770f9e0 x29: ffff00001770f9e0 x28: ffffffff00000000 x27: 00000000ffffffff x26: ffff800bc2c73180 x25: ffff000010e83700 x24: 0000000000000002 x23: 0000000000000001 x22: 0000000000000001 x21: 0000000000000000 x20: ffff800bc48ba0b0 x19: ffff800bc97e8c00 x18: ffffffffffffffff x17: 0000000000000000 x16: 0000000000000000 x15: ffff000010e835c8 x14: 6874207265676e6f x13: 6c20746e656d6765 x12: 7320677320676e69 x11: 7070616d203a342e x10: 31303a31303a3030 x9 : 303020636d6d5f78 x8 : 35363d78616d5b20 x7 : 00000000000002fd x6 : ffff000010fd57dc x5 : 0000000000000000 x4 : ffff0000106c61f0 x3 : 00000000ffffffff x2 : 0000800bee060000 x1 : 7010678df3041a00 x0 : 0000000000000000 Call trace: debug_dma_map_sg+0x2b8/0x350 cvm_mmc_request+0x3c4/0x988 __mmc_start_request+0x9c/0x1f8 mmc_start_request+0x7c/0xb0 mmc_blk_mq_issue_rq+0x5c4/0x7b8 mmc_mq_queue_rq+0x11c/0x278 blk_mq_dispatch_rq_list+0xb0/0x568 blk_mq_do_dispatch_sched+0x6c/0x108 blk_mq_sched_dispatch_requests+0x110/0x1b8 __blk_mq_run_hw_queue+0xb0/0x118 blk_mq_run_work_fn+0x28/0x38 process_one_work+0x210/0x490 worker_thread+0x48/0x458 kthread+0x130/0x138 ret_from_fork+0x10/0x1c Signed-off-by: Kevin Hao Fixes: ba3869ff32e4 ("mmc: cavium: Add core MMC driver for Cavium SOCs") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/cavium.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c index ed5cefb83768..c956813bc6bd 100644 --- a/drivers/mmc/host/cavium.c +++ b/drivers/mmc/host/cavium.c @@ -1046,7 +1046,8 @@ int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host) mmc->max_segs = 1; /* DMA size field can address up to 8 MB */ - mmc->max_seg_size = 8 * 1024 * 1024; + mmc->max_seg_size = min_t(unsigned int, 8 * 1024 * 1024, + dma_get_max_seg_size(host->dev)); mmc->max_req_size = mmc->max_seg_size; /* External DMA is in 512 byte blocks */ mmc->max_blk_size = 512; From b803974a86039913d5280add083d730b2b9ed8ec Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Fri, 26 Jul 2019 10:30:49 +0800 Subject: [PATCH 184/482] mmc: cavium: Add the missing dma unmap when the dma has finished. This fixes the below calltrace when the CONFIG_DMA_API_DEBUG is enabled. DMA-API: thunderx_mmc 0000:01:01.4: cpu touching an active dma mapped cacheline [cln=0x000000002fdf9800] WARNING: CPU: 21 PID: 1 at kernel/dma/debug.c:596 debug_dma_assert_idle+0x1f8/0x270 Modules linked in: CPU: 21 PID: 1 Comm: init Not tainted 5.3.0-rc1-next-20190725-yocto-standard+ #64 Hardware name: Marvell OcteonTX CN96XX board (DT) pstate: 80400009 (Nzcv daif +PAN -UAO) pc : debug_dma_assert_idle+0x1f8/0x270 lr : debug_dma_assert_idle+0x1f8/0x270 sp : ffff0000113cfc10 x29: ffff0000113cfc10 x28: 0000ffff8c880000 x27: ffff800bc72a0000 x26: ffff000010ff8000 x25: ffff000010ff8940 x24: ffff000010ff8968 x23: 0000000000000000 x22: ffff000010e83700 x21: ffff000010ea2000 x20: ffff000010e835c8 x19: ffff800bc2c73300 x18: ffffffffffffffff x17: 0000000000000000 x16: 0000000000000000 x15: ffff000010e835c8 x14: 6d20616d64206576 x13: 69746361206e6120 x12: 676e696863756f74 x11: 20757063203a342e x10: 31303a31303a3030 x9 : 303020636d6d5f78 x8 : 3230303030303030 x7 : 00000000000002fd x6 : ffff000010fd57d0 x5 : 0000000000000000 x4 : ffff0000106c5210 x3 : 00000000ffffffff x2 : 0000800bee9c0000 x1 : 57d5843f4aa62800 x0 : 0000000000000000 Call trace: debug_dma_assert_idle+0x1f8/0x270 wp_page_copy+0xb0/0x688 do_wp_page+0xa8/0x5b8 __handle_mm_fault+0x600/0xd00 handle_mm_fault+0x118/0x1e8 do_page_fault+0x200/0x500 do_mem_abort+0x50/0xb0 el0_da+0x20/0x24 ---[ end trace a005534bd23e109f ]--- DMA-API: Mapped at: debug_dma_map_sg+0x94/0x350 cvm_mmc_request+0x3c4/0x988 __mmc_start_request+0x9c/0x1f8 mmc_start_request+0x7c/0xb0 mmc_blk_mq_issue_rq+0x5c4/0x7b8 Signed-off-by: Kevin Hao Fixes: ba3869ff32e4 ("mmc: cavium: Add core MMC driver for Cavium SOCs") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/cavium.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c index c956813bc6bd..89deb451e0ac 100644 --- a/drivers/mmc/host/cavium.c +++ b/drivers/mmc/host/cavium.c @@ -374,6 +374,7 @@ static int finish_dma_single(struct cvm_mmc_host *host, struct mmc_data *data) { data->bytes_xfered = data->blocks * data->blksz; data->error = 0; + dma_unmap_sg(host->dev, data->sg, data->sg_len, get_dma_dir(data)); return 1; } From 72cda9bb5e219aea0f2f62f56ae05198c59022a7 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Fri, 2 Aug 2019 15:18:57 +0800 Subject: [PATCH 185/482] drm/amdgpu: pin the csb buffer on hw init for gfx v8 Without this pin, the csb buffer will be filled with inconsistent data after S3 resume. And that will causes gfx hang on gfxoff exit since this csb will be executed then. Signed-off-by: Likun Gao Tested-by: Paul Gover Reviewed-by: Feifei Xu Reviewed-by: Xiaojie Yuan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 751567f78567..ee1ccdcf2d30 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1321,6 +1321,39 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) return 0; } +static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, + AMDGPU_GEM_DOMAIN_VRAM); + if (!r) + adev->gfx.rlc.clear_state_gpu_addr = + amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); + + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + + return r; +} + +static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev) +{ + int r; + + if (!adev->gfx.rlc.clear_state_obj) + return; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); + if (likely(r == 0)) { + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + } +} + static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -4785,6 +4818,10 @@ static int gfx_v8_0_hw_init(void *handle) gfx_v8_0_init_golden_registers(adev); gfx_v8_0_constants_init(adev); + r = gfx_v8_0_csb_vram_pin(adev); + if (r) + return r; + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4901,6 +4938,9 @@ static int gfx_v8_0_hw_fini(void *handle) else pr_err("rlc is busy, skip halt rlc\n"); amdgpu_gfx_rlc_exit_safe_mode(adev); + + gfx_v8_0_csb_vram_unpin(adev); + return 0; } From 713203e303ca9f75be8c729b533bf1559e442f6e Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 2 Aug 2019 21:27:20 -0700 Subject: [PATCH 186/482] RISC-V: Remove per cpu clocksource There is only one clocksource in RISC-V. The boot cpu initializes that clocksource. No need to keep a percpu data structure. Signed-off-by: Atish Patra Signed-off-by: Paul Walmsley Acked-by: Daniel Lezcano --- drivers/clocksource/timer-riscv.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 5e6038fbf115..09e031176bc6 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -55,7 +55,7 @@ static u64 riscv_sched_clock(void) return get_cycles64(); } -static DEFINE_PER_CPU(struct clocksource, riscv_clocksource) = { +static struct clocksource riscv_clocksource = { .name = "riscv_clocksource", .rating = 300, .mask = CLOCKSOURCE_MASK(64), @@ -92,7 +92,6 @@ void riscv_timer_interrupt(void) static int __init riscv_timer_init_dt(struct device_node *n) { int cpuid, hartid, error; - struct clocksource *cs; hartid = riscv_of_processor_hartid(n); if (hartid < 0) { @@ -112,8 +111,7 @@ static int __init riscv_timer_init_dt(struct device_node *n) pr_info("%s: Registering clocksource cpuid [%d] hartid [%d]\n", __func__, cpuid, hartid); - cs = per_cpu_ptr(&riscv_clocksource, cpuid); - error = clocksource_register_hz(cs, riscv_timebase); + error = clocksource_register_hz(&riscv_clocksource, riscv_timebase); if (error) { pr_err("RISCV timer register failed [%d] for cpu = [%d]\n", error, cpuid); From d9dfe768b3f30faa8340cbf34196668714780c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 2 Aug 2019 17:44:06 -0400 Subject: [PATCH 187/482] Revert "drm/amdgpu: fix transform feedback GDS hang on gfx10 (v2)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 9ed2c993d723129f85101e51b2ccc36ef5400a67. SET_CONFIG_REG writes to memory if register shadowing is enabled, causing a VM fault. NGG streamout is unstable anyway, so all UMDs should use legacy streamout. I think Mesa is the only driver using NGG streamout. Signed-off-by: Marek Olšák Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 12 +----------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index df8a23554831..f6ac1e9548f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -32,7 +32,6 @@ struct amdgpu_gds { uint32_t gws_size; uint32_t oa_size; uint32_t gds_compute_max_wave_id; - uint32_t vgt_gs_max_wave_id; }; struct amdgpu_gds_reg_offset { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 32773b7523d2..f41287f9000d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4206,15 +4206,6 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; - /* Prevent a hw deadlock due to a wave ID mismatch between ME and GDS. - * This resets the wave ID counters. (needed by transform feedback) - * TODO: This might only be needed on a VMID switch when we change - * the GDS OA mapping, not sure. - */ - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - amdgpu_ring_write(ring, mmVGT_GS_MAX_WAVE_ID); - amdgpu_ring_write(ring, ring->adev->gds.vgt_gs_max_wave_id); - if (ib->flags & AMDGPU_IB_FLAG_CE) header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2); else @@ -4961,7 +4952,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2, /* SWITCH_BUFFER */ - .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_gfx */ + .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */ .emit_ib = gfx_v10_0_ring_emit_ib_gfx, .emit_fence = gfx_v10_0_ring_emit_fence, .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync, @@ -5112,7 +5103,6 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev) default: adev->gds.gds_size = 0x10000; adev->gds.gds_compute_max_wave_id = 0x4ff; - adev->gds.vgt_gs_max_wave_id = 0x3ff; break; } From 3d92aa45fbfd7319e3a19f4ec59fd32b3862b723 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 7 Aug 2019 04:08:51 -0500 Subject: [PATCH 188/482] ALSA: hiface: fix multiple memory leak bugs In hiface_pcm_init(), 'rt' is firstly allocated through kzalloc(). Later on, hiface_pcm_init_urb() is invoked to initialize 'rt->out_urbs[i]'. In hiface_pcm_init_urb(), 'rt->out_urbs[i].buffer' is allocated through kzalloc(). However, if hiface_pcm_init_urb() fails, both 'rt' and 'rt->out_urbs[i].buffer' are not deallocated, leading to memory leak bugs. Also, 'rt->out_urbs[i].buffer' is not deallocated if snd_pcm_new() fails. To fix the above issues, free 'rt' and 'rt->out_urbs[i].buffer'. Fixes: a91c3fb2f842 ("Add M2Tech hiFace USB-SPDIF driver") Signed-off-by: Wenwen Wang Cc: Signed-off-by: Takashi Iwai --- sound/usb/hiface/pcm.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c index 14fc1e1d5d13..c406497c5919 100644 --- a/sound/usb/hiface/pcm.c +++ b/sound/usb/hiface/pcm.c @@ -600,14 +600,13 @@ int hiface_pcm_init(struct hiface_chip *chip, u8 extra_freq) ret = hiface_pcm_init_urb(&rt->out_urbs[i], chip, OUT_EP, hiface_pcm_out_urb_handler); if (ret < 0) - return ret; + goto error; } ret = snd_pcm_new(chip->card, "USB-SPDIF Audio", 0, 1, 0, &pcm); if (ret < 0) { - kfree(rt); dev_err(&chip->dev->dev, "Cannot create pcm instance\n"); - return ret; + goto error; } pcm->private_data = rt; @@ -620,4 +619,10 @@ int hiface_pcm_init(struct hiface_chip *chip, u8 extra_freq) chip->pcm = rt; return 0; + +error: + for (i = 0; i < PCM_N_URBS; i++) + kfree(rt->out_urbs[i].buffer); + kfree(rt); + return ret; } From 0617bdede5114a0002298b12cd0ca2b0cfd0395d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 7 Aug 2019 13:57:18 +0300 Subject: [PATCH 189/482] Revert "PCI: Add missing link delays required by the PCIe spec" Commit c2bf1fc212f7 ("PCI: Add missing link delays required by the PCIe spec") turned out causing issues with some systems either by making them unresponsive or slowing down runtime and system wide resume of PCIe devices. While root cause for the unresponsiveness is still under investigation given the amount of issues reported better to revert it for now. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204413 Link: https://lore.kernel.org/linux-pci/SL2P216MB01878BBCD75F21D882AEEA2880C60@SL2P216MB0187.KORP216.PROD.OUTLOOK.COM/ Link: https://lore.kernel.org/linux-pci/2857501d-c167-547d-c57d-d5d24ea1f1dc@molgen.mpg.de/ Reported-by: Matthias Andree Reported-by: Paul Menzel Reported-by: Nicholas Johnson Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci.c | 29 +++++---------- drivers/pci/pci.h | 1 - drivers/pci/pcie/portdrv_core.c | 66 --------------------------------- 3 files changed, 10 insertions(+), 86 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 29ed5ec1ac27..1b27b5af3d55 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1025,10 +1025,15 @@ static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state) if (state == PCI_D0) { pci_platform_power_transition(dev, PCI_D0); /* - * Mandatory power management transition delays are - * handled in the PCIe portdrv resume hooks. + * Mandatory power management transition delays, see + * PCI Express Base Specification Revision 2.0 Section + * 6.6.1: Conventional Reset. Do not delay for + * devices powered on/off by corresponding bridge, + * because have already delayed for the bridge. */ if (dev->runtime_d3cold) { + if (dev->d3cold_delay && !dev->imm_ready) + msleep(dev->d3cold_delay); /* * When powering on a bridge from D3cold, the * whole hierarchy may be powered on into @@ -4602,16 +4607,14 @@ static int pci_pm_reset(struct pci_dev *dev, int probe) return pci_dev_wait(dev, "PM D3->D0", PCIE_RESET_READY_POLL_MS); } - /** - * pcie_wait_for_link_delay - Wait until link is active or inactive + * pcie_wait_for_link - Wait until link is active or inactive * @pdev: Bridge device * @active: waiting for active or inactive? - * @delay: Delay to wait after link has become active (in ms) * * Use this to wait till link becomes active or inactive. */ -bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, int delay) +bool pcie_wait_for_link(struct pci_dev *pdev, bool active) { int timeout = 1000; bool ret; @@ -4648,25 +4651,13 @@ bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, int delay) timeout -= 10; } if (active && ret) - msleep(delay); + msleep(100); else if (ret != active) pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n", active ? "set" : "cleared"); return ret == active; } -/** - * pcie_wait_for_link - Wait until link is active or inactive - * @pdev: Bridge device - * @active: waiting for active or inactive? - * - * Use this to wait till link becomes active or inactive. - */ -bool pcie_wait_for_link(struct pci_dev *pdev, bool active) -{ - return pcie_wait_for_link_delay(pdev, active, 100); -} - void pci_reset_secondary_bus(struct pci_dev *dev) { u16 ctrl; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 1be03a97cb92..d22d1b807701 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -497,7 +497,6 @@ static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state, u32 service); -bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, int delay); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 308c3e0c4a34..1b330129089f 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -379,67 +378,6 @@ static int pm_iter(struct device *dev, void *data) return 0; } -static int get_downstream_delay(struct pci_bus *bus) -{ - struct pci_dev *pdev; - int min_delay = 100; - int max_delay = 0; - - list_for_each_entry(pdev, &bus->devices, bus_list) { - if (!pdev->imm_ready) - min_delay = 0; - else if (pdev->d3cold_delay < min_delay) - min_delay = pdev->d3cold_delay; - if (pdev->d3cold_delay > max_delay) - max_delay = pdev->d3cold_delay; - } - - return max(min_delay, max_delay); -} - -/* - * wait_for_downstream_link - Wait for downstream link to establish - * @pdev: PCIe port whose downstream link is waited - * - * Handle delays according to PCIe 4.0 section 6.6.1 before configuration - * access to the downstream component is permitted. - * - * This blocks PCI core resume of the hierarchy below this port until the - * link is trained. Should be called before resuming port services to - * prevent pciehp from starting to tear-down the hierarchy too soon. - */ -static void wait_for_downstream_link(struct pci_dev *pdev) -{ - int delay; - - if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT && - pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM) - return; - - if (pci_dev_is_disconnected(pdev)) - return; - - if (!pdev->subordinate || list_empty(&pdev->subordinate->devices) || - !pdev->bridge_d3) - return; - - delay = get_downstream_delay(pdev->subordinate); - if (!delay) - return; - - dev_dbg(&pdev->dev, "waiting downstream link for %d ms\n", delay); - - /* - * If downstream port does not support speeds greater than 5 GT/s - * need to wait 100ms. For higher speeds (gen3) we need to wait - * first for the data link layer to become active. - */ - if (pcie_get_speed_cap(pdev) <= PCIE_SPEED_5_0GT) - msleep(delay); - else - pcie_wait_for_link_delay(pdev, true, delay); -} - /** * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle @@ -453,8 +391,6 @@ int pcie_port_device_suspend(struct device *dev) int pcie_port_device_resume_noirq(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume_noirq); - - wait_for_downstream_link(to_pci_dev(dev)); return device_for_each_child(dev, &off, pm_iter); } @@ -485,8 +421,6 @@ int pcie_port_device_runtime_suspend(struct device *dev) int pcie_port_device_runtime_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_resume); - - wait_for_downstream_link(to_pci_dev(dev)); return device_for_each_child(dev, &off, pm_iter); } #endif /* PM */ From c02f77d32d2c45cfb1b2bb99eabd8a78f5ecc7db Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Aug 2019 17:31:48 +0200 Subject: [PATCH 190/482] ALSA: hda - Workaround for crackled sound on AMD controller (1022:1457) A long-time problem on the recent AMD chip (X370, X470, B450, etc with PCI ID 1022:1457) with Realtek codecs is the crackled or distorted sound for capture streams, as well as occasional playback hiccups. After lengthy debugging sessions, the workarounds we've found are like the following: - Set up the proper driver caps for this controller, similar as the other AMD controller. - Correct the DMA position reporting with the fixed FIFO size, which is similar like as workaround used for VIA chip set. - Even after the position correction, PulseAudio still shows mysterious stalls of playback streams when a capture is triggered in timer-scheduled mode. Since we have no clear way to eliminate the stall, pass the BATCH PCM flag for PA to suppress the tsched mode as a temporary workaround. This patch implements the workarounds. For the driver caps, it defines a new preset, AXZ_DCAPS_PRESET_AMD_SB. It enables the FIFO- corrected position reporting (corresponding to the new position_fix=6) and enforces the SNDRV_PCM_INFO_BATCH flag. Note that the current implementation is merely a workaround. Hopefully we'll find a better alternative in future, especially about removing the BATCH flag hack again. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=195303 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_controller.c | 7 ++++ sound/pci/hda/hda_controller.h | 2 +- sound/pci/hda/hda_intel.c | 63 +++++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 2fbdde239936..48d863736b3c 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -613,6 +613,13 @@ static int azx_pcm_open(struct snd_pcm_substream *substream) 20, 178000000); + /* by some reason, the playback stream stalls on PulseAudio with + * tsched=1 when a capture stream triggers. Until we figure out the + * real cause, disable tsched mode by telling the PCM info flag. + */ + if (chip->driver_caps & AZX_DCAPS_AMD_WORKAROUND) + runtime->hw.info |= SNDRV_PCM_INFO_BATCH; + if (chip->align_buffer_size) /* constrain buffer sizes to be multiple of 128 bytes. This is more efficient in terms of memory diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index baa15374fbcb..f2a6df5e6bcb 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -31,7 +31,7 @@ /* 14 unused */ #define AZX_DCAPS_CTX_WORKAROUND (1 << 15) /* X-Fi workaround */ #define AZX_DCAPS_POSFIX_LPIB (1 << 16) /* Use LPIB as default */ -/* 17 unused */ +#define AZX_DCAPS_AMD_WORKAROUND (1 << 17) /* AMD-specific workaround */ #define AZX_DCAPS_NO_64BIT (1 << 18) /* No 64bit address */ #define AZX_DCAPS_SYNC_WRITE (1 << 19) /* sync each cmd write */ #define AZX_DCAPS_OLD_SSYNC (1 << 20) /* Old SSYNC reg for ICH */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 1e14d7270adf..a6d8c0d77b84 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -64,6 +64,7 @@ enum { POS_FIX_VIACOMBO, POS_FIX_COMBO, POS_FIX_SKL, + POS_FIX_FIFO, }; /* Defines for ATI HD Audio support in SB450 south bridge */ @@ -135,7 +136,7 @@ module_param_array(model, charp, NULL, 0444); MODULE_PARM_DESC(model, "Use the given board model."); module_param_array(position_fix, int, NULL, 0444); MODULE_PARM_DESC(position_fix, "DMA pointer read method." - "(-1 = system default, 0 = auto, 1 = LPIB, 2 = POSBUF, 3 = VIACOMBO, 4 = COMBO, 5 = SKL+)."); + "(-1 = system default, 0 = auto, 1 = LPIB, 2 = POSBUF, 3 = VIACOMBO, 4 = COMBO, 5 = SKL+, 6 = FIFO)."); module_param_array(bdl_pos_adj, int, NULL, 0644); MODULE_PARM_DESC(bdl_pos_adj, "BDL position adjustment offset."); module_param_array(probe_mask, int, NULL, 0444); @@ -332,6 +333,11 @@ enum { #define AZX_DCAPS_PRESET_ATI_HDMI_NS \ (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_SNOOP_OFF) +/* quirks for AMD SB */ +#define AZX_DCAPS_PRESET_AMD_SB \ + (AZX_DCAPS_NO_TCSEL | AZX_DCAPS_SYNC_WRITE | AZX_DCAPS_AMD_WORKAROUND |\ + AZX_DCAPS_SNOOP_TYPE(ATI) | AZX_DCAPS_PM_RUNTIME) + /* quirks for Nvidia */ #define AZX_DCAPS_PRESET_NVIDIA \ (AZX_DCAPS_NO_MSI | AZX_DCAPS_CORBRP_SELF_CLEAR |\ @@ -841,6 +847,49 @@ static unsigned int azx_via_get_position(struct azx *chip, return bound_pos + mod_dma_pos; } +#define AMD_FIFO_SIZE 32 + +/* get the current DMA position with FIFO size correction */ +static unsigned int azx_get_pos_fifo(struct azx *chip, struct azx_dev *azx_dev) +{ + struct snd_pcm_substream *substream = azx_dev->core.substream; + struct snd_pcm_runtime *runtime = substream->runtime; + unsigned int pos, delay; + + pos = snd_hdac_stream_get_pos_lpib(azx_stream(azx_dev)); + if (!runtime) + return pos; + + runtime->delay = AMD_FIFO_SIZE; + delay = frames_to_bytes(runtime, AMD_FIFO_SIZE); + if (azx_dev->insufficient) { + if (pos < delay) { + delay = pos; + runtime->delay = bytes_to_frames(runtime, pos); + } else { + azx_dev->insufficient = 0; + } + } + + /* correct the DMA position for capture stream */ + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { + if (pos < delay) + pos += azx_dev->core.bufsize; + pos -= delay; + } + + return pos; +} + +static int azx_get_delay_from_fifo(struct azx *chip, struct azx_dev *azx_dev, + unsigned int pos) +{ + struct snd_pcm_substream *substream = azx_dev->core.substream; + + /* just read back the calculated value in the above */ + return substream->runtime->delay; +} + static unsigned int azx_skl_get_dpib_pos(struct azx *chip, struct azx_dev *azx_dev) { @@ -1417,6 +1466,7 @@ static int check_position_fix(struct azx *chip, int fix) case POS_FIX_VIACOMBO: case POS_FIX_COMBO: case POS_FIX_SKL: + case POS_FIX_FIFO: return fix; } @@ -1433,6 +1483,10 @@ static int check_position_fix(struct azx *chip, int fix) dev_dbg(chip->card->dev, "Using VIACOMBO position fix\n"); return POS_FIX_VIACOMBO; } + if (chip->driver_caps & AZX_DCAPS_AMD_WORKAROUND) { + dev_dbg(chip->card->dev, "Using FIFO position fix\n"); + return POS_FIX_FIFO; + } if (chip->driver_caps & AZX_DCAPS_POSFIX_LPIB) { dev_dbg(chip->card->dev, "Using LPIB position fix\n"); return POS_FIX_LPIB; @@ -1453,6 +1507,7 @@ static void assign_position_fix(struct azx *chip, int fix) [POS_FIX_VIACOMBO] = azx_via_get_position, [POS_FIX_COMBO] = azx_get_pos_lpib, [POS_FIX_SKL] = azx_get_pos_skl, + [POS_FIX_FIFO] = azx_get_pos_fifo, }; chip->get_position[0] = chip->get_position[1] = callbacks[fix]; @@ -1467,6 +1522,9 @@ static void assign_position_fix(struct azx *chip, int fix) azx_get_delay_from_lpib; } + if (fix == POS_FIX_FIFO) + chip->get_delay[0] = chip->get_delay[1] = + azx_get_delay_from_fifo; } /* @@ -2447,6 +2505,9 @@ static const struct pci_device_id azx_ids[] = { /* AMD Hudson */ { PCI_DEVICE(0x1022, 0x780d), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, + /* AMD, X370 & co */ + { PCI_DEVICE(0x1022, 0x1457), + .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB }, /* AMD Stoney */ { PCI_DEVICE(0x1022, 0x157a), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB | From 4ab9ab656a6cea5257bfa31f00c922d68f7a5c2f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 5 Aug 2019 14:56:54 -0500 Subject: [PATCH 191/482] x86/ptrace: Mark expected switch fall-through Mark switch cases where we are expecting to fall through. Fix the following warning (Building: allnoconfig i386): arch/x86/kernel/ptrace.c:202:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (unlikely(value == 0)) ^ arch/x86/kernel/ptrace.c:206:2: note: here default: ^~~~~~~ Signed-off-by: Gustavo A. R. Silva Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20190805195654.GA17831@embeddedor --- arch/x86/kernel/ptrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0fdbe89d0754..3c5bbe8e4120 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -201,6 +201,7 @@ static int set_segment_reg(struct task_struct *task, case offsetof(struct user_regs_struct, ss): if (unlikely(value == 0)) return -EIO; + /* Else, fall through */ default: *pt_regs_access(task_pt_regs(task), offset) = value; From 7468a4eae541ce5aff65595aa502aa0a4def6615 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 5 Aug 2019 15:17:12 -0500 Subject: [PATCH 192/482] x86: mtrr: cyrix: Mark expected switch fall-through Mark switch cases where we are expecting to fall through. Fix the following warning (Building: i386_defconfig i386): arch/x86/kernel/cpu/mtrr/cyrix.c:99:6: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Gustavo A. R. Silva Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20190805201712.GA19927@embeddedor --- arch/x86/kernel/cpu/mtrr/cyrix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 4296c702a3f7..72182809b333 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -98,6 +98,7 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) case 7: if (size < 0x40) break; + /* Else, fall through */ case 6: case 5: case 4: From d7cd4dd907c19c0295829c947d79afa290b6fc24 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 7 Aug 2019 11:21:46 +0100 Subject: [PATCH 193/482] Btrfs: fix sysfs warning and missing raid sysfs directories In the 5.3 merge window, commit 7c7e301406d0a9 ("btrfs: sysfs: Replace default_attrs in ktypes with groups"), we started using the member "defaults_groups" for the kobject type "btrfs_raid_ktype". That leads to a series of warnings when running some test cases of fstests, such as btrfs/027, btrfs/124 and btrfs/176. The traces produced by those warnings are like the following: [116648.059212] kernfs: can not remove 'total_bytes', no directory [116648.060112] WARNING: CPU: 3 PID: 28500 at fs/kernfs/dir.c:1504 kernfs_remove_by_name_ns+0x75/0x80 (...) [116648.066482] CPU: 3 PID: 28500 Comm: umount Tainted: G W 5.3.0-rc3-btrfs-next-54 #1 (...) [116648.069376] RIP: 0010:kernfs_remove_by_name_ns+0x75/0x80 (...) [116648.072385] RSP: 0018:ffffabfd0090bd08 EFLAGS: 00010282 [116648.073437] RAX: 0000000000000000 RBX: ffffffffc0c11998 RCX: 0000000000000000 [116648.074201] RDX: ffff9fff603a7a00 RSI: ffff9fff603978a8 RDI: ffff9fff603978a8 [116648.074956] RBP: ffffffffc0b9ca2f R08: 0000000000000000 R09: 0000000000000001 [116648.075708] R10: ffff9ffe1f72e1c0 R11: 0000000000000000 R12: ffffffffc0b94120 [116648.076434] R13: ffffffffb3d9b4e0 R14: 0000000000000000 R15: dead000000000100 [116648.077143] FS: 00007f9cdc78a2c0(0000) GS:ffff9fff60380000(0000) knlGS:0000000000000000 [116648.077852] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [116648.078546] CR2: 00007f9fc4747ab4 CR3: 00000005c7832003 CR4: 00000000003606e0 [116648.079235] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [116648.079907] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [116648.080585] Call Trace: [116648.081262] remove_files+0x31/0x70 [116648.081929] sysfs_remove_group+0x38/0x80 [116648.082596] sysfs_remove_groups+0x34/0x70 [116648.083258] kobject_del+0x20/0x60 [116648.083933] btrfs_free_block_groups+0x405/0x430 [btrfs] [116648.084608] close_ctree+0x19a/0x380 [btrfs] [116648.085278] generic_shutdown_super+0x6c/0x110 [116648.085951] kill_anon_super+0xe/0x30 [116648.086621] btrfs_kill_super+0x12/0xa0 [btrfs] [116648.087289] deactivate_locked_super+0x3a/0x70 [116648.087956] cleanup_mnt+0xb4/0x160 [116648.088620] task_work_run+0x7e/0xc0 [116648.089285] exit_to_usermode_loop+0xfa/0x100 [116648.089933] do_syscall_64+0x1cb/0x220 [116648.090567] entry_SYSCALL_64_after_hwframe+0x49/0xbe [116648.091197] RIP: 0033:0x7f9cdc073b37 (...) [116648.100046] ---[ end trace 22e24db328ccadf8 ]--- [116648.100618] ------------[ cut here ]------------ [116648.101175] kernfs: can not remove 'used_bytes', no directory [116648.101731] WARNING: CPU: 3 PID: 28500 at fs/kernfs/dir.c:1504 kernfs_remove_by_name_ns+0x75/0x80 (...) [116648.105649] CPU: 3 PID: 28500 Comm: umount Tainted: G W 5.3.0-rc3-btrfs-next-54 #1 (...) [116648.107461] RIP: 0010:kernfs_remove_by_name_ns+0x75/0x80 (...) [116648.109336] RSP: 0018:ffffabfd0090bd08 EFLAGS: 00010282 [116648.109979] RAX: 0000000000000000 RBX: ffffffffc0c119a0 RCX: 0000000000000000 [116648.110625] RDX: ffff9fff603a7a00 RSI: ffff9fff603978a8 RDI: ffff9fff603978a8 [116648.111283] RBP: ffffffffc0b9ca41 R08: 0000000000000000 R09: 0000000000000001 [116648.111940] R10: ffff9ffe1f72e1c0 R11: 0000000000000000 R12: ffffffffc0b94120 [116648.112603] R13: ffffffffb3d9b4e0 R14: 0000000000000000 R15: dead000000000100 [116648.113268] FS: 00007f9cdc78a2c0(0000) GS:ffff9fff60380000(0000) knlGS:0000000000000000 [116648.113939] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [116648.114607] CR2: 00007f9fc4747ab4 CR3: 00000005c7832003 CR4: 00000000003606e0 [116648.115286] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [116648.115966] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [116648.116649] Call Trace: [116648.117326] remove_files+0x31/0x70 [116648.117997] sysfs_remove_group+0x38/0x80 [116648.118671] sysfs_remove_groups+0x34/0x70 [116648.119342] kobject_del+0x20/0x60 [116648.120022] btrfs_free_block_groups+0x405/0x430 [btrfs] [116648.120707] close_ctree+0x19a/0x380 [btrfs] [116648.121396] generic_shutdown_super+0x6c/0x110 [116648.122057] kill_anon_super+0xe/0x30 [116648.122702] btrfs_kill_super+0x12/0xa0 [btrfs] [116648.123335] deactivate_locked_super+0x3a/0x70 [116648.123961] cleanup_mnt+0xb4/0x160 [116648.124586] task_work_run+0x7e/0xc0 [116648.125210] exit_to_usermode_loop+0xfa/0x100 [116648.125830] do_syscall_64+0x1cb/0x220 [116648.126463] entry_SYSCALL_64_after_hwframe+0x49/0xbe [116648.127080] RIP: 0033:0x7f9cdc073b37 (...) [116648.135923] ---[ end trace 22e24db328ccadf9 ]--- These happen because, during the unmount path, we call kobject_del() for raid kobjects that are not fully initialized, meaning that we set their ktype (as btrfs_raid_ktype) through link_block_group() but we didn't set their parent kobject, which is done through btrfs_add_raid_kobjects(). We have this split raid kobject setup since commit 75cb379d263521 ("btrfs: defer adding raid type kobject until after chunk relocation") in order to avoid triggering reclaim during contextes where we can not (either we are holding a transaction handle or some lock required by the transaction commit path), so that we do the calls to kobject_add(), which triggers GFP_KERNEL allocations, through btrfs_add_raid_kobjects() in contextes where it is safe to trigger reclaim. That change expected that a new raid kobject can only be created either when mounting the filesystem or after raid profile conversion through the relocation path. However, we can have new raid kobject created in other two cases at least: 1) During device replace (or scrub) after adding a device a to the filesystem. The replace procedure (and scrub) do calls to btrfs_inc_block_group_ro() which can allocate a new block group with a new raid profile (because we now have more devices). This can be triggered by test cases btrfs/027 and btrfs/176. 2) During a degraded mount trough any write path. This can be triggered by test case btrfs/124. Fixing this by adding extra calls to btrfs_add_raid_kobjects(), not only makes things more complex and fragile, can also introduce deadlocks with reclaim the following way: 1) Calling btrfs_add_raid_kobjects() at btrfs_inc_block_group_ro() or anywhere in the replace/scrub path will cause a deadlock with reclaim because if reclaim happens and a transaction commit is triggered, the transaction commit path will block at btrfs_scrub_pause(). 2) During degraded mounts it is essentially impossible to figure out where to add extra calls to btrfs_add_raid_kobjects(), because allocation of a block group with a new raid profile can happen anywhere, which means we can't safely figure out which contextes are safe for reclaim, as we can either hold a transaction handle or some lock needed by the transaction commit path. So it is too complex and error prone to have this split setup of raid kobjects. So fix the issue by consolidating the setup of the kobjects in a single place, at link_block_group(), and setup a nofs context there in order to prevent reclaim being triggered by the memory allocations done through the call chain of kobject_add(). Besides fixing the sysfs warnings during kobject_del(), this also ensures the sysfs directories for the new raid profiles end up created and visible to users (a bug that existed before the 5.3 commit 7c7e301406d0a9 ("btrfs: sysfs: Replace default_attrs in ktypes with groups")). Fixes: 75cb379d263521 ("btrfs: defer adding raid type kobject until after chunk relocation") Fixes: 7c7e301406d0a9 ("btrfs: sysfs: Replace default_attrs in ktypes with groups") Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 4 --- fs/btrfs/disk-io.c | 2 -- fs/btrfs/extent-tree.c | 57 ++++++++++++++++++------------------------ fs/btrfs/volumes.c | 13 ---------- 4 files changed, 24 insertions(+), 52 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 299e11e6c554..94660063a162 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -401,7 +401,6 @@ struct btrfs_dev_replace { struct raid_kobject { u64 flags; struct kobject kobj; - struct list_head list; }; /* @@ -915,8 +914,6 @@ struct btrfs_fs_info { u32 thread_pool_size; struct kobject *space_info_kobj; - struct list_head pending_raid_kobjs; - spinlock_t pending_raid_kobjs_lock; /* uncontended */ u64 total_pinned; @@ -2698,7 +2695,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr); int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, u64 type, u64 chunk_offset, u64 size); -void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( struct btrfs_fs_info *fs_info, const u64 chunk_offset); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5f7ee70b3d1a..97beb351a10c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2683,8 +2683,6 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->delayed_iputs); INIT_LIST_HEAD(&fs_info->delalloc_roots); INIT_LIST_HEAD(&fs_info->caching_block_groups); - INIT_LIST_HEAD(&fs_info->pending_raid_kobjs); - spin_lock_init(&fs_info->pending_raid_kobjs_lock); spin_lock_init(&fs_info->delalloc_root_lock); spin_lock_init(&fs_info->trans_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d3b58e388535..25b2c9d7fa7f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -7888,33 +7889,6 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } -/* link_block_group will queue up kobjects to add when we're reclaim-safe */ -void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info) -{ - struct btrfs_space_info *space_info; - struct raid_kobject *rkobj; - LIST_HEAD(list); - int ret = 0; - - spin_lock(&fs_info->pending_raid_kobjs_lock); - list_splice_init(&fs_info->pending_raid_kobjs, &list); - spin_unlock(&fs_info->pending_raid_kobjs_lock); - - list_for_each_entry(rkobj, &list, list) { - space_info = btrfs_find_space_info(fs_info, rkobj->flags); - - ret = kobject_add(&rkobj->kobj, &space_info->kobj, - "%s", btrfs_bg_type_to_raid_name(rkobj->flags)); - if (ret) { - kobject_put(&rkobj->kobj); - break; - } - } - if (ret) - btrfs_warn(fs_info, - "failed to add kobject for block cache, ignoring"); -} - static void link_block_group(struct btrfs_block_group_cache *cache) { struct btrfs_space_info *space_info = cache->space_info; @@ -7929,18 +7903,36 @@ static void link_block_group(struct btrfs_block_group_cache *cache) up_write(&space_info->groups_sem); if (first) { - struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); + struct raid_kobject *rkobj; + unsigned int nofs_flag; + int ret; + + /* + * Setup a NOFS context because kobject_add(), deep in its call + * chain, does GFP_KERNEL allocations, and we are often called + * in a context where if reclaim is triggered we can deadlock + * (we are either holding a transaction handle or some lock + * required for a transaction commit). + */ + nofs_flag = memalloc_nofs_save(); + rkobj = kzalloc(sizeof(*rkobj), GFP_KERNEL); if (!rkobj) { + memalloc_nofs_restore(nofs_flag); btrfs_warn(cache->fs_info, "couldn't alloc memory for raid level kobject"); return; } rkobj->flags = cache->flags; kobject_init(&rkobj->kobj, &btrfs_raid_ktype); - - spin_lock(&fs_info->pending_raid_kobjs_lock); - list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs); - spin_unlock(&fs_info->pending_raid_kobjs_lock); + ret = kobject_add(&rkobj->kobj, &space_info->kobj, "%s", + btrfs_bg_type_to_raid_name(rkobj->flags)); + memalloc_nofs_restore(nofs_flag); + if (ret) { + kobject_put(&rkobj->kobj); + btrfs_warn(fs_info, + "failed to add kobject for block cache, ignoring"); + return; + } space_info->block_group_kobjs[index] = &rkobj->kobj; } } @@ -8206,7 +8198,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) inc_block_group_ro(cache, 1); } - btrfs_add_raid_kobjects(info); btrfs_init_global_block_rsv(info); ret = check_chunk_block_group_mappings(info); error: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d74b74ca07af..a447d3ec48d5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3087,16 +3087,6 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) if (ret) return ret; - /* - * We add the kobjects here (and after forcing data chunk creation) - * since relocation is the only place we'll create chunks of a new - * type at runtime. The only place where we'll remove the last - * chunk of a type is the call immediately below this one. Even - * so, we're protected against races with the cleaner thread since - * we're covered by the delete_unused_bgs_mutex. - */ - btrfs_add_raid_kobjects(fs_info); - trans = btrfs_start_trans_remove_block_group(root->fs_info, chunk_offset); if (IS_ERR(trans)) { @@ -3223,9 +3213,6 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info, btrfs_end_transaction(trans); if (ret < 0) return ret; - - btrfs_add_raid_kobjects(fs_info); - return 1; } } From 07301df7d2fc220d3de5f7ad804dcb941400cb00 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 28 May 2019 16:21:54 +0800 Subject: [PATCH 194/482] btrfs: trim: Check the range passed into to prevent overflow Normally the range->len is set to default value (U64_MAX), but when it's not default value, we should check if the range overflows. And if it overflows, return -EINVAL before doing anything. Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 25b2c9d7fa7f..8b7eb22d508a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8966,6 +8966,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) struct btrfs_device *device; struct list_head *devices; u64 group_trimmed; + u64 range_end = U64_MAX; u64 start; u64 end; u64 trimmed = 0; @@ -8975,16 +8976,23 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) int dev_ret = 0; int ret = 0; + /* + * Check range overflow if range->len is set. + * The default range->len is U64_MAX. + */ + if (range->len != U64_MAX && + check_add_overflow(range->start, range->len, &range_end)) + return -EINVAL; + cache = btrfs_lookup_first_block_group(fs_info, range->start); for (; cache; cache = next_block_group(cache)) { - if (cache->key.objectid >= (range->start + range->len)) { + if (cache->key.objectid >= range_end) { btrfs_put_block_group(cache); break; } start = max(range->start, cache->key.objectid); - end = min(range->start + range->len, - cache->key.objectid + cache->key.offset); + end = min(range_end, cache->key.objectid + cache->key.offset); if (end - start >= range->minlen) { if (!block_group_cache_done(cache)) { From 4b3e30ed3ec7864e798403a63ff2e96bd0c19ab0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 7 Aug 2019 00:23:07 -0500 Subject: [PATCH 195/482] Revert "drm/amdkfd: New IOCTL to allocate queue GWS" This reverts commit 1a058c3376765ee31d65e28cbbb9d4ff15120056. This interface is still in too much flux. Revert until it's sorted out. Acked-by: Oak Zeng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 28 ------------------------ include/uapi/linux/kfd_ioctl.h | 20 +---------------- 2 files changed, 1 insertion(+), 47 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 26b15cc56c31..1d3cd5c50d5f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1567,32 +1567,6 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, return err; } -static int kfd_ioctl_alloc_queue_gws(struct file *filep, - struct kfd_process *p, void *data) -{ - int retval; - struct kfd_ioctl_alloc_queue_gws_args *args = data; - struct kfd_dev *dev; - - if (!hws_gws_support) - return -ENODEV; - - dev = kfd_device_by_id(args->gpu_id); - if (!dev) { - pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); - return -ENODEV; - } - if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) - return -ENODEV; - - mutex_lock(&p->mutex); - retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); - mutex_unlock(&p->mutex); - - args->first_gws = 0; - return retval; -} - static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kfd_process *p, void *data) { @@ -1795,8 +1769,6 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, kfd_ioctl_import_dmabuf, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, - kfd_ioctl_alloc_queue_gws, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 070d1bc7e725..20917c59f39c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -410,21 +410,6 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { __u32 n_success; /* to/from KFD */ }; -/* Allocate GWS for specific queue - * - * @gpu_id: device identifier - * @queue_id: queue's id that GWS is allocated for - * @num_gws: how many GWS to allocate - * @first_gws: index of the first GWS allocated. - * only support contiguous GWS allocation - */ -struct kfd_ioctl_alloc_queue_gws_args { - __u32 gpu_id; /* to KFD */ - __u32 queue_id; /* to KFD */ - __u32 num_gws; /* to KFD */ - __u32 first_gws; /* from KFD */ -}; - struct kfd_ioctl_get_dmabuf_info_args { __u64 size; /* from KFD */ __u64 metadata_ptr; /* to KFD */ @@ -544,10 +529,7 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_IMPORT_DMABUF \ AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) -#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ - AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) - #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1F +#define AMDKFD_COMMAND_END 0x1E #endif From 67e7b52d44e3d539dfbfcd866c3d3d69da23a909 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Aug 2019 07:31:27 -0400 Subject: [PATCH 196/482] NFSv4: Ensure state recovery handles ETIMEDOUT correctly Ensure that the state recovery code handles ETIMEDOUT correctly, and also that we set RPC_TASK_TIMEOUT when recovering open state. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/nfs4state.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 12b2b65ad8a8..1406858bae6c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2179,6 +2179,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct case -ENOENT: case -EAGAIN: case -ESTALE: + case -ETIMEDOUT: break; case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -2499,6 +2500,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, if (!ctx) { nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 1); data->is_recover = true; + task_setup_data.flags |= RPC_TASK_TIMEOUT; } else { nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 0); pnfs_lgopen_prepare(data, ctx); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a4e866b2b43b..cad4e064b328 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1529,6 +1529,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ switch (status) { case 0: break; + case -ETIMEDOUT: case -ESTALE: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: @@ -1682,11 +1683,13 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs case -NFS4ERR_EXPIRED: case -NFS4ERR_NO_GRACE: nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state); + /* Fall through */ case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -ETIMEDOUT: goto out_err; } nfs4_put_open_state(state); @@ -1971,7 +1974,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) return -EPERM; case -EACCES: case -NFS4ERR_DELAY: - case -ETIMEDOUT: case -EAGAIN: ssleep(1); break; @@ -2600,7 +2602,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } /* Now recover expired state... */ - if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { + if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { section = "reclaim nograce"; status = nfs4_do_reclaim(clp, clp->cl_mvops->nograce_recovery_ops); @@ -2608,6 +2610,7 @@ static void nfs4_state_manager(struct nfs_client *clp) continue; if (status < 0) goto out_error; + clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); } nfs4_end_drain_session(clp); From e15c2ffa1091c4f72370f01af4de8f9dddeb17a6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Aug 2019 13:34:31 -0600 Subject: [PATCH 197/482] block: fix O_DIRECT error handling for bio fragments 0eb6ddfb865c tried to fix this up, but introduced a use-after-free of dio. Additionally, we still had an issue with error handling, as reported by Darrick: "I noticed a regression in xfs/747 (an unreleased xfstest for the xfs_scrub media scanning feature) on 5.3-rc3. I'll condense that down to a simpler reproducer: error-test: 0 209 linear 8:48 0 error-test: 209 1 error error-test: 210 6446894 linear 8:48 210 Basically we have a ~3G /dev/sdd and we set up device mapper to fail IO for sector 209 and to pass the io to the scsi device everywhere else. On 5.3-rc3, performing a directio pread of this range with a < 1M buffer (in other words, a request for fewer than MAX_BIO_PAGES bytes) yields EIO like you'd expect: pread64(3, 0x7f880e1c7000, 1048576, 0) = -1 EIO (Input/output error) pread: Input/output error +++ exited with 0 +++ But doing it with a larger buffer succeeds(!): pread64(3, "XFSB\0\0\20\0\0\0\0\0\0\fL\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1146880, 0) = 1146880 read 1146880/1146880 bytes at offset 0 1 MiB, 1 ops; 0.0009 sec (1.124 GiB/sec and 1052.6316 ops/sec) +++ exited with 0 +++ (Note that the part of the buffer corresponding to the dm-error area is uninitialized) On 5.3-rc2, both commands would fail with EIO like you'd expect. The only change between rc2 and rc3 is commit 0eb6ddfb865c ("block: Fix __blkdev_direct_IO() for bio fragments"). AFAICT we end up in __blkdev_direct_IO with a 1120K buffer, which gets split into two bios: one for the first BIO_MAX_PAGES worth of data (1MB) and a second one for the 96k after that." Fix this by noting that it's always safe to dereference dio if we get BLK_QC_T_EAGAIN returned, as end_io hasn't been run for that case. So we can safely increment the dio size before calling submit_bio(), and then decrement it on failure (not that it really matters, as the bio and dio are going away). For error handling, return to the original method of just using 'ret' for tracking the error, and the size tracking in dio->size. Fixes: 0eb6ddfb865c ("block: Fix __blkdev_direct_IO() for bio fragments") Fixes: 6a43074e2f46 ("block: properly handle IOCB_NOWAIT for async O_DIRECT IO") Reported-by: Darrick J. Wong Signed-off-by: Jens Axboe --- fs/block_dev.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index a6f7c892cb4a..131e2e0582a6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -349,7 +349,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) loff_t pos = iocb->ki_pos; blk_qc_t qc = BLK_QC_T_NONE; gfp_t gfp; - ssize_t ret; + int ret; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) @@ -386,8 +386,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) ret = 0; for (;;) { - int err; - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = pos >> 9; bio->bi_write_hint = iocb->ki_hint; @@ -395,10 +393,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bio->bi_end_io = blkdev_bio_end_io; bio->bi_ioprio = iocb->ki_ioprio; - err = bio_iov_iter_get_pages(bio, iter); - if (unlikely(err)) { - if (!ret) - ret = err; + ret = bio_iov_iter_get_pages(bio, iter); + if (unlikely(ret)) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); break; @@ -421,7 +417,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) if (nowait) bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE); - dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size; nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES); @@ -433,13 +428,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) polled = true; } + dio->size += bio->bi_iter.bi_size; qc = submit_bio(bio); if (qc == BLK_QC_T_EAGAIN) { - if (!ret) - ret = -EAGAIN; + dio->size -= bio->bi_iter.bi_size; + ret = -EAGAIN; goto error; } - ret = dio->size; if (polled) WRITE_ONCE(iocb->ki_cookie, qc); @@ -460,18 +455,17 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) atomic_inc(&dio->ref); } + dio->size += bio->bi_iter.bi_size; qc = submit_bio(bio); if (qc == BLK_QC_T_EAGAIN) { - if (!ret) - ret = -EAGAIN; + dio->size -= bio->bi_iter.bi_size; + ret = -EAGAIN; goto error; } - ret = dio->size; bio = bio_alloc(gfp, nr_pages); if (!bio) { - if (!ret) - ret = -EAGAIN; + ret = -EAGAIN; goto error; } } @@ -496,6 +490,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) out: if (!ret) ret = blk_status_to_errno(dio->bio.bi_status); + if (likely(!ret)) + ret = dio->size; bio_put(&dio->bio); return ret; From 2d7271501720038381d45fb3dcbe4831228fc8cc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 Aug 2019 12:20:52 -0600 Subject: [PATCH 198/482] libata: have ata_scsi_rw_xlat() fail invalid passthrough requests For passthrough requests, libata-scsi takes what the user passes in as gospel. This can be problematic if the user fills in the CDB incorrectly. One example of that is in request sizes. For read/write commands, the CDB contains fields describing the transfer length of the request. These should match with the SG_IO header fields, but libata-scsi currently does no validation of that. Check that the number of blocks in the CDB for passthrough requests matches what was mapped into the request. If the CDB asks for more data then the validated SG_IO header fields, error it. Reported-by: Krishna Ram Prakash R Reviewed-by: Kees Cook Signed-off-by: Jens Axboe --- drivers/ata/libata-scsi.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 391ac0503dc0..76d0f9de767b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1786,6 +1786,21 @@ static unsigned int ata_scsi_verify_xlat(struct ata_queued_cmd *qc) return 1; } +static bool ata_check_nblocks(struct scsi_cmnd *scmd, u32 n_blocks) +{ + struct request *rq = scmd->request; + u32 req_blocks; + + if (!blk_rq_is_passthrough(rq)) + return true; + + req_blocks = blk_rq_bytes(rq) / scmd->device->sector_size; + if (n_blocks > req_blocks) + return false; + + return true; +} + /** * ata_scsi_rw_xlat - Translate SCSI r/w command into an ATA one * @qc: Storage for translated ATA taskfile @@ -1830,6 +1845,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) scsi_10_lba_len(cdb, &block, &n_block); if (cdb[1] & (1 << 3)) tf_flags |= ATA_TFLAG_FUA; + if (!ata_check_nblocks(scmd, n_block)) + goto invalid_fld; break; case READ_6: case WRITE_6: @@ -1844,6 +1861,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) */ if (!n_block) n_block = 256; + if (!ata_check_nblocks(scmd, n_block)) + goto invalid_fld; break; case READ_16: case WRITE_16: @@ -1854,6 +1873,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) scsi_16_lba_len(cdb, &block, &n_block); if (cdb[1] & (1 << 3)) tf_flags |= ATA_TFLAG_FUA; + if (!ata_check_nblocks(scmd, n_block)) + goto invalid_fld; break; default: DPRINTK("no-byte command\n"); From 752ead44491e8c91e14d7079625c5916b30921c5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 Aug 2019 12:23:57 -0600 Subject: [PATCH 199/482] libata: add SG safety checks in SFF pio transfers Abort processing of a command if we run out of mapped data in the SG list. This should never happen, but a previous bug caused it to be possible. Play it safe and attempt to abort nicely if we don't have more SG segments left. Reviewed-by: Kees Cook Signed-off-by: Jens Axboe --- drivers/ata/libata-sff.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 10aa27882142..4f115adb4ee8 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -658,6 +658,10 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) unsigned int offset; unsigned char *buf; + if (!qc->cursg) { + qc->curbytes = qc->nbytes; + return; + } if (qc->curbytes == qc->nbytes - qc->sect_size) ap->hsm_task_state = HSM_ST_LAST; @@ -683,6 +687,8 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) if (qc->cursg_ofs == qc->cursg->length) { qc->cursg = sg_next(qc->cursg); + if (!qc->cursg) + ap->hsm_task_state = HSM_ST_LAST; qc->cursg_ofs = 0; } } From f591822c3cf314442819486f45ff7dc1f690e0c0 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 5 Aug 2019 11:30:10 +0300 Subject: [PATCH 200/482] IB/mlx5: Fix implicit MR release flow Once implicit MR is being called to be released by ib_umem_notifier_release() its leaves were marked as "dying". However, when dereg_mr()->mlx5_ib_free_implicit_mr()->mr_leaf_free() is called, it skips running the mr_leaf_free_action (i.e. umem_odp->work) when those leaves were marked as "dying". As such ib_umem_release() for the leaves won't be called and their MRs will be leaked as well. When an application exits/killed without calling dereg_mr we might hit the above flow. This fatal scenario is reported by WARN_ON() upon mlx5_ib_dealloc_ucontext() as ibcontext->per_mm_list is not empty, the call trace can be seen below. Originally the "dying" mark as part of ib_umem_notifier_release() was introduced to prevent pagefault_mr() from returning a success response once this happened. However, we already have today the completion mechanism so no need for that in those flows any more. Even in case a success response will be returned the firmware will not find the pages and an error will be returned in the following call as a released mm will cause ib_umem_odp_map_dma_pages() to permanently fail mmget_not_zero(). Fix the above issue by dropping the "dying" from the above flows. The other flows that are using "dying" are still needed it for their synchronization purposes. WARNING: CPU: 1 PID: 7218 at drivers/infiniband/hw/mlx5/main.c:2004 mlx5_ib_dealloc_ucontext+0x84/0x90 [mlx5_ib] CPU: 1 PID: 7218 Comm: ibv_rc_pingpong Tainted: G E 5.2.0-rc6+ #13 Call Trace: uverbs_destroy_ufile_hw+0xb5/0x120 [ib_uverbs] ib_uverbs_close+0x1f/0x80 [ib_uverbs] __fput+0xbe/0x250 task_work_run+0x88/0xa0 do_exit+0x2cb/0xc30 ? __fput+0x14b/0x250 do_group_exit+0x39/0xb0 get_signal+0x191/0x920 ? _raw_spin_unlock_bh+0xa/0x20 ? inet_csk_accept+0x229/0x2f0 do_signal+0x36/0x5e0 ? put_unused_fd+0x5b/0x70 ? __sys_accept4+0x1a6/0x1e0 ? inet_hash+0x35/0x40 ? release_sock+0x43/0x90 ? _raw_spin_unlock_bh+0xa/0x20 ? inet_listen+0x9f/0x120 exit_to_usermode_loop+0x5c/0xc6 do_syscall_64+0x182/0x1b0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 81713d3788d2 ("IB/mlx5: Add implicit MR support") Link: https://lore.kernel.org/r/20190805083010.21777-1-leon@kernel.org Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem_odp.c | 4 ---- drivers/infiniband/hw/mlx5/odp.c | 22 ++++++++-------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 2a75c6f8d827..c0e15db34680 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -112,10 +112,6 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, * prevent any further fault handling on this MR. */ ib_umem_notifier_start_account(umem_odp); - umem_odp->dying = 1; - /* Make sure that the fact the umem is dying is out before we release - * all pending page faults. */ - smp_wmb(); complete_all(&umem_odp->notifier_completion); umem_odp->umem.context->invalidate_range( umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 81da82050d05..1d257d1b3b0d 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -579,7 +579,6 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, u32 flags) { int npages = 0, current_seq, page_shift, ret, np; - bool implicit = false; struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; @@ -594,7 +593,6 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, if (IS_ERR(odp)) return PTR_ERR(odp); mr = odp->private; - implicit = true; } else { odp = odp_mr; } @@ -682,19 +680,15 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, out: if (ret == -EAGAIN) { - if (implicit || !odp->dying) { - unsigned long timeout = - msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); + unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); - if (!wait_for_completion_timeout( - &odp->notifier_completion, - timeout)) { - mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", - current_seq, odp->notifiers_seq, odp->notifiers_count); - } - } else { - /* The MR is being killed, kill the QP as well. */ - ret = -EFAULT; + if (!wait_for_completion_timeout(&odp->notifier_completion, + timeout)) { + mlx5_ib_warn( + dev, + "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", + current_seq, odp->notifiers_seq, + odp->notifiers_count); } } From d97de8887a12c598abc4d2e4e57a54c1f030e112 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 7 Aug 2019 13:18:19 +0300 Subject: [PATCH 201/482] RDMA/counter: Prevent QP counter binding if counters unsupported In case of rdma_counter_init() fails, counter allocation and QP bind should not be allowed. Fixes: 413d3347503b ("RDMA/counter: Add set/clear per-port auto mode support") Fixes: 1bd8e0a9d0fd ("RDMA/counter: Allow manual mode configuration support") Signed-off-by: Mark Zhang Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190807101819.7581-1-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/core/counters.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 45d5164e9574..b79890739a2c 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -38,6 +38,9 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port, int ret; port_counter = &dev->port_data[port].port_counter; + if (!port_counter->hstats) + return -EOPNOTSUPP; + mutex_lock(&port_counter->lock); if (on) { ret = __counter_set_mode(&port_counter->mode, @@ -509,6 +512,9 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port, if (!rdma_is_port_valid(dev, port)) return -EINVAL; + if (!dev->port_data[port].port_counter.hstats) + return -EOPNOTSUPP; + qp = rdma_counter_get_qp(dev, qp_num); if (!qp) return -ENOENT; From e7e6c6320c8c9ed923250cd019e5f9ca0f59b4b8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 7 Aug 2019 15:32:36 +0300 Subject: [PATCH 202/482] IB/mlx5: Check the correct variable in error handling code The code accidentally checks "event_sub" instead of "event_sub->eventfd". Fixes: 759738537142 ("IB/mlx5: Enable subscription for device events over DEVX") Signed-off-by: Dan Carpenter Reviewed-by: Jason Gunthorpe Acked-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190807123236.GA11452@mwanda Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/devx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ec4370f99381..2d1b3d9609d9 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2026,7 +2026,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( event_sub->eventfd = eventfd_ctx_fdget(redirect_fd); - if (IS_ERR(event_sub)) { + if (IS_ERR(event_sub->eventfd)) { err = PTR_ERR(event_sub->eventfd); event_sub->eventfd = NULL; goto err; From 38ada2f406a9b81fb1249c5c9227fa657e7d5671 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 26 Jul 2019 08:00:49 -0700 Subject: [PATCH 203/482] hwmon: (nct7802) Fix wrong detection of in4 presence The code to detect if in4 is present is wrong; if in4 is not present, the in4_input sysfs attribute is still present. In detail: - Ihen RTD3_MD=11 (VSEN3 present), everything is as expected (no bug). - If we have RTD3_MD!=11 (no VSEN3), we unexpectedly have a in4_input file under /sys and the "sensors" command displays in4_input. But as expected, we have no in4_min, in4_max, in4_alarm, in4_beep. Fix is_visible function to detect and report in4_input visibility as expected. Reported-by: Gilles Buloz Cc: Gilles Buloz Cc: stable@vger.kernel.org Fixes: 3434f37835804 ("hwmon: Driver for Nuvoton NCT7802Y") Signed-off-by: Guenter Roeck --- drivers/hwmon/nct7802.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index ec7bcf8d7cd6..f3dd2a17bd42 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -704,7 +704,7 @@ static struct attribute *nct7802_in_attrs[] = { &sensor_dev_attr_in3_alarm.dev_attr.attr, &sensor_dev_attr_in3_beep.dev_attr.attr, - &sensor_dev_attr_in4_input.dev_attr.attr, /* 17 */ + &sensor_dev_attr_in4_input.dev_attr.attr, /* 16 */ &sensor_dev_attr_in4_min.dev_attr.attr, &sensor_dev_attr_in4_max.dev_attr.attr, &sensor_dev_attr_in4_alarm.dev_attr.attr, @@ -730,9 +730,9 @@ static umode_t nct7802_in_is_visible(struct kobject *kobj, if (index >= 6 && index < 11 && (reg & 0x03) != 0x03) /* VSEN1 */ return 0; - if (index >= 11 && index < 17 && (reg & 0x0c) != 0x0c) /* VSEN2 */ + if (index >= 11 && index < 16 && (reg & 0x0c) != 0x0c) /* VSEN2 */ return 0; - if (index >= 17 && (reg & 0x30) != 0x30) /* VSEN3 */ + if (index >= 16 && (reg & 0x30) != 0x30) /* VSEN3 */ return 0; return attr->mode; From a95a4f3f2702b55a89393bf0f1b2b3d79e0f7da2 Mon Sep 17 00:00:00 2001 From: Iker Perez del Palomar Sustatxa Date: Thu, 1 Aug 2019 08:53:24 +0100 Subject: [PATCH 204/482] hwmon: (lm75) Fixup tmp75b clr_mask The configuration register of the tmp75b sensor is 16bit long, however the first byte is reserved, so there is not no need to take care of it. Because the order of the bytes is little endian and it is only necessary to write one byte, the desired bits must be shifted into a 8 bit range. Fixes: 39abe9d88b30 ("hwmon: (lm75) Add support for TMP75B") Cc: stable@vger.kernel.org Signed-off-by: Iker Perez del Palomar Sustatxa Link: https://lore.kernel.org/r/20190801075324.4638-1-iker.perez@codethink.co.uk Signed-off-by: Guenter Roeck --- drivers/hwmon/lm75.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index 3fb9c0a2d6d0..ce5ec403ec73 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -343,7 +343,7 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id) data->sample_time = MSEC_PER_SEC / 2; break; case tmp75b: /* not one-shot mode, Conversion rate 37Hz */ - clr_mask |= 1 << 15 | 0x3 << 13; + clr_mask |= 1 << 7 | 0x3 << 5; data->resolution = 12; data->sample_time = MSEC_PER_SEC / 37; break; From a86c71ba3022331f79662d7f12d1b25188c7e377 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 2 Aug 2019 13:26:12 -0700 Subject: [PATCH 205/482] scsi: lpfc: Fix crash when cpu count is 1 and null irq affinity mask When a configurations runs with a single cpu (such as a kdump kernel), which causes the driver to request a single vector, when the driver subsequently requests an irq affinity mask, the mask comes back null. The driver currently does nothing in this scenario, which leaves mappings to hardware queues incomplete and crashes the system. Fix by recognizing the null mask and assigning the vector to the first cpu in the system. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index faf43b1d3dbe..a7549ae32542 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -10776,12 +10776,31 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) /* This loop sets up all CPUs that are affinitized with a * irq vector assigned to the driver. All affinitized CPUs * will get a link to that vectors IRQ and EQ. + * + * NULL affinity mask handling: + * If irq count is greater than one, log an error message. + * If the null mask is received for the first irq, find the + * first present cpu, and assign the eq index to ensure at + * least one EQ is assigned. */ for (idx = 0; idx < phba->cfg_irq_chann; idx++) { /* Get a CPU mask for all CPUs affinitized to this vector */ maskp = pci_irq_get_affinity(phba->pcidev, idx); - if (!maskp) - continue; + if (!maskp) { + if (phba->cfg_irq_chann > 1) + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3329 No affinity mask found " + "for vector %d (%d)\n", + idx, phba->cfg_irq_chann); + if (!idx) { + cpu = cpumask_first(cpu_present_mask); + cpup = &phba->sli4_hba.cpu_map[cpu]; + cpup->eq = idx; + cpup->irq = pci_irq_vector(phba->pcidev, idx); + cpup->flag |= LPFC_CPU_FIRST_IRQ; + } + break; + } i = 0; /* Loop through all CPUs associated with vector idx */ From c7cd7c748a3250ca33509f9235efab9c803aca09 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 8 Aug 2019 00:15:21 -0500 Subject: [PATCH 206/482] sound: fix a memory leak bug In sound_insert_unit(), the controlling structure 's' is allocated through kmalloc(). Then it is added to the sound driver list by invoking __sound_insert_unit(). Later on, if __register_chrdev() fails, 's' is removed from the list through __sound_remove_unit(). If 'index' is not less than 0, -EBUSY is returned to indicate the error. However, 's' is not deallocated on this execution path, leading to a memory leak bug. To fix the above issue, free 's' before -EBUSY is returned. Signed-off-by: Wenwen Wang Cc: Signed-off-by: Takashi Iwai --- sound/sound_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/sound_core.c b/sound/sound_core.c index b730d97c4de6..90d118cd9164 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -275,7 +275,8 @@ static int sound_insert_unit(struct sound_unit **list, const struct file_operati goto retry; } spin_unlock(&sound_loader_lock); - return -EBUSY; + r = -EBUSY; + goto fail; } } From 4ce97317f41d38584fb93578e922fcd19e535f5b Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 7 Aug 2019 15:15:32 -0700 Subject: [PATCH 207/482] x86/purgatory: Do not use __builtin_memcpy and __builtin_memset Implementing memcpy and memset in terms of __builtin_memcpy and __builtin_memset is problematic. GCC at -O2 will replace calls to the builtins with calls to memcpy and memset (but will generate an inline implementation at -Os). Clang will replace the builtins with these calls regardless of optimization level. $ llvm-objdump -dr arch/x86/purgatory/string.o | tail 0000000000000339 memcpy: 339: 48 b8 00 00 00 00 00 00 00 00 movabsq $0, %rax 000000000000033b: R_X86_64_64 memcpy 343: ff e0 jmpq *%rax 0000000000000345 memset: 345: 48 b8 00 00 00 00 00 00 00 00 movabsq $0, %rax 0000000000000347: R_X86_64_64 memset 34f: ff e0 Such code results in infinite recursion at runtime. This is observed when doing kexec. Instead, reuse an implementation from arch/x86/boot/compressed/string.c. This requires to implement a stub function for warn(). Also, Clang may lower memcmp's that compare against 0 to bcmp's, so add a small definition, too. See also: commit 5f074f3e192f ("lib/string.c: implement a basic bcmp") Fixes: 8fc5b4d4121c ("purgatory: core purgatory functionality") Reported-by: Vaibhav Rustagi Debugged-by: Vaibhav Rustagi Debugged-by: Manoj Gupta Suggested-by: Alistair Delva Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Tested-by: Vaibhav Rustagi Cc: stable@vger.kernel.org Link: https://bugs.chromium.org/p/chromium/issues/detail?id=984056 Link: https://lkml.kernel.org/r/20190807221539.94583-1-ndesaulniers@google.com --- arch/x86/boot/string.c | 8 ++++++++ arch/x86/purgatory/Makefile | 3 +++ arch/x86/purgatory/purgatory.c | 6 ++++++ arch/x86/purgatory/string.c | 23 ----------------------- 4 files changed, 17 insertions(+), 23 deletions(-) delete mode 100644 arch/x86/purgatory/string.c diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 401e30ca0a75..8272a4492844 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -37,6 +37,14 @@ int memcmp(const void *s1, const void *s2, size_t len) return diff; } +/* + * Clang may lower `memcmp == 0` to `bcmp == 0`. + */ +int bcmp(const void *s1, const void *s2, size_t len) +{ + return memcmp(s1, s2, len); +} + int strcmp(const char *str1, const char *str2) { const unsigned char *s1 = (const unsigned char *)str1; diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 3cf302b26332..91ef244026d2 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -6,6 +6,9 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) +$(obj)/string.o: $(srctree)/arch/x86/boot/compressed/string.c FORCE + $(call if_changed_rule,cc_o_c) + $(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(call if_changed_rule,cc_o_c) diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 6d8d5a34c377..b607bda786f6 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -68,3 +68,9 @@ void purgatory(void) } copy_backup_region(); } + +/* + * Defined in order to reuse memcpy() and memset() from + * arch/x86/boot/compressed/string.c + */ +void warn(const char *msg) {} diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c deleted file mode 100644 index 01ad43873ad9..000000000000 --- a/arch/x86/purgatory/string.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Simple string functions. - * - * Copyright (C) 2014 Red Hat Inc. - * - * Author: - * Vivek Goyal - */ - -#include - -#include "../boot/string.c" - -void *memcpy(void *dst, const void *src, size_t len) -{ - return __builtin_memcpy(dst, src, len); -} - -void *memset(void *dst, int c, size_t len) -{ - return __builtin_memset(dst, c, len); -} From b059f801a937d164e03b33c1848bb3dca67c0b04 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 7 Aug 2019 15:15:33 -0700 Subject: [PATCH 208/482] x86/purgatory: Use CFLAGS_REMOVE rather than reset KBUILD_CFLAGS KBUILD_CFLAGS is very carefully built up in the top level Makefile, particularly when cross compiling or using different build tools. Resetting KBUILD_CFLAGS via := assignment is an antipattern. The comment above the reset mentions that -pg is problematic. Other Makefiles use `CFLAGS_REMOVE_file.o = $(CC_FLAGS_FTRACE)` when CONFIG_FUNCTION_TRACER is set. Prefer that pattern to wiping out all of the important KBUILD_CFLAGS then manually having to re-add them. Seems also that __stack_chk_fail references are generated when using CONFIG_STACKPROTECTOR or CONFIG_STACKPROTECTOR_STRONG. Fixes: 8fc5b4d4121c ("purgatory: core purgatory functionality") Reported-by: Vaibhav Rustagi Suggested-by: Peter Zijlstra Suggested-by: Thomas Gleixner Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Tested-by: Vaibhav Rustagi Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190807221539.94583-2-ndesaulniers@google.com --- arch/x86/purgatory/Makefile | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 91ef244026d2..8901a1f89cf5 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -20,11 +20,34 @@ KCOV_INSTRUMENT := n # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That # in turn leaves some undefined symbols like __fentry__ in purgatory and not -# sure how to relocate those. Like kexec-tools, use custom flags. +# sure how to relocate those. +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_sha256.o += $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_purgatory.o += $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_string.o += $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_kexec-purgatory.o += $(CC_FLAGS_FTRACE) +endif -KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -Os -mcmodel=large -KBUILD_CFLAGS += -m$(BITS) -KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +ifdef CONFIG_STACKPROTECTOR +CFLAGS_REMOVE_sha256.o += -fstack-protector +CFLAGS_REMOVE_purgatory.o += -fstack-protector +CFLAGS_REMOVE_string.o += -fstack-protector +CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector +endif + +ifdef CONFIG_STACKPROTECTOR_STRONG +CFLAGS_REMOVE_sha256.o += -fstack-protector-strong +CFLAGS_REMOVE_purgatory.o += -fstack-protector-strong +CFLAGS_REMOVE_string.o += -fstack-protector-strong +CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector-strong +endif + +ifdef CONFIG_RETPOLINE +CFLAGS_REMOVE_sha256.o += $(RETPOLINE_CFLAGS) +CFLAGS_REMOVE_purgatory.o += $(RETPOLINE_CFLAGS) +CFLAGS_REMOVE_string.o += $(RETPOLINE_CFLAGS) +CFLAGS_REMOVE_kexec-purgatory.o += $(RETPOLINE_CFLAGS) +endif $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) From 04f5bda84b0712d6f172556a7e8dca9ded5e73b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= Date: Wed, 7 Aug 2019 23:27:17 -0400 Subject: [PATCH 209/482] x86/lib/cpu: Address missing prototypes warning When building with W=1, warnings about missing prototypes are emitted: CC arch/x86/lib/cpu.o arch/x86/lib/cpu.c:5:14: warning: no previous prototype for 'x86_family' [-Wmissing-prototypes] 5 | unsigned int x86_family(unsigned int sig) | ^~~~~~~~~~ arch/x86/lib/cpu.c:18:14: warning: no previous prototype for 'x86_model' [-Wmissing-prototypes] 18 | unsigned int x86_model(unsigned int sig) | ^~~~~~~~~ arch/x86/lib/cpu.c:33:14: warning: no previous prototype for 'x86_stepping' [-Wmissing-prototypes] 33 | unsigned int x86_stepping(unsigned int sig) | ^~~~~~~~~~~~ Add the proper include file so the prototypes are there. Signed-off-by: Valdis Kletnieks Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/42513.1565234837@turing-police --- arch/x86/lib/cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c index 04967cdce5d1..7ad68917a51e 100644 --- a/arch/x86/lib/cpu.c +++ b/arch/x86/lib/cpu.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include #include +#include unsigned int x86_family(unsigned int sig) { From 8097c43bcbec56fbd0788d99e1e236c0e0d4013f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Aug 2019 08:39:35 +0200 Subject: [PATCH 210/482] Revert "kernfs: fix memleak in kernel_ops_readdir()" This reverts commit cc798c83898ea0a77fcaa1a92afda35c3c3ded74. Tony writes: Somehow this causes a regression in Linux next for me where I'm seeing lots of sysfs entries now missing under /sys/bus/platform/devices. For example, I now only see one .serial entry show up in sysfs. Things work again if I revert commit cc798c83898e ("kernfs: fix memleak inkernel_ops_readdir()"). Any ideas why that would be? Tejun says: Ugh, you're right. It can get double-put cuz ctx->pos is put by release too. So reverting it for now. Reported-by: Tony Lindgren Cc: Andrea Arcangeli Cc: Tejun Heo Fixes: cc798c83898e ("kernfs: fix memleak in kernel_ops_readdir()") Cc: stable Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 1e98efc2bf6d..a387534c9577 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1684,14 +1684,11 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) kernfs_get(pos); mutex_unlock(&kernfs_mutex); - if (unlikely(!dir_emit(ctx, name, len, ino, type))) { - kernfs_put(pos); - goto out; - } + if (!dir_emit(ctx, name, len, ino, type)) + return 0; mutex_lock(&kernfs_mutex); } mutex_unlock(&kernfs_mutex); -out: file->private_data = NULL; ctx->pos = INT_MAX; return 0; From 491beed3b102b6e6c0e7734200661242226e3933 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 5 Aug 2019 09:19:06 +0800 Subject: [PATCH 211/482] genirq/affinity: Create affinity mask for single vector Since commit c66d4bd110a1f8 ("genirq/affinity: Add new callback for (re)calculating interrupt sets"), irq_create_affinity_masks() returns NULL in case of single vector. This change has caused regression on some drivers, such as lpfc. The problem is that single vector requests can happen in some generic cases: 1) kdump kernel 2) irq vectors resource is close to exhaustion. If in that situation the affinity mask for a single vector is not created, every caller has to handle the special case. There is no reason why the mask cannot be created, so remove the check for a single vector and create the mask. Fixes: c66d4bd110a1f8 ("genirq/affinity: Add new callback for (re)calculating interrupt sets") Signed-off-by: Ming Lei Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190805011906.5020-1-ming.lei@redhat.com --- kernel/irq/affinity.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 4352b08ae48d..6fef48033f96 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -251,11 +251,9 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) * Determine the number of vectors which need interrupt affinities * assigned. If the pre/post request exhausts the available vectors * then nothing to do here except for invoking the calc_sets() - * callback so the device driver can adjust to the situation. If there - * is only a single vector, then managing the queue is pointless as - * well. + * callback so the device driver can adjust to the situation. */ - if (nvecs > 1 && nvecs > affd->pre_vectors + affd->post_vectors) + if (nvecs > affd->pre_vectors + affd->post_vectors) affvecs = nvecs - affd->pre_vectors - affd->post_vectors; else affvecs = 0; From 1be3c1fae6c1e1f5bb982b255d2034034454527a Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 8 Aug 2019 00:50:58 -0500 Subject: [PATCH 212/482] ALSA: firewire: fix a memory leak bug In iso_packets_buffer_init(), 'b->packets' is allocated through kmalloc_array(). Then, the aligned packet size is checked. If it is larger than PAGE_SIZE, -EINVAL will be returned to indicate the error. However, the allocated 'b->packets' is not deallocated on this path, leading to a memory leak. To fix the above issue, free 'b->packets' before returning the error code. Fixes: 31ef9134eb52 ("ALSA: add LaCie FireWire Speakers/Griffin FireWave Surround driver") Signed-off-by: Wenwen Wang Reviewed-by: Takashi Sakamoto Cc: # v2.6.39+ Signed-off-by: Takashi Iwai --- sound/firewire/packets-buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/packets-buffer.c b/sound/firewire/packets-buffer.c index 0d35359d25cd..0ecafd0c6722 100644 --- a/sound/firewire/packets-buffer.c +++ b/sound/firewire/packets-buffer.c @@ -37,7 +37,7 @@ int iso_packets_buffer_init(struct iso_packets_buffer *b, struct fw_unit *unit, packets_per_page = PAGE_SIZE / packet_size; if (WARN_ON(!packets_per_page)) { err = -EINVAL; - goto error; + goto err_packets; } pages = DIV_ROUND_UP(count, packets_per_page); From 6b7c3b86f0b63134b2ab56508921a0853ffa687a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Jun 2019 09:39:59 -0700 Subject: [PATCH 213/482] drm/vmwgfx: fix memory leak when too many retries have occurred Currently when too many retries have occurred there is a memory leak on the allocation for reply on the error return path. Fix this by kfree'ing reply before returning. Addresses-Coverity: ("Resource leak") Fixes: a9cd9c044aa9 ("drm/vmwgfx: Add a check to handle host message failure") Signed-off-by: Colin Ian King Reviewed-by: Deepak Rawat Signed-off-by: Deepak Rawat Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index e4e09d47c5c0..59e9d05ab928 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -389,8 +389,10 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, break; } - if (retries == RETRIES) + if (retries == RETRIES) { + kfree(reply); return -EINVAL; + } *msg_len = reply_len; *msg = reply; From 2ca359f4f8b954b3a9d15a89f22a8b7283e7669f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 8 Aug 2019 11:28:54 +0200 Subject: [PATCH 214/482] Revert "USB: rio500: simplify locking" This reverts commit d710734b06770814de2bfa2819420fb5df7f3a81. This simplification causes a deadlock. Reported-by: syzbot+7bbcbe9c9ff0cd49592a@syzkaller.appspotmail.com Fixes: d710734b0677 ("USB: rio500: simplify locking") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20190808092854.23519-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/rio500.c | 43 ++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c index 27e9c78a791e..a32d61a79ab8 100644 --- a/drivers/usb/misc/rio500.c +++ b/drivers/usb/misc/rio500.c @@ -51,6 +51,7 @@ struct rio_usb_data { char *obuf, *ibuf; /* transfer buffers */ char bulk_in_ep, bulk_out_ep; /* Endpoint assignments */ wait_queue_head_t wait_q; /* for timeouts */ + struct mutex lock; /* general race avoidance */ }; static DEFINE_MUTEX(rio500_mutex); @@ -62,8 +63,10 @@ static int open_rio(struct inode *inode, struct file *file) /* against disconnect() */ mutex_lock(&rio500_mutex); + mutex_lock(&(rio->lock)); if (rio->isopen || !rio->present) { + mutex_unlock(&(rio->lock)); mutex_unlock(&rio500_mutex); return -EBUSY; } @@ -71,6 +74,7 @@ static int open_rio(struct inode *inode, struct file *file) init_waitqueue_head(&rio->wait_q); + mutex_unlock(&(rio->lock)); dev_info(&rio->rio_dev->dev, "Rio opened.\n"); mutex_unlock(&rio500_mutex); @@ -84,6 +88,7 @@ static int close_rio(struct inode *inode, struct file *file) /* against disconnect() */ mutex_lock(&rio500_mutex); + mutex_lock(&(rio->lock)); rio->isopen = 0; if (!rio->present) { @@ -95,6 +100,7 @@ static int close_rio(struct inode *inode, struct file *file) } else { dev_info(&rio->rio_dev->dev, "Rio closed.\n"); } + mutex_unlock(&(rio->lock)); mutex_unlock(&rio500_mutex); return 0; } @@ -109,7 +115,7 @@ static long ioctl_rio(struct file *file, unsigned int cmd, unsigned long arg) int retries; int retval=0; - mutex_lock(&rio500_mutex); + mutex_lock(&(rio->lock)); /* Sanity check to make sure rio is connected, powered, etc */ if (rio->present == 0 || rio->rio_dev == NULL) { retval = -ENODEV; @@ -253,7 +259,7 @@ static long ioctl_rio(struct file *file, unsigned int cmd, unsigned long arg) err_out: - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return retval; } @@ -273,12 +279,12 @@ write_rio(struct file *file, const char __user *buffer, int errn = 0; int intr; - intr = mutex_lock_interruptible(&rio500_mutex); + intr = mutex_lock_interruptible(&(rio->lock)); if (intr) return -EINTR; /* Sanity check to make sure rio is connected, powered, etc */ if (rio->present == 0 || rio->rio_dev == NULL) { - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return -ENODEV; } @@ -301,7 +307,7 @@ write_rio(struct file *file, const char __user *buffer, goto error; } if (signal_pending(current)) { - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return bytes_written ? bytes_written : -EINTR; } @@ -339,12 +345,12 @@ write_rio(struct file *file, const char __user *buffer, buffer += copy_size; } while (count > 0); - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return bytes_written ? bytes_written : -EIO; error: - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return errn; } @@ -361,12 +367,12 @@ read_rio(struct file *file, char __user *buffer, size_t count, loff_t * ppos) char *ibuf; int intr; - intr = mutex_lock_interruptible(&rio500_mutex); + intr = mutex_lock_interruptible(&(rio->lock)); if (intr) return -EINTR; /* Sanity check to make sure rio is connected, powered, etc */ if (rio->present == 0 || rio->rio_dev == NULL) { - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return -ENODEV; } @@ -377,11 +383,11 @@ read_rio(struct file *file, char __user *buffer, size_t count, loff_t * ppos) while (count > 0) { if (signal_pending(current)) { - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return read_count ? read_count : -EINTR; } if (!rio->rio_dev) { - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return -ENODEV; } this_read = (count >= IBUF_SIZE) ? IBUF_SIZE : count; @@ -399,7 +405,7 @@ read_rio(struct file *file, char __user *buffer, size_t count, loff_t * ppos) count = this_read = partial; } else if (result == -ETIMEDOUT || result == 15) { /* FIXME: 15 ??? */ if (!maxretry--) { - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); dev_err(&rio->rio_dev->dev, "read_rio: maxretry timeout\n"); return -ETIME; @@ -409,19 +415,19 @@ read_rio(struct file *file, char __user *buffer, size_t count, loff_t * ppos) finish_wait(&rio->wait_q, &wait); continue; } else if (result != -EREMOTEIO) { - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); dev_err(&rio->rio_dev->dev, "Read Whoops - result:%d partial:%u this_read:%u\n", result, partial, this_read); return -EIO; } else { - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return (0); } if (this_read) { if (copy_to_user(buffer, ibuf, this_read)) { - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return -EFAULT; } count -= this_read; @@ -429,7 +435,7 @@ read_rio(struct file *file, char __user *buffer, size_t count, loff_t * ppos) buffer += this_read; } } - mutex_unlock(&rio500_mutex); + mutex_unlock(&(rio->lock)); return read_count; } @@ -494,6 +500,8 @@ static int probe_rio(struct usb_interface *intf, } dev_dbg(&intf->dev, "ibuf address:%p\n", rio->ibuf); + mutex_init(&(rio->lock)); + usb_set_intfdata (intf, rio); rio->present = 1; bail_out: @@ -511,10 +519,12 @@ static void disconnect_rio(struct usb_interface *intf) if (rio) { usb_deregister_dev(intf, &usb_rio_class); + mutex_lock(&(rio->lock)); if (rio->isopen) { rio->isopen = 0; /* better let it finish - the release will do whats needed */ rio->rio_dev = NULL; + mutex_unlock(&(rio->lock)); mutex_unlock(&rio500_mutex); return; } @@ -524,6 +534,7 @@ static void disconnect_rio(struct usb_interface *intf) dev_info(&intf->dev, "USB Rio disconnected.\n"); rio->present = 0; + mutex_unlock(&(rio->lock)); } mutex_unlock(&rio500_mutex); } From c468a8aa790e0dfe0a7f8a39db282d39c2c00b46 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 8 Aug 2019 11:27:28 +0200 Subject: [PATCH 215/482] usb: iowarrior: fix deadlock on disconnect We have to drop the mutex before we close() upon disconnect() as close() needs the lock. This is safe to do by dropping the mutex as intfdata is already set to NULL, so open() will fail. Fixes: 03f36e885fc26 ("USB: open disconnect race in iowarrior") Reported-by: syzbot+a64a382964bf6c71a9c0@syzkaller.appspotmail.com Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20190808092728.23417-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index ba05dd80a020..f5bed9f29e56 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -866,19 +866,20 @@ static void iowarrior_disconnect(struct usb_interface *interface) dev = usb_get_intfdata(interface); mutex_lock(&iowarrior_open_disc_lock); usb_set_intfdata(interface, NULL); + /* prevent device read, write and ioctl */ + dev->present = 0; minor = dev->minor; + mutex_unlock(&iowarrior_open_disc_lock); + /* give back our minor - this will call close() locks need to be dropped at this point*/ - /* give back our minor */ usb_deregister_dev(interface, &iowarrior_class); mutex_lock(&dev->mutex); /* prevent device read, write and ioctl */ - dev->present = 0; mutex_unlock(&dev->mutex); - mutex_unlock(&iowarrior_open_disc_lock); if (dev->opened) { /* There is a process that holds a filedescriptor to the device , From 6cf9481b440da6d6d86bd8e4c99a8b553b9d1271 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 30 Jul 2019 17:48:48 +0200 Subject: [PATCH 216/482] pwm: Fallback to the static lookup-list when acpi_pwm_get fails Commit 4a6ef8e37c4d ("pwm: Add support referencing PWMs from ACPI") made pwm_get unconditionally return the acpi_pwm_get return value if the device passed to pwm_get has an ACPI fwnode. But even if the passed in device has an ACPI fwnode, it does not necessarily have the necessary ACPI package defining its pwm bindings, especially since the binding / API of this ACPI package has only been introduced very recently. Up until now X86/ACPI devices which use a separate pwm controller for controlling their LCD screen's backlight brightness have been relying on the static lookup-list to get their pwm. pwm_get unconditionally returning the acpi_pwm_get return value breaks this, breaking backlight control on these devices. This commit fixes this by making pwm_get fall back to the static lookup-list if acpi_pwm_get returns -ENOENT. BugLink: https://bugs.freedesktop.org/show_bug.cgi?id=96571 Reported-by: youling257@gmail.com Fixes: 4a6ef8e37c4d ("pwm: Add support referencing PWMs from ACPI") Cc: Nikolaus Voss Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Acked-by: Nikolaus Voss Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index c3ab07ab31a9..8edfac17364e 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -882,8 +882,11 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id) return of_pwm_get(dev, dev->of_node, con_id); /* then lookup via ACPI */ - if (dev && is_acpi_node(dev->fwnode)) - return acpi_pwm_get(dev->fwnode); + if (dev && is_acpi_node(dev->fwnode)) { + pwm = acpi_pwm_get(dev->fwnode); + if (!IS_ERR(pwm) || PTR_ERR(pwm) != -ENOENT) + return pwm; + } /* * We look up the provider in the static table typically provided by From 7bac98707f65b93bf994ef4e99b1eb9e7dbb9c32 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 21 Jan 2019 13:54:39 +0100 Subject: [PATCH 217/482] kbuild: add OBJSIZE variable for the size tool Define and export OBJSIZE variable for "size" tool from binutils to be used in architecture specific Makefiles (naming the variable just "SIZE" would be too risky). In particular this tool is useful to perform checks that early boot code is not using bss section (which might have not been zeroed yet or intersects with initrd or other files boot loader might have put right after the linux kernel). Link: http://lkml.kernel.org/r/patch-1.thread-2257a1.git-188f5a3d81d5.your-ad-here.call-01565088755-ext-5120@work.hours Acked-by: Masahiro Yamada Signed-off-by: Vasily Gorbik --- Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 23cdf1f41364..01683ba84d4c 100644 --- a/Makefile +++ b/Makefile @@ -419,6 +419,7 @@ NM = $(CROSS_COMPILE)nm STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy OBJDUMP = $(CROSS_COMPILE)objdump +OBJSIZE = $(CROSS_COMPILE)size PAHOLE = pahole LEX = flex YACC = bison @@ -475,9 +476,9 @@ GCC_PLUGINS_CFLAGS := CLANG_FLAGS := export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC -export CPP AR NM STRIP OBJCOPY OBJDUMP PAHOLE KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS -export MAKE LEX YACC AWK INSTALLKERNEL PERL PYTHON PYTHON2 PYTHON3 UTS_MACHINE -export HOSTCXX KBUILD_HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS +export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE PAHOLE LEX YACC AWK INSTALLKERNEL +export PERL PYTHON PYTHON2 PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX +export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE From 739bacbf7aa2c44bb25d9ad5f7d5b256082c5e66 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 21 Jan 2019 13:54:53 +0100 Subject: [PATCH 218/482] s390/build: use size command to perform empty .bss check Currently empty .bss checks performed do not pay attention to "common objects" in object files which end up in .bss section eventually. The "size" tool is a part of binutils and since version 2.18 provides "--common" command line option, which allows to account "common objects" sizes in .bss section size. Utilize "size --common" to perform accurate check that .bss section is unused. Besides that the size tool handles object files without .bss section gracefully and doesn't require additional objdump run. The linux kernel requires binutils 2.20 since 4.13. Kbuild exports OBJSIZE to reference the right size tool. Link: http://lkml.kernel.org/r/patch-2.thread-2257a1.git-2257a1c53d4a.your-ad-here.call-01565088755-ext-5120@work.hours Reported-and-tested-by: Heiko Carstens Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/scripts/Makefile.chkbss | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss index 884a9caff5fb..f4f4c2c6dee9 100644 --- a/arch/s390/scripts/Makefile.chkbss +++ b/arch/s390/scripts/Makefile.chkbss @@ -11,8 +11,7 @@ chkbss: $(addprefix $(obj)/, $(chkbss-files)) quiet_cmd_chkbss = CHKBSS $< cmd_chkbss = \ - if $(OBJDUMP) -h $< | grep -q "\.bss" && \ - ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \ + if ! $(OBJSIZE) --common $< | $(AWK) 'END { if ($$3) exit 1 }'; then \ echo "error: $< .bss section is not empty" >&2; exit 1; \ fi; \ touch $@; From 430380b4637aec646996b4aef67ad417593923b2 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Thu, 8 Aug 2019 11:09:54 +0800 Subject: [PATCH 219/482] block: aoe: Fix kernel crash due to atomic sleep when exiting Since commit 3582dd291788 ("aoe: convert aoeblk to blk-mq"), aoedev_downdev has had the possibility of sleeping and causing the following crash. BUG: scheduling while atomic: rmmod/2242/0x00000003 Modules linked in: aoe Preemption disabled at: [] flush+0x95/0x4a0 [aoe] CPU: 7 PID: 2242 Comm: rmmod Tainted: G I 5.2.3 #1 Hardware name: Intel Corporation S5520HC/S5520HC, BIOS S5500.86B.01.10.0025.030220091519 03/02/2009 Call Trace: dump_stack+0x4f/0x6a ? flush+0x95/0x4a0 [aoe] __schedule_bug.cold+0x44/0x54 __schedule+0x44f/0x680 schedule+0x44/0xd0 blk_mq_freeze_queue_wait+0x46/0xb0 ? wait_woken+0x80/0x80 blk_mq_freeze_queue+0x1b/0x20 aoedev_downdev+0x111/0x160 [aoe] flush+0xff/0x4a0 [aoe] aoedev_exit+0x23/0x30 [aoe] aoe_exit+0x35/0x948 [aoe] __se_sys_delete_module+0x183/0x210 __x64_sys_delete_module+0x16/0x20 do_syscall_64+0x4d/0x130 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f24e0043b07 Code: 73 01 c3 48 8b 0d 89 73 0b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 59 73 0b 00 f7 d8 64 89 01 48 RSP: 002b:00007ffe18f7f1e8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f24e0043b07 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000555c3ecf87c8 RBP: 00007ffe18f7f1f0 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f24e00b4ac0 R11: 0000000000000206 R12: 00007ffe18f7f238 R13: 00007ffe18f7f410 R14: 00007ffe18f80e73 R15: 0000555c3ecf8760 This patch, handling in the same way of pass two, unlocks the locks and restart pass one after aoedev_downdev is done. Fixes: 3582dd291788 ("aoe: convert aoeblk to blk-mq") Signed-off-by: He Zhe Signed-off-by: Jens Axboe --- drivers/block/aoe/aoedev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 5b49f1b33ebe..e2ea2356da06 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -323,10 +323,14 @@ flush(const char __user *str, size_t cnt, int exiting) } flush_scheduled_work(); - /* pass one: without sleeping, do aoedev_downdev */ + /* pass one: do aoedev_downdev, which might sleep */ +restart1: spin_lock_irqsave(&devlist_lock, flags); for (d = devlist; d; d = d->next) { spin_lock(&d->lock); + if (d->flags & DEVFL_TKILL) + goto cont; + if (exiting) { /* unconditionally take each device down */ } else if (specified) { @@ -338,8 +342,11 @@ flush(const char __user *str, size_t cnt, int exiting) || d->ref) goto cont; + spin_unlock(&d->lock); + spin_unlock_irqrestore(&devlist_lock, flags); aoedev_downdev(d); d->flags |= DEVFL_TKILL; + goto restart1; cont: spin_unlock(&d->lock); } @@ -348,7 +355,7 @@ flush(const char __user *str, size_t cnt, int exiting) /* pass two: call freedev, which might sleep, * for aoedevs marked with DEVFL_TKILL */ -restart: +restart2: spin_lock_irqsave(&devlist_lock, flags); for (d = devlist; d; d = d->next) { spin_lock(&d->lock); @@ -357,7 +364,7 @@ flush(const char __user *str, size_t cnt, int exiting) spin_unlock(&d->lock); spin_unlock_irqrestore(&devlist_lock, flags); freedev(d); - goto restart; + goto restart2; } spin_unlock(&d->lock); } From 08d383a74948b43eb6e96c86153e63cbf276f1fa Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 7 Aug 2019 16:17:53 +0200 Subject: [PATCH 220/482] block, bfq: reset last_completed_rq_bfqq if the pointed queue is freed Since commit 13a857a4c4e8 ("block, bfq: detect wakers and unconditionally inject their I/O"), BFQ stores, in a per-device pointer last_completed_rq_bfqq, the last bfq_queue that had an I/O request completed. If some bfq_queue receives new I/O right after the last request of last_completed_rq_bfqq has been completed, then last_completed_rq_bfqq may be a waker bfq_queue. But if the bfq_queue last_completed_rq_bfqq points to is freed, then last_completed_rq_bfqq becomes a dangling reference. This commit resets last_completed_rq_bfqq if the pointed bfq_queue is freed. Fixes: 13a857a4c4e8 ("block, bfq: detect wakers and unconditionally inject their I/O") Reported-by: Douglas Anderson Tested-by: Douglas Anderson Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 586fcfe227ea..b2009650afc2 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1924,12 +1924,13 @@ static void bfq_add_request(struct request *rq) * confirmed no later than during the next * I/O-plugging interval for bfqq. */ - if (!bfq_bfqq_has_short_ttime(bfqq) && + if (bfqd->last_completed_rq_bfqq && + !bfq_bfqq_has_short_ttime(bfqq) && ktime_get_ns() - bfqd->last_completion < 200 * NSEC_PER_USEC) { if (bfqd->last_completed_rq_bfqq != bfqq && - bfqd->last_completed_rq_bfqq != - bfqq->waker_bfqq) { + bfqd->last_completed_rq_bfqq != + bfqq->waker_bfqq) { /* * First synchronization detected with * a candidate waker queue, or with a @@ -4808,6 +4809,9 @@ void bfq_put_queue(struct bfq_queue *bfqq) bfqq->bfqd->burst_size--; } + if (bfqq->bfqd && bfqq->bfqd->last_completed_rq_bfqq == bfqq) + bfqq->bfqd->last_completed_rq_bfqq = NULL; + kmem_cache_free(bfq_pool, bfqq); #ifdef CONFIG_BFQ_GROUP_IOSCHED bfqg_and_blkg_put(bfqg); From 3f758e844aa9800eb660d60ee10226fa802594d4 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 7 Aug 2019 16:17:54 +0200 Subject: [PATCH 221/482] block, bfq: move update of waker and woken list to queue freeing Since commit 13a857a4c4e8 ("block, bfq: detect wakers and unconditionally inject their I/O"), every bfq_queue has a pointer to a waker bfq_queue and a list of the bfq_queues it may wake. In this respect, when a bfq_queue, say Q, remains with no I/O source attached to it, Q cannot be woken by any other bfq_queue, and cannot wake any other bfq_queue. Then Q must be removed from the woken list of its possible waker bfq_queue, and all bfq_queues in the woken list of Q must stop having a waker bfq_queue. Q remains with no I/O source in two cases: when the last process associated with Q exits or when such a process gets associated with a different bfq_queue. Unfortunately, commit 13a857a4c4e8 ("block, bfq: detect wakers and unconditionally inject their I/O") performed the above updates only in the first case. This commit fixes this bug by moving these updates to when Q gets freed. This is a simple and safe way to handle all cases, as both the above events, process exit and re-association, lead to Q being freed soon, and because dangling references would come out only after Q gets freed (if no update were performed). Fixes: 13a857a4c4e8 ("block, bfq: detect wakers and unconditionally inject their I/O") Reported-by: Douglas Anderson Tested-by: Douglas Anderson Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index b2009650afc2..5f477501bb3d 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4765,6 +4765,8 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) */ void bfq_put_queue(struct bfq_queue *bfqq) { + struct bfq_queue *item; + struct hlist_node *n; #ifdef CONFIG_BFQ_GROUP_IOSCHED struct bfq_group *bfqg = bfqq_group(bfqq); #endif @@ -4809,6 +4811,33 @@ void bfq_put_queue(struct bfq_queue *bfqq) bfqq->bfqd->burst_size--; } + /* + * bfqq does not exist any longer, so it cannot be woken by + * any other queue, and cannot wake any other queue. Then bfqq + * must be removed from the woken list of its possible waker + * queue, and all queues in the woken list of bfqq must stop + * having a waker queue. Strictly speaking, these updates + * should be performed when bfqq remains with no I/O source + * attached to it, which happens before bfqq gets freed. In + * particular, this happens when the last process associated + * with bfqq exits or gets associated with a different + * queue. However, both events lead to bfqq being freed soon, + * and dangling references would come out only after bfqq gets + * freed. So these updates are done here, as a simple and safe + * way to handle all cases. + */ + /* remove bfqq from woken list */ + if (!hlist_unhashed(&bfqq->woken_list_node)) + hlist_del_init(&bfqq->woken_list_node); + + /* reset waker for all queues in woken list */ + hlist_for_each_entry_safe(item, n, &bfqq->woken_list, + woken_list_node) { + item->waker_bfqq = NULL; + bfq_clear_bfqq_has_waker(item); + hlist_del_init(&item->woken_list_node); + } + if (bfqq->bfqd && bfqq->bfqd->last_completed_rq_bfqq == bfqq) bfqq->bfqd->last_completed_rq_bfqq = NULL; @@ -4839,9 +4868,6 @@ static void bfq_put_cooperator(struct bfq_queue *bfqq) static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) { - struct bfq_queue *item; - struct hlist_node *n; - if (bfqq == bfqd->in_service_queue) { __bfq_bfqq_expire(bfqd, bfqq, BFQQE_BUDGET_TIMEOUT); bfq_schedule_dispatch(bfqd); @@ -4851,18 +4877,6 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_put_cooperator(bfqq); - /* remove bfqq from woken list */ - if (!hlist_unhashed(&bfqq->woken_list_node)) - hlist_del_init(&bfqq->woken_list_node); - - /* reset waker for all queues in woken list */ - hlist_for_each_entry_safe(item, n, &bfqq->woken_list, - woken_list_node) { - item->waker_bfqq = NULL; - bfq_clear_bfqq_has_waker(item); - hlist_del_init(&item->woken_list_node); - } - bfq_put_queue(bfqq); /* release process reference */ } From fd03177c33b287c6541f4048f1d67b7b45a1abc9 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 7 Aug 2019 19:21:11 +0200 Subject: [PATCH 222/482] block, bfq: handle NULL return value by bfq_init_rq() As reported in [1], the call bfq_init_rq(rq) may return NULL in case of OOM (in particular, if rq->elv.icq is NULL because memory allocation failed in failed in ioc_create_icq()). This commit handles this circumstance. [1] https://lkml.org/lkml/2019/7/22/824 Cc: Hsin-Yi Wang Cc: Nicolas Boichat Cc: Doug Anderson Reported-by: Guenter Roeck Reported-by: Hsin-Yi Wang Reviewed-by: Guenter Roeck Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 5f477501bb3d..b33be928d164 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2251,9 +2251,14 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, blk_rq_pos(container_of(rb_prev(&req->rb_node), struct request, rb_node))) { struct bfq_queue *bfqq = bfq_init_rq(req); - struct bfq_data *bfqd = bfqq->bfqd; + struct bfq_data *bfqd; struct request *prev, *next_rq; + if (!bfqq) + return; + + bfqd = bfqq->bfqd; + /* Reposition request in its sort_list */ elv_rb_del(&bfqq->sort_list, req); elv_rb_add(&bfqq->sort_list, req); @@ -2300,6 +2305,9 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, struct bfq_queue *bfqq = bfq_init_rq(rq), *next_bfqq = bfq_init_rq(next); + if (!bfqq) + return; + /* * If next and rq belong to the same bfq_queue and next is older * than rq, then reposition rq in the fifo (by substituting next @@ -5454,12 +5462,12 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, spin_lock_irq(&bfqd->lock); bfqq = bfq_init_rq(rq); - if (at_head || blk_rq_is_passthrough(rq)) { + if (!bfqq || at_head || blk_rq_is_passthrough(rq)) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); else list_add_tail(&rq->queuelist, &bfqd->dispatch); - } else { /* bfqq is assumed to be non null here */ + } else { idle_timer_disabled = __bfq_insert_request(bfqd, rq); /* * Update bfqq, because, if a queue merge has occurred From 39c71a5b8212f4b502d9a630c6706ac723abd422 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 29 Jul 2019 10:08:49 +0800 Subject: [PATCH 223/482] dmaengine: stm32-mdma: Fix a possible null-pointer dereference in stm32_mdma_irq_handler() In stm32_mdma_irq_handler(), chan is checked on line 1368. When chan is NULL, it is still used on line 1369: dev_err(chan2dev(chan), "MDMA channel not initialized\n"); Thus, a possible null-pointer dereference may occur. To fix this bug, "dev_dbg(mdma2dev(dmadev), ...)" is used instead. Signed-off-by: Jia-Ju Bai Fixes: a4ffb13c8946 ("dmaengine: Add STM32 MDMA driver") Link: https://lore.kernel.org/r/20190729020849.17971-1-baijiaju1990@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index d6e919d3936a..1311de74bfdd 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -1366,7 +1366,7 @@ static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid) chan = &dmadev->chan[id]; if (!chan) { - dev_err(chan2dev(chan), "MDMA channel not initialized\n"); + dev_dbg(mdma2dev(dmadev), "MDMA channel not initialized\n"); goto exit; } From e91455bad5cff40a8c232f2204a5104127e3fec2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 7 Aug 2019 11:36:47 +0200 Subject: [PATCH 224/482] bdev: Fixup error handling in blkdev_get() Commit 89e524c04fa9 ("loop: Fix mount(2) failure due to race with LOOP_SET_FD") converted blkdev_get() to use the new helpers for finishing claiming of a block device. However the conversion botched the error handling in blkdev_get() and thus the bdev has been marked as held even in case __blkdev_get() returned error. This led to occasional warnings with block/001 test from blktests like: kernel: WARNING: CPU: 5 PID: 907 at fs/block_dev.c:1899 __blkdev_put+0x396/0x3a0 Correct the error handling. CC: stable@vger.kernel.org Fixes: 89e524c04fa9 ("loop: Fix mount(2) failure due to race with LOOP_SET_FD") Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/block_dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 131e2e0582a6..eb657ab94060 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1750,7 +1750,10 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) /* finish claiming */ mutex_lock(&bdev->bd_mutex); - bd_finish_claiming(bdev, whole, holder); + if (!res) + bd_finish_claiming(bdev, whole, holder); + else + bd_abort_claiming(bdev, whole, holder); /* * Block event polling for write claims if requested. Any * write holder makes the write_holder state stick until From 27709ae4e2fe6cf7da2ae45e718e190c5433342b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thi=C3=A9baud=20Weksteen?= Date: Tue, 6 Aug 2019 13:00:50 +0200 Subject: [PATCH 225/482] usb: setup authorized_default attributes using usb_bus_notify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the authorized_default and interface_authorized_default attributes for HCD are set up after the uevent has been sent to userland. This creates a race condition where userland may fail to access this file when processing the event. Move the appending of these attributes earlier relying on the usb_bus_notify dispatcher. Signed-off-by: Thiébaud Weksteen Cc: stable Link: https://lore.kernel.org/r/20190806110050.38918-1-tweek@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 123 --------------------------------------- drivers/usb/core/sysfs.c | 121 ++++++++++++++++++++++++++++++++++++++ drivers/usb/core/usb.h | 5 ++ 3 files changed, 126 insertions(+), 123 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 9320787ac2e6..2ccbc2f83570 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -103,11 +103,6 @@ static DEFINE_SPINLOCK(hcd_urb_unlink_lock); /* wait queue for synchronous unlinks */ DECLARE_WAIT_QUEUE_HEAD(usb_kill_urb_queue); -static inline int is_root_hub(struct usb_device *udev) -{ - return (udev->parent == NULL); -} - /*-------------------------------------------------------------------------*/ /* @@ -880,101 +875,6 @@ static int usb_rh_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) } - -/* - * Show & store the current value of authorized_default - */ -static ssize_t authorized_default_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct usb_device *rh_usb_dev = to_usb_device(dev); - struct usb_bus *usb_bus = rh_usb_dev->bus; - struct usb_hcd *hcd; - - hcd = bus_to_hcd(usb_bus); - return snprintf(buf, PAGE_SIZE, "%u\n", hcd->dev_policy); -} - -static ssize_t authorized_default_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - ssize_t result; - unsigned val; - struct usb_device *rh_usb_dev = to_usb_device(dev); - struct usb_bus *usb_bus = rh_usb_dev->bus; - struct usb_hcd *hcd; - - hcd = bus_to_hcd(usb_bus); - result = sscanf(buf, "%u\n", &val); - if (result == 1) { - hcd->dev_policy = val <= USB_DEVICE_AUTHORIZE_INTERNAL ? - val : USB_DEVICE_AUTHORIZE_ALL; - result = size; - } else { - result = -EINVAL; - } - return result; -} -static DEVICE_ATTR_RW(authorized_default); - -/* - * interface_authorized_default_show - show default authorization status - * for USB interfaces - * - * note: interface_authorized_default is the default value - * for initializing the authorized attribute of interfaces - */ -static ssize_t interface_authorized_default_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct usb_device *usb_dev = to_usb_device(dev); - struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus); - - return sprintf(buf, "%u\n", !!HCD_INTF_AUTHORIZED(hcd)); -} - -/* - * interface_authorized_default_store - store default authorization status - * for USB interfaces - * - * note: interface_authorized_default is the default value - * for initializing the authorized attribute of interfaces - */ -static ssize_t interface_authorized_default_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct usb_device *usb_dev = to_usb_device(dev); - struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus); - int rc = count; - bool val; - - if (strtobool(buf, &val) != 0) - return -EINVAL; - - if (val) - set_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags); - else - clear_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags); - - return rc; -} -static DEVICE_ATTR_RW(interface_authorized_default); - -/* Group all the USB bus attributes */ -static struct attribute *usb_bus_attrs[] = { - &dev_attr_authorized_default.attr, - &dev_attr_interface_authorized_default.attr, - NULL, -}; - -static const struct attribute_group usb_bus_attr_group = { - .name = NULL, /* we want them in the same directory */ - .attrs = usb_bus_attrs, -}; - - - /*-------------------------------------------------------------------------*/ /** @@ -2894,32 +2794,11 @@ int usb_add_hcd(struct usb_hcd *hcd, if (retval != 0) goto err_register_root_hub; - retval = sysfs_create_group(&rhdev->dev.kobj, &usb_bus_attr_group); - if (retval < 0) { - printk(KERN_ERR "Cannot register USB bus sysfs attributes: %d\n", - retval); - goto error_create_attr_group; - } if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) usb_hcd_poll_rh_status(hcd); return retval; -error_create_attr_group: - clear_bit(HCD_FLAG_RH_RUNNING, &hcd->flags); - if (HC_IS_RUNNING(hcd->state)) - hcd->state = HC_STATE_QUIESCING; - spin_lock_irq(&hcd_root_hub_lock); - hcd->rh_registered = 0; - spin_unlock_irq(&hcd_root_hub_lock); - -#ifdef CONFIG_PM - cancel_work_sync(&hcd->wakeup_work); -#endif - cancel_work_sync(&hcd->died_work); - mutex_lock(&usb_bus_idr_lock); - usb_disconnect(&rhdev); /* Sets rhdev to NULL */ - mutex_unlock(&usb_bus_idr_lock); err_register_root_hub: hcd->rh_pollable = 0; clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); @@ -2963,8 +2842,6 @@ void usb_remove_hcd(struct usb_hcd *hcd) dev_info(hcd->self.controller, "remove, state %x\n", hcd->state); usb_get_dev(rhdev); - sysfs_remove_group(&rhdev->dev.kobj, &usb_bus_attr_group); - clear_bit(HCD_FLAG_RH_RUNNING, &hcd->flags); if (HC_IS_RUNNING (hcd->state)) hcd->state = HC_STATE_QUIESCING; diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index 7e88fdfe3cf5..f19694e69f5c 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include "usb.h" @@ -922,6 +923,116 @@ static struct bin_attribute dev_bin_attr_descriptors = { .size = 18 + 65535, /* dev descr + max-size raw descriptor */ }; +/* + * Show & store the current value of authorized_default + */ +static ssize_t authorized_default_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct usb_device *rh_usb_dev = to_usb_device(dev); + struct usb_bus *usb_bus = rh_usb_dev->bus; + struct usb_hcd *hcd; + + hcd = bus_to_hcd(usb_bus); + return snprintf(buf, PAGE_SIZE, "%u\n", hcd->dev_policy); +} + +static ssize_t authorized_default_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + ssize_t result; + unsigned int val; + struct usb_device *rh_usb_dev = to_usb_device(dev); + struct usb_bus *usb_bus = rh_usb_dev->bus; + struct usb_hcd *hcd; + + hcd = bus_to_hcd(usb_bus); + result = sscanf(buf, "%u\n", &val); + if (result == 1) { + hcd->dev_policy = val <= USB_DEVICE_AUTHORIZE_INTERNAL ? + val : USB_DEVICE_AUTHORIZE_ALL; + result = size; + } else { + result = -EINVAL; + } + return result; +} +static DEVICE_ATTR_RW(authorized_default); + +/* + * interface_authorized_default_show - show default authorization status + * for USB interfaces + * + * note: interface_authorized_default is the default value + * for initializing the authorized attribute of interfaces + */ +static ssize_t interface_authorized_default_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct usb_device *usb_dev = to_usb_device(dev); + struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus); + + return sprintf(buf, "%u\n", !!HCD_INTF_AUTHORIZED(hcd)); +} + +/* + * interface_authorized_default_store - store default authorization status + * for USB interfaces + * + * note: interface_authorized_default is the default value + * for initializing the authorized attribute of interfaces + */ +static ssize_t interface_authorized_default_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct usb_device *usb_dev = to_usb_device(dev); + struct usb_hcd *hcd = bus_to_hcd(usb_dev->bus); + int rc = count; + bool val; + + if (strtobool(buf, &val) != 0) + return -EINVAL; + + if (val) + set_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags); + else + clear_bit(HCD_FLAG_INTF_AUTHORIZED, &hcd->flags); + + return rc; +} +static DEVICE_ATTR_RW(interface_authorized_default); + +/* Group all the USB bus attributes */ +static struct attribute *usb_bus_attrs[] = { + &dev_attr_authorized_default.attr, + &dev_attr_interface_authorized_default.attr, + NULL, +}; + +static const struct attribute_group usb_bus_attr_group = { + .name = NULL, /* we want them in the same directory */ + .attrs = usb_bus_attrs, +}; + + +static int add_default_authorized_attributes(struct device *dev) +{ + int rc = 0; + + if (is_usb_device(dev)) + rc = sysfs_create_group(&dev->kobj, &usb_bus_attr_group); + + return rc; +} + +static void remove_default_authorized_attributes(struct device *dev) +{ + if (is_usb_device(dev)) { + sysfs_remove_group(&dev->kobj, &usb_bus_attr_group); + } +} + int usb_create_sysfs_dev_files(struct usb_device *udev) { struct device *dev = &udev->dev; @@ -938,7 +1049,14 @@ int usb_create_sysfs_dev_files(struct usb_device *udev) retval = add_power_attributes(dev); if (retval) goto error; + + if (is_root_hub(udev)) { + retval = add_default_authorized_attributes(dev); + if (retval) + goto error; + } return retval; + error: usb_remove_sysfs_dev_files(udev); return retval; @@ -948,6 +1066,9 @@ void usb_remove_sysfs_dev_files(struct usb_device *udev) { struct device *dev = &udev->dev; + if (is_root_hub(udev)) + remove_default_authorized_attributes(dev); + remove_power_attributes(dev); remove_persist_attributes(dev); device_remove_bin_file(dev, &dev_bin_attr_descriptors); diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index bd8d01f85a13..0c9fde5ad052 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -153,6 +153,11 @@ static inline int is_usb_port(const struct device *dev) return dev->type == &usb_port_device_type; } +static inline int is_root_hub(struct usb_device *udev) +{ + return (udev->parent == NULL); +} + /* Do the same for device drivers and interface drivers. */ static inline int is_usb_device_driver(struct device_driver *drv) From d0a255e795ab976481565f6ac178314b34fbf891 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 8 Aug 2019 11:17:01 -0400 Subject: [PATCH 226/482] loop: set PF_MEMALLOC_NOIO for the worker thread A deadlock with this stacktrace was observed. The loop thread does a GFP_KERNEL allocation, it calls into dm-bufio shrinker and the shrinker depends on I/O completion in the dm-bufio subsystem. In order to fix the deadlock (and other similar ones), we set the flag PF_MEMALLOC_NOIO at loop thread entry. PID: 474 TASK: ffff8813e11f4600 CPU: 10 COMMAND: "kswapd0" #0 [ffff8813dedfb938] __schedule at ffffffff8173f405 #1 [ffff8813dedfb990] schedule at ffffffff8173fa27 #2 [ffff8813dedfb9b0] schedule_timeout at ffffffff81742fec #3 [ffff8813dedfba60] io_schedule_timeout at ffffffff8173f186 #4 [ffff8813dedfbaa0] bit_wait_io at ffffffff8174034f #5 [ffff8813dedfbac0] __wait_on_bit at ffffffff8173fec8 #6 [ffff8813dedfbb10] out_of_line_wait_on_bit at ffffffff8173ff81 #7 [ffff8813dedfbb90] __make_buffer_clean at ffffffffa038736f [dm_bufio] #8 [ffff8813dedfbbb0] __try_evict_buffer at ffffffffa0387bb8 [dm_bufio] #9 [ffff8813dedfbbd0] dm_bufio_shrink_scan at ffffffffa0387cc3 [dm_bufio] #10 [ffff8813dedfbc40] shrink_slab at ffffffff811a87ce #11 [ffff8813dedfbd30] shrink_zone at ffffffff811ad778 #12 [ffff8813dedfbdc0] kswapd at ffffffff811ae92f #13 [ffff8813dedfbec0] kthread at ffffffff810a8428 #14 [ffff8813dedfbf50] ret_from_fork at ffffffff81745242 PID: 14127 TASK: ffff881455749c00 CPU: 11 COMMAND: "loop1" #0 [ffff88272f5af228] __schedule at ffffffff8173f405 #1 [ffff88272f5af280] schedule at ffffffff8173fa27 #2 [ffff88272f5af2a0] schedule_preempt_disabled at ffffffff8173fd5e #3 [ffff88272f5af2b0] __mutex_lock_slowpath at ffffffff81741fb5 #4 [ffff88272f5af330] mutex_lock at ffffffff81742133 #5 [ffff88272f5af350] dm_bufio_shrink_count at ffffffffa03865f9 [dm_bufio] #6 [ffff88272f5af380] shrink_slab at ffffffff811a86bd #7 [ffff88272f5af470] shrink_zone at ffffffff811ad778 #8 [ffff88272f5af500] do_try_to_free_pages at ffffffff811adb34 #9 [ffff88272f5af590] try_to_free_pages at ffffffff811adef8 #10 [ffff88272f5af610] __alloc_pages_nodemask at ffffffff811a09c3 #11 [ffff88272f5af710] alloc_pages_current at ffffffff811e8b71 #12 [ffff88272f5af760] new_slab at ffffffff811f4523 #13 [ffff88272f5af7b0] __slab_alloc at ffffffff8173a1b5 #14 [ffff88272f5af880] kmem_cache_alloc at ffffffff811f484b #15 [ffff88272f5af8d0] do_blockdev_direct_IO at ffffffff812535b3 #16 [ffff88272f5afb00] __blockdev_direct_IO at ffffffff81255dc3 #17 [ffff88272f5afb30] xfs_vm_direct_IO at ffffffffa01fe3fc [xfs] #18 [ffff88272f5afb90] generic_file_read_iter at ffffffff81198994 #19 [ffff88272f5afc50] __dta_xfs_file_read_iter_2398 at ffffffffa020c970 [xfs] #20 [ffff88272f5afcc0] lo_rw_aio at ffffffffa0377042 [loop] #21 [ffff88272f5afd70] loop_queue_work at ffffffffa0377c3b [loop] #22 [ffff88272f5afe60] kthread_worker_fn at ffffffff810a8a0c #23 [ffff88272f5afec0] kthread at ffffffff810a8428 #24 [ffff88272f5aff50] ret_from_fork at ffffffff81745242 Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3036883fc9f8..ab7ca5989097 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -885,7 +885,7 @@ static void loop_unprepare_queue(struct loop_device *lo) static int loop_kthread_worker_fn(void *worker_ptr) { - current->flags |= PF_LESS_THROTTLE; + current->flags |= PF_LESS_THROTTLE | PF_MEMALLOC_NOIO; return kthread_worker_fn(worker_ptr); } From 30e235389faadb9e3d918887b1f126155d7d761d Mon Sep 17 00:00:00 2001 From: Jia He Date: Wed, 7 Aug 2019 12:58:51 +0800 Subject: [PATCH 227/482] arm64: mm: add missing PTE_SPECIAL in pte_mkdevmap on arm64 Without this patch, the MAP_SYNC test case will cause a print_bad_pte warning on arm64 as follows: [ 25.542693] BUG: Bad page map in process mapdax333 pte:2e8000448800f53 pmd:41ff5f003 [ 25.546360] page:ffff7e0010220000 refcount:1 mapcount:-1 mapping:ffff8003e29c7440 index:0x0 [ 25.550281] ext4_dax_aops [ 25.550282] name:"__aaabbbcccddd__" [ 25.551553] flags: 0x3ffff0000001002(referenced|reserved) [ 25.555802] raw: 03ffff0000001002 ffff8003dfffa908 0000000000000000 ffff8003e29c7440 [ 25.559446] raw: 0000000000000000 0000000000000000 00000001fffffffe 0000000000000000 [ 25.563075] page dumped because: bad pte [ 25.564938] addr:0000ffffbe05b000 vm_flags:208000fb anon_vma:0000000000000000 mapping:ffff8003e29c7440 index:0 [ 25.574272] file:__aaabbbcccddd__ fault:ext4_dax_fault mmmmap:ext4_file_mmap readpage:0x0 [ 25.578799] CPU: 1 PID: 1180 Comm: mapdax333 Not tainted 5.2.0+ #21 [ 25.581702] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 25.585624] Call trace: [ 25.587008] dump_backtrace+0x0/0x178 [ 25.588799] show_stack+0x24/0x30 [ 25.590328] dump_stack+0xa8/0xcc [ 25.591901] print_bad_pte+0x18c/0x218 [ 25.593628] unmap_page_range+0x778/0xc00 [ 25.595506] unmap_single_vma+0x94/0xe8 [ 25.597304] unmap_vmas+0x90/0x108 [ 25.598901] unmap_region+0xc0/0x128 [ 25.600566] __do_munmap+0x284/0x3f0 [ 25.602245] __vm_munmap+0x78/0xe0 [ 25.603820] __arm64_sys_munmap+0x34/0x48 [ 25.605709] el0_svc_common.constprop.0+0x78/0x168 [ 25.607956] el0_svc_handler+0x34/0x90 [ 25.609698] el0_svc+0x8/0xc [...] The root cause is in _vm_normal_page, without the PTE_SPECIAL bit, the return value will be incorrectly set to pfn_to_page(pfn) instead of NULL. Besides, this patch also rewrite the pmd_mkdevmap to avoid setting PTE_SPECIAL for pmd The MAP_SYNC test case is as follows(Provided by Yibo Cai) $#include $#include $#include $#include $#include $#ifndef MAP_SYNC $#define MAP_SYNC 0x80000 $#endif /* mount -o dax /dev/pmem0 /mnt */ $#define F "/mnt/__aaabbbcccddd__" int main(void) { int fd; char buf[4096]; void *addr; if ((fd = open(F, O_CREAT|O_TRUNC|O_RDWR, 0644)) < 0) { perror("open1"); return 1; } if (write(fd, buf, 4096) != 4096) { perror("lseek"); return 1; } addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_SYNC, fd, 0); if (addr == MAP_FAILED) { perror("mmap"); printf("did you mount with '-o dax'?\n"); return 1; } memset(addr, 0x55, 4096); if (munmap(addr, 4096) == -1) { perror("munmap"); return 1; } close(fd); return 0; } Fixes: 73b20c84d42d ("arm64: mm: implement pte_devmap support") Reported-by: Yibo Cai Acked-by: Will Deacon Acked-by: Robin Murphy Signed-off-by: Jia He Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5fdcfe237338..e09760ece844 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -209,7 +209,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd) static inline pte_t pte_mkdevmap(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_DEVMAP)); + return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); } static inline void set_pte(pte_t *ptep, pte_t pte) @@ -396,7 +396,10 @@ static inline int pmd_protnone(pmd_t pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd)) #endif -#define pmd_mkdevmap(pmd) pte_pmd(pte_mkdevmap(pmd_pte(pmd))) +static inline pmd_t pmd_mkdevmap(pmd_t pmd) +{ + return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP))); +} #define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd)) #define __phys_to_pmd_val(phys) __phys_to_pte_val(phys) From 75354284cc3aa58f7e54d479d9bee69bd2ca828f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Aug 2019 16:14:44 +0900 Subject: [PATCH 228/482] auxdisplay: charlcd: move charlcd.h to drivers/auxdisplay This header is included in drivers/auxdisplay/. Make it a local header. Reviewed-by: Geert Uytterhoeven Signed-off-by: Masahiro Yamada Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/charlcd.c | 2 +- {include/misc => drivers/auxdisplay}/charlcd.h | 0 drivers/auxdisplay/hd44780.c | 3 +-- drivers/auxdisplay/panel.c | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) rename {include/misc => drivers/auxdisplay}/charlcd.h (100%) diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index 92745efefb54..bef6b85778b6 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -20,7 +20,7 @@ #include -#include +#include "charlcd.h" #define LCD_MINOR 156 diff --git a/include/misc/charlcd.h b/drivers/auxdisplay/charlcd.h similarity index 100% rename from include/misc/charlcd.h rename to drivers/auxdisplay/charlcd.h diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c index ab15b64707ad..bcbe13092327 100644 --- a/drivers/auxdisplay/hd44780.c +++ b/drivers/auxdisplay/hd44780.c @@ -14,8 +14,7 @@ #include #include -#include - +#include "charlcd.h" enum hd44780_pin { /* Order does matter due to writing to GPIO array subsets! */ diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c index e6bd727da503..85965953683e 100644 --- a/drivers/auxdisplay/panel.c +++ b/drivers/auxdisplay/panel.c @@ -55,7 +55,7 @@ #include #include -#include +#include "charlcd.h" #define KEYPAD_MINOR 185 From 390235c3e66036351e2a89b925843a741c8afd6c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Aug 2019 16:14:45 +0900 Subject: [PATCH 229/482] auxdisplay: charlcd: add include guard to charlcd.h Add a header include guard just in case. Signed-off-by: Masahiro Yamada Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/charlcd.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/auxdisplay/charlcd.h b/drivers/auxdisplay/charlcd.h index 8cf6c18b0adb..00911ad0f3de 100644 --- a/drivers/auxdisplay/charlcd.h +++ b/drivers/auxdisplay/charlcd.h @@ -6,6 +6,9 @@ * Copyright (C) 2016-2017 Glider bvba */ +#ifndef _CHARLCD_H +#define _CHARLCD_H + struct charlcd { const struct charlcd_ops *ops; const unsigned char *char_conv; /* Optional */ @@ -37,3 +40,5 @@ int charlcd_register(struct charlcd *lcd); int charlcd_unregister(struct charlcd *lcd); void charlcd_poke(struct charlcd *lcd); + +#endif /* CHARLCD_H */ From 6c4d6bc5486466e3a67cc47270001d0b4a26eed4 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 6 Aug 2019 23:23:28 +0900 Subject: [PATCH 230/482] auxdisplay: Fix a typo in cfag12864b-example.c This patch fix a spelling typo in cfag12864b-example.c Signed-off-by: Masanari Iida Signed-off-by: Miguel Ojeda --- samples/auxdisplay/cfag12864b-example.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/auxdisplay/cfag12864b-example.c b/samples/auxdisplay/cfag12864b-example.c index 85571e90191f..bfeab44f81d0 100644 --- a/samples/auxdisplay/cfag12864b-example.c +++ b/samples/auxdisplay/cfag12864b-example.c @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) if (argc != 2) { printf( - "Sintax: %s fbdev\n" + "Syntax: %s fbdev\n" "Usually: /dev/fb0, /dev/fb1...\n", argv[0]); return -1; } From 85127775a65fc58e69af0c44513937d471ccbe7b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 6 Aug 2019 11:24:09 -0300 Subject: [PATCH 231/482] perf annotate: Fix printing of unaugmented disassembled instructions from BPF The code to disassemble BPF programs uses binutil's disassembling routines, and those use in turn fprintf to print to a memstream FILE, adding a newline at the end of each line, which ends up confusing the TUI routines called from: annotate_browser__write() annotate_line__write() annotate_browser__printf() ui_browser__vprintf() SLsmg_vprintf() The SLsmg_vprintf() function in the slang library gets confused with the terminating newline, so make the disasm_line__parse() function that parses the lines produced by the BPF specific disassembler (that uses binutil's libopcodes) and the lines produced by the objdump based disassembler used for everything else (and that doesn't adds this terminating newline) trim the end of the line in addition of the beginning. This way when disasm_line->ops.raw, i.e. for instructions without a special scnprintf() method, we'll not have that \n getting in the way of filling the screen right after the instruction with spaces to avoid leaving what was on the screen before and thus garbling the annotation screen, breaking scrolling, etc. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Song Liu Fixes: 6987561c9e86 ("perf annotate: Enable annotation of BPF programs") Link: https://lkml.kernel.org/n/tip-unbr5a5efakobfr6rhxq99ta@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ac9ad2330f93..163536720149 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1122,7 +1122,7 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp) goto out; (*rawp)[0] = tmp; - *rawp = skip_spaces(*rawp); + *rawp = strim(*rawp); return 0; From 3de7ae0b2a1d86dbb23d0cb135150534fdb2e836 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 8 Aug 2019 09:48:23 +0300 Subject: [PATCH 232/482] perf db-export: Fix thread__exec_comm() Threads synthesized from /proc have comms with a start time of zero, and not marked as "exec". Currently, there can be 2 such comms. The first is created by processing a synthesized fork event and is set to the parent's comm string, and the second by processing a synthesized comm event set to the thread's current comm string. In the absence of an "exec" comm, thread__exec_comm() picks the last (oldest) comm, which, in the case above, is the parent's comm string. For a main thread, that is very probably wrong. Use the second-to-last in that case. This affects only db-export because it is the only user of thread__exec_comm(). Example: $ sudo perf record -a -o pt-a-sleep-1 -e intel_pt//u -- sleep 1 $ sudo chown ahunter pt-a-sleep-1 Before: $ perf script -i pt-a-sleep-1 --itrace=bep -s tools/perf/scripts/python/export-to-sqlite.py pt-a-sleep-1.db branches calls $ sqlite3 -header -column pt-a-sleep-1.db 'select * from comm_threads_view' comm_id command thread_id pid tid ---------- ---------- ---------- ---------- ---------- 1 swapper 1 0 0 2 rcu_sched 2 10 10 3 kthreadd 3 78 78 5 sudo 4 15180 15180 5 sudo 5 15180 15182 7 kworker/4: 6 10335 10335 8 kthreadd 7 55 55 10 systemd 8 865 865 10 systemd 9 865 875 13 perf 10 15181 15181 15 sleep 10 15181 15181 16 kworker/3: 11 14179 14179 17 kthreadd 12 29376 29376 19 systemd 13 746 746 21 systemd 14 401 401 23 systemd 15 879 879 23 systemd 16 879 945 25 kthreadd 17 556 556 27 kworker/u1 18 14136 14136 28 kworker/u1 19 15021 15021 29 kthreadd 20 509 509 31 systemd 21 836 836 31 systemd 22 836 967 33 systemd 23 1148 1148 33 systemd 24 1148 1163 35 kworker/2: 25 17988 17988 36 kworker/0: 26 13478 13478 After: $ perf script -i pt-a-sleep-1 --itrace=bep -s tools/perf/scripts/python/export-to-sqlite.py pt-a-sleep-1b.db branches calls $ sqlite3 -header -column pt-a-sleep-1b.db 'select * from comm_threads_view' comm_id command thread_id pid tid ---------- ---------- ---------- ---------- ---------- 1 swapper 1 0 0 2 rcu_sched 2 10 10 3 kswapd0 3 78 78 4 perf 4 15180 15180 4 perf 5 15180 15182 6 kworker/4: 6 10335 10335 7 kcompactd0 7 55 55 8 accounts-d 8 865 865 8 accounts-d 9 865 875 10 perf 10 15181 15181 12 sleep 10 15181 15181 13 kworker/3: 11 14179 14179 14 kworker/1: 12 29376 29376 15 haveged 13 746 746 16 systemd-jo 14 401 401 17 NetworkMan 15 879 879 17 NetworkMan 16 879 945 19 irq/131-iw 17 556 556 20 kworker/u1 18 14136 14136 21 kworker/u1 19 15021 15021 22 kworker/u1 20 509 509 23 thermald 21 836 836 23 thermald 22 836 967 25 unity-sett 23 1148 1148 25 unity-sett 24 1148 1163 27 kworker/2: 25 17988 17988 28 kworker/0: 26 13478 13478 Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Fixes: 65de51f93ebf ("perf tools: Identify which comms are from exec") Link: http://lkml.kernel.org/r/20190808064823.14846-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 873ab505ca80..590793cc5142 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -214,14 +214,24 @@ struct comm *thread__comm(const struct thread *thread) struct comm *thread__exec_comm(const struct thread *thread) { - struct comm *comm, *last = NULL; + struct comm *comm, *last = NULL, *second_last = NULL; list_for_each_entry(comm, &thread->comm_list, list) { if (comm->exec) return comm; + second_last = last; last = comm; } + /* + * 'last' with no start time might be the parent's comm of a synthesized + * thread (created by processing a synthesized fork event). For a main + * thread, that is very probably wrong. Prefer a later comm to avoid + * that case. + */ + if (second_last && !last->start && thread->pid_ == thread->tid) + return second_last; + return last; } From cf30ae726c011e0372fd4c2d588466c8b50a8907 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Fri, 2 Aug 2019 16:29:51 +0800 Subject: [PATCH 233/482] perf ftrace: Fix failure to set cpumask when only one cpu is present The buffer containing the string used to set cpumask is overwritten at the end of the string later in cpu_map__snprint_mask due to not enough memory space, when there is only one cpu. And thus causes the following failure: $ perf ftrace ls failed to reset ftrace $ This patch fixes the calculation of the cpumask string size. Signed-off-by: He Zhe Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Fixes: dc23103278c5 ("perf ftrace: Add support for -a and -C option") Link: http://lkml.kernel.org/r/1564734592-15624-1-git-send-email-zhe.he@windriver.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 66d5a6658daf..019312810405 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -173,7 +173,7 @@ static int set_tracing_cpumask(struct cpu_map *cpumap) int last_cpu; last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1); - mask_size = (last_cpu + 3) / 4 + 1; + mask_size = last_cpu / 4 + 2; /* one more byte for EOS */ mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ cpumask = malloc(mask_size); From 5f5e25f1c7933a6e1673515c0b1d5acd82fea1ed Mon Sep 17 00:00:00 2001 From: He Zhe Date: Fri, 2 Aug 2019 16:29:52 +0800 Subject: [PATCH 234/482] perf cpumap: Fix writing to illegal memory in handling cpumap mask cpu_map__snprint_mask() would write to illegal memory pointed by zalloc(0) when there is only one cpu. This patch fixes the calculation and adds sanity check against the input parameters. Signed-off-by: He Zhe Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Fixes: 4400ac8a9a90 ("perf cpumap: Introduce cpu_map__snprint_mask()") Link: http://lkml.kernel.org/r/1564734592-15624-2-git-send-email-zhe.he@windriver.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cpumap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 3acfbe34ebaf..39cce66b4ebc 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -751,7 +751,10 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size) unsigned char *bitmap; int last_cpu = cpu_map__cpu(map, map->nr - 1); - bitmap = zalloc((last_cpu + 7) / 8); + if (buf == NULL) + return 0; + + bitmap = zalloc(last_cpu / 8 + 1); if (bitmap == NULL) { buf[0] = '\0'; return 0; From 89b66500f739e0033ef59011e3df694f2053679d Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 1 Aug 2019 12:28:12 +0900 Subject: [PATCH 235/482] perf tools: Fix a typo in a variable name in the Documentation Makefile This patch fix a spelling typo in a variable name in the Documentation Makefile. Signed-off-by: Masanari Iida Reviewed-by: Mukesh Ojha Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190801032812.25018-1-standby24x7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 6d148a40551c..adc5a7e44b98 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -242,7 +242,7 @@ $(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ mv $@+ $@ --include $(OUPTUT)doc.dep +-include $(OUTPUT)doc.dep _cmds_txt = cmds-ancillaryinterrogators.txt \ cmds-ancillarymanipulators.txt \ From fa37bab6d7154658d8a35920513f9396587754cc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 31 Jul 2019 15:54:41 -0700 Subject: [PATCH 236/482] perf tools: Fix include paths in ui directory These paths point to the wrong location but still work because they get picked up by a -I flag that happens to direct to the correct file. Fix paths to point to the correct location without -I flags. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190731225441.233800-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browser.c | 9 +++++---- tools/perf/ui/tui/progress.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index f80c51d53565..d227d74b28f8 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../string2.h" -#include "../config.h" -#include "../../perf.h" +#include "../util/util.h" +#include "../util/string2.h" +#include "../util/config.h" +#include "../perf.h" #include "libslang.h" #include "ui.h" #include "util.h" @@ -14,7 +15,7 @@ #include "browser.h" #include "helpline.h" #include "keysyms.h" -#include "../color.h" +#include "../util/color.h" #include #include diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index bc134b82829d..5a24dd3ce4db 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include "../cache.h" +#include "../../util/cache.h" #include "../progress.h" #include "../libslang.h" #include "../ui.h" From 12a6d2940b5f02b4b9f71ce098e3bb02bc24a9ea Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 24 Jul 2019 14:27:02 +0200 Subject: [PATCH 237/482] perf record: Fix module size on s390 On s390 the modules loaded in memory have the text segment located after the GOT and Relocation table. This can be seen with this output: [root@m35lp76 perf]# fgrep qeth /proc/modules qeth 151552 1 qeth_l2, Live 0x000003ff800b2000 ... [root@m35lp76 perf]# cat /sys/module/qeth/sections/.text 0x000003ff800b3990 [root@m35lp76 perf]# There is an offset of 0x1990 bytes. The size of the qeth module is 151552 bytes (0x25000 in hex). The location of the GOT/relocation table at the beginning of a module is unique to s390. commit 203d8a4aa6ed ("perf s390: Fix 'start' address of module's map") adjusts the start address of a module in the map structures, but does not adjust the size of the modules. This leads to overlapping of module maps as this example shows: [root@m35lp76 perf] # ./perf report -D 0 0 0xfb0 [0xa0]: PERF_RECORD_MMAP -1/0: [0x3ff800b3990(0x25000) @ 0]: x /lib/modules/.../qeth.ko.xz 0 0 0x1050 [0xb0]: PERF_RECORD_MMAP -1/0: [0x3ff800d85a0(0x8000) @ 0]: x /lib/modules/.../ip6_tables.ko.xz The module qeth.ko has an adjusted start address modified to b3990, but its size is unchanged and the module ends at 0x3ff800d8990. This end address overlaps with the next modules start address of 0x3ff800d85a0. When the size of the leading GOT/Relocation table stored in the beginning of the text segment (0x1990 bytes) is subtracted from module qeth end address, there are no overlaps anymore: 0x3ff800d8990 - 0x1990 = 0x0x3ff800d7000 which is the same as 0x3ff800b2000 + 0x25000 = 0x0x3ff800d7000. To fix this issue, also adjust the modules size in function arch__fix_module_text_start(). Add another function parameter named size and reduce the size of the module when the text segment start address is changed. Output after: 0 0 0xfb0 [0xa0]: PERF_RECORD_MMAP -1/0: [0x3ff800b3990(0x23670) @ 0]: x /lib/modules/.../qeth.ko.xz 0 0 0x1050 [0xb0]: PERF_RECORD_MMAP -1/0: [0x3ff800d85a0(0x7a60) @ 0]: x /lib/modules/.../ip6_tables.ko.xz Reported-by: Stefan Liebler Signed-off-by: Thomas Richter Acked-by: Heiko Carstens Cc: Hendrik Brueckner Cc: Vasily Gorbik Cc: stable@vger.kernel.org Fixes: 203d8a4aa6ed ("perf s390: Fix 'start' address of module's map") Link: http://lkml.kernel.org/r/20190724122703.3996-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/util/machine.c | 14 +++++++++++++- tools/perf/util/machine.c | 3 ++- tools/perf/util/machine.h | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index a19690a17291..de26b1441a48 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -7,7 +7,7 @@ #include "api/fs/fs.h" #include "debug.h" -int arch__fix_module_text_start(u64 *start, const char *name) +int arch__fix_module_text_start(u64 *start, u64 *size, const char *name) { u64 m_start = *start; char path[PATH_MAX]; @@ -17,6 +17,18 @@ int arch__fix_module_text_start(u64 *start, const char *name) if (sysfs__read_ull(path, (unsigned long long *)start) < 0) { pr_debug2("Using module %s start:%#lx\n", path, m_start); *start = m_start; + } else { + /* Successful read of the modules segment text start address. + * Calculate difference between module start address + * in memory and module text segment start address. + * For example module load address is 0x3ff8011b000 + * (from /proc/modules) and module text segment start + * address is 0x3ff8011b870 (from file above). + * + * Adjust the module size and subtract the GOT table + * size located at the beginning of the module. + */ + *size -= (*start - m_start); } return 0; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index cf826eca3aaf..83b2fbbeeb90 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1378,6 +1378,7 @@ static int machine__set_modules_path(struct machine *machine) return map_groups__set_modules_path_dir(&machine->kmaps, modules_path, 0); } int __weak arch__fix_module_text_start(u64 *start __maybe_unused, + u64 *size __maybe_unused, const char *name __maybe_unused) { return 0; @@ -1389,7 +1390,7 @@ static int machine__create_module(void *arg, const char *name, u64 start, struct machine *machine = arg; struct map *map; - if (arch__fix_module_text_start(&start, name) < 0) + if (arch__fix_module_text_start(&start, &size, name) < 0) return -1; map = machine__findnew_module_map(machine, start, name); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index f70ab98a7bde..7aa38da26427 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -222,7 +222,7 @@ struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine, struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename); -int arch__fix_module_text_start(u64 *start, const char *name); +int arch__fix_module_text_start(u64 *start, u64 *size, const char *name); int machine__load_kallsyms(struct machine *machine, const char *filename); From b9c0a64901d5bdec6eafd38d1dc8fa0e2974fccb Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 24 Jul 2019 14:27:03 +0200 Subject: [PATCH 238/482] perf annotate: Fix s390 gap between kernel end and module start During execution of command 'perf top' the error message: Not enough memory for annotating '__irf_end' symbol!) is emitted from this call sequence: __cmd_top perf_top__mmap_read perf_top__mmap_read_idx perf_event__process_sample hist_entry_iter__add hist_iter__top_callback perf_top__record_precise_ip hist_entry__inc_addr_samples symbol__inc_addr_samples symbol__get_annotation symbol__alloc_hist In this function the size of symbol __irf_end is calculated. The size of a symbol is the difference between its start and end address. When the symbol was read the first time, its start and end was set to: symbol__new: __irf_end 0xe954d0-0xe954d0 which is correct and maps with /proc/kallsyms: root@s8360046:~/linux-4.15.0/tools/perf# fgrep _irf_end /proc/kallsyms 0000000000e954d0 t __irf_end root@s8360046:~/linux-4.15.0/tools/perf# In function symbol__alloc_hist() the end of symbol __irf_end is symbol__alloc_hist sym:__irf_end start:0xe954d0 end:0x3ff80045a8 which is identical with the first module entry in /proc/kallsyms This results in a symbol size of __irf_req for histogram analyses of 70334140059072 bytes and a malloc() for this requested size fails. The root cause of this is function __dso__load_kallsyms() +-> symbols__fixup_end() Function symbols__fixup_end() enlarges the last symbol in the kallsyms map: # fgrep __irf_end /proc/kallsyms 0000000000e954d0 t __irf_end # to the start address of the first module: # cat /proc/kallsyms | sort | egrep ' [tT] ' .... 0000000000e952d0 T __security_initcall_end 0000000000e954d0 T __initramfs_size 0000000000e954d0 t __irf_end 000003ff800045a8 T fc_get_event_number [scsi_transport_fc] 000003ff800045d0 t store_fc_vport_disable [scsi_transport_fc] 000003ff800046a8 T scsi_is_fc_rport [scsi_transport_fc] 000003ff800046d0 t fc_target_setup [scsi_transport_fc] On s390 the kernel is located around memory address 0x200, 0x10000 or 0x100000, depending on linux version. Modules however start some- where around 0x3ff xxxx xxxx. This is different than x86 and produces a large gap for which histogram allocation fails. Fix this by detecting the kernel's last symbol and do no adjustment for it. Introduce a weak function and handle s390 specifics. Reported-by: Klaus Theurich Signed-off-by: Thomas Richter Acked-by: Heiko Carstens Cc: Hendrik Brueckner Cc: Vasily Gorbik Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20190724122703.3996-2-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/util/machine.c | 17 +++++++++++++++++ tools/perf/util/symbol.c | 7 ++++++- tools/perf/util/symbol.h | 1 + 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index de26b1441a48..c8c86a0c9b79 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -6,6 +6,7 @@ #include "machine.h" #include "api/fs/fs.h" #include "debug.h" +#include "symbol.h" int arch__fix_module_text_start(u64 *start, u64 *size, const char *name) { @@ -33,3 +34,19 @@ int arch__fix_module_text_start(u64 *start, u64 *size, const char *name) return 0; } + +/* On s390 kernel text segment start is located at very low memory addresses, + * for example 0x10000. Modules are located at very high memory addresses, + * for example 0x3ff xxxx xxxx. The gap between end of kernel text segment + * and beginning of first module's text segment is very big. + * Therefore do not fill this gap and do not assign it to the kernel dso map. + */ +void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) +{ + if (strchr(p->name, '[') == NULL && strchr(c->name, '[')) + /* Last kernel symbol mapped to end of page */ + p->end = roundup(p->end, page_size); + else + p->end = c->start; + pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end); +} diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 173f3378aaa0..4efde7879474 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -92,6 +92,11 @@ static int prefix_underscores_count(const char *str) return tail - str; } +void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c) +{ + p->end = c->start; +} + const char * __weak arch__normalize_symbol_name(const char *name) { return name; @@ -218,7 +223,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols) curr = rb_entry(nd, struct symbol, rb_node); if (prev->end == prev->start && prev->end != curr->start) - prev->end = curr->start; + arch__symbols__fixup_end(prev, curr); } /* Last entry */ diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 12755b42ea93..183f630cb5f1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -288,6 +288,7 @@ const char *arch__normalize_symbol_name(const char *name); #define SYMBOL_A 0 #define SYMBOL_B 1 +void arch__symbols__fixup_end(struct symbol *p, struct symbol *c); int arch__compare_symbol_names(const char *namea, const char *nameb); int arch__compare_symbol_names_n(const char *namea, const char *nameb, unsigned int n); From 8e6e5bea2e34c61291d00cb3f47560341aa84bc3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 29 Jul 2019 15:27:55 +0800 Subject: [PATCH 239/482] perf pmu-events: Fix missing "cpu_clk_unhalted.core" event The events defined in pmu-events JSON are parsed and added into perf tool. For fixed counters, we handle the encodings between JSON and perf by using a static array fixed[]. But the fixed[] has missed an important event "cpu_clk_unhalted.core". For example, on the Tremont platform, [root@localhost ~]# perf stat -e cpu_clk_unhalted.core -a event syntax error: 'cpu_clk_unhalted.core' \___ parser error With this patch, the event cpu_clk_unhalted.core can be parsed. [root@localhost perf]# ./perf stat -e cpu_clk_unhalted.core -a -vvv ------------------------------------------------------------ perf_event_attr: type 4 size 112 config 0x3c sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ ... Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190729072755.2166-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 1a91a197cafb..d413761621b0 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -453,6 +453,7 @@ static struct fixed { { "inst_retired.any_p", "event=0xc0" }, { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" }, { "cpu_clk_unhalted.thread", "event=0x3c" }, + { "cpu_clk_unhalted.core", "event=0x3c" }, { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" }, { NULL, NULL}, }; From 405b93eb764367a670e729da18e54dc42db32620 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 5 Jul 2019 17:59:28 +0300 Subject: [PATCH 240/482] net/mlx5e: Use flow keys dissector to parse packets for ARFS The current ARFS code relies on certain fields to be set in the SKB (e.g. transport_header) and extracts IP addresses and ports by custom code that parses the packet. The necessary SKB fields, however, are not always set at that point, which leads to an out-of-bounds access. Use skb_flow_dissect_flow_keys() to get the necessary information reliably, fix the out-of-bounds access and reuse the code. Fixes: 18c908e477dc ("net/mlx5e: Add accelerated RFS support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_arfs.c | 97 +++++++------------ 1 file changed, 34 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 8657e0f26995..2c75b2752f58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -437,12 +437,6 @@ arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port, return &arfs_t->rules_hash[bucket_idx]; } -static u8 arfs_get_ip_proto(const struct sk_buff *skb) -{ - return (skb->protocol == htons(ETH_P_IP)) ? - ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr; -} - static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, u8 ip_proto, __be16 etype) { @@ -602,31 +596,9 @@ static void arfs_handle_work(struct work_struct *work) arfs_may_expire_flow(priv); } -/* return L4 destination port from ip4/6 packets */ -static __be16 arfs_get_dst_port(const struct sk_buff *skb) -{ - char *transport_header; - - transport_header = skb_transport_header(skb); - if (arfs_get_ip_proto(skb) == IPPROTO_TCP) - return ((struct tcphdr *)transport_header)->dest; - return ((struct udphdr *)transport_header)->dest; -} - -/* return L4 source port from ip4/6 packets */ -static __be16 arfs_get_src_port(const struct sk_buff *skb) -{ - char *transport_header; - - transport_header = skb_transport_header(skb); - if (arfs_get_ip_proto(skb) == IPPROTO_TCP) - return ((struct tcphdr *)transport_header)->source; - return ((struct udphdr *)transport_header)->source; -} - static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, struct arfs_table *arfs_t, - const struct sk_buff *skb, + const struct flow_keys *fk, u16 rxq, u32 flow_id) { struct arfs_rule *rule; @@ -641,19 +613,19 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, INIT_WORK(&rule->arfs_work, arfs_handle_work); tuple = &rule->tuple; - tuple->etype = skb->protocol; + tuple->etype = fk->basic.n_proto; + tuple->ip_proto = fk->basic.ip_proto; if (tuple->etype == htons(ETH_P_IP)) { - tuple->src_ipv4 = ip_hdr(skb)->saddr; - tuple->dst_ipv4 = ip_hdr(skb)->daddr; + tuple->src_ipv4 = fk->addrs.v4addrs.src; + tuple->dst_ipv4 = fk->addrs.v4addrs.dst; } else { - memcpy(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, + memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src, sizeof(struct in6_addr)); - memcpy(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, + memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, sizeof(struct in6_addr)); } - tuple->ip_proto = arfs_get_ip_proto(skb); - tuple->src_port = arfs_get_src_port(skb); - tuple->dst_port = arfs_get_dst_port(skb); + tuple->src_port = fk->ports.src; + tuple->dst_port = fk->ports.dst; rule->flow_id = flow_id; rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER; @@ -664,37 +636,33 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, return rule; } -static bool arfs_cmp_ips(struct arfs_tuple *tuple, - const struct sk_buff *skb) +static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk) { - if (tuple->etype == htons(ETH_P_IP) && - tuple->src_ipv4 == ip_hdr(skb)->saddr && - tuple->dst_ipv4 == ip_hdr(skb)->daddr) - return true; - if (tuple->etype == htons(ETH_P_IPV6) && - (!memcmp(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, - sizeof(struct in6_addr))) && - (!memcmp(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, - sizeof(struct in6_addr)))) - return true; + if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst) + return false; + if (tuple->etype != fk->basic.n_proto) + return false; + if (tuple->etype == htons(ETH_P_IP)) + return tuple->src_ipv4 == fk->addrs.v4addrs.src && + tuple->dst_ipv4 == fk->addrs.v4addrs.dst; + if (tuple->etype == htons(ETH_P_IPV6)) + return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)) && + !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); return false; } static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t, - const struct sk_buff *skb) + const struct flow_keys *fk) { struct arfs_rule *arfs_rule; struct hlist_head *head; - __be16 src_port = arfs_get_src_port(skb); - __be16 dst_port = arfs_get_dst_port(skb); - head = arfs_hash_bucket(arfs_t, src_port, dst_port); + head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst); hlist_for_each_entry(arfs_rule, head, hlist) { - if (arfs_rule->tuple.src_port == src_port && - arfs_rule->tuple.dst_port == dst_port && - arfs_cmp_ips(&arfs_rule->tuple, skb)) { + if (arfs_cmp(&arfs_rule->tuple, fk)) return arfs_rule; - } } return NULL; @@ -707,20 +675,24 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_table *arfs_t; struct arfs_rule *arfs_rule; + struct flow_keys fk; - if (skb->protocol != htons(ETH_P_IP) && - skb->protocol != htons(ETH_P_IPV6)) + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) + return -EPROTONOSUPPORT; + + if (fk.basic.n_proto != htons(ETH_P_IP) && + fk.basic.n_proto != htons(ETH_P_IPV6)) return -EPROTONOSUPPORT; if (skb->encapsulation) return -EPROTONOSUPPORT; - arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol); + arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto); if (!arfs_t) return -EPROTONOSUPPORT; spin_lock_bh(&arfs->arfs_lock); - arfs_rule = arfs_find_rule(arfs_t, skb); + arfs_rule = arfs_find_rule(arfs_t, &fk); if (arfs_rule) { if (arfs_rule->rxq == rxq_index) { spin_unlock_bh(&arfs->arfs_lock); @@ -728,8 +700,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, } arfs_rule->rxq = rxq_index; } else { - arfs_rule = arfs_alloc_rule(priv, arfs_t, skb, - rxq_index, flow_id); + arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id); if (!arfs_rule) { spin_unlock_bh(&arfs->arfs_lock); return -ENOMEM; From 93b3586e070b14704dd7bff81fbcefd64663f3c2 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 17 Jul 2019 14:04:31 -0500 Subject: [PATCH 241/482] net/mlx5: Support inner header match criteria for non decap flow action We have an issue that OVS application creates an offloaded drop rule that drops VXLAN traffic with both inner and outer header match criteria. mlx5_core driver detects correctly the inner and outer header match criteria but does not enable the inner header match criteria due to an incorrect assumption in mlx5_eswitch_add_offloaded_rule that only decap rule needs inner header criteria. Solution: Remove mlx5_esw_flow_attr's match_level and tunnel_match_level and add two new members: inner_match_level and outer_match_level. inner/outer_match_level is set to NONE if the inner/outer match criteria is not specified in the tc rule creation request. The decap assumption is removed and the code just needs to check for inner/outer_match_level to enable the corresponding bit in firmware's match_criteria_enable value. Fixes: 6363651d6dd7 ("net/mlx5e: Properly set steering match levels for offloaded TC decap rules") Signed-off-by: Huy Nguyen Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 31 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 4 +-- .../mellanox/mlx5/core/eswitch_offloads.c | 12 +++---- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7ecfc53cf5f6..deeb65da99f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1480,7 +1480,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct net_device *filter_dev, - u8 *match_level, u8 *tunnel_match_level) + u8 *inner_match_level, u8 *outer_match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1495,8 +1495,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0; + u8 *match_level; - *match_level = MLX5_MATCH_NONE; + match_level = outer_match_level; if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_META) | @@ -1524,12 +1525,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } if (mlx5e_get_tc_tun(filter_dev)) { - if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) + if (parse_tunnel_attr(priv, spec, f, filter_dev, + outer_match_level)) return -EOPNOTSUPP; - /* In decap flow, header pointers should point to the inner + /* At this point, header pointers should point to the inner * headers, outer header were already set by parse_tunnel_attr */ + match_level = inner_match_level; headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, spec); headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, @@ -1831,35 +1834,41 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct flow_cls_offload *f, struct net_device *filter_dev) { + u8 inner_match_level, outer_match_level, non_tunnel_match_level; struct netlink_ext_ack *extack = f->common.extack; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; - u8 match_level, tunnel_match_level = MLX5_MATCH_NONE; struct mlx5_eswitch_rep *rep; int err; - err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level); + inner_match_level = MLX5_MATCH_NONE; + outer_match_level = MLX5_MATCH_NONE; + + err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, + &outer_match_level); + non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? + outer_match_level : inner_match_level; if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && - esw->offloads.inline_mode < match_level)) { + esw->offloads.inline_mode < non_tunnel_match_level)) { NL_SET_ERR_MSG_MOD(extack, "Flow is not offloaded due to min inline setting"); netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", - match_level, esw->offloads.inline_mode); + non_tunnel_match_level, esw->offloads.inline_mode); return -EOPNOTSUPP; } } if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - flow->esw_attr->match_level = match_level; - flow->esw_attr->tunnel_match_level = tunnel_match_level; + flow->esw_attr->inner_match_level = inner_match_level; + flow->esw_attr->outer_match_level = outer_match_level; } else { - flow->nic_attr->match_level = match_level; + flow->nic_attr->match_level = non_tunnel_match_level; } return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a38e8a3c7c9a..04685dbb280c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -377,8 +377,8 @@ struct mlx5_esw_flow_attr { struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; - u8 match_level; - u8 tunnel_match_level; + u8 inner_match_level; + u8 outer_match_level; struct mlx5_fc *counter; u32 chain; u16 prio; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 089ae4d48a82..0323fd078271 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -207,14 +207,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, mlx5_eswitch_set_rule_source_port(esw, spec, attr); - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { - if (attr->tunnel_match_level != MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; - if (attr->match_level != MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; - } else if (attr->match_level != MLX5_MATCH_NONE) { + if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; - } + if (attr->inner_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; @@ -290,7 +286,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, mlx5_eswitch_set_rule_source_port(esw, spec, attr); spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; - if (attr->match_level != MLX5_MATCH_NONE) + if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); From 466df6eb4a9e813b3cfc674363316450c57a89c5 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 1 Aug 2019 11:10:19 -0500 Subject: [PATCH 242/482] net/mlx5e: Only support tx/rx pause setting for port owner Only support changing tx/rx pause frame setting if the net device is the vport group manager. Fixes: 3c2d18ef22df ("net/mlx5e: Support ethtool get/set_pauseparam") Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 03bed714bac3..ee9fa0c2c8b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1338,6 +1338,9 @@ int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev = priv->mdev; int err; + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EOPNOTSUPP; + if (pauseparam->autoneg) return -EINVAL; From 5faf5b70c51dd9c9905bf8209e33cbd867486607 Mon Sep 17 00:00:00 2001 From: Mohamad Heib Date: Tue, 23 Apr 2019 21:13:48 +0300 Subject: [PATCH 243/482] net/mlx5e: ethtool, Avoid setting speed to 56GBASE when autoneg off Setting speed to 56GBASE is allowed only with auto-negotiation enabled. This patch prevent setting speed to 56GBASE when auto-negotiation disabled. Fixes: f62b8bb8f2d3 ("net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality") Signed-off-by: Mohamad Heib Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ee9fa0c2c8b9..e89dba790a2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1081,6 +1081,14 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : mlx5e_port_speed2linkmodes(mdev, speed, !ext); + if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && + autoneg != AUTONEG_ENABLE) { + netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n", + __func__); + err = -EINVAL; + goto out; + } + link_modes = link_modes & eproto.cap; if (!link_modes) { netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", From 55c9bd37ef5a0bd79c80c8eb418ce162bbc65590 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 21 Jul 2019 14:13:15 +0300 Subject: [PATCH 244/482] net/mlx5: crypto, Fix wrong offset in encryption key command Fix the 128b key offset in key encryption key creation command, per the HW specification. Fixes: 45d3b55dc665 ("net/mlx5: Add crypto library to support create/destroy encryption key") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c index ea9ee88491e5..ea1d4d26ece0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -27,6 +27,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, case 128: general_obj_key_size = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128; + key_p += sz_bytes; break; case 256: general_obj_key_size = From 26149e3e1f44d27897d0af9ca4bcd723674bad44 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 21 Jul 2019 14:18:42 +0300 Subject: [PATCH 245/482] net/mlx5: kTLS, Fix wrong TIS opmod constants Fix the used constants for TLS TIS opmods, per the HW specification. Fixes: a12ff35e0fb7 ("net/mlx5: Introduce TLS TX offload hardware bits and structures") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index ce9839c8bc1a..c2f056b5766d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -446,11 +446,11 @@ enum { }; enum { - MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS = 0x20, + MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS = 0x1, }; enum { - MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS = 0x20, + MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS = 0x1, }; enum { From a9bc3390327317345dd4683b70970c83ab400ea3 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 30 Jul 2019 11:55:25 +0300 Subject: [PATCH 246/482] net/mlx5e: kTLS, Fix progress params context WQE layout The TLS progress params context WQE should not include an Eth segment, drop it. In addition, align the tls_progress_params layout with the HW specification document: - fix the tisn field name. - remove the valid bit. Fixes: a12ff35e0fb7 ("net/mlx5: Introduce TLS TX offload hardware bits and structures") Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 9 +++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h | 6 ++++-- .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 4 ++-- include/linux/mlx5/mlx5_ifc.h | 5 ++--- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ce1be2a84231..f6b64a03cd06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -184,8 +184,13 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + union { + struct { + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[0]; + }; + u8 tls_progress_params_ctx[0]; + }; }; struct mlx5e_rx_wqe_ll { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index 407da83474ef..b7298f9ee3d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -11,12 +11,14 @@ #include "accel/tls.h" #define MLX5E_KTLS_STATIC_UMR_WQE_SZ \ - (sizeof(struct mlx5e_umr_wqe) + MLX5_ST_SZ_BYTES(tls_static_params)) + (offsetof(struct mlx5e_umr_wqe, tls_static_params_ctx) + \ + MLX5_ST_SZ_BYTES(tls_static_params)) #define MLX5E_KTLS_STATIC_WQEBBS \ (DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB)) #define MLX5E_KTLS_PROGRESS_WQE_SZ \ - (sizeof(struct mlx5e_tx_wqe) + MLX5_ST_SZ_BYTES(tls_progress_params)) + (offsetof(struct mlx5e_tx_wqe, tls_progress_params_ctx) + \ + MLX5_ST_SZ_BYTES(tls_progress_params)) #define MLX5E_KTLS_PROGRESS_WQEBBS \ (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB)) #define MLX5E_KTLS_MAX_DUMP_WQEBBS 2 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 3766545ce259..9f67bfb559f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -80,7 +80,7 @@ build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn, static void fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) { - MLX5_SET(tls_progress_params, ctx, pd, priv_tx->tisn); + MLX5_SET(tls_progress_params, ctx, tisn, priv_tx->tisn); MLX5_SET(tls_progress_params, ctx, record_tracker_state, MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START); MLX5_SET(tls_progress_params, ctx, auth_state, @@ -104,7 +104,7 @@ build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn, PROGRESS_PARAMS_DS_CNT); cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; - fill_progress_params_ctx(wqe->data, priv_tx); + fill_progress_params_ctx(wqe->tls_progress_params_ctx, priv_tx); } static void tx_fill_wi(struct mlx5e_txqsq *sq, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ec571fd7fcf8..b8b570c30b5e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10054,9 +10054,8 @@ struct mlx5_ifc_tls_static_params_bits { }; struct mlx5_ifc_tls_progress_params_bits { - u8 valid[0x1]; - u8 reserved_at_1[0x7]; - u8 pd[0x18]; + u8 reserved_at_0[0x8]; + u8 tisn[0x18]; u8 next_record_tcp_sn[0x20]; From f1897b3cd1af1dce8d6c06b06f02551c71112003 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 8 Aug 2019 12:26:57 +0300 Subject: [PATCH 247/482] net/mlx5e: kTLS, Fix tisn field name Use the proper tisn field name from the union in struct mlx5_wqe_ctrl_seg. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 9f67bfb559f1..cfc9e7d457e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -69,7 +69,7 @@ build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn, cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | STATIC_PARAMS_DS_CNT); cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; - cseg->imm = cpu_to_be32(priv_tx->tisn); + cseg->tisn = cpu_to_be32(priv_tx->tisn); ucseg->flags = MLX5_UMR_INLINE; ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16); @@ -278,7 +278,7 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - cseg->imm = cpu_to_be32(tisn); + cseg->tisn = cpu_to_be32(tisn); cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; eseg->inline_hdr.sz = cpu_to_be16(ihs); @@ -434,7 +434,7 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, priv_tx->expected_seq = seq + datalen; cseg = &(*wqe)->ctrl; - cseg->imm = cpu_to_be32(priv_tx->tisn); + cseg->tisn = cpu_to_be32(priv_tx->tisn); stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; stats->tls_encrypted_bytes += datalen; From b86f1abe2c275e6f9abf7e2669ccc318b7ef7bb9 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 30 Jul 2019 13:45:42 +0300 Subject: [PATCH 248/482] net/mlx5e: kTLS, Fix tisn field placement Shift the tisn field in the WQE control segment, per the HW specification. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index cfc9e7d457e3..8b93101e1a09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -69,7 +69,7 @@ build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn, cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | STATIC_PARAMS_DS_CNT); cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; - cseg->tisn = cpu_to_be32(priv_tx->tisn); + cseg->tisn = cpu_to_be32(priv_tx->tisn << 8); ucseg->flags = MLX5_UMR_INLINE; ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16); @@ -278,7 +278,7 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - cseg->tisn = cpu_to_be32(tisn); + cseg->tisn = cpu_to_be32(tisn << 8); cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; eseg->inline_hdr.sz = cpu_to_be16(ihs); @@ -434,7 +434,7 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, priv_tx->expected_seq = seq + datalen; cseg = &(*wqe)->ctrl; - cseg->tisn = cpu_to_be32(priv_tx->tisn); + cseg->tisn = cpu_to_be32(priv_tx->tisn << 8); stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; stats->tls_encrypted_bytes += datalen; From d9a2fcf53c76a7edb2bcf99e94507935561a83d5 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 7 Aug 2019 15:59:06 +0300 Subject: [PATCH 249/482] net/mlx5e: Fix false negative indication on tx reporter CQE recovery Remove wrong error return value when SQ is not in error state. CQE recovery on TX reporter queries the sq state. If the sq is not in error state, the sq is either in ready or reset state. Ready state is good state which doesn't require recovery and reset state is a temporal state which ends in ready state. With this patch, CQE recovery in this scenario is successful. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index f3d98748b211..b307234b4e05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -86,10 +86,8 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) return err; } - if (state != MLX5_SQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return -EINVAL; - } + if (state != MLX5_SQC_STATE_ERR) + return 0; mlx5e_tx_disable_queue(sq->txq); From 276d197e70bcc47153592f4384675b51c7d83aba Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 6 Aug 2019 15:19:19 +0300 Subject: [PATCH 250/482] net/mlx5e: Fix error flow of CQE recovery on tx reporter CQE recovery function begins with test and set of recovery bit. Add an error flow which ensures clearing of this bit when leaving the recovery function, to allow further recoveries to take place. This allows removal of clearing recovery bit on sq activate. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 12 ++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 - 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index b307234b4e05..b91814ecfbc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -83,17 +83,17 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) if (err) { netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", sq->sqn, err); - return err; + goto out; } if (state != MLX5_SQC_STATE_ERR) - return 0; + goto out; mlx5e_tx_disable_queue(sq->txq); err = mlx5e_wait_for_sq_flush(sq); if (err) - return err; + goto out; /* At this point, no new packets will arrive from the stack as TXQ is * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all @@ -102,13 +102,17 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) err = mlx5e_sq_to_ready(sq, state); if (err) - return err; + goto out; mlx5e_reset_txqsq_cc_pc(sq); sq->stats->recover++; + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); mlx5e_activate_txqsq(sq); return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + return err; } static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6c712c5be4d8..9d5f6e56188f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1321,7 +1321,6 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c, void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); - clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); netdev_tx_reset_queue(sq->txq); netif_tx_start_queue(sq->txq); From a4e508cab623951dc4754f346e5673714f3bbade Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 8 Aug 2019 15:55:48 +0300 Subject: [PATCH 251/482] net/mlx5e: Remove redundant check in CQE recovery flow of tx reporter Remove check of recovery bit, in the beginning of the CQE recovery function. This test is already performed right before the reporter is invoked, when CQE error is detected. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index b91814ecfbc9..c7f86453c638 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -76,9 +76,6 @@ static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) u8 state; int err; - if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) - return 0; - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); if (err) { netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", From 891584f48a9084ba462f10da4c6bb28b6181b543 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 2 Aug 2019 17:15:03 +0200 Subject: [PATCH 252/482] inet: frags: re-introduce skb coalescing for local delivery Before commit d4289fcc9b16 ("net: IP6 defrag: use rbtrees for IPv6 defrag"), a netperf UDP_STREAM test[0] using big IPv6 datagrams (thus generating many fragments) and running over an IPsec tunnel, reported more than 6Gbps throughput. After that patch, the same test gets only 9Mbps when receiving on a be2net nic (driver can make a big difference here, for example, ixgbe doesn't seem to be affected). By reusing the IPv4 defragmentation code, IPv6 lost fragment coalescing (IPv4 fragment coalescing was dropped by commit 14fe22e33462 ("Revert "ipv4: use skb coalescing in defragmentation"")). Without fragment coalescing, be2net runs out of Rx ring entries and starts to drop frames (ethtool reports rx_drops_no_frags errors). Since the netperf traffic is only composed of UDP fragments, any lost packet prevents reassembly of the full datagram. Therefore, fragments which have no possibility to ever get reassembled pile up in the reassembly queue, until the memory accounting exeeds the threshold. At that point no fragment is accepted anymore, which effectively discards all netperf traffic. When reassembly timeout expires, some stale fragments are removed from the reassembly queue, so a few packets can be received, reassembled and delivered to the netperf receiver. But the nic still drops frames and soon the reassembly queue gets filled again with stale fragments. These long time frames where no datagram can be received explain why the performance drop is so significant. Re-introducing fragment coalescing is enough to get the initial performances again (6.6Gbps with be2net): driver doesn't drop frames anymore (no more rx_drops_no_frags errors) and the reassembly engine works at full speed. This patch is quite conservative and only coalesces skbs for local IPv4 and IPv6 delivery (in order to avoid changing skb geometry when forwarding). Coalescing could be extended in the future if need be, as more scenarios would probably benefit from it. [0]: Test configuration Sender: ip xfrm policy flush ip xfrm state flush ip xfrm state add src fc00:1::1 dst fc00:2::1 proto esp spi 0x1000 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:1::1 dst fc00:2::1 ip xfrm policy add src fc00:1::1 dst fc00:2::1 dir in tmpl src fc00:1::1 dst fc00:2::1 proto esp mode transport action allow ip xfrm state add src fc00:2::1 dst fc00:1::1 proto esp spi 0x1001 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:2::1 dst fc00:1::1 ip xfrm policy add src fc00:2::1 dst fc00:1::1 dir out tmpl src fc00:2::1 dst fc00:1::1 proto esp mode transport action allow netserver -D -L fc00:2::1 Receiver: ip xfrm policy flush ip xfrm state flush ip xfrm state add src fc00:2::1 dst fc00:1::1 proto esp spi 0x1001 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:2::1 dst fc00:1::1 ip xfrm policy add src fc00:2::1 dst fc00:1::1 dir in tmpl src fc00:2::1 dst fc00:1::1 proto esp mode transport action allow ip xfrm state add src fc00:1::1 dst fc00:2::1 proto esp spi 0x1000 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:1::1 dst fc00:2::1 ip xfrm policy add src fc00:1::1 dst fc00:2::1 dir out tmpl src fc00:1::1 dst fc00:2::1 proto esp mode transport action allow netperf -H fc00:2::1 -f k -P 0 -L fc00:1::1 -l 60 -t UDP_STREAM -I 99,5 -i 5,5 -T5,5 -6 Signed-off-by: Guillaume Nault Acked-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 +- net/ieee802154/6lowpan/reassembly.c | 2 +- net/ipv4/inet_fragment.c | 39 ++++++++++++++++++------- net/ipv4/ip_fragment.c | 8 ++++- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- 6 files changed, 39 insertions(+), 16 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 010f26b31c89..bac79e817776 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -171,7 +171,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, - void *reasm_data); + void *reasm_data, bool try_coalesce); struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q); #endif diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index e4aba5d485be..bbe9b3b2d395 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -170,7 +170,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); if (!reasm_data) goto out_oom; - inet_frag_reasm_finish(&fq->q, skb, reasm_data); + inet_frag_reasm_finish(&fq->q, skb, reasm_data, false); skb->dev = ldev; skb->tstamp = fq->q.stamp; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index a999451345f9..10d31733297d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -475,11 +475,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, - void *reasm_data) + void *reasm_data, bool try_coalesce) { struct sk_buff **nextp = (struct sk_buff **)reasm_data; struct rb_node *rbn; struct sk_buff *fp; + int sum_truesize; skb_push(head, head->data - skb_network_header(head)); @@ -487,25 +488,41 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, fp = FRAG_CB(head)->next_frag; rbn = rb_next(&head->rbnode); rb_erase(&head->rbnode, &q->rb_fragments); + + sum_truesize = head->truesize; while (rbn || fp) { /* fp points to the next sk_buff in the current run; * rbn points to the next run. */ /* Go through the current run. */ while (fp) { - *nextp = fp; - nextp = &fp->next; - fp->prev = NULL; - memset(&fp->rbnode, 0, sizeof(fp->rbnode)); - fp->sk = NULL; - head->data_len += fp->len; - head->len += fp->len; + struct sk_buff *next_frag = FRAG_CB(fp)->next_frag; + bool stolen; + int delta; + + sum_truesize += fp->truesize; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - fp = FRAG_CB(fp)->next_frag; + + if (try_coalesce && skb_try_coalesce(head, fp, &stolen, + &delta)) { + kfree_skb_partial(fp, stolen); + } else { + fp->prev = NULL; + memset(&fp->rbnode, 0, sizeof(fp->rbnode)); + fp->sk = NULL; + + head->data_len += fp->len; + head->len += fp->len; + head->truesize += fp->truesize; + + *nextp = fp; + nextp = &fp->next; + } + + fp = next_frag; } /* Move to the next run. */ if (rbn) { @@ -516,7 +533,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, rbn = rbnext; } } - sub_frag_mem_limit(q->fqdir, head->truesize); + sub_frag_mem_limit(q->fqdir, sum_truesize); *nextp = NULL; skb_mark_not_on_list(head); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 4385eb9e781f..cfeb8890f94e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -393,6 +393,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) return err; } +static bool ip_frag_coalesce_ok(const struct ipq *qp) +{ + return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER; +} + /* Build a new IP datagram from all its fragments. */ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev) @@ -421,7 +426,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, if (len > 65535) goto out_oversize; - inet_frag_reasm_finish(&qp->q, skb, reasm_data); + inet_frag_reasm_finish(&qp->q, skb, reasm_data, + ip_frag_coalesce_ok(qp)); skb->dev = dev; IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 0f82c150543b..fed9666a2f7d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -348,7 +348,7 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, skb_reset_transport_header(skb); - inet_frag_reasm_finish(&fq->q, skb, reasm_data); + inet_frag_reasm_finish(&fq->q, skb, reasm_data, false); skb->ignore_df = 1; skb->dev = dev; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ca05b16f1bb9..1f5d4d196dcc 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -282,7 +282,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, skb_reset_transport_header(skb); - inet_frag_reasm_finish(&fq->q, skb, reasm_data); + inet_frag_reasm_finish(&fq->q, skb, reasm_data, true); skb->dev = dev; ipv6_hdr(skb)->payload_len = htons(payload_len); From 94ed3fde38c7c1347cd82b945553905cfd992ab9 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 2 Aug 2019 21:27:23 -0700 Subject: [PATCH 253/482] dt-bindings: Update the riscv,isa string description Since the RISC-V specification states that ISA description strings are case-insensitive, there's no functional difference between mixed-case, upper-case, and lower-case ISA strings. Thus, to simplify parsing, specify that the letters present in "riscv,isa" must be all lowercase. Suggested-by: Paul Walmsley Signed-off-by: Atish Patra Signed-off-by: Paul Walmsley --- Documentation/devicetree/bindings/riscv/cpus.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index c899111aa5e3..9d3fe6aada2b 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -50,6 +50,10 @@ properties: User-Level ISA document, available from https://riscv.org/specifications/ + While the isa strings in ISA specification are case + insensitive, letters in the riscv,isa string must be all + lowercase to simplify parsing. + timebase-frequency: type: integer minimum: 1 From 66cc016ab7c780d53450fd1648010da02ddf2770 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 6 Aug 2019 18:28:33 -0700 Subject: [PATCH 254/482] riscv: delay: use do_div() instead of __udivdi3() In preparation for removing __udivdi3() from the RISC-V architecture-specific files, convert its one user to use do_div(). This avoids breaking the RV32 build after __udivdi3() is removed. This second version removes the assignment of the remainder to an unused temporary variable. Thanks to Nicolas Pitre for the suggestion. Signed-off-by: Paul Walmsley Cc: Nicolas Pitre --- arch/riscv/lib/delay.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c index 87ff89e88f2c..f51c9a03bca1 100644 --- a/arch/riscv/lib/delay.c +++ b/arch/riscv/lib/delay.c @@ -81,9 +81,13 @@ EXPORT_SYMBOL(__delay); void udelay(unsigned long usecs) { u64 ucycles = (u64)usecs * lpj_fine * UDELAY_MULT; + u64 n; if (unlikely(usecs > MAX_UDELAY_US)) { - __delay((u64)usecs * riscv_timebase / 1000000ULL); + n = (u64)usecs * riscv_timebase; + do_div(n, 1000000); + + __delay(n); return; } From 81a48ee417387bef9cb720ca75980ee11ae9c901 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 6 Aug 2019 17:05:08 -0700 Subject: [PATCH 255/482] RISC-V: Remove udivdi3 This should never have landed in the first place: it was added as part of 64-bit divide support for 32-bit systems, but the kernel doesn't allow this sort of division. I must have forgotten to remove it. This patch removes the support. Since this routine only worked on 64-bit platforms but was only built on 32-bit platforms, it's essentially just nonsense anyway. Signed-off-by: Palmer Dabbelt Acked-by: Nicolas Pitre Link: https://lore.kernel.org/linux-riscv/nycvar.YSQ.7.76.1908061413360.19480@knanqh.ubzr/T/#t Reported-by: Eric Lin Signed-off-by: Paul Walmsley --- arch/riscv/lib/Makefile | 2 -- arch/riscv/lib/udivdi3.S | 32 -------------------------------- 2 files changed, 34 deletions(-) delete mode 100644 arch/riscv/lib/udivdi3.S diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 8e364ebf37de..267feaa10f6a 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -5,5 +5,3 @@ lib-y += memset.o lib-y += uaccess.o lib-$(CONFIG_64BIT) += tishift.o - -lib-$(CONFIG_32BIT) += udivdi3.o diff --git a/arch/riscv/lib/udivdi3.S b/arch/riscv/lib/udivdi3.S deleted file mode 100644 index 3f07476a91a9..000000000000 --- a/arch/riscv/lib/udivdi3.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2016-2017 Free Software Foundation, Inc. - */ - -#include - -ENTRY(__udivdi3) - mv a2, a1 - mv a1, a0 - li a0, -1 - beqz a2, .L5 - li a3, 1 - bgeu a2, a1, .L2 -.L1: - blez a2, .L2 - slli a2, a2, 1 - slli a3, a3, 1 - bgtu a1, a2, .L1 -.L2: - li a0, 0 -.L3: - bltu a1, a2, .L4 - sub a1, a1, a2 - or a0, a0, a3 -.L4: - srli a3, a3, 1 - srli a2, a2, 1 - bnez a3, .L3 -.L5: - ret -ENDPROC(__udivdi3) From 8e5e72e3314021a3d166c1d19a991a0870568856 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 8 Aug 2019 14:46:58 -0700 Subject: [PATCH 256/482] dt-bindings: riscv: remove obsolete cpus.txt Remove the now-obsolete riscv/cpus.txt DT binding document, since we are using YAML binding documentation instead. While doing so, transfer the explanatory text about 'harts' (with some edits) into the YAML file, at Rob's request. Link: https://lore.kernel.org/linux-riscv/CAL_JsqJs6MtvmuyAknsUxQymbmoV=G+=JfS1PQj9kNHV7fjC9g@mail.gmail.com/ Signed-off-by: Paul Walmsley Cc: Rob Herring Reviewed-by: Rob Herring --- .../devicetree/bindings/riscv/cpus.txt | 162 ------------------ .../devicetree/bindings/riscv/cpus.yaml | 12 ++ 2 files changed, 12 insertions(+), 162 deletions(-) delete mode 100644 Documentation/devicetree/bindings/riscv/cpus.txt diff --git a/Documentation/devicetree/bindings/riscv/cpus.txt b/Documentation/devicetree/bindings/riscv/cpus.txt deleted file mode 100644 index adf7b7af5dc3..000000000000 --- a/Documentation/devicetree/bindings/riscv/cpus.txt +++ /dev/null @@ -1,162 +0,0 @@ -=================== -RISC-V CPU Bindings -=================== - -The device tree allows to describe the layout of CPUs in a system through -the "cpus" node, which in turn contains a number of subnodes (ie "cpu") -defining properties for every cpu. - -Bindings for CPU nodes follow the Devicetree Specification, available from: - -https://www.devicetree.org/specifications/ - -with updates for 32-bit and 64-bit RISC-V systems provided in this document. - -=========== -Terminology -=========== - -This document uses some terminology common to the RISC-V community that is not -widely used, the definitions of which are listed here: - -* hart: A hardware execution context, which contains all the state mandated by - the RISC-V ISA: a PC and some registers. This terminology is designed to - disambiguate software's view of execution contexts from any particular - microarchitectural implementation strategy. For example, my Intel laptop is - described as having one socket with two cores, each of which has two hyper - threads. Therefore this system has four harts. - -===================================== -cpus and cpu node bindings definition -===================================== - -The RISC-V architecture, in accordance with the Devicetree Specification, -requires the cpus and cpu nodes to be present and contain the properties -described below. - -- cpus node - - Description: Container of cpu nodes - - The node name must be "cpus". - - A cpus node must define the following properties: - - - #address-cells - Usage: required - Value type: - Definition: must be set to 1 - - #size-cells - Usage: required - Value type: - Definition: must be set to 0 - -- cpu node - - Description: Describes a hart context - - PROPERTIES - - - device_type - Usage: required - Value type: - Definition: must be "cpu" - - reg - Usage: required - Value type: - Definition: The hart ID of this CPU node - - compatible: - Usage: required - Value type: - Definition: must contain "riscv", may contain one of - "sifive,rocket0" - - mmu-type: - Usage: optional - Value type: - Definition: Specifies the CPU's MMU type. Possible values are - "riscv,sv32" - "riscv,sv39" - "riscv,sv48" - - riscv,isa: - Usage: required - Value type: - Definition: Contains the RISC-V ISA string of this hart. These - ISA strings are defined by the RISC-V ISA manual. - -Example: SiFive Freedom U540G Development Kit ---------------------------------------------- - -This system contains two harts: a hart marked as disabled that's used for -low-level system tasks and should be ignored by Linux, and a second hart that -Linux is allowed to run on. - - cpus { - #address-cells = <1>; - #size-cells = <0>; - timebase-frequency = <1000000>; - cpu@0 { - clock-frequency = <1600000000>; - compatible = "sifive,rocket0", "riscv"; - device_type = "cpu"; - i-cache-block-size = <64>; - i-cache-sets = <128>; - i-cache-size = <16384>; - next-level-cache = <&L15 &L0>; - reg = <0>; - riscv,isa = "rv64imac"; - status = "disabled"; - L10: interrupt-controller { - #interrupt-cells = <1>; - compatible = "riscv,cpu-intc"; - interrupt-controller; - }; - }; - cpu@1 { - clock-frequency = <1600000000>; - compatible = "sifive,rocket0", "riscv"; - d-cache-block-size = <64>; - d-cache-sets = <64>; - d-cache-size = <32768>; - d-tlb-sets = <1>; - d-tlb-size = <32>; - device_type = "cpu"; - i-cache-block-size = <64>; - i-cache-sets = <64>; - i-cache-size = <32768>; - i-tlb-sets = <1>; - i-tlb-size = <32>; - mmu-type = "riscv,sv39"; - next-level-cache = <&L15 &L0>; - reg = <1>; - riscv,isa = "rv64imafdc"; - status = "okay"; - tlb-split; - L13: interrupt-controller { - #interrupt-cells = <1>; - compatible = "riscv,cpu-intc"; - interrupt-controller; - }; - }; - }; - -Example: Spike ISA Simulator with 1 Hart ----------------------------------------- - -This device tree matches the Spike ISA golden model as run with `spike -p1`. - - cpus { - cpu@0 { - device_type = "cpu"; - reg = <0x00000000>; - status = "okay"; - compatible = "riscv"; - riscv,isa = "rv64imafdc"; - mmu-type = "riscv,sv48"; - clock-frequency = <0x3b9aca00>; - interrupt-controller { - #interrupt-cells = <0x00000001>; - interrupt-controller; - compatible = "riscv,cpu-intc"; - } - } - } diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index 9d3fe6aada2b..b261a3015f84 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -10,6 +10,18 @@ maintainers: - Paul Walmsley - Palmer Dabbelt +description: | + This document uses some terminology common to the RISC-V community + that is not widely used, the definitions of which are listed here: + + hart: A hardware execution context, which contains all the state + mandated by the RISC-V ISA: a PC and some registers. This + terminology is designed to disambiguate software's view of execution + contexts from any particular microarchitectural implementation + strategy. For example, an Intel laptop containing one socket with + two cores, each of which has two hyperthreads, could be described as + having four harts. + properties: compatible: items: From b390e0bfd2996f1215231395f4e25a4c011eeaf9 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 8 Aug 2019 15:36:44 -0700 Subject: [PATCH 257/482] dt-bindings: riscv: fix the schema compatible string for the HiFive Unleashed board The YAML binding document for SiFive boards has an incorrect compatible string for the HiFive Unleashed board. Change it to match the name of the board on the SiFive web site: https://www.sifive.com/boards/hifive-unleashed which also matches the contents of the board DT data file: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts#n13 Signed-off-by: Paul Walmsley Acked-by: Rob Herring --- Documentation/devicetree/bindings/riscv/sifive.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/riscv/sifive.yaml b/Documentation/devicetree/bindings/riscv/sifive.yaml index 9d17dc2f3f84..3ab532713dc1 100644 --- a/Documentation/devicetree/bindings/riscv/sifive.yaml +++ b/Documentation/devicetree/bindings/riscv/sifive.yaml @@ -19,7 +19,7 @@ properties: compatible: items: - enum: - - sifive,freedom-unleashed-a00 + - sifive,hifive-unleashed-a00 - const: sifive,fu540-c000 - const: sifive,fu540 ... From 3a0233ddec554b886298de2428edb5c50a20e694 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 5 Aug 2019 16:34:34 +0100 Subject: [PATCH 258/482] xen/netback: Reset nr_frags before freeing skb At this point nr_frags has been incremented but the frag does not yet have a page assigned so freeing the skb results in a crash. Reset nr_frags before freeing the skb to prevent this. Signed-off-by: Ross Lagerwall Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 1d9940d4e8c7..c9262ffeefe4 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -925,6 +925,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; nskb = xenvif_alloc_skb(0); if (unlikely(nskb == NULL)) { + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); xenvif_tx_err(queue, &txreq, extra_count, idx); if (net_ratelimit()) @@ -940,6 +941,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, if (xenvif_set_skb_gso(queue->vif, skb, gso)) { /* Failure in xenvif_set_skb_gso is fatal. */ + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); kfree_skb(nskb); break; From 7e7c076e123ae8c4faa1966fc0da64f6e24eb57e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 5 Aug 2019 15:30:02 -0700 Subject: [PATCH 259/482] docs: admin-guide: remove references to IPX and token-ring Both IPX and TR have not been supported for a while now. Remove them from the /proc/sys/net documentation. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- Documentation/admin-guide/sysctl/net.rst | 29 +----------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index a7d44e71019d..287b98708a40 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -39,7 +39,6 @@ Table : Subdirectories in /proc/sys/net 802 E802 protocol ax25 AX25 ethernet Ethernet protocol rose X.25 PLP layer ipv4 IP version 4 x25 X.25 protocol - ipx IPX token-ring IBM token ring bridge Bridging decnet DEC net ipv6 IP version 6 tipc TIPC ========= =================== = ========== ================== @@ -401,33 +400,7 @@ interface. (network) that the route leads to, the router (may be directly connected), the route flags, and the device the route is using. - -5. IPX ------- - -The IPX protocol has no tunable values in proc/sys/net. - -The IPX protocol does, however, provide proc/net/ipx. This lists each IPX -socket giving the local and remote addresses in Novell format (that is -network:node:port). In accordance with the strange Novell tradition, -everything but the port is in hex. Not_Connected is displayed for sockets that -are not tied to a specific remote address. The Tx and Rx queue sizes indicate -the number of bytes pending for transmission and reception. The state -indicates the state the socket is in and the uid is the owning uid of the -socket. - -The /proc/net/ipx_interface file lists all IPX interfaces. For each interface -it gives the network number, the node number, and indicates if the network is -the primary network. It also indicates which device it is bound to (or -Internal for internal networks) and the Frame Type if appropriate. Linux -supports 802.3, 802.2, 802.2 SNAP and DIX (Blue Book) ethernet framing for -IPX. - -The /proc/net/ipx_route table holds a list of IPX routes. For each route it -gives the destination network, the router node (or Directly) and the network -address of the router (or Connected) for internal networks. - -6. TIPC +5. TIPC ------- tipc_rmem From fe90689fed119cb55ff04e6a1df0817f0a3e9d32 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 5 Aug 2019 15:30:03 -0700 Subject: [PATCH 260/482] net: docs: replace IPX in tuntap documentation IPX is no longer supported, but the example in the documentation might useful. Replace it with IPv6. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- Documentation/networking/tuntap.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt index 949d5dcdd9a3..0104830d5075 100644 --- a/Documentation/networking/tuntap.txt +++ b/Documentation/networking/tuntap.txt @@ -204,8 +204,8 @@ Ethernet device, which instead of receiving packets from a physical media, receives them from user space program and instead of sending packets via physical media sends them to the user space program. -Let's say that you configured IPX on the tap0, then whenever -the kernel sends an IPX packet to tap0, it is passed to the application +Let's say that you configured IPv6 on the tap0, then whenever +the kernel sends an IPv6 packet to tap0, it is passed to the application (VTun for example). The application encrypts, compresses and sends it to the other side over TCP or UDP. The application on the other side decompresses and decrypts the data received and writes the packet to the TAP device, From 8676b3ca4673517650fd509d7fa586aff87b3c28 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 18 Jul 2019 18:02:15 -0500 Subject: [PATCH 261/482] soundwire: fix regmap dependencies and align with other serial links The existing code has a mixed select/depend usage which makes no sense. config SOUNDWIRE_BUS tristate select REGMAP_SOUNDWIRE config REGMAP_SOUNDWIRE tristate depends on SOUNDWIRE_BUS Let's remove one layer of Kconfig definitions and align with the solutions used by all other serial links. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190718230215.18675-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- drivers/base/regmap/Kconfig | 2 +- drivers/soundwire/Kconfig | 7 +------ drivers/soundwire/Makefile | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig index a4984136c19d..0fd6f97ee523 100644 --- a/drivers/base/regmap/Kconfig +++ b/drivers/base/regmap/Kconfig @@ -44,7 +44,7 @@ config REGMAP_IRQ config REGMAP_SOUNDWIRE tristate - depends on SOUNDWIRE_BUS + depends on SOUNDWIRE config REGMAP_SCCB tristate diff --git a/drivers/soundwire/Kconfig b/drivers/soundwire/Kconfig index 3a01cfd70fdc..f518273cfbe3 100644 --- a/drivers/soundwire/Kconfig +++ b/drivers/soundwire/Kconfig @@ -4,7 +4,7 @@ # menuconfig SOUNDWIRE - bool "SoundWire support" + tristate "SoundWire support" help SoundWire is a 2-Pin interface with data and clock line ratified by the MIPI Alliance. SoundWire is used for transporting data @@ -17,17 +17,12 @@ if SOUNDWIRE comment "SoundWire Devices" -config SOUNDWIRE_BUS - tristate - select REGMAP_SOUNDWIRE - config SOUNDWIRE_CADENCE tristate config SOUNDWIRE_INTEL tristate "Intel SoundWire Master driver" select SOUNDWIRE_CADENCE - select SOUNDWIRE_BUS depends on X86 && ACPI && SND_SOC help SoundWire Intel Master driver. diff --git a/drivers/soundwire/Makefile b/drivers/soundwire/Makefile index fd99a831b92a..45b7e5001653 100644 --- a/drivers/soundwire/Makefile +++ b/drivers/soundwire/Makefile @@ -5,7 +5,7 @@ #Bus Objs soundwire-bus-objs := bus_type.o bus.o slave.o mipi_disco.o stream.o -obj-$(CONFIG_SOUNDWIRE_BUS) += soundwire-bus.o +obj-$(CONFIG_SOUNDWIRE) += soundwire-bus.o #Cadence Objs soundwire-cadence-objs := cadence_master.o From 51650d33b2771acd505068da669cf85cffac369a Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 7 Aug 2019 01:45:40 +0300 Subject: [PATCH 262/482] net: sched: sch_taprio: fix memleak in error path for sched list parse In error case, all entries should be freed from the sched list before deleting it. For simplicity use rcu way. Fixes: 5a781ccbd19e46 ("tc: Add support for configuring the taprio scheduler") Acked-by: Vinicius Costa Gomes Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index c39db507ba3f..e25d414ae12f 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1195,7 +1195,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, spin_unlock_bh(qdisc_lock(sch)); free_sched: - kfree(new_admin); + if (new_admin) + call_rcu(&new_admin->rcu, taprio_free_sched_cb); return err; } From d595b03de2cb0bdf9bcdf35ff27840cc3a37158f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 7 Aug 2019 10:19:59 +0800 Subject: [PATCH 263/482] bonding: Add vlan tx offload to hw_enc_features As commit 30d8177e8ac7 ("bonding: Always enable vlan tx offload") said, we should always enable bonding's vlan tx offload, pass the vlan packets to the slave devices with vlan tci, let them to handle vlan implementation. Now if encapsulation protocols like VXLAN is used, skb->encapsulation may be set, then the packet is passed to vlan device which based on bonding device. However in netif_skb_features(), the check of hw_enc_features: if (skb->encapsulation) features &= dev->hw_enc_features; clears NETIF_F_HW_VLAN_CTAG_TX/NETIF_F_HW_VLAN_STAG_TX. This results in same issue in commit 30d8177e8ac7 like this: vlan_dev_hard_start_xmit -->dev_queue_xmit -->validate_xmit_skb -->netif_skb_features //NETIF_F_HW_VLAN_CTAG_TX is cleared -->validate_xmit_vlan -->__vlan_hwaccel_push_inside //skb->tci is cleared ... --> bond_start_xmit --> bond_xmit_hash //BOND_XMIT_POLICY_ENCAP34 --> __skb_flow_dissect // nhoff point to IP header --> case htons(ETH_P_8021Q) // skb_vlan_tag_present is false, so vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), //vlan point to ip header wrongly Fixes: b2a103e6d0af ("bonding: convert to ndo_fix_features") Signed-off-by: YueHaibing Acked-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 02fd7822c14a..931d9d935686 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1126,6 +1126,8 @@ static void bond_compute_features(struct bonding *bond) done: bond_dev->vlan_features = vlan_features; bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX | NETIF_F_GSO_UDP_L4; bond_dev->mpls_features = mpls_features; bond_dev->gso_max_segs = gso_max_segs; From e3e3af9aa29a2ada43d5c27b47ea320415cd5bb3 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 7 Aug 2019 21:08:56 +0800 Subject: [PATCH 264/482] net: dsa: sja1105: remove set but not used variables 'tx_vid' and 'rx_vid' Fixes gcc '-Wunused-but-set-variable' warning: drivers/net/dsa/sja1105/sja1105_main.c: In function sja1105_fdb_dump: drivers/net/dsa/sja1105/sja1105_main.c:1226:14: warning: variable tx_vid set but not used [-Wunused-but-set-variable] drivers/net/dsa/sja1105/sja1105_main.c:1226:6: warning: variable rx_vid set but not used [-Wunused-but-set-variable] They are not used since commit 6d7c7d948a2e ("net: dsa: sja1105: Fix broken learning with vlan_filtering disabled") Reported-by: Hulk Robot Signed-off-by: YueHaibing Reviewed-by: Vladimir Oltean Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index d073baffc20b..df976b259e43 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1223,12 +1223,8 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, { struct sja1105_private *priv = ds->priv; struct device *dev = ds->dev; - u16 rx_vid, tx_vid; int i; - rx_vid = dsa_8021q_rx_vid(ds, port); - tx_vid = dsa_8021q_tx_vid(ds, port); - for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) { struct sja1105_l2_lookup_entry l2_lookup = {0}; u8 macaddr[ETH_ALEN]; From e1fea322fc6d4075254ca9c5f2afdace0281da2a Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 7 Aug 2019 15:57:28 -0400 Subject: [PATCH 265/482] net sched: update skbedit action for batched events operations Add get_fill_size() routine used to calculate the action size when building a batch of events. Fixes: ca9b0e27e ("pkt_action: add new action skbedit") Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- net/sched/act_skbedit.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index b100870f02a6..37dced00b63d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -307,6 +307,17 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) +{ + return nla_total_size(sizeof(struct tc_skbedit)) + + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */ + + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */ + + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */ + + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */ + + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */ + + nla_total_size_64bit(sizeof(u64)); /* TCA_SKBEDIT_FLAGS */ +} + static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", .id = TCA_ID_SKBEDIT, @@ -316,6 +327,7 @@ static struct tc_action_ops act_skbedit_ops = { .init = tcf_skbedit_init, .cleanup = tcf_skbedit_cleanup, .walk = tcf_skbedit_walker, + .get_fill_size = tcf_skbedit_get_fill_size, .lookup = tcf_skbedit_search, .size = sizeof(struct tcf_skbedit), }; From 7bc161846dcf4af0485f260930d17fdd892a4980 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 7 Aug 2019 15:57:29 -0400 Subject: [PATCH 266/482] tc-testing: updated skbedit action tests with batch create/delete Update TDC tests with cases varifying ability of TC to install or delete batches of skbedit actions. Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/actions/skbedit.json | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json index bf5ebf59c2d4..9cdd2e31ac2c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json @@ -670,5 +670,52 @@ "teardown": [ "$TC actions flush action skbedit" ] + }, + { + "id": "630c", + "name": "Add batch of 32 skbedit actions with all parameters and cookie", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action skbedit queue_mapping 2 priority 10 mark 7/0xaabbccdd ptype host inheritdsfield index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "32", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "706d", + "name": "Delete batch of 32 skbedit actions with all parameters", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ], + "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action skbedit queue_mapping 2 priority 10 mark 7/0xaabbccdd ptype host inheritdsfield index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\"" + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action skbedit index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "0", + "teardown": [] } ] From 414776621d1006e57e80e6db7fdc3837897aaa64 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 7 Aug 2019 17:03:59 -0700 Subject: [PATCH 267/482] net/tls: prevent skb_orphan() from leaking TLS plain text with offload sk_validate_xmit_skb() and drivers depend on the sk member of struct sk_buff to identify segments requiring encryption. Any operation which removes or does not preserve the original TLS socket such as skb_orphan() or skb_clone() will cause clear text leaks. Make the TCP socket underlying an offloaded TLS connection mark all skbs as decrypted, if TLS TX is in offload mode. Then in sk_validate_xmit_skb() catch skbs which have no socket (or a socket with no validation) and decrypted flag set. Note that CONFIG_SOCK_VALIDATE_XMIT, CONFIG_TLS_DEVICE and sk->sk_validate_xmit_skb are slightly interchangeable right now, they all imply TLS offload. The new checks are guarded by CONFIG_TLS_DEVICE because that's the option guarding the sk_buff->decrypted member. Second, smaller issue with orphaning is that it breaks the guarantee that packets will be delivered to device queues in-order. All TLS offload drivers depend on that scheduling property. This means skb_orphan_partial()'s trick of preserving partial socket references will cause issues in the drivers. We need a full orphan, and as a result netem delay/throttling will cause all TLS offload skbs to be dropped. Reusing the sk_buff->decrypted flag also protects from leaking clear text when incoming, decrypted skb is redirected (e.g. by TC). See commit 0608c69c9a80 ("bpf: sk_msg, sock{map|hash} redirect through ULP") for justification why the internal flag is safe. The only location which could leak the flag in is tcp_bpf_sendmsg(), which is taken care of by clearing the previously unused bit. v2: - remove superfluous decrypted mark copy (Willem); - remove the stale doc entry (Boris); - rely entirely on EOR marking to prevent coalescing (Boris); - use an internal sendpages flag instead of marking the socket (Boris). v3 (Willem): - reorganize the can_skb_orphan_partial() condition; - fix the flag leak-in through tcp_bpf_sendmsg. Signed-off-by: Jakub Kicinski Acked-by: Willem de Bruijn Reviewed-by: Boris Pismenny Signed-off-by: David S. Miller --- Documentation/networking/tls-offload.rst | 18 ------------------ include/linux/skbuff.h | 8 ++++++++ include/linux/socket.h | 3 +++ include/net/sock.h | 10 +++++++++- net/core/sock.c | 19 ++++++++++++++----- net/ipv4/tcp.c | 3 +++ net/ipv4/tcp_bpf.c | 6 +++++- net/ipv4/tcp_output.c | 3 +++ net/tls/tls_device.c | 9 +++++++-- 9 files changed, 52 insertions(+), 27 deletions(-) diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index b70b70dc4524..0dd3f748239f 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -506,21 +506,3 @@ Drivers should ignore the changes to TLS the device feature flags. These flags will be acted upon accordingly by the core ``ktls`` code. TLS device feature flags only control adding of new TLS connection offloads, old connections will remain active after flags are cleared. - -Known bugs -========== - -skb_orphan() leaks clear text ------------------------------ - -Currently drivers depend on the :c:member:`sk` member of -:c:type:`struct sk_buff ` to identify segments requiring -encryption. Any operation which removes or does not preserve the socket -association such as :c:func:`skb_orphan` or :c:func:`skb_clone` -will cause the driver to miss the packets and lead to clear text leaks. - -Redirects leak clear text -------------------------- - -In the RX direction, if segment has already been decrypted by the device -and it gets redirected or mirrored - clear text will be transmitted out. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d8af86d995d6..ba5583522d24 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1374,6 +1374,14 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) to->l4_hash = from->l4_hash; }; +static inline void skb_copy_decrypted(struct sk_buff *to, + const struct sk_buff *from) +{ +#ifdef CONFIG_TLS_DEVICE + to->decrypted = from->decrypted; +#endif +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { diff --git a/include/linux/socket.h b/include/linux/socket.h index 97523818cb14..fc0bed59fc84 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -292,6 +292,9 @@ struct ucred { #define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ #define MSG_EOF MSG_FIN #define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */ +#define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry + * plain text and require encryption + */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ diff --git a/include/net/sock.h b/include/net/sock.h index 228db3998e46..2c53f1a1d905 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2482,6 +2482,7 @@ static inline bool sk_fullsock(const struct sock *sk) /* Checks if this SKB belongs to an HW offloaded socket * and whether any SW fallbacks are required based on dev. + * Check decrypted mark in case skb_orphan() cleared socket. */ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, struct net_device *dev) @@ -2489,8 +2490,15 @@ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, #ifdef CONFIG_SOCK_VALIDATE_XMIT struct sock *sk = skb->sk; - if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) + if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) { skb = sk->sk_validate_xmit_skb(sk, dev, skb); +#ifdef CONFIG_TLS_DEVICE + } else if (unlikely(skb->decrypted)) { + pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n"); + kfree_skb(skb); + skb = NULL; +#endif + } #endif return skb; diff --git a/net/core/sock.c b/net/core/sock.c index d57b0cc995a0..6d08553f885c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1992,6 +1992,19 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) } EXPORT_SYMBOL(skb_set_owner_w); +static bool can_skb_orphan_partial(const struct sk_buff *skb) +{ +#ifdef CONFIG_TLS_DEVICE + /* Drivers depend on in-order delivery for crypto offload, + * partial orphan breaks out-of-order-OK logic. + */ + if (skb->decrypted) + return false; +#endif + return (skb->destructor == sock_wfree || + (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree)); +} + /* This helper is used by netem, as it can hold packets in its * delay queue. We want to allow the owner socket to send more * packets, as if they were already TX completed by a typical driver. @@ -2003,11 +2016,7 @@ void skb_orphan_partial(struct sk_buff *skb) if (skb_is_tcp_pure_ack(skb)) return; - if (skb->destructor == sock_wfree -#ifdef CONFIG_INET - || skb->destructor == tcp_wfree -#endif - ) { + if (can_skb_orphan_partial(skb)) { struct sock *sk = skb->sk; if (refcount_inc_not_zero(&sk->sk_refcnt)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 776905899ac0..77b485d60b9d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -984,6 +984,9 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, if (!skb) goto wait_for_memory; +#ifdef CONFIG_TLS_DEVICE + skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); +#endif skb_entail(sk, skb); copy = size_goal; } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 3d1e15401384..8a56e09cfb0e 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -398,10 +398,14 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct sk_msg tmp, *msg_tx = NULL; - int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS; int copied = 0, err = 0; struct sk_psock *psock; long timeo; + int flags; + + /* Don't let internal do_tcp_sendpages() flags through */ + flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED); + flags |= MSG_NO_SHARED_FRAGS; psock = sk_psock_get(sk); if (unlikely(!psock)) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6e4afc48d7bb..979520e46e33 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1320,6 +1320,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, buff = sk_stream_alloc_skb(sk, nsize, gfp, true); if (!buff) return -ENOMEM; /* We'll just try again later. */ + skb_copy_decrypted(buff, skb); sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); @@ -1874,6 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) return -ENOMEM; + skb_copy_decrypted(buff, skb); sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); @@ -2143,6 +2145,7 @@ static int tcp_mtu_probe(struct sock *sk) sk_mem_charge(sk, nskb->truesize); skb = tcp_send_head(sk); + skb_copy_decrypted(nskb, skb); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 7c0b2b778703..43922d86e510 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -373,9 +373,9 @@ static int tls_push_data(struct sock *sk, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); - int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE); struct tls_record_info *record = ctx->open_record; + int tls_push_record_flags; struct page_frag *pfrag; size_t orig_size = size; u32 max_open_record_len; @@ -390,6 +390,9 @@ static int tls_push_data(struct sock *sk, if (sk->sk_err) return -sk->sk_err; + flags |= MSG_SENDPAGE_DECRYPTED; + tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (tls_is_partially_sent_record(tls_ctx)) { rc = tls_push_partial_record(sk, tls_ctx, flags); @@ -576,7 +579,9 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx) gfp_t sk_allocation = sk->sk_allocation; sk->sk_allocation = GFP_ATOMIC; - tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL); + tls_push_partial_record(sk, ctx, + MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_DECRYPTED); sk->sk_allocation = sk_allocation; } } From 227f2f030e28d8783c3d10ce70ff4ba79cad653f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 8 Aug 2019 14:22:47 +0800 Subject: [PATCH 268/482] team: Add vlan tx offload to hw_enc_features We should also enable team's vlan tx offload in hw_enc_features, pass the vlan packets to the slave devices with vlan tci, let the slave handle vlan tunneling offload implementation. Fixes: 3268e5cb494d ("team: Advertise tunneling offload features") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- drivers/net/team/team.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index abfa0da9bbd2..e8089def5a46 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1004,6 +1004,8 @@ static void __team_compute_features(struct team *team) team->dev->vlan_features = vlan_features; team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX | NETIF_F_GSO_UDP_L4; team->dev->hard_header_len = max_hard_header_len; From 8c25d0887a8bd0e1ca2074ac0c6dff173787a83b Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Fri, 9 Aug 2019 13:35:39 +0800 Subject: [PATCH 269/482] net: tundra: tsi108: use spin_lock_irqsave instead of spin_lock_irq in IRQ context As spin_unlock_irq will enable interrupts. Function tsi108_stat_carry is called from interrupt handler tsi108_irq. Interrupts are enabled in interrupt handler. Use spin_lock_irqsave/spin_unlock_irqrestore instead of spin_(un)lock_irq in IRQ context to avoid this. Signed-off-by: Fuqian Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/tundra/tsi108_eth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 78a7de3fb622..c62f474b6d08 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -371,9 +371,10 @@ tsi108_stat_carry_one(int carry, int carry_bit, int carry_shift, static void tsi108_stat_carry(struct net_device *dev) { struct tsi108_prv_data *data = netdev_priv(dev); + unsigned long flags; u32 carry1, carry2; - spin_lock_irq(&data->misclock); + spin_lock_irqsave(&data->misclock, flags); carry1 = TSI_READ(TSI108_STAT_CARRY1); carry2 = TSI_READ(TSI108_STAT_CARRY2); @@ -441,7 +442,7 @@ static void tsi108_stat_carry(struct net_device *dev) TSI108_STAT_TXPAUSEDROP_CARRY, &data->tx_pause_drop); - spin_unlock_irq(&data->misclock); + spin_unlock_irqrestore(&data->misclock, flags); } /* Read a stat counter atomically with respect to carries. From 03fdfb2690099c19160a3f2c5b77db60b3afeded Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 5 Aug 2019 10:34:51 +0100 Subject: [PATCH 270/482] KVM: arm64: Don't write junk to sysregs on reset At the moment, the way we reset system registers is mildly insane: We write junk to them, call the reset functions, and then check that we have something else in them. The "fun" thing is that this can happen while the guest is running (PSCI, for example). If anything in KVM has to evaluate the state of a system register while junk is in there, bad thing may happen. Let's stop doing that. Instead, we track that we have called a reset function for that register, and assume that the reset function has done something. This requires fixing a couple of sysreg refinition in the trap table. In the end, the very need of this reset check is pretty dubious, as it doesn't check everything (a lot of the sysregs leave outside of the sys_regs[] array). It may well be axed in the near future. Tested-by: Zenghui Yu Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f26e181d881c..2071260a275b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -632,7 +632,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) */ val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); - __vcpu_sys_reg(vcpu, PMCR_EL0) = val; + __vcpu_sys_reg(vcpu, r->reg) = val; } static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) @@ -981,13 +981,13 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ - trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \ + trap_bvr, reset_bvr, 0, 0, get_bvr, set_bvr }, \ { SYS_DESC(SYS_DBGBCRn_EL1(n)), \ - trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \ + trap_bcr, reset_bcr, 0, 0, get_bcr, set_bcr }, \ { SYS_DESC(SYS_DBGWVRn_EL1(n)), \ - trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \ + trap_wvr, reset_wvr, 0, 0, get_wvr, set_wvr }, \ { SYS_DESC(SYS_DBGWCRn_EL1(n)), \ - trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } + trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr } /* Macro to expand the PMEVCNTRn_EL0 register */ #define PMU_PMEVCNTR_EL0(n) \ @@ -1540,7 +1540,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, { SYS_DESC(SYS_CTR_EL0), access_ctr }, - { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, + { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 }, { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 }, { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 }, @@ -2254,13 +2254,19 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, } static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *table, size_t num) + const struct sys_reg_desc *table, size_t num, + unsigned long *bmap) { unsigned long i; for (i = 0; i < num; i++) - if (table[i].reset) + if (table[i].reset) { + int reg = table[i].reg; + table[i].reset(vcpu, &table[i]); + if (reg > 0 && reg < NR_SYS_REGS) + set_bit(reg, bmap); + } } /** @@ -2774,18 +2780,16 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) { size_t num; const struct sys_reg_desc *table; - - /* Catch someone adding a register without putting in reset entry. */ - memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs)); + DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, }; /* Generic chip reset first (so target could override). */ - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap); table = get_target_table(vcpu->arch.target, true, &num); - reset_sys_reg_descs(vcpu, table, num); + reset_sys_reg_descs(vcpu, table, num, bmap); for (num = 1; num < NR_SYS_REGS; num++) { - if (WARN(__vcpu_sys_reg(vcpu, num) == 0x4242424242424242, + if (WARN(!test_bit(num, bmap), "Didn't reset __vcpu_sys_reg(%zi)\n", num)) break; } From c69509c70aa45a8c4954c88c629a64acf4ee4a36 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 5 Aug 2019 10:34:51 +0100 Subject: [PATCH 271/482] KVM: arm: Don't write junk to CP15 registers on reset At the moment, the way we reset CP15 registers is mildly insane: We write junk to them, call the reset functions, and then check that we have something else in them. The "fun" thing is that this can happen while the guest is running (PSCI, for example). If anything in KVM has to evaluate the state of a CP15 register while junk is in there, bad thing may happen. Let's stop doing that. Instead, we track that we have called a reset function for that register, and assume that the reset function has done something. In the end, the very need of this reset check is pretty dubious, as it doesn't check everything (a lot of the CP15 reg leave outside of the cp15_regs[] array). It may well be axed in the near future. Signed-off-by: Marc Zyngier --- arch/arm/kvm/coproc.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index d2806bcff8bb..07745ee022a1 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -651,13 +651,22 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) } static void reset_coproc_regs(struct kvm_vcpu *vcpu, - const struct coproc_reg *table, size_t num) + const struct coproc_reg *table, size_t num, + unsigned long *bmap) { unsigned long i; for (i = 0; i < num; i++) - if (table[i].reset) + if (table[i].reset) { + int reg = table[i].reg; + table[i].reset(vcpu, &table[i]); + if (reg > 0 && reg < NR_CP15_REGS) { + set_bit(reg, bmap); + if (table[i].is_64bit) + set_bit(reg + 1, bmap); + } + } } static struct coproc_params decode_32bit_hsr(struct kvm_vcpu *vcpu) @@ -1432,17 +1441,15 @@ void kvm_reset_coprocs(struct kvm_vcpu *vcpu) { size_t num; const struct coproc_reg *table; - - /* Catch someone adding a register without putting in reset entry. */ - memset(vcpu->arch.ctxt.cp15, 0x42, sizeof(vcpu->arch.ctxt.cp15)); + DECLARE_BITMAP(bmap, NR_CP15_REGS) = { 0, }; /* Generic chip reset first (so target could override). */ - reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); + reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs), bmap); table = get_target_table(vcpu->arch.target, &num); - reset_coproc_regs(vcpu, table, num); + reset_coproc_regs(vcpu, table, num, bmap); for (num = 1; num < NR_CP15_REGS; num++) - WARN(vcpu_cp15(vcpu, num) == 0x42424242, + WARN(!test_bit(num, bmap), "Didn't reset vcpu_cp15(vcpu, %zi)", num); } From 16e604a437c89751dc626c9e90cf88ba93c5be64 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Wed, 7 Aug 2019 10:53:20 +0100 Subject: [PATCH 272/482] KVM: arm/arm64: vgic: Reevaluate level sensitive interrupts on enable A HW mapped level sensitive interrupt asserted by a device will not be put into the ap_list if it is disabled at the VGIC level. When it is enabled again, it will be inserted into the ap_list and written to a list register on guest entry regardless of the state of the device. We could argue that this can also happen on real hardware, when the command to enable the interrupt reached the GIC before the device had the chance to de-assert the interrupt signal; however, we emulate the distributor and redistributors in software and we can do better than that. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 3ba7278fb533..44efc2ff863f 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -113,6 +113,22 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (vgic_irq_is_mapped_level(irq)) { + bool was_high = irq->line_level; + + /* + * We need to update the state of the interrupt because + * the guest might have changed the state of the device + * while the interrupt was disabled at the VGIC level. + */ + irq->line_level = vgic_get_phys_line_level(irq); + /* + * Deactivate the physical interrupt so the GIC will let + * us know when it is asserted again. + */ + if (!irq->active && was_high && !irq->line_level) + vgic_irq_set_phys_active(irq, false); + } irq->enabled = true; vgic_queue_irq_unlock(vcpu->kvm, irq, flags); From eac4471d0882da14be652ef0a55b914145fab15a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Aug 2019 13:32:36 +0300 Subject: [PATCH 273/482] drm/i915: Use after free in error path in intel_vgpu_create_workload() We can't free "workload" until after the printk or it's a use after free. Fixes: 2089a76ade90 ("drm/i915/gvt: Checking workload's gma earlier") Signed-off-by: Dan Carpenter Reviewed-by: Chris Wilson Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 9f3fd7d96a69..75baff657e43 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1528,9 +1528,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, if (!intel_gvt_ggtt_validate_range(vgpu, workload->wa_ctx.indirect_ctx.guest_gma, workload->wa_ctx.indirect_ctx.size)) { - kmem_cache_free(s->workloads, workload); gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n", workload->wa_ctx.indirect_ctx.guest_gma); + kmem_cache_free(s->workloads, workload); return ERR_PTR(-EINVAL); } } @@ -1542,9 +1542,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, if (!intel_gvt_ggtt_validate_range(vgpu, workload->wa_ctx.per_ctx.guest_gma, CACHELINE_BYTES)) { - kmem_cache_free(s->workloads, workload); gvt_vgpu_err("invalid per_ctx at: 0x%lx\n", workload->wa_ctx.per_ctx.guest_gma); + kmem_cache_free(s->workloads, workload); return ERR_PTR(-EINVAL); } } From 404861e15b5fa7edbab22400f9174c1a21fde731 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 7 Aug 2019 14:31:59 +0200 Subject: [PATCH 274/482] s390/vdso: map vdso also for statically linked binaries s390 does not map the vdso for statically linked binaries, assuming that this doesn't make sense. See commit fc5243d98ac2 ("[S390] arch_setup_additional_pages arguments"). However with glibc commit d665367f596d ("linux: Enable vDSO for static linking as default (BZ#19767)") and commit 5e855c895401 ("s390: Enable VDSO for static linking") the vdso is also used for statically linked binaries - if the kernel would make it available. Therefore map the vdso always, just like all other architectures. Reported-by: Stefan Liebler Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 243d8b1185bf..c6bc190f3c28 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -216,11 +216,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso_enabled) return 0; - /* - * Only map the vdso for dynamically linked elf binaries. - */ - if (!uses_interp) - return 0; vdso_pages = vdso64_pages; #ifdef CONFIG_COMPAT_VDSO From de768ce45466f3009809719eb7b1f6f5277d9373 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 Aug 2019 11:23:00 +0200 Subject: [PATCH 275/482] ALSA: hda - Apply workaround for another AMD chip 1022:1487 MSI MPG X570 board is with another AMD HD-audio controller (PCI ID 1022:1487) and it requires the same workaround applied for X370, etc (PCI ID 1022:1457). BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=195303 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index a6d8c0d77b84..99fc0917339b 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2508,6 +2508,9 @@ static const struct pci_device_id azx_ids[] = { /* AMD, X370 & co */ { PCI_DEVICE(0x1022, 0x1457), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB }, + /* AMD, X570 & co */ + { PCI_DEVICE(0x1022, 0x1487), + .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB }, /* AMD Stoney */ { PCI_DEVICE(0x1022, 0x157a), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB | From d555c34338cae844b207564c482e5a3fb089d25e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 9 Aug 2019 10:32:40 +0200 Subject: [PATCH 276/482] omap-dma/omap_vout_vrfb: fix off-by-one fi value The OMAP 4 TRM specifies that when using double-index addressing the address increases by the ES plus the EI value minus 1 within a frame. When a full frame is transferred, the address increases by the ES plus the frame index (FI) value minus 1. The omap-dma code didn't account for the 'minus 1' in the FI register. To get correct addressing, add 1 to the src_icg value. This was found when testing a hacked version of the media m2m-deinterlace.c driver on a Pandaboard. The only other source that uses this feature is omap_vout_vrfb.c, and that adds a + 1 when setting the dst_icg. This is a workaround for the broken omap-dma.c behavior. So remove the workaround at the same time that we fix omap-dma.c. I tested the omap_vout driver with a Beagle XM board to check that the '+ 1' in omap_vout_vrfb.c was indeed a workaround for the omap-dma bug. Signed-off-by: Hans Verkuil Reviewed-by: Laurent Pinchart Acked-by: Peter Ujfalusi Acked-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/952e7f51-f208-9333-6f58-b7ed20d2ea0b@xs4all.nl Signed-off-by: Vinod Koul --- drivers/dma/ti/omap-dma.c | 4 ++-- drivers/media/platform/omap/omap_vout_vrfb.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c index ba2489d4ea24..ba27802efcd0 100644 --- a/drivers/dma/ti/omap-dma.c +++ b/drivers/dma/ti/omap-dma.c @@ -1234,7 +1234,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_interleaved( if (src_icg) { d->ccr |= CCR_SRC_AMODE_DBLIDX; d->ei = 1; - d->fi = src_icg; + d->fi = src_icg + 1; } else if (xt->src_inc) { d->ccr |= CCR_SRC_AMODE_POSTINC; d->fi = 0; @@ -1249,7 +1249,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_interleaved( if (dst_icg) { d->ccr |= CCR_DST_AMODE_DBLIDX; sg->ei = 1; - sg->fi = dst_icg; + sg->fi = dst_icg + 1; } else if (xt->dst_inc) { d->ccr |= CCR_DST_AMODE_POSTINC; sg->fi = 0; diff --git a/drivers/media/platform/omap/omap_vout_vrfb.c b/drivers/media/platform/omap/omap_vout_vrfb.c index 29e3f5da59c1..11ec048929e8 100644 --- a/drivers/media/platform/omap/omap_vout_vrfb.c +++ b/drivers/media/platform/omap/omap_vout_vrfb.c @@ -253,8 +253,7 @@ int omap_vout_prepare_vrfb(struct omap_vout_device *vout, */ pixsize = vout->bpp * vout->vrfb_bpp; - dst_icg = ((MAX_PIXELS_PER_LINE * pixsize) - - (vout->pix.width * vout->bpp)) + 1; + dst_icg = MAX_PIXELS_PER_LINE * pixsize - vout->pix.width * vout->bpp; xt->src_start = vout->buf_phy_addr[vb->i]; xt->dst_start = vout->vrfb_context[vb->i].paddr[0]; From 6a0a8d10a3661a036b55af695542a714c429ab7c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Aug 2019 11:01:27 +0200 Subject: [PATCH 277/482] netfilter: nf_tables: use-after-free in failing rule with bound set If a rule that has already a bound anonymous set fails to be added, the preparation phase releases the rule and the bound set. However, the transaction object from the abort path still has a reference to the set object that is stale, leading to a use-after-free when checking for the set->bound field. Add a new field to the transaction that specifies if the set is bound, so the abort path can skip releasing it since the rule command owns it and it takes care of releasing it. After this update, the set->bound field is removed. [ 24.649883] Unable to handle kernel paging request at virtual address 0000000000040434 [ 24.657858] Mem abort info: [ 24.660686] ESR = 0x96000004 [ 24.663769] Exception class = DABT (current EL), IL = 32 bits [ 24.669725] SET = 0, FnV = 0 [ 24.672804] EA = 0, S1PTW = 0 [ 24.675975] Data abort info: [ 24.678880] ISV = 0, ISS = 0x00000004 [ 24.682743] CM = 0, WnR = 0 [ 24.685723] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000428952000 [ 24.692207] [0000000000040434] pgd=0000000000000000 [ 24.697119] Internal error: Oops: 96000004 [#1] SMP [...] [ 24.889414] Call trace: [ 24.891870] __nf_tables_abort+0x3f0/0x7a0 [ 24.895984] nf_tables_abort+0x20/0x40 [ 24.899750] nfnetlink_rcv_batch+0x17c/0x588 [ 24.904037] nfnetlink_rcv+0x13c/0x190 [ 24.907803] netlink_unicast+0x18c/0x208 [ 24.911742] netlink_sendmsg+0x1b0/0x350 [ 24.915682] sock_sendmsg+0x4c/0x68 [ 24.919185] ___sys_sendmsg+0x288/0x2c8 [ 24.923037] __sys_sendmsg+0x7c/0xd0 [ 24.926628] __arm64_sys_sendmsg+0x2c/0x38 [ 24.930744] el0_svc_common.constprop.0+0x94/0x158 [ 24.935556] el0_svc_handler+0x34/0x90 [ 24.939322] el0_svc+0x8/0xc [ 24.942216] Code: 37280300 f9404023 91014262 aa1703e0 (f9401863) [ 24.948336] ---[ end trace cebbb9dcbed3b56f ]--- Fixes: f6ac85858976 ("netfilter: nf_tables: unbind set in rule from commit path") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 9 +++++++-- net/netfilter/nf_tables_api.c | 15 ++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 9b624566b82d..475d6f28ca67 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -421,8 +421,7 @@ struct nft_set { unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:13, - bound:1, + u16 flags:14, genmask:2; u8 klen; u8 dlen; @@ -1348,12 +1347,15 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_set *set; u32 set_id; + bool bound; }; #define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) +#define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound) struct nft_trans_chain { bool update; @@ -1384,12 +1386,15 @@ struct nft_trans_table { struct nft_trans_elem { struct nft_set *set; struct nft_set_elem elem; + bool bound; }; #define nft_trans_elem_set(trans) \ (((struct nft_trans_elem *)trans->data)->set) #define nft_trans_elem(trans) \ (((struct nft_trans_elem *)trans->data)->elem) +#define nft_trans_elem_set_bound(trans) \ + (((struct nft_trans_elem *)trans->data)->bound) struct nft_trans_obj { struct nft_object *obj; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 605a7cfe7ca7..88abbddf8967 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -138,9 +138,14 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) return; list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { - if (trans->msg_type == NFT_MSG_NEWSET && - nft_trans_set(trans) == set) { - set->bound = true; + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_trans_set(trans) == set) + nft_trans_set_bound(trans) = true; + break; + case NFT_MSG_NEWSETELEM: + if (nft_trans_elem_set(trans) == set) + nft_trans_elem_set_bound(trans) = true; break; } } @@ -6906,7 +6911,7 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - if (nft_trans_set(trans)->bound) { + if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; } @@ -6918,7 +6923,7 @@ static int __nf_tables_abort(struct net *net) nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: - if (nft_trans_elem_set(trans)->bound) { + if (nft_trans_elem_set_bound(trans)) { nft_trans_destroy(trans); break; } From 3e68db2f6422d711550a32cbc87abd97bb6efab3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Aug 2019 11:01:33 +0200 Subject: [PATCH 278/482] netfilter: nf_flow_table: conntrack picks up expired flows Update conntrack entry to pick up expired flows, otherwise the conntrack entry gets stuck with the internal offload timeout (one day). The TCP state also needs to be adjusted to ESTABLISHED state and tracking is set to liberal mode in order to give conntrack a chance to pick up the expired flow. Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index e3d797252a98..68a24471ffee 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -111,7 +111,7 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ) #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ) -static void flow_offload_fixup_ct_state(struct nf_conn *ct) +static void flow_offload_fixup_ct(struct nf_conn *ct) { const struct nf_conntrack_l4proto *l4proto; unsigned int timeout; @@ -208,6 +208,11 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) } EXPORT_SYMBOL_GPL(flow_offload_add); +static inline bool nf_flow_has_expired(const struct flow_offload *flow) +{ + return (__s32)(flow->timeout - (u32)jiffies) <= 0; +} + static void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow) { @@ -223,6 +228,9 @@ static void flow_offload_del(struct nf_flowtable *flow_table, e = container_of(flow, struct flow_offload_entry, flow); clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); + if (nf_flow_has_expired(flow)) + flow_offload_fixup_ct(e->ct); + flow_offload_free(flow); } @@ -233,7 +241,7 @@ void flow_offload_teardown(struct flow_offload *flow) flow->flags |= FLOW_OFFLOAD_TEARDOWN; e = container_of(flow, struct flow_offload_entry, flow); - flow_offload_fixup_ct_state(e->ct); + flow_offload_fixup_ct(e->ct); } EXPORT_SYMBOL_GPL(flow_offload_teardown); @@ -298,11 +306,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table, return err; } -static inline bool nf_flow_has_expired(const struct flow_offload *flow) -{ - return (__s32)(flow->timeout - (u32)jiffies) <= 0; -} - static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) { struct nf_flowtable *flow_table = data; From 1e5b2471bcc4838df298080ae1ec042c2cbc9ce9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Aug 2019 11:01:35 +0200 Subject: [PATCH 279/482] netfilter: nf_flow_table: teardown flow timeout race Flows that are in teardown state (due to RST / FIN TCP packet) still have their offload flag set on. Hence, the conntrack garbage collector may race to undo the timeout adjustment that the fixup routine performs, leaving the conntrack entry in place with the internal offload timeout (one day). Update teardown flow state to ESTABLISHED and set tracking to liberal, then once the offload bit is cleared, adjust timeout if it is more than the default fixup timeout (conntrack might already have set a lower timeout from the packet path). Fixes: da5984e51063 ("netfilter: nf_flow_table: add support for sending flows back to the slow path") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 34 ++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 68a24471ffee..80a8f9ae4c93 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -111,15 +111,16 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ) #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ) -static void flow_offload_fixup_ct(struct nf_conn *ct) +static inline __s32 nf_flow_timeout_delta(unsigned int timeout) +{ + return (__s32)(timeout - (u32)jiffies); +} + +static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) { const struct nf_conntrack_l4proto *l4proto; + int l4num = nf_ct_protonum(ct); unsigned int timeout; - int l4num; - - l4num = nf_ct_protonum(ct); - if (l4num == IPPROTO_TCP) - flow_offload_fixup_tcp(&ct->proto.tcp); l4proto = nf_ct_l4proto_find(l4num); if (!l4proto) @@ -132,7 +133,20 @@ static void flow_offload_fixup_ct(struct nf_conn *ct) else return; - ct->timeout = nfct_time_stamp + timeout; + if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) + ct->timeout = nfct_time_stamp + timeout; +} + +static void flow_offload_fixup_ct_state(struct nf_conn *ct) +{ + if (nf_ct_protonum(ct) == IPPROTO_TCP) + flow_offload_fixup_tcp(&ct->proto.tcp); +} + +static void flow_offload_fixup_ct(struct nf_conn *ct) +{ + flow_offload_fixup_ct_state(ct); + flow_offload_fixup_ct_timeout(ct); } void flow_offload_free(struct flow_offload *flow) @@ -210,7 +224,7 @@ EXPORT_SYMBOL_GPL(flow_offload_add); static inline bool nf_flow_has_expired(const struct flow_offload *flow) { - return (__s32)(flow->timeout - (u32)jiffies) <= 0; + return nf_flow_timeout_delta(flow->timeout) <= 0; } static void flow_offload_del(struct nf_flowtable *flow_table, @@ -230,6 +244,8 @@ static void flow_offload_del(struct nf_flowtable *flow_table, if (nf_flow_has_expired(flow)) flow_offload_fixup_ct(e->ct); + else if (flow->flags & FLOW_OFFLOAD_TEARDOWN) + flow_offload_fixup_ct_timeout(e->ct); flow_offload_free(flow); } @@ -241,7 +257,7 @@ void flow_offload_teardown(struct flow_offload *flow) flow->flags |= FLOW_OFFLOAD_TEARDOWN; e = container_of(flow, struct flow_offload_entry, flow); - flow_offload_fixup_ct(e->ct); + flow_offload_fixup_ct_state(e->ct); } EXPORT_SYMBOL_GPL(flow_offload_teardown); From 20621fedb2a696e4dc60bc1c5de37cf21976abcb Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 9 Aug 2019 14:14:05 +0800 Subject: [PATCH 280/482] bcache: Revert "bcache: use sysfs_match_string() instead of __sysfs_match_string()" This reverts commit 89e0341af082dbc170019f908846f4a424efc86b. In drivers/md/bcache/sysfs.c:bch_snprint_string_list(), NULL pointer at the end of list is necessary. Remove the NULL from last element of each lists will cause the following panic, [ 4340.455652] bcache: register_cache() registered cache device nvme0n1 [ 4340.464603] bcache: register_bdev() registered backing device sdk [ 4421.587335] bcache: bch_cached_dev_run() cached dev sdk is running already [ 4421.587348] bcache: bch_cached_dev_attach() Caching sdk as bcache0 on set 354e1d46-d99f-4d8b-870b-078b80dc88a6 [ 5139.247950] general protection fault: 0000 [#1] SMP NOPTI [ 5139.247970] CPU: 9 PID: 5896 Comm: cat Not tainted 4.12.14-95.29-default #1 SLE12-SP4 [ 5139.247988] Hardware name: HPE ProLiant DL380 Gen10/ProLiant DL380 Gen10, BIOS U30 04/18/2019 [ 5139.248006] task: ffff888fb25c0b00 task.stack: ffff9bbacc704000 [ 5139.248021] RIP: 0010:string+0x21/0x70 [ 5139.248030] RSP: 0018:ffff9bbacc707bf0 EFLAGS: 00010286 [ 5139.248043] RAX: ffffffffa7e432e3 RBX: ffff8881c20da02a RCX: ffff0a00ffffff04 [ 5139.248058] RDX: 3f00656863616362 RSI: ffff8881c20db000 RDI: ffffffffffffffff [ 5139.248075] RBP: ffff8881c20db000 R08: 0000000000000000 R09: ffff8881c20da02a [ 5139.248090] R10: 0000000000000004 R11: 0000000000000000 R12: ffff9bbacc707c48 [ 5139.248104] R13: 0000000000000fd6 R14: ffffffffc0665855 R15: ffffffffc0665855 [ 5139.248119] FS: 00007faf253b8700(0000) GS:ffff88903f840000(0000) knlGS:0000000000000000 [ 5139.248137] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 5139.248149] CR2: 00007faf25395008 CR3: 0000000f72150006 CR4: 00000000007606e0 [ 5139.248164] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 5139.248179] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 5139.248193] PKRU: 55555554 [ 5139.248200] Call Trace: [ 5139.248210] vsnprintf+0x1fb/0x510 [ 5139.248221] snprintf+0x39/0x40 [ 5139.248238] bch_snprint_string_list.constprop.15+0x5b/0x90 [bcache] [ 5139.248256] __bch_cached_dev_show+0x44d/0x5f0 [bcache] [ 5139.248270] ? __alloc_pages_nodemask+0xb2/0x210 [ 5139.248284] bch_cached_dev_show+0x2c/0x50 [bcache] [ 5139.248297] sysfs_kf_seq_show+0xbb/0x190 [ 5139.248308] seq_read+0xfc/0x3c0 [ 5139.248317] __vfs_read+0x26/0x140 [ 5139.248327] vfs_read+0x87/0x130 [ 5139.248336] SyS_read+0x42/0x90 [ 5139.248346] do_syscall_64+0x74/0x160 [ 5139.248358] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 5139.248370] RIP: 0033:0x7faf24eea370 [ 5139.248379] RSP: 002b:00007fff82d03f38 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 5139.248395] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007faf24eea370 [ 5139.248411] RDX: 0000000000020000 RSI: 00007faf25396000 RDI: 0000000000000003 [ 5139.248426] RBP: 00007faf25396000 R08: 00000000ffffffff R09: 0000000000000000 [ 5139.248441] R10: 000000007c9d4d41 R11: 0000000000000246 R12: 00007faf25396000 [ 5139.248456] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000fff [ 5139.248892] Code: ff ff ff 0f 1f 80 00 00 00 00 49 89 f9 48 89 cf 48 c7 c0 e3 32 e4 a7 48 c1 ff 30 48 81 fa ff 0f 00 00 48 0f 46 d0 48 85 ff 74 45 <44> 0f b6 02 48 8d 42 01 45 84 c0 74 38 48 01 fa 4c 89 cf eb 0e The simplest way to fix is to revert commit 89e0341af082 ("bcache: use sysfs_match_string() instead of __sysfs_match_string()"). This bug was introduced in Linux v5.2, so this fix only applies to Linux v5.2 is enough for stable tree maintainer. Fixes: 89e0341af082 ("bcache: use sysfs_match_string() instead of __sysfs_match_string()") Cc: stable@vger.kernel.org Cc: Alexandru Ardelean Reported-by: Peifeng Lin Acked-by: Alexandru Ardelean Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/sysfs.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 9f0826712845..e2059af90791 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -23,24 +23,28 @@ static const char * const bch_cache_modes[] = { "writethrough", "writeback", "writearound", - "none" + "none", + NULL }; /* Default is 0 ("auto") */ static const char * const bch_stop_on_failure_modes[] = { "auto", - "always" + "always", + NULL }; static const char * const cache_replacement_policies[] = { "lru", "fifo", - "random" + "random", + NULL }; static const char * const error_actions[] = { "unregister", - "panic" + "panic", + NULL }; write_attribute(attach); @@ -338,7 +342,7 @@ STORE(__cached_dev) } if (attr == &sysfs_cache_mode) { - v = sysfs_match_string(bch_cache_modes, buf); + v = __sysfs_match_string(bch_cache_modes, -1, buf); if (v < 0) return v; @@ -349,7 +353,7 @@ STORE(__cached_dev) } if (attr == &sysfs_stop_when_cache_set_failed) { - v = sysfs_match_string(bch_stop_on_failure_modes, buf); + v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf); if (v < 0) return v; @@ -816,7 +820,7 @@ STORE(__bch_cache_set) 0, UINT_MAX); if (attr == &sysfs_errors) { - v = sysfs_match_string(error_actions, buf); + v = __sysfs_match_string(error_actions, -1, buf); if (v < 0) return v; @@ -1088,7 +1092,7 @@ STORE(__bch_cache) } if (attr == &sysfs_cache_replacement_policy) { - v = sysfs_match_string(cache_replacement_policies, buf); + v = __sysfs_match_string(cache_replacement_policies, -1, buf); if (v < 0) return v; From 6a7553e8d84d5322d883cb83bb9888c49a0f04e0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 9 Aug 2019 15:46:40 +0200 Subject: [PATCH 281/482] MAINTAINERS: handle fbdev changes through drm-misc tree fbdev patches will now go to upstream through drm-misc tree (IOW starting with v5.4 merge window fbdev changes will be included in DRM pull request) for improved maintainership and better integration testing. Update MAINTAINERS file accordingly. Acked-by: Daniel Vetter Signed-off-by: Bartlomiej Zolnierkiewicz --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a2c343ee3b2c..38963aead319 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6377,7 +6377,7 @@ FRAMEBUFFER LAYER M: Bartlomiej Zolnierkiewicz L: dri-devel@lists.freedesktop.org L: linux-fbdev@vger.kernel.org -T: git git://github.com/bzolnier/linux.git +T: git git://anongit.freedesktop.org/drm/drm-misc Q: http://patchwork.kernel.org/project/linux-fbdev/list/ S: Maintained F: Documentation/fb/ From 730c5fd42c1e3652a065448fd235cb9fafb2bd10 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Aug 2019 15:20:41 +0100 Subject: [PATCH 282/482] rxrpc: Fix local endpoint refcounting The object lifetime management on the rxrpc_local struct is broken in that the rxrpc_local_processor() function is expected to clean up and remove an object - but it may get requeued by packets coming in on the backing UDP socket once it starts running. This may result in the assertion in rxrpc_local_rcu() firing because the memory has been scheduled for RCU destruction whilst still queued: rxrpc: Assertion failed ------------[ cut here ]------------ kernel BUG at net/rxrpc/local_object.c:468! Note that if the processor comes around before the RCU free function, it will just do nothing because ->dead is true. Fix this by adding a separate refcount to count active users of the endpoint that causes the endpoint to be destroyed when it reaches 0. The original refcount can then be used to refcount objects through the work processor and cause the memory to be rcu freed when that reaches 0. Fixes: 4f95dd78a77e ("rxrpc: Rework local endpoint management") Reported-by: syzbot+1e0edc4b8b7494c28450@syzkaller.appspotmail.com Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 4 +- net/rxrpc/ar-internal.h | 5 ++- net/rxrpc/input.c | 16 ++++++-- net/rxrpc/local_object.c | 86 +++++++++++++++++++++++++--------------- 4 files changed, 72 insertions(+), 39 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index d09eaf153544..8c9bd3ae9edf 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -193,7 +193,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) service_in_use: write_unlock(&local->services_lock); - rxrpc_put_local(local); + rxrpc_unuse_local(local); ret = -EADDRINUSE; error_unlock: release_sock(&rx->sk); @@ -901,7 +901,7 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_queue_work(&rxnet->service_conn_reaper); rxrpc_queue_work(&rxnet->client_conn_reaper); - rxrpc_put_local(rx->local); + rxrpc_unuse_local(rx->local); rx->local = NULL; key_put(rx->key); rx->key = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 822f45386e31..9796c45d2f6a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -254,7 +254,8 @@ struct rxrpc_security { */ struct rxrpc_local { struct rcu_head rcu; - atomic_t usage; + atomic_t active_users; /* Number of users of the local endpoint */ + atomic_t usage; /* Number of references to the structure */ struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct list_head link; struct socket *socket; /* my UDP socket */ @@ -1002,6 +1003,8 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *); struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *); void rxrpc_put_local(struct rxrpc_local *); +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *); +void rxrpc_unuse_local(struct rxrpc_local *); void rxrpc_queue_local(struct rxrpc_local *); void rxrpc_destroy_all_locals(struct rxrpc_net *); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5bd6f1546e5c..ee95d1cd1cdf 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1108,8 +1108,12 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, { _enter("%p,%p", local, skb); - skb_queue_tail(&local->event_queue, skb); - rxrpc_queue_local(local); + if (rxrpc_get_local_maybe(local)) { + skb_queue_tail(&local->event_queue, skb); + rxrpc_queue_local(local); + } else { + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + } } /* @@ -1119,8 +1123,12 @@ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) { CHECK_SLAB_OKAY(&local->usage); - skb_queue_tail(&local->reject_queue, skb); - rxrpc_queue_local(local); + if (rxrpc_get_local_maybe(local)) { + skb_queue_tail(&local->reject_queue, skb); + rxrpc_queue_local(local); + } else { + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + } } /* diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index b1c71bad510b..9798159ee65f 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -79,6 +79,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { atomic_set(&local->usage, 1); + atomic_set(&local->active_users, 1); local->rxnet = rxnet; INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); @@ -266,11 +267,8 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, * bind the transport socket may still fail if we're attempting * to use a local address that the dying object is still using. */ - if (!rxrpc_get_local_maybe(local)) { - cursor = cursor->next; - list_del_init(&local->link); + if (!rxrpc_use_local(local)) break; - } age = "old"; goto found; @@ -284,7 +282,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, if (ret < 0) goto sock_error; - list_add_tail(&local->link, cursor); + if (cursor != &rxnet->local_endpoints) + list_replace(cursor, &local->link); + else + list_add_tail(&local->link, cursor); age = "new"; found: @@ -342,7 +343,8 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) } /* - * Queue a local endpoint. + * Queue a local endpoint unless it has become unreferenced and pass the + * caller's reference to the work item. */ void rxrpc_queue_local(struct rxrpc_local *local) { @@ -351,15 +353,8 @@ void rxrpc_queue_local(struct rxrpc_local *local) if (rxrpc_queue_work(&local->processor)) trace_rxrpc_local(local, rxrpc_local_queued, atomic_read(&local->usage), here); -} - -/* - * A local endpoint reached its end of life. - */ -static void __rxrpc_put_local(struct rxrpc_local *local) -{ - _enter("%d", local->debug_id); - rxrpc_queue_work(&local->processor); + else + rxrpc_put_local(local); } /* @@ -375,10 +370,45 @@ void rxrpc_put_local(struct rxrpc_local *local) trace_rxrpc_local(local, rxrpc_local_put, n, here); if (n == 0) - __rxrpc_put_local(local); + call_rcu(&local->rcu, rxrpc_local_rcu); } } +/* + * Start using a local endpoint. + */ +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) +{ + unsigned int au; + + local = rxrpc_get_local_maybe(local); + if (!local) + return NULL; + + au = atomic_fetch_add_unless(&local->active_users, 1, 0); + if (au == 0) { + rxrpc_put_local(local); + return NULL; + } + + return local; +} + +/* + * Cease using a local endpoint. Once the number of active users reaches 0, we + * start the closure of the transport in the work processor. + */ +void rxrpc_unuse_local(struct rxrpc_local *local) +{ + unsigned int au; + + au = atomic_dec_return(&local->active_users); + if (au == 0) + rxrpc_queue_local(local); + else + rxrpc_put_local(local); +} + /* * Destroy a local endpoint's socket and then hand the record to RCU to dispose * of. @@ -393,16 +423,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) _enter("%d", local->debug_id); - /* We can get a race between an incoming call packet queueing the - * processor again and the work processor starting the destruction - * process which will shut down the UDP socket. - */ - if (local->dead) { - _leave(" [already dead]"); - return; - } - local->dead = true; - mutex_lock(&rxnet->local_mutex); list_del_init(&local->link); mutex_unlock(&rxnet->local_mutex); @@ -422,13 +442,11 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) */ rxrpc_purge_queue(&local->reject_queue); rxrpc_purge_queue(&local->event_queue); - - _debug("rcu local %d", local->debug_id); - call_rcu(&local->rcu, rxrpc_local_rcu); } /* - * Process events on an endpoint + * Process events on an endpoint. The work item carries a ref which + * we must release. */ static void rxrpc_local_processor(struct work_struct *work) { @@ -441,8 +459,10 @@ static void rxrpc_local_processor(struct work_struct *work) do { again = false; - if (atomic_read(&local->usage) == 0) - return rxrpc_local_destroyer(local); + if (atomic_read(&local->active_users) == 0) { + rxrpc_local_destroyer(local); + break; + } if (!skb_queue_empty(&local->reject_queue)) { rxrpc_reject_packets(local); @@ -454,6 +474,8 @@ static void rxrpc_local_processor(struct work_struct *work) again = true; } } while (again); + + rxrpc_put_local(local); } /* From e8c3af6bb33a9e4b56920ee00aef92eb5e4cf485 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Aug 2019 15:20:41 +0100 Subject: [PATCH 283/482] rxrpc: Don't bother generating maxSkew in the ACK packet Don't bother generating maxSkew in the ACK packet as it has been obsolete since AFS 3.1. Signed-off-by: David Howells Reviewed-by: Jeffrey Altman --- net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/ar-internal.h | 3 +-- net/rxrpc/call_event.c | 15 ++++++-------- net/rxrpc/input.c | 43 +++++++++++++++-------------------------- net/rxrpc/output.c | 3 +-- net/rxrpc/recvmsg.c | 6 +++--- 6 files changed, 28 insertions(+), 44 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 8c9bd3ae9edf..0dbbfd1b6487 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -402,7 +402,7 @@ EXPORT_SYMBOL(rxrpc_kernel_check_life); */ void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call) { - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, rxrpc_propose_ack_ping_for_check_life); rxrpc_send_ack_packet(call, true, NULL); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9796c45d2f6a..145335611af6 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -650,7 +650,6 @@ struct rxrpc_call { /* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ - u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_serial_t ackr_first_seq; /* first sequence number received */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ @@ -744,7 +743,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index bc2adeb3acb9..c767679bfa5d 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -43,8 +43,7 @@ static void rxrpc_propose_ping(struct rxrpc_call *call, * propose an ACK be sent */ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate, - bool background, + u32 serial, bool immediate, bool background, enum rxrpc_propose_ack_trace why) { enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; @@ -69,14 +68,12 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { outcome = rxrpc_propose_ack_update; call->ackr_serial = serial; - call->ackr_skew = skew; } if (!immediate) goto trace; } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { call->ackr_reason = ack_reason; call->ackr_serial = serial; - call->ackr_skew = skew; } else { outcome = rxrpc_propose_ack_subsume; } @@ -137,11 +134,11 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, * propose an ACK be sent, locking the call structure */ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate, bool background, + u32 serial, bool immediate, bool background, enum rxrpc_propose_ack_trace why) { spin_lock_bh(&call->lock); - __rxrpc_propose_ACK(call, ack_reason, skew, serial, + __rxrpc_propose_ACK(call, ack_reason, serial, immediate, background, why); spin_unlock_bh(&call->lock); } @@ -239,7 +236,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_ns(ack_ts) < call->peer->rtt) goto out; - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); rxrpc_send_ack_packet(call, true, NULL); goto out; @@ -372,7 +369,7 @@ void rxrpc_process_call(struct work_struct *work) if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, true, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, true, rxrpc_propose_ack_ping_for_keepalive); set_bit(RXRPC_CALL_EV_PING, &call->events); } @@ -407,7 +404,7 @@ void rxrpc_process_call(struct work_struct *work) send_ack = NULL; if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { call->acks_lost_top = call->tx_top; - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); send_ack = &call->acks_lost_ping; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index ee95d1cd1cdf..dd47d465d1d3 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -196,15 +196,14 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, * Ping the other end to fill our RTT cache and to retrieve the rwind * and MTU parameters. */ -static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, - int skew) +static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); ktime_t now = skb->tstamp; if (call->peer->rtt_usage < 3 || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) - rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, true, true, rxrpc_propose_ack_ping_for_params); } @@ -419,8 +418,7 @@ static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, /* * Process a DATA packet, adding the packet to the Rx ring. */ -static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, - u16 skew) +static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); enum rxrpc_call_state state; @@ -600,11 +598,11 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, ack: if (ack) - rxrpc_propose_ACK(call, ack, skew, ack_serial, + rxrpc_propose_ACK(call, ack, ack_serial, immediate_ack, true, rxrpc_propose_ack_input_data); else - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial, + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false, true, rxrpc_propose_ack_input_data); @@ -822,8 +820,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, * soft-ACK means that the packet may be discarded and retransmission * requested. A phase is complete when all packets are hard-ACK'd. */ -static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, - u16 skew) +static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -867,11 +864,11 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (buf.ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", sp->hdr.serial); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - skew, sp->hdr.serial, true, true, + sp->hdr.serial, true, true, rxrpc_propose_ack_respond_to_ping); } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, - skew, sp->hdr.serial, true, true, + sp->hdr.serial, true, true, rxrpc_propose_ack_respond_to_ack); } @@ -948,7 +945,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && rxrpc_is_client_call(call)) - rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, false, true, rxrpc_propose_ack_ping_for_lost_reply); @@ -1004,7 +1001,7 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) * Process an incoming call packet. */ static void rxrpc_input_call_packet(struct rxrpc_call *call, - struct sk_buff *skb, u16 skew) + struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned long timo; @@ -1023,11 +1020,11 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: - rxrpc_input_data(call, skb, skew); + rxrpc_input_data(call, skb); break; case RXRPC_PACKET_TYPE_ACK: - rxrpc_input_ack(call, skb, skew); + rxrpc_input_ack(call, skb); break; case RXRPC_PACKET_TYPE_BUSY: @@ -1181,7 +1178,6 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) struct rxrpc_peer *peer = NULL; struct rxrpc_sock *rx = NULL; unsigned int channel; - int skew = 0; _enter("%p", udp_sk); @@ -1309,15 +1305,8 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) goto out; } - /* Note the serial number skew here */ - skew = (int)sp->hdr.serial - (int)conn->hi_serial; - if (skew >= 0) { - if (skew > 0) - conn->hi_serial = sp->hdr.serial; - } else { - skew = -skew; - skew = min(skew, 65535); - } + if ((int)sp->hdr.serial - (int)conn->hi_serial > 0) + conn->hi_serial = sp->hdr.serial; /* Call-bound packets are routed by connection channel. */ channel = sp->hdr.cid & RXRPC_CHANNELMASK; @@ -1380,11 +1369,11 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) call = rxrpc_new_incoming_call(local, rx, skb); if (!call) goto reject_packet; - rxrpc_send_ping(call, skb, skew); + rxrpc_send_ping(call, skb); mutex_unlock(&call->user_mutex); } - rxrpc_input_call_packet(call, skb, skew); + rxrpc_input_call_packet(call, skb); goto discard; discard: diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 948e3fe249ec..369e516c4bdf 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -87,7 +87,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, *_top = top; pkt->ack.bufferSpace = htons(8); - pkt->ack.maxSkew = htons(call->ackr_skew); + pkt->ack.maxSkew = htons(0); pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); pkt->ack.serial = htonl(serial); @@ -228,7 +228,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, if (ping) clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, - ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), false, true, rxrpc_propose_ack_retry_tx); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 5abf46cf9e6c..9a7e1bc9791d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -141,7 +141,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, false, true, + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, false, true, rxrpc_propose_ack_terminal_ack); //rxrpc_send_ack_packet(call, false, NULL); } @@ -159,7 +159,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) call->state = RXRPC_CALL_SERVER_ACK_REQUEST; call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; write_unlock_bh(&call->state_lock); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false, true, rxrpc_propose_ack_processing_op); break; default: @@ -212,7 +212,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) if (after_eq(hard_ack, call->ackr_consumed + 2) || after_eq(top, call->ackr_seen + 2) || (hard_ack == top && after(hard_ack, call->ackr_consumed))) - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, true, true, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) From 8f946da73aaa1c3f609bd14d1193f39afa6830c7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 5 Aug 2019 11:11:08 +0200 Subject: [PATCH 284/482] kvm: remove unnecessary PageReserved check The same check is already done in kvm_is_reserved_pfn. Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4feceaa03fb1..c6a91b044d8d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1855,8 +1855,7 @@ void kvm_set_pfn_dirty(kvm_pfn_t pfn) if (!kvm_is_reserved_pfn(pfn)) { struct page *page = pfn_to_page(pfn); - if (!PageReserved(page)) - SetPageDirty(page); + SetPageDirty(page); } } EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); From e2c26537ea7652d123ddb01ceb1aa0e2a5edb7ad Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 31 Jul 2019 16:28:51 +0200 Subject: [PATCH 285/482] KVM: selftests: Update gitignore file for latest changes The kvm_create_max_vcpus test has been moved to the main directory, and sync_regs_test is now available on s390x, too. Signed-off-by: Thomas Huth Acked-by: Shuah Khan Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 41266af0d3dc..b35da375530a 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,7 +1,7 @@ +/s390x/sync_regs_test /x86_64/cr4_cpuid_sync_test /x86_64/evmcs_test /x86_64/hyperv_cpuid -/x86_64/kvm_create_max_vcpus /x86_64/mmio_warning_test /x86_64/platform_info_test /x86_64/set_sregs_test @@ -13,3 +13,4 @@ /x86_64/vmx_tsc_adjust_test /clear_dirty_log_test /dirty_log_test +/kvm_create_max_vcpus From c096397c78f766db972f923433031f2dec01cae0 Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Wed, 7 Aug 2019 13:58:14 +0000 Subject: [PATCH 286/482] selftests: kvm: Adding config fragments selftests kvm test cases need pre-required kernel configs for the test to get pass. Signed-off-by: Naresh Kamboju Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/config | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tools/testing/selftests/kvm/config diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config new file mode 100644 index 000000000000..63ed533f73d6 --- /dev/null +++ b/tools/testing/selftests/kvm/config @@ -0,0 +1,3 @@ +CONFIG_KVM=y +CONFIG_KVM_INTEL=y +CONFIG_KVM_AMD=y From bfeaec7f7d2f6b09764a1b48e88e9ca3d5076419 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sat, 20 Jul 2019 10:01:26 +0800 Subject: [PATCH 287/482] iommu/vt-d: Correctly check format of page table in debugfs PASID support and enable bit in the context entry isn't the right indicator for the type of tables (legacy or scalable mode). Check the DMA_RTADDR_SMT bit in the root context pointer instead. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Sai Praneeth Fixes: dd5142ca5d24b ("iommu/vt-d: Add debugfs support to show scalable mode DMAR table internals") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 2b25d9c59336..471f05d452e0 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -235,7 +235,7 @@ static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 bus) tbl_wlk.ctx_entry = context; m->private = &tbl_wlk; - if (pasid_supported(iommu) && is_pasid_enabled(context)) { + if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT) { pasid_dir_ptr = context->lo & VTD_PAGE_MASK; pasid_dir_size = get_pasid_dir_size(context); pasid_dir_walk(m, pasid_dir_ptr, pasid_dir_size); From ab2cbeb0ed301a9f0460078e91b09f39958212ef Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 29 Jul 2019 17:46:00 +0100 Subject: [PATCH 288/482] iommu/dma: Handle SG length overflow better Since scatterlist dimensions are all unsigned ints, in the relatively rare cases where a device's max_segment_size is set to UINT_MAX, then the "cur_len + s_length <= max_len" check in __finalise_sg() will always return true. As a result, the corner case of such a device mapping an excessively large scatterlist which is mergeable to or beyond a total length of 4GB can lead to overflow and a bogus truncated dma_length in the resulting segment. As we already assume that any single segment must be no longer than max_len to begin with, this can easily be addressed by reshuffling the comparison. Fixes: 809eac54cdd6 ("iommu/dma: Implement scatterlist segment merging") Reported-by: Nicolin Chen Tested-by: Nicolin Chen Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 6441197a75ea..4ea9cf02ba2d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -762,7 +762,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, * - and wouldn't make the resulting output segment too long */ if (cur_len && !s_iova_off && (dma_addr & seg_mask) && - (cur_len + s_length <= max_len)) { + (max_len - cur_len >= s_length)) { /* ...then concatenate it with the previous one */ cur_len += s_length; } else { From ae23bfb68f2896835e54a137688906713cb607e7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 6 Aug 2019 08:14:08 +0800 Subject: [PATCH 289/482] iommu/vt-d: Detach domain before using a private one When the default domain of a group doesn't work for a device, the iommu driver will try to use a private domain. The domain which was previously attached to the device must be detached. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Alex Williamson Fixes: 942067f1b6b97 ("iommu/vt-d: Identify default domains replaced with private") Reported-by: Alex Williamson Link: https://lkml.org/lkml/2019/8/2/1379 Signed-off-by: Lu Baolu Tested-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 3e22fa6ae8c8..37259b7f95a7 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3449,6 +3449,7 @@ static bool iommu_need_mapping(struct device *dev) dmar_domain = to_dmar_domain(domain); dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; } + dmar_remove_one_dev_info(dev); get_private_domain_for_dev(dev); } @@ -4803,7 +4804,8 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_lock_irqsave(&device_domain_lock, flags); info = dev->archdata.iommu; - __dmar_remove_one_dev_info(info); + if (info) + __dmar_remove_one_dev_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); } @@ -5281,6 +5283,7 @@ static int intel_iommu_add_device(struct device *dev) if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { ret = iommu_request_dm_for_dev(dev); if (ret) { + dmar_remove_one_dev_info(dev); dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; domain_add_dev_info(si_domain, dev); dev_info(dev, @@ -5291,6 +5294,7 @@ static int intel_iommu_add_device(struct device *dev) if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) { ret = iommu_request_dma_domain_for_dev(dev); if (ret) { + dmar_remove_one_dev_info(dev); dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; if (!get_private_domain_for_dev(dev)) { dev_warn(dev, From 3a18844dcf89e636b2d0cbf577e3963b0bcb6d23 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 6 Aug 2019 08:14:09 +0800 Subject: [PATCH 290/482] iommu/vt-d: Fix possible use-after-free of private domain Multiple devices might share a private domain. One real example is a pci bridge and all devices behind it. When remove a private domain, make sure that it has been detached from all devices to avoid use-after-free case. Cc: Ashok Raj Cc: Jacob Pan Cc: Kevin Tian Cc: Alex Williamson Fixes: 942067f1b6b97 ("iommu/vt-d: Identify default domains replaced with private") Signed-off-by: Lu Baolu Tested-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 37259b7f95a7..12d094d08c0a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4791,7 +4791,8 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) /* free the private domain */ if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN && - !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) + !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && + list_empty(&domain->devices)) domain_exit(info->domain); free_devinfo_mem(info); From b3e78adcbf991a4e8b2ebb23c9889e968ec76c5f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Aug 2019 17:19:22 -0700 Subject: [PATCH 291/482] tools: bpftool: fix error message (prog -> object) Change an error message to work for any object being pinned not just programs. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 5215e0870bcb..c52a6ffb8949 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -237,7 +237,7 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) fd = get_fd_by_id(id); if (fd < 0) { - p_err("can't get prog by id (%u): %s", id, strerror(errno)); + p_err("can't open object by id (%u): %s", id, strerror(errno)); return -1; } From 3c7be384fe6da0d7b1d6fc0ad6b4a33edb73aad5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Aug 2019 17:19:23 -0700 Subject: [PATCH 292/482] tools: bpftool: add error message on pin failure No error message is currently printed if the pin syscall itself fails. It got lost in the loadall refactoring. Fixes: 77380998d91d ("bpftool: add loadall command") Reported-by: Andy Lutomirski Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index c52a6ffb8949..6a71324be628 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -204,7 +204,11 @@ int do_pin_fd(int fd, const char *name) if (err) return err; - return bpf_obj_pin(fd, name); + err = bpf_obj_pin(fd, name); + if (err) + p_err("can't pin the object (%s): %s", name, strerror(errno)); + + return err; } int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) From a27a0c9b6a208722016c8ec5ad31ec96082b91ec Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 5 Aug 2019 12:22:03 +0100 Subject: [PATCH 293/482] gfs2: gfs2_walk_metadata fix It turns out that the current version of gfs2_metadata_walker suffers from multiple problems that can cause gfs2_hole_size to report an incorrect size. This will confuse fiemap as well as lseek with the SEEK_DATA flag. Fix that by changing gfs2_hole_walker to compute the metapath to the first data block after the hole (if any), and compute the hole size based on that. Fixes xfstest generic/490. Signed-off-by: Andreas Gruenbacher Reviewed-by: Bob Peterson Cc: stable@vger.kernel.org # v4.18+ --- fs/gfs2/bmap.c | 164 ++++++++++++++++++++++++++++++------------------- 1 file changed, 101 insertions(+), 63 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 4df26ef2b2b1..4f8b5fd6c81f 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -390,6 +390,19 @@ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) return mp->mp_aheight - x - 1; } +static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp) +{ + sector_t factor = 1, block = 0; + int hgt; + + for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) { + if (hgt < mp->mp_aheight) + block += mp->mp_list[hgt] * factor; + factor *= sdp->sd_inptrs; + } + return block; +} + static void release_metapath(struct metapath *mp) { int i; @@ -430,60 +443,84 @@ static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *pt return ptr - first; } -typedef const __be64 *(*gfs2_metadata_walker)( - struct metapath *mp, - const __be64 *start, const __be64 *end, - u64 factor, void *data); +enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE }; -#define WALK_STOP ((__be64 *)0) -#define WALK_NEXT ((__be64 *)1) +/* + * gfs2_metadata_walker - walk an indirect block + * @mp: Metapath to indirect block + * @ptrs: Number of pointers to look at + * + * When returning WALK_FOLLOW, the walker must update @mp to point at the right + * indirect block to follow. + */ +typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp, + unsigned int ptrs); -static int gfs2_walk_metadata(struct inode *inode, sector_t lblock, - u64 len, struct metapath *mp, gfs2_metadata_walker walker, - void *data) +/* + * gfs2_walk_metadata - walk a tree of indirect blocks + * @inode: The inode + * @mp: Starting point of walk + * @max_len: Maximum number of blocks to walk + * @walker: Called during the walk + * + * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or + * past the end of metadata, and a negative error code otherwise. + */ + +static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp, + u64 max_len, gfs2_metadata_walker walker) { - struct metapath clone; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - const __be64 *start, *end, *ptr; u64 factor = 1; unsigned int hgt; - int ret = 0; + int ret; - for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--) + /* + * The walk starts in the lowest allocated indirect block, which may be + * before the position indicated by @mp. Adjust @max_len accordingly + * to avoid a short walk. + */ + for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) { + max_len += mp->mp_list[hgt] * factor; + mp->mp_list[hgt] = 0; factor *= sdp->sd_inptrs; + } for (;;) { - u64 step; + u16 start = mp->mp_list[hgt]; + enum walker_status status; + unsigned int ptrs; + u64 len; /* Walk indirect block. */ - start = metapointer(hgt, mp); - end = metaend(hgt, mp); - - step = (end - start) * factor; - if (step > len) - end = start + DIV_ROUND_UP_ULL(len, factor); - - ptr = walker(mp, start, end, factor, data); - if (ptr == WALK_STOP) + ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start; + len = ptrs * factor; + if (len > max_len) + ptrs = DIV_ROUND_UP_ULL(max_len, factor); + status = walker(mp, ptrs); + switch (status) { + case WALK_STOP: + return 1; + case WALK_FOLLOW: + BUG_ON(mp->mp_aheight == mp->mp_fheight); + ptrs = mp->mp_list[hgt] - start; + len = ptrs * factor; break; - if (step >= len) + case WALK_CONTINUE: break; - len -= step; - if (ptr != WALK_NEXT) { - BUG_ON(!*ptr); - mp->mp_list[hgt] += ptr - start; - goto fill_up_metapath; } + if (len >= max_len) + break; + max_len -= len; + if (status == WALK_FOLLOW) + goto fill_up_metapath; lower_metapath: /* Decrease height of metapath. */ - if (mp != &clone) { - clone_metapath(&clone, mp); - mp = &clone; - } brelse(mp->mp_bh[hgt]); mp->mp_bh[hgt] = NULL; + mp->mp_list[hgt] = 0; if (!hgt) break; hgt--; @@ -491,10 +528,7 @@ static int gfs2_walk_metadata(struct inode *inode, sector_t lblock, /* Advance in metadata tree. */ (mp->mp_list[hgt])++; - start = metapointer(hgt, mp); - end = metaend(hgt, mp); - if (start >= end) { - mp->mp_list[hgt] = 0; + if (mp->mp_list[hgt] >= sdp->sd_inptrs) { if (!hgt) break; goto lower_metapath; @@ -502,44 +536,36 @@ static int gfs2_walk_metadata(struct inode *inode, sector_t lblock, fill_up_metapath: /* Increase height of metapath. */ - if (mp != &clone) { - clone_metapath(&clone, mp); - mp = &clone; - } ret = fillup_metapath(ip, mp, ip->i_height - 1); if (ret < 0) - break; + return ret; hgt += ret; for (; ret; ret--) do_div(factor, sdp->sd_inptrs); mp->mp_aheight = hgt + 1; } - if (mp == &clone) - release_metapath(mp); - return ret; + return 0; } -struct gfs2_hole_walker_args { - u64 blocks; -}; - -static const __be64 *gfs2_hole_walker(struct metapath *mp, - const __be64 *start, const __be64 *end, - u64 factor, void *data) +static enum walker_status gfs2_hole_walker(struct metapath *mp, + unsigned int ptrs) { - struct gfs2_hole_walker_args *args = data; - const __be64 *ptr; + const __be64 *start, *ptr, *end; + unsigned int hgt; + + hgt = mp->mp_aheight - 1; + start = metapointer(hgt, mp); + end = start + ptrs; for (ptr = start; ptr < end; ptr++) { if (*ptr) { - args->blocks += (ptr - start) * factor; + mp->mp_list[hgt] += ptr - start; if (mp->mp_aheight == mp->mp_fheight) return WALK_STOP; - return ptr; /* increase height */ + return WALK_FOLLOW; } } - args->blocks += (end - start) * factor; - return WALK_NEXT; + return WALK_CONTINUE; } /** @@ -557,12 +583,24 @@ static const __be64 *gfs2_hole_walker(struct metapath *mp, static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len, struct metapath *mp, struct iomap *iomap) { - struct gfs2_hole_walker_args args = { }; - int ret = 0; + struct metapath clone; + u64 hole_size; + int ret; - ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args); - if (!ret) - iomap->length = args.blocks << inode->i_blkbits; + clone_metapath(&clone, mp); + ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker); + if (ret < 0) + goto out; + + if (ret == 1) + hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock; + else + hole_size = len; + iomap->length = hole_size << inode->i_blkbits; + ret = 0; + +out: + release_metapath(&clone); return ret; } From 47801c97deb71b9e279c15a02a44cf00aa11e7d9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 2 Aug 2019 19:23:58 +0900 Subject: [PATCH 294/482] kbuild: revive single target %.ko I removed the single target %.ko in commit ff9b45c55b26 ("kbuild: modpost: read modules.order instead of $(MODVERDIR)/*.mod") because the modpost stage does not work reliably. For instance, the module dependency, modversion, etc. do not work if we lack symbol information from the other modules. Yet, some people still want to build only one module in their interest, and it may be still useful if it is used within those limitations. Fixes: ff9b45c55b26 ("kbuild: modpost: read modules.order instead of $(MODVERDIR)/*.mod") Reported-by: Don Brace Reported-by: Arend Van Spriel Signed-off-by: Masahiro Yamada --- Makefile | 11 +++++++++++ scripts/Makefile.modpost | 6 ++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 23cdf1f41364..d1f35e32edc7 100644 --- a/Makefile +++ b/Makefile @@ -1002,6 +1002,8 @@ endif PHONY += prepare0 +export MODORDER := $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)modules.order + ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ @@ -1771,6 +1773,15 @@ build-dir = $(patsubst %/,%,$(dir $(build-target))) $(Q)$(MAKE) $(build)=$(build-dir) $(build-target) %.symtypes: prepare FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(build-target) +ifeq ($(KBUILD_EXTMOD),) +# For the single build of an in-tree module, use a temporary file to avoid +# the situation of modules_install installing an invalid modules.order. +%.ko: MODORDER := .modules.tmp +endif +%.ko: prepare FORCE + $(Q)$(MAKE) $(build)=$(build-dir) $(build-target:.ko=.mod) + $(Q)echo $(build-target) > $(MODORDER) + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost # Modules PHONY += / diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 92ed02d7cd5e..26e6574ecd08 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -85,10 +85,8 @@ endif include scripts/Makefile.lib -modorder := $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)modules.order - # find all modules listed in modules.order -modules := $(sort $(shell cat $(modorder))) +modules := $(sort $(shell cat $(MODORDER))) # Stop after building .o files if NOFINAL is set. Makes compile tests quicker __modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules)) @@ -98,7 +96,7 @@ MODPOST += $(subst -i,-n,$(filter -i,$(MAKEFLAGS))) -s -T - $(wildcard vmlinux) # We can go over command line length here, so be careful. quiet_cmd_modpost = MODPOST $(words $(modules)) modules - cmd_modpost = sed 's/ko$$/o/' $(modorder) | $(MODPOST) + cmd_modpost = sed 's/ko$$/o/' $(MODORDER) | $(MODPOST) PHONY += modules-modpost modules-modpost: From d9f78edfd81b9e484423534360350ef7253cc888 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Aug 2019 19:03:21 +0900 Subject: [PATCH 295/482] kbuild: fix false-positive need-builtin calculation The current implementation of need-builtin is false-positive, for example, in the following Makefile: obj-m := foo/ obj-y := foo/bar/ ..., where foo/built-in.a is not required. Signed-off-by: Masahiro Yamada --- scripts/Makefile.build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 0d434d0afc0b..3fe0c73e002c 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -487,7 +487,8 @@ targets += $(call intermediate_targets, .asn1.o, .asn1.c .asn1.h) \ PHONY += $(subdir-ym) $(subdir-ym): - $(Q)$(MAKE) $(build)=$@ need-builtin=$(if $(findstring $@,$(subdir-obj-y)),1) + $(Q)$(MAKE) $(build)=$@ \ + need-builtin=$(if $(filter $@/built-in.a, $(subdir-obj-y)),1) # Add FORCE to the prequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- From 4f2c8f3089f538f556c86f26603a062865e4aa94 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Aug 2019 19:03:22 +0900 Subject: [PATCH 296/482] kbuild: generate modules.order only in directories visited by obj-y/m The modules.order files in directories visited by the chain of obj-y or obj-m are merged to the upper-level ones, and become parts of the top-level modules.order. On the other hand, there is no need to generate modules.order in directories visited by subdir-y or subdir-m since they would become orphan anyway. Signed-off-by: Masahiro Yamada --- scripts/Makefile.build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 3fe0c73e002c..37a1d2cd49d4 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -488,7 +488,8 @@ targets += $(call intermediate_targets, .asn1.o, .asn1.c .asn1.h) \ PHONY += $(subdir-ym) $(subdir-ym): $(Q)$(MAKE) $(build)=$@ \ - need-builtin=$(if $(filter $@/built-in.a, $(subdir-obj-y)),1) + need-builtin=$(if $(filter $@/built-in.a, $(subdir-obj-y)),1) \ + need-modorder=$(if $(need-modorder),$(if $(filter $@/modules.order, $(modorder)),1)) # Add FORCE to the prequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- From c07d8d47bca1b325102fa2be3a463075f7b051d9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 8 Aug 2019 20:21:11 +0900 Subject: [PATCH 297/482] kbuild: show hint if subdir-y/m is used to visit module Makefile Since commit ff9b45c55b26 ("kbuild: modpost: read modules.order instead of $(MODVERDIR)/*.mod"), a module is no longer built in the following pattern: [Makefile] subdir-y := some-module [some-module/Makefile] obj-m := some-module.o You cannot write Makefile this way in upstream because modules.order is not correctly generated. subdir-y is used to descend to a sub-directory that builds tools, device trees, etc. For external modules, the modules order does not matter. So, the Makefile above was known to work. I believe the Makefile should be re-written as follows: [Makefile] obj-m := some-module/ [some-module/Makefile] obj-m := some-module.o However, people will have no idea if their Makefile suddenly stops working. In fact, I received questions from multiple people. Show a warning for a while if obj-m is specified in a Makefile visited by subdir-y or subdir-m. I touched the %/ rule to avoid false-positive warnings for the single target. Cc: Jan Kiszka Cc: Tom Stonecypher Signed-off-by: Masahiro Yamada Tested-by: Jan Kiszka --- Makefile | 2 +- scripts/Makefile.build | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d1f35e32edc7..fd219e1452a0 100644 --- a/Makefile +++ b/Makefile @@ -1788,7 +1788,7 @@ PHONY += / /: ./ %/: prepare FORCE - $(Q)$(MAKE) KBUILD_MODULES=1 $(build)=$(build-dir) + $(Q)$(MAKE) KBUILD_MODULES=1 $(build)=$(build-dir) need-modorder=1 # FIXME Should go into a make.lib or something # =========================================================================== diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 37a1d2cd49d4..2f66ed388d1c 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -52,6 +52,13 @@ ifndef obj $(warning kbuild: Makefile.build is included improperly) endif +ifeq ($(MAKECMDGOALS)$(need-modorder),) +ifneq ($(obj-m),) +$(warning $(patsubst %.o,'%.ko',$(obj-m)) will not be built even though obj-m is specified.) +$(warning You cannot use subdir-y/m to visit a module Makefile. Use obj-y/m instead.) +endif +endif + # =========================================================================== ifneq ($(strip $(lib-y) $(lib-m) $(lib-)),) From cd48bdda4fb82c2fe569d97af4217c530168c99c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 8 Aug 2019 13:57:25 +0200 Subject: [PATCH 298/482] sock: make cookie generation global instead of per netns Generating and retrieving socket cookies are a useful feature that is exposed to BPF for various program types through bpf_get_socket_cookie() helper. The fact that the cookie counter is per netns is quite a limitation for BPF in practice in particular for programs in host namespace that use socket cookies as part of a map lookup key since they will be causing socket cookie collisions e.g. when attached to BPF cgroup hooks or cls_bpf on tc egress in host namespace handling container traffic from veth or ipvlan devices with peer in different netns. Change the counter to be global instead. Socket cookie consumers must assume the value as opqaue in any case. Not every socket must have a cookie generated and knowledge of the counter value itself does not provide much value either way hence conversion to global is fine. Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Alexei Starovoitov Cc: Willem de Bruijn Cc: Martynas Pumputis Signed-off-by: David S. Miller --- include/net/net_namespace.h | 1 - include/uapi/linux/bpf.h | 4 ++-- net/core/sock_diag.c | 3 ++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 4a9da951a794..cb668bc2692d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -61,7 +61,6 @@ struct net { spinlock_t rules_mod_lock; u32 hash_mix; - atomic64_t cookie_gen; struct list_head list; /* list of network namespaces */ struct list_head exit_list; /* To linked to call pernet exit diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fa1c753dcdbc..a5aa7d3ac6a1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1466,8 +1466,8 @@ union bpf_attr { * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 3312a5849a97..c13ffbd33d8d 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -19,6 +19,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); static struct workqueue_struct *broadcast_wq; +static atomic64_t cookie_gen; u64 sock_gen_cookie(struct sock *sk) { @@ -27,7 +28,7 @@ u64 sock_gen_cookie(struct sock *sk) if (res) return res; - res = atomic64_inc_return(&sock_net(sk)->cookie_gen); + res = atomic64_inc_return(&cookie_gen); atomic64_cmpxchg(&sk->sk_cookie, 0, res); } } From 609a2ca57afc467fbc46b7f3453de4e1811456c5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 8 Aug 2019 13:57:26 +0200 Subject: [PATCH 299/482] bpf: sync bpf.h to tools infrastructure Pull in updates in BPF helper function description. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4e455018da65..a5aa7d3ac6a1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1466,8 +1466,8 @@ union bpf_attr { * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. @@ -1571,8 +1571,11 @@ union bpf_attr { * but this is only implemented for native XDP (with driver * support) as of this writing). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * The lower two bits of *flags* are used as the return code if + * the map lookup fails. This is so that the return value can be + * one of the XDP program return codes up to XDP_TX, as chosen by + * the caller. Any higher bits in the *flags* argument must be + * unset. * * When used to redirect packets to net devices, this helper * provides a high performance increase over **bpf_redirect**\ (). From 8b6381600d59871fbe44d36522272f961ab42410 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 8 Aug 2019 09:37:56 -0700 Subject: [PATCH 300/482] ixgbe: fix possible deadlock in ixgbe_service_task() ixgbe_service_task() calls unregister_netdev() under rtnl_lock(). But unregister_netdev() internally calls rtnl_lock(). So deadlock would occur. Fixes: 59dd45d550c5 ("ixgbe: firmware recovery mode") Signed-off-by: Taehee Yoo Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index cbaf712d6529..7882148abb43 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7897,11 +7897,8 @@ static void ixgbe_service_task(struct work_struct *work) return; } if (ixgbe_check_fw_error(adapter)) { - if (!test_bit(__IXGBE_DOWN, &adapter->state)) { - rtnl_lock(); + if (!test_bit(__IXGBE_DOWN, &adapter->state)) unregister_netdev(adapter->netdev); - rtnl_unlock(); - } ixgbe_service_event_complete(adapter); return; } From 6d0d779dca73cd5acb649c54f81401f93098b298 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 9 Aug 2019 01:58:08 +0000 Subject: [PATCH 301/482] hv_netvsc: Fix a warning of suspicious RCU usage This fixes a warning of "suspicious rcu_dereference_check() usage" when nload runs. Fixes: 776e726bfb34 ("netvsc: fix RCU warning in get_stats") Signed-off-by: Dexuan Cui Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3544e1991579..e8fce6d715ef 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1239,12 +1239,15 @@ static void netvsc_get_stats64(struct net_device *net, struct rtnl_link_stats64 *t) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev); + struct netvsc_device *nvdev; struct netvsc_vf_pcpu_stats vf_tot; int i; + rcu_read_lock(); + + nvdev = rcu_dereference(ndev_ctx->nvdev); if (!nvdev) - return; + goto out; netdev_stats_to_stats64(t, &net->stats); @@ -1283,6 +1286,8 @@ static void netvsc_get_stats64(struct net_device *net, t->rx_packets += packets; t->multicast += multicast; } +out: + rcu_read_unlock(); } static int netvsc_set_mac_addr(struct net_device *ndev, void *p) From e6a9522ac3ff59980ea00e070b6b8573aface36a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 25 Jul 2019 15:29:57 -0500 Subject: [PATCH 302/482] drm/i915: Remove redundant user_access_end() from __copy_from_user() error path Objtool reports: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.o: warning: objtool: .altinstr_replacement+0x36: redundant UACCESS disable __copy_from_user() already does both STAC and CLAC, so the user_access_end() in its error path adds an extra unnecessary CLAC. Fixes: 0b2c8f8b6b0c ("i915: fix missing user_access_end() in page fault exception case") Reported-by: Thomas Gleixner Reported-by: Sedat Dilek Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Nick Desaulniers Tested-by: Sedat Dilek Acked-by: Peter Zijlstra (Intel) Acked-by: Chris Wilson Link: https://github.com/ClangBuiltLinux/linux/issues/617 Link: https://lkml.kernel.org/r/51a4155c5bc2ca847a9cbe85c1c11918bb193141.1564086017.git.jpoimboe@redhat.com --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5fae0e50aad0..41dab9ea33cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1628,6 +1628,7 @@ static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) static int eb_copy_relocations(const struct i915_execbuffer *eb) { + struct drm_i915_gem_relocation_entry *relocs; const unsigned int count = eb->buffer_count; unsigned int i; int err; @@ -1635,7 +1636,6 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb) for (i = 0; i < count; i++) { const unsigned int nreloc = eb->exec[i].relocation_count; struct drm_i915_gem_relocation_entry __user *urelocs; - struct drm_i915_gem_relocation_entry *relocs; unsigned long size; unsigned long copied; @@ -1663,14 +1663,8 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb) if (__copy_from_user((char *)relocs + copied, (char __user *)urelocs + copied, - len)) { -end_user: - user_access_end(); -end: - kvfree(relocs); - err = -EFAULT; - goto err; - } + len)) + goto end; copied += len; } while (copied < size); @@ -1699,10 +1693,14 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb) return 0; +end_user: + user_access_end(); +end: + kvfree(relocs); + err = -EFAULT; err: while (i--) { - struct drm_i915_gem_relocation_entry *relocs = - u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); + relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); if (eb->exec[i].relocation_count) kvfree(relocs); } From 06282373ff57a2b82621be4f84f981e1b0a4eb28 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 8 Aug 2019 14:43:49 -0700 Subject: [PATCH 303/482] mm/memremap: Fix reuse of pgmap instances with internal references Currently, attempts to shutdown and re-enable a device-dax instance trigger: Missing reference count teardown definition WARNING: CPU: 37 PID: 1608 at mm/memremap.c:211 devm_memremap_pages+0x234/0x850 [..] RIP: 0010:devm_memremap_pages+0x234/0x850 [..] Call Trace: dev_dax_probe+0x66/0x190 [device_dax] really_probe+0xef/0x390 driver_probe_device+0xb4/0x100 device_driver_attach+0x4f/0x60 Given that the setup path initializes pgmap->ref, arrange for it to be also torn down so devm_memremap_pages() is ready to be called again and not be mistaken for the 3rd-party per-cpu-ref case. Fixes: 24917f6b1041 ("memremap: provide an optional internal refcount in struct dev_pagemap") Reported-by: Fan Du Tested-by: Vishal Verma Cc: Andrew Morton Cc: Christoph Hellwig Cc: Ira Weiny Cc: Jason Gunthorpe Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/156530042781.2068700.8733813683117819799.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- mm/memremap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memremap.c b/mm/memremap.c index 6ee03a816d67..86432650f829 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -91,6 +91,12 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap) wait_for_completion(&pgmap->done); percpu_ref_exit(pgmap->ref); } + /* + * Undo the pgmap ref assignment for the internal case as the + * caller may re-enable the same pgmap. + */ + if (pgmap->ref == &pgmap->internal_ref) + pgmap->ref = NULL; } static void devm_memremap_pages_release(void *data) From 2d0e988d8427b7b9c6d5c7835b6944405c038011 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 28 Jul 2019 18:02:41 -0500 Subject: [PATCH 304/482] ARM/hw_breakpoint: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings: arch/arm/kernel/hw_breakpoint.c: In function 'hw_breakpoint_arch_parse': arch/arm/kernel/hw_breakpoint.c:609:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) ^ arch/arm/kernel/hw_breakpoint.c:611:2: note: here case 3: ^~~~ arch/arm/kernel/hw_breakpoint.c:613:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) ^ arch/arm/kernel/hw_breakpoint.c:615:2: note: here default: ^~~~~~~ arch/arm/kernel/hw_breakpoint.c: In function 'arch_build_bp_info': arch/arm/kernel/hw_breakpoint.c:544:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if ((hw->ctrl.type != ARM_BREAKPOINT_EXECUTE) ^ arch/arm/kernel/hw_breakpoint.c:547:2: note: here default: ^~~~~~~ In file included from include/linux/kernel.h:11, from include/linux/list.h:9, from include/linux/preempt.h:11, from include/linux/hardirq.h:5, from arch/arm/kernel/hw_breakpoint.c:16: arch/arm/kernel/hw_breakpoint.c: In function 'hw_breakpoint_pending': include/linux/compiler.h:78:22: warning: this statement may fall through [-Wimplicit-fallthrough=] # define unlikely(x) __builtin_expect(!!(x), 0) ^~~~~~~~~~~~~~~~~~~~~~~~~~ include/asm-generic/bug.h:136:2: note: in expansion of macro 'unlikely' unlikely(__ret_warn_on); \ ^~~~~~~~ arch/arm/kernel/hw_breakpoint.c:863:3: note: in expansion of macro 'WARN' WARN(1, "Asynchronous watchpoint exception taken. Debugging results may be unreliable\n"); ^~~~ arch/arm/kernel/hw_breakpoint.c:864:2: note: here case ARM_ENTRY_SYNC_WATCHPOINT: ^~~~ arch/arm/kernel/hw_breakpoint.c: In function 'core_has_os_save_restore': arch/arm/kernel/hw_breakpoint.c:910:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (oslsr & ARM_OSLSR_OSLM0) ^ arch/arm/kernel/hw_breakpoint.c:912:2: note: here default: ^~~~~~~ Reported-by: Stephen Rothwell Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- arch/arm/kernel/hw_breakpoint.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index af8b8e15f589..b0c195e3a06d 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -544,6 +544,7 @@ static int arch_build_bp_info(struct perf_event *bp, if ((hw->ctrl.type != ARM_BREAKPOINT_EXECUTE) && max_watchpoint_len >= 8) break; + /* Else, fall through */ default: return -EINVAL; } @@ -608,10 +609,12 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, /* Allow halfword watchpoints and breakpoints. */ if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) break; + /* Else, fall through */ case 3: /* Allow single byte watchpoint. */ if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) break; + /* Else, fall through */ default: ret = -EINVAL; goto out; @@ -861,6 +864,7 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, break; case ARM_ENTRY_ASYNC_WATCHPOINT: WARN(1, "Asynchronous watchpoint exception taken. Debugging results may be unreliable\n"); + /* Fall through */ case ARM_ENTRY_SYNC_WATCHPOINT: watchpoint_handler(addr, fsr, regs); break; @@ -909,6 +913,7 @@ static bool core_has_os_save_restore(void) ARM_DBG_READ(c1, c1, 4, oslsr); if (oslsr & ARM_OSLSR_OSLM0) return true; + /* Else, fall through */ default: return false; } From 9b76ad3a9cc5ebb1dde650c8a9937f045e2707a2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 28 Jul 2019 18:10:21 -0500 Subject: [PATCH 305/482] ARM: tegra: Mark expected switch fall-through Mark switch cases where we are expecting to fall through. This patch fixes the following warning: arch/arm/mach-tegra/reset.c: In function 'tegra_cpu_reset_handler_enable': arch/arm/mach-tegra/reset.c:72:3: warning: this statement may fall through [-Wimplicit-fallthrough=] tegra_cpu_reset_handler_set(reset_address); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/arm/mach-tegra/reset.c:74:2: note: here case 0: ^~~~ Reported-by: Stephen Rothwell Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- arch/arm/mach-tegra/reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c index 5a67a71f80cc..76a65df42d10 100644 --- a/arch/arm/mach-tegra/reset.c +++ b/arch/arm/mach-tegra/reset.c @@ -70,7 +70,7 @@ static void __init tegra_cpu_reset_handler_enable(void) switch (err) { case -ENOSYS: tegra_cpu_reset_handler_set(reset_address); - /* pass-through */ + /* fall through */ case 0: is_enabled = true; break; From e7c0c9f6028dfa09ea9f20c5f1d387e5858afede Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 28 Jul 2019 18:17:18 -0500 Subject: [PATCH 306/482] ARM: alignment: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings: arch/arm/mm/alignment.c: In function 'thumb2arm': arch/arm/mm/alignment.c:688:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if ((tinstr & (3 << 9)) == 0x0400) { ^ arch/arm/mm/alignment.c:700:2: note: here default: ^~~~~~~ arch/arm/mm/alignment.c: In function 'do_alignment_t32_to_handler': arch/arm/mm/alignment.c:753:15: warning: this statement may fall through [-Wimplicit-fallthrough=] poffset->un = (tinst2 & 0xff) << 2; ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~ arch/arm/mm/alignment.c:754:2: note: here case 0xe940: ^~~~ Reported-by: Stephen Rothwell Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- arch/arm/mm/alignment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 8cdb78642e93..04b36436cbc0 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -695,7 +695,7 @@ thumb2arm(u16 tinstr) return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] | (tinstr & 255); /* register_list */ } - /* Else fall through for illegal instruction case */ + /* Else, fall through - for illegal instruction case */ default: return BAD_INSTR; @@ -751,6 +751,8 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, case 0xe8e0: case 0xe9e0: poffset->un = (tinst2 & 0xff) << 2; + /* Fall through */ + case 0xe940: case 0xe9c0: return do_alignment_ldrdstrd; From 3da6bd945b71c5fdd5db7b19224b8fc0b4d78ed8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 28 Jul 2019 18:19:41 -0500 Subject: [PATCH 307/482] ARM: OMAP: dma: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings: arch/arm/plat-omap/dma.c: In function 'omap_set_dma_src_burst_mode': arch/arm/plat-omap/dma.c:384:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (dma_omap2plus()) { ^ arch/arm/plat-omap/dma.c:393:2: note: here case OMAP_DMA_DATA_BURST_16: ^~~~ arch/arm/plat-omap/dma.c:394:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (dma_omap2plus()) { ^ arch/arm/plat-omap/dma.c:402:2: note: here default: ^~~~~~~ arch/arm/plat-omap/dma.c: In function 'omap_set_dma_dest_burst_mode': arch/arm/plat-omap/dma.c:473:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (dma_omap2plus()) { ^ arch/arm/plat-omap/dma.c:481:2: note: here default: ^~~~~~~ Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. Reported-by: Stephen Rothwell Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- arch/arm/plat-omap/dma.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index 79f43acf9acb..08c99413d02c 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -388,17 +388,15 @@ void omap_set_dma_src_burst_mode(int lch, enum omap_dma_burst_mode burst_mode) /* * not supported by current hardware on OMAP1 * w |= (0x03 << 7); - * fall through */ + /* fall through */ case OMAP_DMA_DATA_BURST_16: if (dma_omap2plus()) { burst = 0x3; break; } - /* - * OMAP1 don't support burst 16 - * fall through - */ + /* OMAP1 don't support burst 16 */ + /* fall through */ default: BUG(); } @@ -474,10 +472,8 @@ void omap_set_dma_dest_burst_mode(int lch, enum omap_dma_burst_mode burst_mode) burst = 0x3; break; } - /* - * OMAP1 don't support burst 16 - * fall through - */ + /* OMAP1 don't support burst 16 */ + /* fall through */ default: printk(KERN_ERR "Invalid DMA burst mode\n"); BUG(); From 795952d9f40c451a8b22c6610df1d59f57ce2046 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 28 Jul 2019 18:53:15 -0500 Subject: [PATCH 308/482] mfd: db8500-prcmu: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings: drivers/mfd/db8500-prcmu.c: In function 'dsiclk_rate': drivers/mfd/db8500-prcmu.c:1592:7: warning: this statement may fall through [-Wimplicit-fallthrough=] div *= 2; ~~~~^~~~ drivers/mfd/db8500-prcmu.c:1593:2: note: here case PRCM_DSI_PLLOUT_SEL_PHI_2: ^~~~ drivers/mfd/db8500-prcmu.c:1594:7: warning: this statement may fall through [-Wimplicit-fallthrough=] div *= 2; ~~~~^~~~ drivers/mfd/db8500-prcmu.c:1595:2: note: here case PRCM_DSI_PLLOUT_SEL_PHI: ^~~~ Reported-by: Stephen Rothwell Reviewed-by: Kees Cook Reviewed-by: Linus Walleij Signed-off-by: Gustavo A. R. Silva --- drivers/mfd/db8500-prcmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index 3f21e26b8d36..90e0f21bc49c 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -1590,8 +1590,10 @@ static unsigned long dsiclk_rate(u8 n) switch (divsel) { case PRCM_DSI_PLLOUT_SEL_PHI_4: div *= 2; + /* Fall through */ case PRCM_DSI_PLLOUT_SEL_PHI_2: div *= 2; + /* Fall through */ case PRCM_DSI_PLLOUT_SEL_PHI: return pll_rate(PRCM_PLLDSI_FREQ, clock_rate(PRCMU_HDMICLK), PLL_RAW) / div; From 9039782047e769bd446667a9c14c0d3b8948ae51 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 28 Jul 2019 18:57:05 -0500 Subject: [PATCH 309/482] mfd: omap-usb-host: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings: drivers/mfd/omap-usb-host.c: In function 'usbhs_runtime_resume': drivers/mfd/omap-usb-host.c:303:7: warning: this statement may fall through [-Wimplicit-fallthrough=] if (!IS_ERR(omap->hsic480m_clk[i])) { ^ drivers/mfd/omap-usb-host.c:313:3: note: here case OMAP_EHCI_PORT_MODE_TLL: ^~~~ drivers/mfd/omap-usb-host.c: In function 'usbhs_runtime_suspend': drivers/mfd/omap-usb-host.c:345:7: warning: this statement may fall through [-Wimplicit-fallthrough=] if (!IS_ERR(omap->hsic480m_clk[i])) ^ drivers/mfd/omap-usb-host.c:349:3: note: here case OMAP_EHCI_PORT_MODE_TLL: ^~~~ Reported-by: Stephen Rothwell Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- drivers/mfd/omap-usb-host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 792b855a9104..4798d9f3f9d5 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -308,7 +308,7 @@ static int usbhs_runtime_resume(struct device *dev) i, r); } } - /* Fall through as HSIC mode needs utmi_clk */ + /* Fall through - as HSIC mode needs utmi_clk */ case OMAP_EHCI_PORT_MODE_TLL: if (!IS_ERR(omap->utmi_clk[i])) { @@ -344,7 +344,7 @@ static int usbhs_runtime_suspend(struct device *dev) if (!IS_ERR(omap->hsic480m_clk[i])) clk_disable_unprepare(omap->hsic480m_clk[i]); - /* Fall through as utmi_clks were used in HSIC mode */ + /* Fall through - as utmi_clks were used in HSIC mode */ case OMAP_EHCI_PORT_MODE_TLL: if (!IS_ERR(omap->utmi_clk[i])) From e9d81fc5b2014eb17d45ba4940bfb603d57bbcb3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 28 Jul 2019 19:11:11 -0500 Subject: [PATCH 310/482] ARM: signal: Mark expected switch fall-through Mark switch cases where we are expecting to fall through. This patch fixes the following warning: arch/arm/kernel/signal.c: In function 'do_signal': arch/arm/kernel/signal.c:598:12: warning: this statement may fall through [-Wimplicit-fallthrough=] restart -= 2; ~~~~~~~~^~~~ arch/arm/kernel/signal.c:599:3: note: here case -ERESTARTNOHAND: ^~~~ Reported-by: Stephen Rothwell Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- arch/arm/kernel/signal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 09f6fdd41974..ab2568996ddb 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -596,6 +596,7 @@ static int do_signal(struct pt_regs *regs, int syscall) switch (retval) { case -ERESTART_RESTARTBLOCK: restart -= 2; + /* Fall through */ case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: From d259f94f7265065b5c2b2bd5bbe4fa9b76504440 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 10:08:05 -0500 Subject: [PATCH 311/482] watchdog: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings: drivers/watchdog/ar7_wdt.c: warning: this statement may fall through [-Wimplicit-fallthrough=]: => 237:3 drivers/watchdog/pcwd.c: warning: this statement may fall through [-Wimplicit-fallthrough=]: => 653:3 drivers/watchdog/sb_wdog.c: warning: this statement may fall through [-Wimplicit-fallthrough=]: => 204:3 drivers/watchdog/wdt.c: warning: this statement may fall through [-Wimplicit-fallthrough=]: => 391:3 Reported-by: Geert Uytterhoeven Reviewed-by: Kees Cook Reviewed-by: Guenter Roeck Signed-off-by: Gustavo A. R. Silva --- drivers/watchdog/ar7_wdt.c | 1 + drivers/watchdog/pcwd.c | 2 +- drivers/watchdog/sb_wdog.c | 1 + drivers/watchdog/wdt.c | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/ar7_wdt.c b/drivers/watchdog/ar7_wdt.c index b9b2d06b3879..668a1c704f28 100644 --- a/drivers/watchdog/ar7_wdt.c +++ b/drivers/watchdog/ar7_wdt.c @@ -235,6 +235,7 @@ static long ar7_wdt_ioctl(struct file *file, ar7_wdt_update_margin(new_margin); ar7_wdt_kick(1); spin_unlock(&wdt_lock); + /* Fall through */ case WDIOC_GETTIMEOUT: if (put_user(margin, (int *)arg)) diff --git a/drivers/watchdog/pcwd.c b/drivers/watchdog/pcwd.c index 1b2cf5b95a89..c3c93e00b320 100644 --- a/drivers/watchdog/pcwd.c +++ b/drivers/watchdog/pcwd.c @@ -651,7 +651,7 @@ static long pcwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; pcwd_keepalive(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(heartbeat, argp); diff --git a/drivers/watchdog/sb_wdog.c b/drivers/watchdog/sb_wdog.c index 5a6ced7a7e8f..202fc8d8ca5f 100644 --- a/drivers/watchdog/sb_wdog.c +++ b/drivers/watchdog/sb_wdog.c @@ -202,6 +202,7 @@ static long sbwdog_ioctl(struct file *file, unsigned int cmd, timeout = time; sbwdog_set(user_dog, timeout); sbwdog_pet(user_dog); + /* Fall through */ case WDIOC_GETTIMEOUT: /* diff --git a/drivers/watchdog/wdt.c b/drivers/watchdog/wdt.c index 0650100fad00..7d278b37e083 100644 --- a/drivers/watchdog/wdt.c +++ b/drivers/watchdog/wdt.c @@ -389,7 +389,7 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (wdt_set_heartbeat(new_heartbeat)) return -EINVAL; wdt_ping(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); default: From 24f2161887c90ca3791f0224933642b6da7f9573 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 15:03:49 -0500 Subject: [PATCH 312/482] watchdog: scx200_wdt: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warning (Building: i386): drivers/watchdog/scx200_wdt.c: In function ‘scx200_wdt_ioctl’: drivers/watchdog/scx200_wdt.c:188:3: warning: this statement may fall through [-Wimplicit-fallthrough=] scx200_wdt_ping(); ^~~~~~~~~~~~~~~~~ drivers/watchdog/scx200_wdt.c:189:2: note: here case WDIOC_GETTIMEOUT: ^~~~ Reviewed-by: Kees Cook Reviewed-by: Guenter Roeck Signed-off-by: Gustavo A. R. Silva --- drivers/watchdog/scx200_wdt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/scx200_wdt.c b/drivers/watchdog/scx200_wdt.c index efd7996694de..46268309ee9b 100644 --- a/drivers/watchdog/scx200_wdt.c +++ b/drivers/watchdog/scx200_wdt.c @@ -186,6 +186,7 @@ static long scx200_wdt_ioctl(struct file *file, unsigned int cmd, margin = new_margin; scx200_wdt_update_margin(); scx200_wdt_ping(); + /* Fall through */ case WDIOC_GETTIMEOUT: if (put_user(margin, p)) return -EFAULT; From d51c61637b4aca6c06015a34063e41279c32b8e8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 17:30:57 -0500 Subject: [PATCH 313/482] watchdog: wdt977: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warning (Building: arm): drivers/watchdog/wdt977.c: In function ‘wdt977_ioctl’: LD [M] drivers/media/platform/vicodec/vicodec.o drivers/watchdog/wdt977.c:400:3: warning: this statement may fall through [-Wimplicit-fallthrough=] wdt977_keepalive(); ^~~~~~~~~~~~~~~~~~ drivers/watchdog/wdt977.c:403:2: note: here case WDIOC_GETTIMEOUT: ^~~~ Reviewed-by: Kees Cook Reviewed-by: Guenter Roeck Signed-off-by: Gustavo A. R. Silva --- drivers/watchdog/wdt977.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/wdt977.c b/drivers/watchdog/wdt977.c index 567005d7598e..5c52c73e1839 100644 --- a/drivers/watchdog/wdt977.c +++ b/drivers/watchdog/wdt977.c @@ -398,7 +398,7 @@ static long wdt977_ioctl(struct file *file, unsigned int cmd, return -EINVAL; wdt977_keepalive(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(timeout, uarg.i); From 3d86c7ad56a9b1109a705a100990f1e7be1d3d43 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 17:35:45 -0500 Subject: [PATCH 314/482] crypto: ux500/crypt: Mark expected switch fall-throughs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warning (Building: arm): drivers/crypto/ux500/cryp/cryp.c: In function ‘cryp_save_device_context’: drivers/crypto/ux500/cryp/cryp.c:316:16: warning: this statement may fall through [-Wimplicit-fallthrough=] ctx->key_4_r = readl_relaxed(&src_reg->key_4_r); drivers/crypto/ux500/cryp/cryp.c:318:2: note: here case CRYP_KEY_SIZE_192: ^~~~ drivers/crypto/ux500/cryp/cryp.c:320:16: warning: this statement may fall through [-Wimplicit-fallthrough=] ctx->key_3_r = readl_relaxed(&src_reg->key_3_r); drivers/crypto/ux500/cryp/cryp.c:322:2: note: here case CRYP_KEY_SIZE_128: ^~~~ drivers/crypto/ux500/cryp/cryp.c:324:16: warning: this statement may fall through [-Wimplicit-fallthrough=] ctx->key_2_r = readl_relaxed(&src_reg->key_2_r); drivers/crypto/ux500/cryp/cryp.c:326:2: note: here default: ^~~~~~~ In file included from ./include/linux/io.h:13:0, from drivers/crypto/ux500/cryp/cryp_p.h:14, from drivers/crypto/ux500/cryp/cryp.c:15: drivers/crypto/ux500/cryp/cryp.c: In function ‘cryp_restore_device_context’: ./arch/arm/include/asm/io.h:92:22: warning: this statement may fall through [-Wimplicit-fallthrough=] #define __raw_writel __raw_writel ^ ./arch/arm/include/asm/io.h:299:29: note: in expansion of macro ‘__raw_writel’ #define writel_relaxed(v,c) __raw_writel((__force u32) cpu_to_le32(v),c) ^~~~~~~~~~~~ drivers/crypto/ux500/cryp/cryp.c:363:3: note: in expansion of macro ‘writel_relaxed’ writel_relaxed(ctx->key_4_r, ®->key_4_r); ^~~~~~~~~~~~~~ drivers/crypto/ux500/cryp/cryp.c:365:2: note: here case CRYP_KEY_SIZE_192: ^~~~ In file included from ./include/linux/io.h:13:0, from drivers/crypto/ux500/cryp/cryp_p.h:14, from drivers/crypto/ux500/cryp/cryp.c:15: ./arch/arm/include/asm/io.h:92:22: warning: this statement may fall through [-Wimplicit-fallthrough=] #define __raw_writel __raw_writel ^ ./arch/arm/include/asm/io.h:299:29: note: in expansion of macro ‘__raw_writel’ #define writel_relaxed(v,c) __raw_writel((__force u32) cpu_to_le32(v),c) ^~~~~~~~~~~~ drivers/crypto/ux500/cryp/cryp.c:367:3: note: in expansion of macro ‘writel_relaxed’ writel_relaxed(ctx->key_3_r, ®->key_3_r); ^~~~~~~~~~~~~~ drivers/crypto/ux500/cryp/cryp.c:369:2: note: here case CRYP_KEY_SIZE_128: ^~~~ In file included from ./include/linux/io.h:13:0, from drivers/crypto/ux500/cryp/cryp_p.h:14, from drivers/crypto/ux500/cryp/cryp.c:15: ./arch/arm/include/asm/io.h:92:22: warning: this statement may fall through [-Wimplicit-fallthrough=] #define __raw_writel __raw_writel ^ ./arch/arm/include/asm/io.h:299:29: note: in expansion of macro ‘__raw_writel’ #define writel_relaxed(v,c) __raw_writel((__force u32) cpu_to_le32(v),c) ^~~~~~~~~~~~ drivers/crypto/ux500/cryp/cryp.c:371:3: note: in expansion of macro ‘writel_relaxed’ writel_relaxed(ctx->key_2_r, ®->key_2_r); ^~~~~~~~~~~~~~ drivers/crypto/ux500/cryp/cryp.c:373:2: note: here default: ^~~~~~~ Signed-off-by: Gustavo A. R. Silva --- drivers/crypto/ux500/cryp/cryp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c index ece83a363e11..f22f6fa612b3 100644 --- a/drivers/crypto/ux500/cryp/cryp.c +++ b/drivers/crypto/ux500/cryp/cryp.c @@ -314,14 +314,17 @@ void cryp_save_device_context(struct cryp_device_data *device_data, case CRYP_KEY_SIZE_256: ctx->key_4_l = readl_relaxed(&src_reg->key_4_l); ctx->key_4_r = readl_relaxed(&src_reg->key_4_r); + /* Fall through */ case CRYP_KEY_SIZE_192: ctx->key_3_l = readl_relaxed(&src_reg->key_3_l); ctx->key_3_r = readl_relaxed(&src_reg->key_3_r); + /* Fall through */ case CRYP_KEY_SIZE_128: ctx->key_2_l = readl_relaxed(&src_reg->key_2_l); ctx->key_2_r = readl_relaxed(&src_reg->key_2_r); + /* Fall through */ default: ctx->key_1_l = readl_relaxed(&src_reg->key_1_l); @@ -361,14 +364,17 @@ void cryp_restore_device_context(struct cryp_device_data *device_data, case CRYP_KEY_SIZE_256: writel_relaxed(ctx->key_4_l, ®->key_4_l); writel_relaxed(ctx->key_4_r, ®->key_4_r); + /* Fall through */ case CRYP_KEY_SIZE_192: writel_relaxed(ctx->key_3_l, ®->key_3_l); writel_relaxed(ctx->key_3_r, ®->key_3_r); + /* Fall through */ case CRYP_KEY_SIZE_128: writel_relaxed(ctx->key_2_l, ®->key_2_l); writel_relaxed(ctx->key_2_r, ®->key_2_r); + /* Fall through */ default: writel_relaxed(ctx->key_1_l, ®->key_1_l); From 7b7331511e66cfe671e88daa7133f6597a86aced Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 19:13:54 -0500 Subject: [PATCH 315/482] s390/net: Mark expected switch fall-throughs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warnings (Building: s390): drivers/s390/net/ctcm_fsms.c: In function ‘ctcmpc_chx_attnbusy’: drivers/s390/net/ctcm_fsms.c:1703:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (grp->changed_side == 1) { ^ drivers/s390/net/ctcm_fsms.c:1707:2: note: here case MPCG_STATE_XID0IOWAIX: ^~~~ drivers/s390/net/ctcm_mpc.c: In function ‘ctc_mpc_alloc_channel’: drivers/s390/net/ctcm_mpc.c:358:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (callback) ^ drivers/s390/net/ctcm_mpc.c:360:2: note: here case MPCG_STATE_XID0IOWAIT: ^~~~ drivers/s390/net/ctcm_mpc.c: In function ‘mpc_action_timeout’: drivers/s390/net/ctcm_mpc.c:1469:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if ((fsm_getstate(rch->fsm) == CH_XID0_PENDING) && ^ drivers/s390/net/ctcm_mpc.c:1472:2: note: here default: ^~~~~~~ drivers/s390/net/ctcm_mpc.c: In function ‘mpc_send_qllc_discontact’: drivers/s390/net/ctcm_mpc.c:2087:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (grp->estconnfunc) { ^ drivers/s390/net/ctcm_mpc.c:2092:2: note: here case MPCG_STATE_FLOWC: ^~~~ drivers/s390/net/qeth_l2_main.c: In function ‘qeth_l2_process_inbound_buffer’: drivers/s390/net/qeth_l2_main.c:328:7: warning: this statement may fall through [-Wimplicit-fallthrough=] if (IS_OSN(card)) { ^ drivers/s390/net/qeth_l2_main.c:337:3: note: here default: ^~~~~~~ Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- drivers/s390/net/ctcm_fsms.c | 1 + drivers/s390/net/ctcm_mpc.c | 3 +++ drivers/s390/net/qeth_l2_main.c | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/ctcm_fsms.c b/drivers/s390/net/ctcm_fsms.c index 1b4ee570b712..4a8a5373cb35 100644 --- a/drivers/s390/net/ctcm_fsms.c +++ b/drivers/s390/net/ctcm_fsms.c @@ -1704,6 +1704,7 @@ static void ctcmpc_chx_attnbusy(fsm_instance *fsm, int event, void *arg) grp->changed_side = 2; break; } + /* Else, fall through */ case MPCG_STATE_XID0IOWAIX: case MPCG_STATE_XID7INITW: case MPCG_STATE_XID7INITX: diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c index e02f295d38a9..1534420a0243 100644 --- a/drivers/s390/net/ctcm_mpc.c +++ b/drivers/s390/net/ctcm_mpc.c @@ -357,6 +357,7 @@ int ctc_mpc_alloc_channel(int port_num, void (*callback)(int, int)) /*fsm_newstate(grp->fsm, MPCG_STATE_XID2INITW);*/ if (callback) grp->send_qllc_disc = 1; + /* Else, fall through */ case MPCG_STATE_XID0IOWAIT: fsm_deltimer(&grp->timer); grp->outstanding_xid2 = 0; @@ -1469,6 +1470,7 @@ static void mpc_action_timeout(fsm_instance *fi, int event, void *arg) if ((fsm_getstate(rch->fsm) == CH_XID0_PENDING) && (fsm_getstate(wch->fsm) == CH_XID0_PENDING)) break; + /* Else, fall through */ default: fsm_event(grp->fsm, MPCG_EVENT_INOP, dev); } @@ -2089,6 +2091,7 @@ static int mpc_send_qllc_discontact(struct net_device *dev) grp->estconnfunc = NULL; break; } + /* Else, fall through */ case MPCG_STATE_FLOWC: case MPCG_STATE_READY: grp->send_qllc_disc = 2; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index fd64bc3f4062..cbead3d1b2fd 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -333,7 +333,7 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card, card->osn_info.data_cb(skb); break; } - /* else unknown */ + /* Else, fall through */ default: dev_kfree_skb_any(skb); QETH_CARD_TEXT(card, 3, "inbunkno"); From 40ad2de37f2a6be5726eb40ed085dff60df3099b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 Jul 2019 20:45:29 -0500 Subject: [PATCH 316/482] watchdog: riowd: Mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. This patch fixes the following warnings (Building: sparc64): drivers/watchdog/riowd.c: In function ‘riowd_ioctl’: drivers/watchdog/riowd.c:136:3: warning: this statement may fall through [-Wimplicit-fallthrough=] riowd_writereg(p, riowd_timeout, WDTO_INDEX); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/watchdog/riowd.c:139:2: note: here case WDIOC_GETTIMEOUT: ^~~~ Reviewed-by: Kees Cook Reviewed-by: Guenter Roeck Signed-off-by: Gustavo A. R. Silva --- drivers/watchdog/riowd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/riowd.c b/drivers/watchdog/riowd.c index 41a2a11535a6..b35f7be20c00 100644 --- a/drivers/watchdog/riowd.c +++ b/drivers/watchdog/riowd.c @@ -134,7 +134,7 @@ static long riowd_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EINVAL; riowd_timeout = (new_margin + 59) / 60; riowd_writereg(p, riowd_timeout, WDTO_INDEX); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(riowd_timeout * 60, (int __user *)argp); From 70a2783c1893a7e485b3be1c2ef23286fa252493 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 2 Aug 2019 14:10:29 -0500 Subject: [PATCH 317/482] video: fbdev: omapfb_main: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warning (Building: omap1_defconfig arm): drivers/watchdog/wdt285.c:170:3: warning: this statement may fall through [-Wimplicit-fallthrough=] drivers/watchdog/ar7_wdt.c:237:3: warning: this statement may fall through [-Wimplicit-fallthrough=] drivers/video/fbdev/omap/omapfb_main.c:449:23: warning: this statement may fall through [-Wimplicit-fallthrough=] drivers/video/fbdev/omap/omapfb_main.c:1549:6: warning: this statement may fall through [-Wimplicit-fallthrough=] drivers/video/fbdev/omap/omapfb_main.c:1547:3: warning: this statement may fall through [-Wimplicit-fallthrough=] drivers/video/fbdev/omap/omapfb_main.c:1545:3: warning: this statement may fall through [-Wimplicit-fallthrough=] drivers/video/fbdev/omap/omapfb_main.c:1543:3: warning: this statement may fall through [-Wimplicit-fallthrough=] drivers/video/fbdev/omap/omapfb_main.c:1540:6: warning: this statement may fall through [-Wimplicit-fallthrough=] drivers/video/fbdev/omap/omapfb_main.c:1538:3: warning: this statement may fall through [-Wimplicit-fallthrough=] drivers/video/fbdev/omap/omapfb_main.c:1535:3: warning: this statement may fall through [-Wimplicit-fallthrough=] Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- drivers/video/fbdev/omap/omapfb_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index 90eca64e3144..702cca59bda1 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -447,6 +447,7 @@ static int set_color_mode(struct omapfb_plane_struct *plane, return 0; case 12: var->bits_per_pixel = 16; + /* fall through */ case 16: if (plane->fbdev->panel->bpp == 12) plane->color_mode = OMAPFB_COLOR_RGB444; @@ -1534,20 +1535,27 @@ static void omapfb_free_resources(struct omapfb_device *fbdev, int state) case OMAPFB_ACTIVE: for (i = 0; i < fbdev->mem_desc.region_cnt; i++) unregister_framebuffer(fbdev->fb_info[i]); + /* fall through */ case 7: omapfb_unregister_sysfs(fbdev); + /* fall through */ case 6: if (fbdev->panel->disable) fbdev->panel->disable(fbdev->panel); + /* fall through */ case 5: omapfb_set_update_mode(fbdev, OMAPFB_UPDATE_DISABLED); + /* fall through */ case 4: planes_cleanup(fbdev); + /* fall through */ case 3: ctrl_cleanup(fbdev); + /* fall through */ case 2: if (fbdev->panel->cleanup) fbdev->panel->cleanup(fbdev->panel); + /* fall through */ case 1: dev_set_drvdata(fbdev->dev, NULL); kfree(fbdev); From 5f163f331b645106d548f3fcddfa1f92c1af60d0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 5 Aug 2019 14:47:35 -0500 Subject: [PATCH 318/482] pcmcia: db1xxx_ss: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. This patch fixes the following warnings (Building: db1xxx_defconfig mips): drivers/pcmcia/db1xxx_ss.c:257:3: warning: this statement may fall through [-Wimplicit-fallthrough=] drivers/pcmcia/db1xxx_ss.c:269:3: warning: this statement may fall through [-Wimplicit-fallthrough=] Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- drivers/pcmcia/db1xxx_ss.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pcmcia/db1xxx_ss.c b/drivers/pcmcia/db1xxx_ss.c index eb6168e6ac43..590e594092f2 100644 --- a/drivers/pcmcia/db1xxx_ss.c +++ b/drivers/pcmcia/db1xxx_ss.c @@ -255,8 +255,10 @@ static int db1x_pcmcia_configure(struct pcmcia_socket *skt, switch (state->Vcc) { case 50: ++v; + /* fall through */ case 33: ++v; + /* fall through */ case 0: break; default: @@ -267,9 +269,11 @@ static int db1x_pcmcia_configure(struct pcmcia_socket *skt, switch (state->Vpp) { case 12: ++p; + /* fall through */ case 33: case 50: ++p; + /* fall through */ case 0: break; default: From fccf01b6473c68692579bc008c1d38d6c3fcb276 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 6 Aug 2019 03:26:15 -0500 Subject: [PATCH 319/482] scsi: fas216: Mark expected switch fall-throughs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark switch cases where we are expecting to fall through. Fix the following warnings (Building: rpc_defconfig arm): drivers/scsi/arm/fas216.c: In function ‘fas216_disconnect_intr’: drivers/scsi/arm/fas216.c:913:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if (fas216_get_last_msg(info, info->scsi.msgin_fifo) == ABORT) { ^ drivers/scsi/arm/fas216.c:919:2: note: here default: /* huh? */ ^~~~~~~ drivers/scsi/arm/fas216.c: In function ‘fas216_kick’: drivers/scsi/arm/fas216.c:1959:3: warning: this statement may fall through [-Wimplicit-fallthrough=] fas216_allocate_tag(info, SCpnt); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/arm/fas216.c:1960:2: note: here case TYPE_OTHER: ^~~~ drivers/scsi/arm/fas216.c: In function ‘fas216_busservice_intr’: drivers/scsi/arm/fas216.c:1413:3: warning: this statement may fall through [-Wimplicit-fallthrough=] fas216_stoptransfer(info); ^~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/arm/fas216.c:1414:2: note: here case STATE(STAT_STATUS, PHASE_SELSTEPS):/* Sel w/ steps -> Status */ ^~~~ drivers/scsi/arm/fas216.c:1424:3: warning: this statement may fall through [-Wimplicit-fallthrough=] fas216_stoptransfer(info); ^~~~~~~~~~~~~~~~~~~~~~~~~ drivers/scsi/arm/fas216.c:1425:2: note: here case STATE(STAT_MESGIN, PHASE_COMMAND): /* Command -> Message In */ ^~~~ drivers/scsi/arm/fas216.c: In function ‘fas216_funcdone_intr’: drivers/scsi/arm/fas216.c:1573:6: warning: this statement may fall through [-Wimplicit-fallthrough=] if ((stat & STAT_BUSMASK) == STAT_MESGIN) { ^ drivers/scsi/arm/fas216.c:1579:2: note: here default: ^~~~~~~ drivers/scsi/arm/fas216.c: In function ‘fas216_handlesync’: drivers/scsi/arm/fas216.c:605:20: warning: this statement may fall through [-Wimplicit-fallthrough=] info->scsi.phase = PHASE_MSGOUT_EXPECT; ~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~ drivers/scsi/arm/fas216.c:607:2: note: here case async: ^~~~ Signed-off-by: Gustavo A. R. Silva --- drivers/scsi/arm/fas216.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c index aea4fd73c862..6c68c2303638 100644 --- a/drivers/scsi/arm/fas216.c +++ b/drivers/scsi/arm/fas216.c @@ -603,6 +603,7 @@ static void fas216_handlesync(FAS216_Info *info, char *msg) msgqueue_flush(&info->scsi.msgs); msgqueue_addmsg(&info->scsi.msgs, 1, MESSAGE_REJECT); info->scsi.phase = PHASE_MSGOUT_EXPECT; + /* fall through */ case async: dev->period = info->ifcfg.asyncperiod / 4; @@ -915,6 +916,7 @@ static void fas216_disconnect_intr(FAS216_Info *info) fas216_done(info, DID_ABORT); break; } + /* else, fall through */ default: /* huh? */ printk(KERN_ERR "scsi%d.%c: unexpected disconnect in phase %s\n", @@ -1411,6 +1413,8 @@ static void fas216_busservice_intr(FAS216_Info *info, unsigned int stat, unsigne case STATE(STAT_STATUS, PHASE_DATAOUT): /* Data Out -> Status */ case STATE(STAT_STATUS, PHASE_DATAIN): /* Data In -> Status */ fas216_stoptransfer(info); + /* fall through */ + case STATE(STAT_STATUS, PHASE_SELSTEPS):/* Sel w/ steps -> Status */ case STATE(STAT_STATUS, PHASE_MSGOUT): /* Message Out -> Status */ case STATE(STAT_STATUS, PHASE_COMMAND): /* Command -> Status */ @@ -1422,6 +1426,8 @@ static void fas216_busservice_intr(FAS216_Info *info, unsigned int stat, unsigne case STATE(STAT_MESGIN, PHASE_DATAOUT): /* Data Out -> Message In */ case STATE(STAT_MESGIN, PHASE_DATAIN): /* Data In -> Message In */ fas216_stoptransfer(info); + /* fall through */ + case STATE(STAT_MESGIN, PHASE_COMMAND): /* Command -> Message In */ case STATE(STAT_MESGIN, PHASE_SELSTEPS):/* Sel w/ steps -> Message In */ case STATE(STAT_MESGIN, PHASE_MSGOUT): /* Message Out -> Message In */ @@ -1575,6 +1581,7 @@ static void fas216_funcdone_intr(FAS216_Info *info, unsigned int stat, unsigned fas216_message(info); break; } + /* else, fall through */ default: fas216_log(info, 0, "internal phase %s for function done?" @@ -1957,6 +1964,7 @@ static void fas216_kick(FAS216_Info *info) switch (where_from) { case TYPE_QUEUE: fas216_allocate_tag(info, SCpnt); + /* fall through */ case TYPE_OTHER: fas216_start_command(info, SCpnt); break; From 1f7585f30a3af595ac07f610b807c738c9e3baab Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 7 Aug 2019 21:34:48 -0500 Subject: [PATCH 320/482] ARM: ep93xx: Mark expected switch fall-through Mark switch cases where we are expecting to fall through. Fix the following warnings (Building: arm-ep93xx_defconfig arm): arch/arm/mach-ep93xx/crunch.c: In function 'crunch_do': arch/arm/mach-ep93xx/crunch.c:46:3: warning: this statement may fall through [-Wimplicit-fallthrough=] memset(crunch_state, 0, sizeof(*crunch_state)); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/arm/mach-ep93xx/crunch.c:53:2: note: here case THREAD_NOTIFY_EXIT: ^~~~ Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. Reported-by: kbuild test robot Signed-off-by: Gustavo A. R. Silva --- arch/arm/mach-ep93xx/crunch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-ep93xx/crunch.c b/arch/arm/mach-ep93xx/crunch.c index 1c9a4be8b503..1c05c5bf7e5c 100644 --- a/arch/arm/mach-ep93xx/crunch.c +++ b/arch/arm/mach-ep93xx/crunch.c @@ -49,6 +49,7 @@ static int crunch_do(struct notifier_block *self, unsigned long cmd, void *t) * FALLTHROUGH: Ensure we don't try to overwrite our newly * initialised state information on the first fault. */ + /* Fall through */ case THREAD_NOTIFY_EXIT: crunch_task_release(thread); From 57c722e932cfb82e9820bbaae1b1f7222ea97b52 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 9 Aug 2019 18:36:23 -0700 Subject: [PATCH 321/482] net/tls: swap sk_write_space on close Now that we swap the original proto and clear the ULP pointer on close we have to make sure no callback will try to access the freed state. sk_write_space is not part of sk_prot, remember to swap it. Reported-by: syzbot+dcdc9deefaec44785f32@syzkaller.appspotmail.com Fixes: 95fa145479fb ("bpf: sockmap/tls, close can race with map free") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 9cbbae606ced..ce6ef56a65ef 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -308,6 +308,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) if (free_ctx) icsk->icsk_ulp_data = NULL; sk->sk_prot = ctx->sk_proto; + sk->sk_write_space = ctx->sk_write_space; write_unlock_bh(&sk->sk_callback_lock); release_sock(sk); if (ctx->tx_conf == TLS_SW) From cfef67f016e4c00a2f423256fc678a6967a9fc09 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 9 Aug 2019 23:29:48 -0500 Subject: [PATCH 322/482] ALSA: hda - Fix a memory leak bug In snd_hda_parse_generic_codec(), 'spec' is allocated through kzalloc(). Then, the pin widgets in 'codec' are parsed. However, if the parsing process fails, 'spec' is not deallocated, leading to a memory leak. To fix the above issue, free 'spec' before returning the error. Fixes: 352f7f914ebb ("ALSA: hda - Merge Realtek parser code to generic parser") Signed-off-by: Wenwen Wang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 485edaba0037..8f2beb1f3ae4 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -6100,7 +6100,7 @@ static int snd_hda_parse_generic_codec(struct hda_codec *codec) err = snd_hda_parse_pin_defcfg(codec, &spec->autocfg, NULL, 0); if (err < 0) - return err; + goto error; err = snd_hda_gen_parse_auto_config(codec, &spec->autocfg); if (err < 0) From e61a41256edf9f425039129757af4a80b5ed8162 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 9 Aug 2019 08:01:17 +0530 Subject: [PATCH 323/482] cpufreq: dev_pm_qos_update_request() can return 1 on success dev_pm_qos_update_request() can return 1 on success, so don't treat it as an error. Fixes: 18c49926c4bf ("cpufreq: Add QoS requests for userspace constraints") Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8dda62367816..c28ebf2810f1 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2528,7 +2528,7 @@ static int cpufreq_boost_set_sw(int state) } ret = dev_pm_qos_update_request(policy->max_freq_req, policy->max); - if (ret) + if (ret < 0) break; } From 600f5badb78c316146d062cfd7af4a2cfb655baa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 7 Aug 2019 12:36:01 +0530 Subject: [PATCH 324/482] cpufreq: schedutil: Don't skip freq update when limits change To avoid reducing the frequency of a CPU prematurely, we skip reducing the frequency if the CPU had been busy recently. This should not be done when the limits of the policy are changed, for example due to thermal throttling. We should always get the frequency within the new limits as soon as possible. Trying to fix this by using only one flag, i.e. need_freq_update, can lead to a race condition where the flag gets cleared without forcing us to change the frequency at least once. And so this patch introduces another flag to avoid that race condition. Fixes: ecd288429126 ("cpufreq: schedutil: Don't set next_freq to UINT_MAX") Cc: v4.18+ # v4.18+ Reported-by: Doug Smythies Tested-by: Doug Smythies Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 636ca6f88c8e..867b4bb6d4be 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -40,6 +40,7 @@ struct sugov_policy { struct task_struct *thread; bool work_in_progress; + bool limits_changed; bool need_freq_update; }; @@ -89,8 +90,11 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) !cpufreq_this_cpu_can_update(sg_policy->policy)) return false; - if (unlikely(sg_policy->need_freq_update)) + if (unlikely(sg_policy->limits_changed)) { + sg_policy->limits_changed = false; + sg_policy->need_freq_update = true; return true; + } delta_ns = time - sg_policy->last_freq_update_time; @@ -437,7 +441,7 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) { if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) - sg_policy->need_freq_update = true; + sg_policy->limits_changed = true; } static void sugov_update_single(struct update_util_data *hook, u64 time, @@ -457,7 +461,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (!sugov_should_update_freq(sg_policy, time)) return; - busy = sugov_cpu_is_busy(sg_cpu); + /* Limits may have changed, don't skip frequency update */ + busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu); util = sugov_get_util(sg_cpu); max = sg_cpu->max; @@ -831,6 +836,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->last_freq_update_time = 0; sg_policy->next_freq = 0; sg_policy->work_in_progress = false; + sg_policy->limits_changed = false; sg_policy->need_freq_update = false; sg_policy->cached_raw_freq = 0; @@ -879,7 +885,7 @@ static void sugov_limits(struct cpufreq_policy *policy) mutex_unlock(&sg_policy->work_lock); } - sg_policy->need_freq_update = true; + sg_policy->limits_changed = true; } struct cpufreq_governor schedutil_gov = { From cf14be0b41c659ede89abef3f7ec0e98e6cfea5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 6 Aug 2019 14:33:23 +0300 Subject: [PATCH 325/482] dma-direct: fix DMA_ATTR_NO_KERNEL_MAPPING The new DMA_ATTR_NO_KERNEL_MAPPING needs to actually assign a dma_addr to work. Also skip it if the architecture needs forced decryption handling, as that needs a kernel virtual address. Fixes: d98849aff879 (dma-direct: handle DMA_ATTR_NO_KERNEL_MAPPING in common code) Signed-off-by: Christoph Hellwig Reviewed-by: Lucas Stach --- kernel/dma/direct.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 59bdceea3737..974e96a1de44 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -130,10 +130,12 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, if (!page) return NULL; - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && + !force_dma_unencrypted(dev)) { /* remove any dirty cache lines on the kernel alias */ if (!PageHighMem(page)) arch_dma_prep_coherent(page, size); + *dma_handle = phys_to_dma(dev, page_to_phys(page)); /* return the page pointer as the opaque cookie */ return page; } @@ -178,7 +180,8 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, { unsigned int page_order = get_order(size); - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && + !force_dma_unencrypted(dev)) { /* cpu_addr is a struct page cookie, not a kernel address */ __dma_direct_free_pages(dev, size, cpu_addr); return; From d8ad55538abe443919e20e0bb996561bca9cad84 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 5 Aug 2019 17:51:53 +0200 Subject: [PATCH 326/482] dma-direct: don't truncate dma_required_mask to bus addressing capabilities The dma required_mask needs to reflect the actual addressing capabilities needed to handle the whole system RAM. When truncated down to the bus addressing capabilities dma_addressing_limited() will incorrectly signal no limitations for devices which are restricted by the bus_dma_mask. Fixes: b4ebe6063204 (dma-direct: implement complete bus_dma_mask handling) Signed-off-by: Lucas Stach Tested-by: Atish Patra Signed-off-by: Christoph Hellwig --- kernel/dma/direct.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 974e96a1de44..795c9b095d75 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -47,9 +47,6 @@ u64 dma_direct_get_required_mask(struct device *dev) { u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT); - if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma) - max_dma = dev->bus_dma_mask; - return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; } From 33dcb37cef741294b481f4d889a465b8091f11bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Jul 2019 09:26:40 +0200 Subject: [PATCH 327/482] dma-mapping: fix page attributes for dma_mmap_* All the way back to introducing dma_common_mmap we've defaulted to mark the pages as uncached. But this is wrong for DMA coherent devices. Later on DMA_ATTR_WRITE_COMBINE also got incorrect treatment as that flag is only treated special on the alloc side for non-coherent devices. Introduce a new dma_pgprot helper that deals with the check for coherent devices so that only the remapping cases ever reach arch_dma_mmap_pgprot and we thus ensure no aliasing of page attributes happens, which makes the powerpc version of arch_dma_mmap_pgprot obsolete and simplifies the remaining ones. Note that this means arch_dma_mmap_pgprot is a bit misnamed now, but we'll phase it out soon. Fixes: 64ccc9c033c6 ("common: dma-mapping: add support for generic dma_mmap_* calls") Reported-by: Shawn Anastasio Reported-by: Gavin Li Signed-off-by: Christoph Hellwig Acked-by: Catalin Marinas # arm64 --- arch/arm/mm/dma-mapping.c | 4 +--- arch/arm64/mm/dma-mapping.c | 4 +--- arch/powerpc/Kconfig | 1 - arch/powerpc/kernel/Makefile | 3 +-- arch/powerpc/kernel/dma-common.c | 17 ----------------- drivers/iommu/dma-iommu.c | 6 +++--- include/linux/dma-noncoherent.h | 13 +++++++++---- kernel/dma/mapping.c | 19 ++++++++++++++++++- kernel/dma/remap.c | 2 +- 9 files changed, 34 insertions(+), 35 deletions(-) delete mode 100644 arch/powerpc/kernel/dma-common.c diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 6774b03aa405..d42557ee69c2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -2405,9 +2405,7 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { - if (!dev_is_dma_coherent(dev)) - return __get_dma_pgprot(attrs, prot); - return prot; + return __get_dma_pgprot(attrs, prot); } void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 1d3f0b5a9940..bd2b039f43a6 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -14,9 +14,7 @@ pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { - if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE)) - return pgprot_writecombine(prot); - return prot; + return pgprot_writecombine(prot); } void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 77f6ebf97113..d8dcd8820369 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -121,7 +121,6 @@ config PPC select ARCH_32BIT_OFF_T if PPC32 select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED - select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ea0c69236789..56dfa7a2a6f2 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -49,8 +49,7 @@ obj-y := cputable.o ptrace.o syscalls.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o \ - dma-common.o + of_platform.o prom_parse.o obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o diff --git a/arch/powerpc/kernel/dma-common.c b/arch/powerpc/kernel/dma-common.c deleted file mode 100644 index dc7ef6b17b69..000000000000 --- a/arch/powerpc/kernel/dma-common.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Contains common dma routines for all powerpc platforms. - * - * Copyright (C) 2019 Shawn Anastasio. - */ - -#include -#include - -pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, - unsigned long attrs) -{ - if (!dev_is_dma_coherent(dev)) - return pgprot_noncached(prot); - return prot; -} diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a7f9c3edbcb2..0015fe610b23 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -574,7 +574,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, struct iova_domain *iovad = &cookie->iovad; bool coherent = dev_is_dma_coherent(dev); int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); - pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); + pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; struct page **pages; struct sg_table sgt; @@ -975,7 +975,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size, return NULL; if (IS_ENABLED(CONFIG_DMA_REMAP) && (!coherent || PageHighMem(page))) { - pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); + pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); cpu_addr = dma_common_contiguous_remap(page, alloc_size, VM_USERMAP, prot, __builtin_return_address(0)); @@ -1035,7 +1035,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long pfn, off = vma->vm_pgoff; int ret; - vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); + vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 3813211a9aad..0bff3d7fac92 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -42,13 +42,18 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, dma_addr_t dma_addr); - -#ifdef CONFIG_ARCH_HAS_DMA_MMAP_PGPROT pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); + +#ifdef CONFIG_MMU +pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); #else -# define arch_dma_mmap_pgprot(dev, prot, attrs) pgprot_noncached(prot) -#endif +static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ + return prot; /* no protection bits supported without page tables */ +} +#endif /* CONFIG_MMU */ #ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index b945239621d8..b0038ca3aa92 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -150,6 +150,23 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, } EXPORT_SYMBOL(dma_get_sgtable_attrs); +#ifdef CONFIG_MMU +/* + * Return the page attributes used for mapping dma_alloc_* memory, either in + * kernel space if remapping is needed, or to userspace through dma_mmap_*. + */ +pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) +{ + if (dev_is_dma_coherent(dev) || + (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && + (attrs & DMA_ATTR_NON_CONSISTENT))) + return prot; + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_MMAP_PGPROT)) + return arch_dma_mmap_pgprot(dev, prot, attrs); + return pgprot_noncached(prot); +} +#endif /* CONFIG_MMU */ + /* * Create userspace mapping for the DMA-coherent memory. */ @@ -164,7 +181,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long pfn; int ret = -ENXIO; - vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); + vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index a594aec07882..ffe78f0b2fe4 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -218,7 +218,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, /* create a coherent mapping */ ret = dma_common_contiguous_remap(page, size, VM_USERMAP, - arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs), + dma_pgprot(dev, PAGE_KERNEL, attrs), __builtin_return_address(0)); if (!ret) { __dma_direct_free_pages(dev, size, page); From bfd77145f35c3deafe57e9eb67fff4ccffdaef6e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 5 Aug 2019 15:11:15 -0700 Subject: [PATCH 328/482] Makefile: Convert -Wimplicit-fallthrough=3 to just -Wimplicit-fallthrough for clang A compilation -Wimplicit-fallthrough warning was enabled by commit a035d552a93b ("Makefile: Globally enable fall-through warning") Even though clang 10.0.0 does not currently support this warning without a patch, clang currently does not support a value for this option. Link: https://bugs.llvm.org/show_bug.cgi?id=39382 The gcc default for this warning is 3 so removing the =3 has no effect for gcc and enables the warning for patched versions of clang. Also remove the =3 from an existing use in a parisc Makefile: arch/parisc/math-emu/Makefile Signed-off-by: Joe Perches Reviewed-and-tested-by: Nathan Chancellor Cc: Gustavo A. R. Silva Signed-off-by: Linus Torvalds --- Makefile | 2 +- arch/parisc/math-emu/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 02aff718a11d..18a237743a02 100644 --- a/Makefile +++ b/Makefile @@ -846,7 +846,7 @@ NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) KBUILD_CFLAGS += -Wdeclaration-after-statement # Warn about unmarked fall-throughs in switch statement. -KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough=3,) +KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough,) # Variable Length Arrays (VLAs) should not be used anywhere in the kernel KBUILD_CFLAGS += -Wvla diff --git a/arch/parisc/math-emu/Makefile b/arch/parisc/math-emu/Makefile index 55c1396580a4..3747a0cbd3b8 100644 --- a/arch/parisc/math-emu/Makefile +++ b/arch/parisc/math-emu/Makefile @@ -18,4 +18,4 @@ obj-y := frnd.o driver.o decode_exc.o fpudispatch.o denormal.o \ # other very old or stripped-down PA-RISC CPUs -- not currently supported obj-$(CONFIG_MATH_EMULATION) += unimplemented-math-emulation.o -CFLAGS_REMOVE_fpudispatch.o = -Wimplicit-fallthrough=3 +CFLAGS_REMOVE_fpudispatch.o = -Wimplicit-fallthrough From d45331b00ddb179e291766617259261c112db872 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Aug 2019 13:26:41 -0700 Subject: [PATCH 329/482] Linux 5.3-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 18a237743a02..1b23f95db176 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Bobtail Squid # *DOCUMENTATION* From 54b13723f765122f3f5f49d07f3e48fa9fcf8c7c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 10 Aug 2019 00:19:11 -0500 Subject: [PATCH 330/482] sh: kernel: disassemble: Mark expected switch fall-throughs Remove logically dead code and mark switch cases where we are expecting to fall through. Fix the following warnings (Building: defconfig sh): arch/sh/kernel/disassemble.c:478:8: warning: this statement may fall through [-Wimplicit-fallthrough=] arch/sh/kernel/disassemble.c:487:8: warning: this statement may fall through [-Wimplicit-fallthrough=] arch/sh/kernel/disassemble.c:496:8: warning: this statement may fall through [-Wimplicit-fallthrough=] Reviewed-by: Geert Uytterhoeven Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Gustavo A. R. Silva --- arch/sh/kernel/disassemble.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c index defebf1a9c8a..845543780cc5 100644 --- a/arch/sh/kernel/disassemble.c +++ b/arch/sh/kernel/disassemble.c @@ -475,8 +475,6 @@ static void print_sh_insn(u32 memaddr, u16 insn) printk("dbr"); break; case FD_REG_N: - if (0) - goto d_reg_n; case F_REG_N: printk("fr%d", rn); break; @@ -488,7 +486,7 @@ static void print_sh_insn(u32 memaddr, u16 insn) printk("xd%d", rn & ~1); break; } - d_reg_n: + /* else, fall through */ case D_REG_N: printk("dr%d", rn); break; @@ -497,6 +495,7 @@ static void print_sh_insn(u32 memaddr, u16 insn) printk("xd%d", rm & ~1); break; } + /* else, fall through */ case D_REG_M: printk("dr%d", rm); break; From 1ee1119d184bb06af921b48c3021d921bbd85bac Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 9 Aug 2019 23:43:56 -0500 Subject: [PATCH 331/482] sh: kernel: hw_breakpoint: Fix missing break in switch statement Add missing break statement in order to prevent the code from falling through to case SH_BREAKPOINT_WRITE. Fixes: 09a072947791 ("sh: hw-breakpoints: Add preliminary support for SH-4A UBC.") Cc: stable@vger.kernel.org Reviewed-by: Geert Uytterhoeven Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Gustavo A. R. Silva --- arch/sh/kernel/hw_breakpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c index 3bd010b4c55f..f10d64311127 100644 --- a/arch/sh/kernel/hw_breakpoint.c +++ b/arch/sh/kernel/hw_breakpoint.c @@ -157,6 +157,7 @@ int arch_bp_generic_fields(int sh_len, int sh_type, switch (sh_type) { case SH_BREAKPOINT_READ: *gen_type = HW_BREAKPOINT_R; + break; case SH_BREAKPOINT_WRITE: *gen_type = HW_BREAKPOINT_W; break; From 59c84b9fcf42c99a945d5fdc49220d854e539690 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 6 Aug 2019 12:15:17 -0700 Subject: [PATCH 332/482] netdevsim: Restore per-network namespace accounting for fib entries Prior to the commit in the fixes tag, the resource controller in netdevsim tracked fib entries and rules per network namespace. Restore that behavior. Fixes: 5fc494225c1e ("netdevsim: create devlink instance per netdevsim instance") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/netdevsim/dev.c | 63 ++++++++---------- drivers/net/netdevsim/fib.c | 102 ++++++++++++++++++------------ drivers/net/netdevsim/netdev.c | 9 ++- drivers/net/netdevsim/netdevsim.h | 10 ++- 4 files changed, 98 insertions(+), 86 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index c5c417a3c0ce..bcc40a236624 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -73,46 +73,47 @@ static void nsim_dev_port_debugfs_exit(struct nsim_dev_port *nsim_dev_port) debugfs_remove_recursive(nsim_dev_port->ddir); } +static struct net *nsim_devlink_net(struct devlink *devlink) +{ + return &init_net; +} + static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false); } static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB_RULES, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false); } static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false); } static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv; - return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB_RULES, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false); } static int nsim_dev_resources_register(struct devlink *devlink) { - struct nsim_dev *nsim_dev = devlink_priv(devlink); struct devlink_resource_size_params params = { .size_max = (u64)-1, .size_granularity = 1, .unit = DEVLINK_RESOURCE_UNIT_ENTRY }; + struct net *net = nsim_devlink_net(devlink); int err; u64 n; @@ -126,8 +127,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) goto out; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true); err = devlink_resource_register(devlink, "fib", n, NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4, ¶ms); @@ -136,8 +136,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB_RULES, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true); err = devlink_resource_register(devlink, "fib-rules", n, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV4, ¶ms); @@ -156,8 +155,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) goto out; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true); err = devlink_resource_register(devlink, "fib", n, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6, ¶ms); @@ -166,8 +164,7 @@ static int nsim_dev_resources_register(struct devlink *devlink) return err; } - n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB_RULES, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true); err = devlink_resource_register(devlink, "fib-rules", n, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_IPV6, ¶ms); @@ -179,19 +176,19 @@ static int nsim_dev_resources_register(struct devlink *devlink) devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB, nsim_dev_ipv4_fib_resource_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB_RULES, nsim_dev_ipv4_fib_rules_res_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB, nsim_dev_ipv6_fib_resource_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB_RULES, nsim_dev_ipv6_fib_rules_res_occ_get, - nsim_dev); + net); out: return err; } @@ -199,11 +196,11 @@ static int nsim_dev_resources_register(struct devlink *devlink) static int nsim_dev_reload(struct devlink *devlink, struct netlink_ext_ack *extack) { - struct nsim_dev *nsim_dev = devlink_priv(devlink); enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES }; + struct net *net = nsim_devlink_net(devlink); int i; for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { @@ -212,8 +209,7 @@ static int nsim_dev_reload(struct devlink *devlink, err = devlink_resource_size_get(devlink, res_ids[i], &val); if (!err) { - err = nsim_fib_set_max(nsim_dev->fib_data, - res_ids[i], val, extack); + err = nsim_fib_set_max(net, res_ids[i], val, extack); if (err) return err; } @@ -285,15 +281,9 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) mutex_init(&nsim_dev->port_list_lock); nsim_dev->fw_update_status = true; - nsim_dev->fib_data = nsim_fib_create(); - if (IS_ERR(nsim_dev->fib_data)) { - err = PTR_ERR(nsim_dev->fib_data); - goto err_devlink_free; - } - err = nsim_dev_resources_register(devlink); if (err) - goto err_fib_destroy; + goto err_devlink_free; err = devlink_register(devlink, &nsim_bus_dev->dev); if (err) @@ -315,8 +305,6 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) devlink_unregister(devlink); err_resources_unregister: devlink_resources_unregister(devlink, NULL); -err_fib_destroy: - nsim_fib_destroy(nsim_dev->fib_data); err_devlink_free: devlink_free(devlink); return ERR_PTR(err); @@ -330,7 +318,6 @@ static void nsim_dev_destroy(struct nsim_dev *nsim_dev) nsim_dev_debugfs_exit(nsim_dev); devlink_unregister(devlink); devlink_resources_unregister(devlink, NULL); - nsim_fib_destroy(nsim_dev->fib_data); mutex_destroy(&nsim_dev->port_list_lock); devlink_free(devlink); } diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 8c57ba747772..f61d094746c0 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "netdevsim.h" @@ -32,14 +33,15 @@ struct nsim_per_fib_data { }; struct nsim_fib_data { - struct notifier_block fib_nb; struct nsim_per_fib_data ipv4; struct nsim_per_fib_data ipv6; }; -u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, bool max) +static unsigned int nsim_fib_net_id; + +u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max) { + struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id); struct nsim_fib_entry *entry; switch (res_id) { @@ -62,10 +64,10 @@ u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, return max ? entry->max : entry->num; } -int nsim_fib_set_max(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, u64 val, +int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, struct netlink_ext_ack *extack) { + struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id); struct nsim_fib_entry *entry; int err = 0; @@ -118,9 +120,9 @@ static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, return err; } -static int nsim_fib_rule_event(struct nsim_fib_data *data, - struct fib_notifier_info *info, bool add) +static int nsim_fib_rule_event(struct fib_notifier_info *info, bool add) { + struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id); struct netlink_ext_ack *extack = info->extack; int err = 0; @@ -155,9 +157,9 @@ static int nsim_fib_account(struct nsim_fib_entry *entry, bool add, return err; } -static int nsim_fib_event(struct nsim_fib_data *data, - struct fib_notifier_info *info, bool add) +static int nsim_fib_event(struct fib_notifier_info *info, bool add) { + struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id); struct netlink_ext_ack *extack = info->extack; int err = 0; @@ -176,22 +178,18 @@ static int nsim_fib_event(struct nsim_fib_data *data, static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, void *ptr) { - struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, - fib_nb); struct fib_notifier_info *info = ptr; int err = 0; switch (event) { case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - err = nsim_fib_rule_event(data, info, - event == FIB_EVENT_RULE_ADD); + err = nsim_fib_rule_event(info, event == FIB_EVENT_RULE_ADD); break; case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - err = nsim_fib_event(data, info, - event == FIB_EVENT_ENTRY_ADD); + err = nsim_fib_event(info, event == FIB_EVENT_ENTRY_ADD); break; } @@ -201,23 +199,30 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, /* inconsistent dump, trying again */ static void nsim_fib_dump_inconsistent(struct notifier_block *nb) { - struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, - fib_nb); + struct nsim_fib_data *data; + struct net *net; - data->ipv4.fib.num = 0ULL; - data->ipv4.rules.num = 0ULL; - data->ipv6.fib.num = 0ULL; - data->ipv6.rules.num = 0ULL; + rcu_read_lock(); + for_each_net_rcu(net) { + data = net_generic(net, nsim_fib_net_id); + + data->ipv4.fib.num = 0ULL; + data->ipv4.rules.num = 0ULL; + + data->ipv6.fib.num = 0ULL; + data->ipv6.rules.num = 0ULL; + } + rcu_read_unlock(); } -struct nsim_fib_data *nsim_fib_create(void) -{ - struct nsim_fib_data *data; - int err; +static struct notifier_block nsim_fib_nb = { + .notifier_call = nsim_fib_event_nb, +}; - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return ERR_PTR(-ENOMEM); +/* Initialize per network namespace state */ +static int __net_init nsim_fib_netns_init(struct net *net) +{ + struct nsim_fib_data *data = net_generic(net, nsim_fib_net_id); data->ipv4.fib.max = (u64)-1; data->ipv4.rules.max = (u64)-1; @@ -225,22 +230,37 @@ struct nsim_fib_data *nsim_fib_create(void) data->ipv6.fib.max = (u64)-1; data->ipv6.rules.max = (u64)-1; - data->fib_nb.notifier_call = nsim_fib_event_nb; - err = register_fib_notifier(&data->fib_nb, nsim_fib_dump_inconsistent); - if (err) { + return 0; +} + +static struct pernet_operations nsim_fib_net_ops = { + .init = nsim_fib_netns_init, + .id = &nsim_fib_net_id, + .size = sizeof(struct nsim_fib_data), +}; + +void nsim_fib_exit(void) +{ + unregister_pernet_subsys(&nsim_fib_net_ops); + unregister_fib_notifier(&nsim_fib_nb); +} + +int nsim_fib_init(void) +{ + int err; + + err = register_pernet_subsys(&nsim_fib_net_ops); + if (err < 0) { + pr_err("Failed to register pernet subsystem\n"); + goto err_out; + } + + err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent); + if (err < 0) { pr_err("Failed to register fib notifier\n"); goto err_out; } - return data; - err_out: - kfree(data); - return ERR_PTR(err); -} - -void nsim_fib_destroy(struct nsim_fib_data *data) -{ - unregister_fib_notifier(&data->fib_nb); - kfree(data); + return err; } diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 0740940f41b1..55f57f76d01b 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -357,12 +357,18 @@ static int __init nsim_module_init(void) if (err) goto err_dev_exit; - err = rtnl_link_register(&nsim_link_ops); + err = nsim_fib_init(); if (err) goto err_bus_exit; + err = rtnl_link_register(&nsim_link_ops); + if (err) + goto err_fib_exit; + return 0; +err_fib_exit: + nsim_fib_exit(); err_bus_exit: nsim_bus_exit(); err_dev_exit: @@ -373,6 +379,7 @@ static int __init nsim_module_init(void) static void __exit nsim_module_exit(void) { rtnl_link_unregister(&nsim_link_ops); + nsim_fib_exit(); nsim_bus_exit(); nsim_dev_exit(); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 79c05af2a7c0..9404637d34b7 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -169,12 +169,10 @@ int nsim_dev_port_add(struct nsim_bus_dev *nsim_bus_dev, int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_index); -struct nsim_fib_data *nsim_fib_create(void); -void nsim_fib_destroy(struct nsim_fib_data *fib_data); -u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, bool max); -int nsim_fib_set_max(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, u64 val, +int nsim_fib_init(void); +void nsim_fib_exit(void); +u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max); +int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, struct netlink_ext_ack *extack); #if IS_ENABLED(CONFIG_XFRM_OFFLOAD) From 68553f1a6f746bf860bce3eb42d78c26a717d9c0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Aug 2019 22:47:47 +0100 Subject: [PATCH 333/482] rxrpc: Fix local refcounting Fix rxrpc_unuse_local() to handle a NULL local pointer as it can be called on an unbound socket on which rx->local is not yet set. The following reproduced (includes omitted): int main(void) { socket(AF_RXRPC, SOCK_DGRAM, AF_INET); return 0; } causes the following oops to occur: BUG: kernel NULL pointer dereference, address: 0000000000000010 ... RIP: 0010:rxrpc_unuse_local+0x8/0x1b ... Call Trace: rxrpc_release+0x2b5/0x338 __sock_release+0x37/0xa1 sock_close+0x14/0x17 __fput+0x115/0x1e9 task_work_run+0x72/0x98 do_exit+0x51b/0xa7a ? __context_tracking_exit+0x4e/0x10e do_group_exit+0xab/0xab __x64_sys_exit_group+0x14/0x17 do_syscall_64+0x89/0x1d4 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported-by: syzbot+20dee719a2e090427b5f@syzkaller.appspotmail.com Fixes: 730c5fd42c1e ("rxrpc: Fix local endpoint refcounting") Signed-off-by: David Howells cc: Jeffrey Altman Signed-off-by: David S. Miller --- net/rxrpc/local_object.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 9798159ee65f..c9db3e762d8d 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -402,11 +402,13 @@ void rxrpc_unuse_local(struct rxrpc_local *local) { unsigned int au; - au = atomic_dec_return(&local->active_users); - if (au == 0) - rxrpc_queue_local(local); - else - rxrpc_put_local(local); + if (local) { + au = atomic_dec_return(&local->active_users); + if (au == 0) + rxrpc_queue_local(local); + else + rxrpc_put_local(local); + } } /* From d81f41411c2549b0ae42f23140d9589172096759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sat, 10 Aug 2019 13:11:56 +0200 Subject: [PATCH 334/482] net: nps_enet: Fix function names in doc comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust the function names in two doc comments to match the corresponding functions. Signed-off-by: Jonathan Neuschäfer Signed-off-by: David S. Miller --- drivers/net/ethernet/ezchip/nps_enet.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ezchip/nps_enet.h b/drivers/net/ethernet/ezchip/nps_enet.h index 133acca0bf31..092da2d90026 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.h +++ b/drivers/net/ethernet/ezchip/nps_enet.h @@ -167,7 +167,7 @@ struct nps_enet_priv { }; /** - * nps_reg_set - Sets ENET register with provided value. + * nps_enet_reg_set - Sets ENET register with provided value. * @priv: Pointer to EZchip ENET private data structure. * @reg: Register offset from base address. * @value: Value to set in register. @@ -179,7 +179,7 @@ static inline void nps_enet_reg_set(struct nps_enet_priv *priv, } /** - * nps_reg_get - Gets value of specified ENET register. + * nps_enet_reg_get - Gets value of specified ENET register. * @priv: Pointer to EZchip ENET private data structure. * @reg: Register offset from base address. * From 8028ccda39bb440d86aee6948405c8337afbed8b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 11 Aug 2019 10:48:37 +0300 Subject: [PATCH 335/482] mlxsw: spectrum_ptp: Keep unmatched entries in a linked list To identify timestamps for matching with their packets, Spectrum-1 uses a five-tuple of (port, direction, domain number, message type, sequence ID). If there are several clients from the same domain behind a single port sending Delay_Req's, the only thing differentiating these packets, as far as Spectrum-1 is concerned, is the sequence ID. Should sequence IDs between individual clients be similar, conflicts may arise. That is not a problem to hardware, which will simply deliver timestamps on a first comes, first served basis. However the driver uses a simple hash table to store the unmatched pieces. When a new conflicting piece arrives, it pushes out the previously stored one, which if it is a packet, is delivered without timestamp. Later on as the corresponding timestamps arrive, the first one is mismatched to the second packet, and the second one is never matched and eventually is GCd. To correct this issue, instead of using a simple rhashtable, use rhltable to keep the unmatched entries. Previously, a found unmatched entry would always be removed from the hash table. That is not the case anymore--an incompatible entry is left in the hash table. Therefore removal from the hash table cannot be used to confirm the validity of the looked-up pointer, instead the lookup would simply need to be redone. Therefore move it inside the critical section. This simplifies a lot of the code. Fixes: 8748642751ed ("mlxsw: spectrum: PTP: Support SIOCGHWTSTAMP, SIOCSHWTSTAMP ioctls") Reported-by: Alex Veber Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_ptp.c | 138 +++++++----------- 1 file changed, 55 insertions(+), 83 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index 63b07edd9d81..38bb1cfe4e8c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -29,7 +29,7 @@ struct mlxsw_sp_ptp_state { struct mlxsw_sp *mlxsw_sp; - struct rhashtable unmatched_ht; + struct rhltable unmatched_ht; spinlock_t unmatched_lock; /* protects the HT */ struct delayed_work ht_gc_dw; u32 gc_cycle; @@ -45,7 +45,7 @@ struct mlxsw_sp1_ptp_key { struct mlxsw_sp1_ptp_unmatched { struct mlxsw_sp1_ptp_key key; - struct rhash_head ht_node; + struct rhlist_head ht_node; struct rcu_head rcu; struct sk_buff *skb; u64 timestamp; @@ -359,7 +359,7 @@ static int mlxsw_sp_ptp_parse(struct sk_buff *skb, /* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on * error. */ -static struct mlxsw_sp1_ptp_unmatched * +static int mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp1_ptp_key key, struct sk_buff *skb, @@ -368,41 +368,51 @@ mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp, int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL; struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state; struct mlxsw_sp1_ptp_unmatched *unmatched; - struct mlxsw_sp1_ptp_unmatched *conflict; + int err; unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC); if (!unmatched) - return ERR_PTR(-ENOMEM); + return -ENOMEM; unmatched->key = key; unmatched->skb = skb; unmatched->timestamp = timestamp; unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles; - conflict = rhashtable_lookup_get_insert_fast(&ptp_state->unmatched_ht, - &unmatched->ht_node, - mlxsw_sp1_ptp_unmatched_ht_params); - if (conflict) + err = rhltable_insert(&ptp_state->unmatched_ht, &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); + if (err) kfree(unmatched); - return conflict; + return err; } static struct mlxsw_sp1_ptp_unmatched * mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp1_ptp_key key) + struct mlxsw_sp1_ptp_key key, int *p_length) { - return rhashtable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key, - mlxsw_sp1_ptp_unmatched_ht_params); + struct mlxsw_sp1_ptp_unmatched *unmatched, *last = NULL; + struct rhlist_head *tmp, *list; + int length = 0; + + list = rhltable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key, + mlxsw_sp1_ptp_unmatched_ht_params); + rhl_for_each_entry_rcu(unmatched, tmp, list, ht_node) { + last = unmatched; + length++; + } + + *p_length = length; + return last; } static int mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp1_ptp_unmatched *unmatched) { - return rhashtable_remove_fast(&mlxsw_sp->ptp_state->unmatched_ht, - &unmatched->ht_node, - mlxsw_sp1_ptp_unmatched_ht_params); + return rhltable_remove(&mlxsw_sp->ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); } /* This function is called in the following scenarios: @@ -489,75 +499,38 @@ static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp1_ptp_key key, struct sk_buff *skb, u64 timestamp) { - struct mlxsw_sp1_ptp_unmatched *unmatched, *conflict; + struct mlxsw_sp1_ptp_unmatched *unmatched; + int length; int err; rcu_read_lock(); - unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key); - spin_lock(&mlxsw_sp->ptp_state->unmatched_lock); - if (unmatched) { - /* There was an unmatched entry when we looked, but it may have - * been removed before we took the lock. - */ - err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched); - if (err) - unmatched = NULL; - } - - if (!unmatched) { - /* We have no unmatched entry, but one may have been added after - * we looked, but before we took the lock. - */ - unmatched = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, - skb, timestamp); - if (IS_ERR(unmatched)) { - if (skb) - mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, - key.local_port, - key.ingress, NULL); - unmatched = NULL; - } else if (unmatched) { - /* Save just told us, under lock, that the entry is - * there, so this has to work. - */ - err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, - unmatched); - WARN_ON_ONCE(err); - } - } - - /* If unmatched is non-NULL here, it comes either from the lookup, or - * from the save attempt above. In either case the entry was removed - * from the hash table. If unmatched is NULL, a new unmatched entry was - * added to the hash table, and there was no conflict. - */ - + unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key, &length); if (skb && unmatched && unmatched->timestamp) { unmatched->skb = skb; } else if (timestamp && unmatched && unmatched->skb) { unmatched->timestamp = timestamp; - } else if (unmatched) { - /* unmatched holds an older entry of the same type: either an - * skb if we are handling skb, or a timestamp if we are handling - * timestamp. We can't match that up, so save what we have. + } else { + /* Either there is no entry to match, or one that is there is + * incompatible. */ - conflict = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, - skb, timestamp); - if (IS_ERR(conflict)) { - if (skb) - mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, - key.local_port, - key.ingress, NULL); - } else { - /* Above, we removed an object with this key from the - * hash table, under lock, so conflict can not be a - * valid pointer. - */ - WARN_ON_ONCE(conflict); - } + if (length < 100) + err = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, + skb, timestamp); + else + err = -E2BIG; + if (err && skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, + key.ingress, NULL); + unmatched = NULL; + } + + if (unmatched) { + err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched); + WARN_ON_ONCE(err); } spin_unlock(&mlxsw_sp->ptp_state->unmatched_lock); @@ -669,9 +642,8 @@ mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state, local_bh_disable(); spin_lock(&ptp_state->unmatched_lock); - err = rhashtable_remove_fast(&ptp_state->unmatched_ht, - &unmatched->ht_node, - mlxsw_sp1_ptp_unmatched_ht_params); + err = rhltable_remove(&ptp_state->unmatched_ht, &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); spin_unlock(&ptp_state->unmatched_lock); if (err) @@ -702,7 +674,7 @@ static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work) ptp_state = container_of(dwork, struct mlxsw_sp_ptp_state, ht_gc_dw); gc_cycle = ptp_state->gc_cycle++; - rhashtable_walk_enter(&ptp_state->unmatched_ht, &iter); + rhltable_walk_enter(&ptp_state->unmatched_ht, &iter); rhashtable_walk_start(&iter); while ((obj = rhashtable_walk_next(&iter))) { if (IS_ERR(obj)) @@ -855,8 +827,8 @@ struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) spin_lock_init(&ptp_state->unmatched_lock); - err = rhashtable_init(&ptp_state->unmatched_ht, - &mlxsw_sp1_ptp_unmatched_ht_params); + err = rhltable_init(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_ht_params); if (err) goto err_hashtable_init; @@ -891,7 +863,7 @@ struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) err_mtptpt1_set: mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); err_mtptpt_set: - rhashtable_destroy(&ptp_state->unmatched_ht); + rhltable_destroy(&ptp_state->unmatched_ht); err_hashtable_init: kfree(ptp_state); return ERR_PTR(err); @@ -906,8 +878,8 @@ void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false); mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); - rhashtable_free_and_destroy(&ptp_state->unmatched_ht, - &mlxsw_sp1_ptp_unmatched_free_fn, NULL); + rhltable_free_and_destroy(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_free_fn, NULL); kfree(ptp_state); } From 58799865be84e2a895dab72de0e1b996ed943f22 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 11 Aug 2019 22:18:25 +0800 Subject: [PATCH 336/482] net: dsa: Check existence of .port_mdb_add callback before calling it The dsa framework has optional .port_mdb_{prepare,add,del} callback fields for drivers to handle multicast database entries. When adding an entry, the framework goes through a prepare phase, then a commit phase. Drivers not providing these callbacks should be detected in the prepare phase. DSA core may still bypass the bridge layer and call the dsa_port_mdb_add function directly with no prepare phase or no switchdev trans object, and the framework ends up calling an undefined .port_mdb_add callback. This results in a NULL pointer dereference, as shown in the log below. The other functions seem to be properly guarded. Do the same for .port_mdb_add in dsa_switch_mdb_add_bitmap() as well. 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = (ptrval) [00000000] *pgd=00000000 Internal error: Oops: 80000005 [#1] SMP ARM Modules linked in: rtl8xxxu rtl8192cu rtl_usb rtl8192c_common rtlwifi mac80211 cfg80211 CPU: 1 PID: 134 Comm: kworker/1:2 Not tainted 5.3.0-rc1-00247-gd3519030752a #1 Hardware name: Allwinner sun7i (A20) Family Workqueue: events switchdev_deferred_process_work PC is at 0x0 LR is at dsa_switch_event+0x570/0x620 pc : [<00000000>] lr : [] psr: 80070013 sp : ee871db8 ip : 00000000 fp : ee98d0a4 r10: 0000000c r9 : 00000008 r8 : ee89f710 r7 : ee98d040 r6 : ee98d088 r5 : c0f04c48 r4 : ee98d04c r3 : 00000000 r2 : ee89f710 r1 : 00000008 r0 : ee98d040 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 6deb406a DAC: 00000051 Process kworker/1:2 (pid: 134, stack limit = 0x(ptrval)) Stack: (0xee871db8 to 0xee872000) 1da0: ee871e14 103ace2d 1dc0: 00000000 ffffffff 00000000 ee871e14 00000005 00000000 c08524a0 00000000 1de0: ffffe000 c014bdfc c0f04c48 ee871e98 c0f04c48 ee9e5000 c0851120 c014bef0 1e00: 00000000 b643aea2 ee9b4068 c08509a8 ee2bf940 ee89f710 ee871ecb 00000000 1e20: 00000008 103ace2d 00000000 c087e248 ee29c868 103ace2d 00000001 ffffffff 1e40: 00000000 ee871e98 00000006 00000000 c0fb2a50 c087e2d0 ffffffff c08523c4 1e60: ffffffff c014bdfc 00000006 c0fad2d0 ee871e98 ee89f710 00000000 c014c500 1e80: 00000000 ee89f3c0 c0f04c48 00000000 ee9e5000 c087dfb4 ee9e5000 00000000 1ea0: ee89f710 ee871ecb 00000001 103ace2d 00000000 c0f04c48 00000000 c087e0a8 1ec0: 00000000 efd9a3e0 0089f3c0 103ace2d ee89f700 ee89f710 ee9e5000 00000122 1ee0: 00000100 c087e130 ee89f700 c0fad2c8 c1003ef0 c087de4c 2e928000 c0fad2ec 1f00: c0fad2ec ee839580 ef7a62c0 ef7a9400 00000000 c087def8 c0fad2ec c01447dc 1f20: ef315640 ef7a62c0 00000008 ee839580 ee839594 ef7a62c0 00000008 c0f03d00 1f40: ef7a62d8 ef7a62c0 ffffe000 c0145b84 ffffe000 c0fb2420 c0bfaa8c 00000000 1f60: ffffe000 ee84b600 ee84b5c0 00000000 ee870000 ee839580 c0145b40 ef0e5ea4 1f80: ee84b61c c014a6f8 00000001 ee84b5c0 c014a5b0 00000000 00000000 00000000 1fa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [] (dsa_switch_event) from [] (notifier_call_chain+0x48/0x84) [] (notifier_call_chain) from [] (raw_notifier_call_chain+0x18/0x20) [] (raw_notifier_call_chain) from [] (dsa_port_mdb_add+0x48/0x74) [] (dsa_port_mdb_add) from [] (__switchdev_handle_port_obj_add+0x54/0xd4) [] (__switchdev_handle_port_obj_add) from [] (switchdev_handle_port_obj_add+0x8/0x14) [] (switchdev_handle_port_obj_add) from [] (dsa_slave_switchdev_blocking_event+0x94/0xa4) [] (dsa_slave_switchdev_blocking_event) from [] (notifier_call_chain+0x48/0x84) [] (notifier_call_chain) from [] (blocking_notifier_call_chain+0x50/0x68) [] (blocking_notifier_call_chain) from [] (switchdev_port_obj_notify+0x44/0xa8) [] (switchdev_port_obj_notify) from [] (switchdev_port_obj_add_now+0x90/0x104) [] (switchdev_port_obj_add_now) from [] (switchdev_port_obj_add_deferred+0x14/0x5c) [] (switchdev_port_obj_add_deferred) from [] (switchdev_deferred_process+0x64/0x104) [] (switchdev_deferred_process) from [] (switchdev_deferred_process_work+0xc/0x14) [] (switchdev_deferred_process_work) from [] (process_one_work+0x218/0x50c) [] (process_one_work) from [] (worker_thread+0x44/0x5bc) [] (worker_thread) from [] (kthread+0x148/0x150) [] (kthread) from [] (ret_from_fork+0x14/0x2c) Exception stack(0xee871fb0 to 0xee871ff8) 1fa0: 00000000 00000000 00000000 00000000 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 Code: bad PC value ---[ end trace 1292c61abd17b130 ]--- [] (dsa_switch_event) from [] (notifier_call_chain+0x48/0x84) corresponds to $ arm-linux-gnueabihf-addr2line -C -i -e vmlinux c08533ec linux/net/dsa/switch.c:156 linux/net/dsa/switch.c:178 linux/net/dsa/switch.c:328 Fixes: e6db98db8a95 ("net: dsa: add switch mdb bitmap functions") Signed-off-by: Chen-Yu Tsai Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/switch.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 4ec5b7f85d51..09d9286b27cc 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -153,6 +153,9 @@ static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds, { int port; + if (!ds->ops->port_mdb_add) + return; + for_each_set_bit(port, bitmap, ds->num_ports) ds->ops->port_mdb_add(ds, port, mdb); } From 8874ecae2977e5a2d4f0ba301364435b81c05938 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 12 Aug 2019 08:18:25 +1200 Subject: [PATCH 337/482] tipc: initialise addr_trail_end when setting node addresses We set the field 'addr_trial_end' to 'jiffies', instead of the current value 0, at the moment the node address is initialized. This guarantees we don't inadvertently enter an address trial period when the node address is explicitly set by the user. Signed-off-by: Chris Packham Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/addr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/addr.c b/net/tipc/addr.c index b88d48d00913..0f1eaed1bd1b 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -75,6 +75,7 @@ void tipc_set_node_addr(struct net *net, u32 addr) tipc_set_node_id(net, node_id); } tn->trial_addr = addr; + tn->addr_trial_end = jiffies; pr_info("32-bit node address hash set to %x\n", addr); } From 125b7e0949d4e72b15c2b1a1590f8cece985a918 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sun, 11 Aug 2019 20:13:45 -0700 Subject: [PATCH 338/482] net: tc35815: Explicitly check NET_IP_ALIGN is not zero in tc35815_rx clang warns: drivers/net/ethernet/toshiba/tc35815.c:1507:30: warning: use of logical '&&' with constant operand [-Wconstant-logical-operand] if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN) ^ ~~~~~~~~~~~~ drivers/net/ethernet/toshiba/tc35815.c:1507:30: note: use '&' for a bitwise operation if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN) ^~ & drivers/net/ethernet/toshiba/tc35815.c:1507:30: note: remove constant to silence this warning if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN) ~^~~~~~~~~~~~~~~ 1 warning generated. Explicitly check that NET_IP_ALIGN is not zero, which matches how this is checked in other parts of the tree. Because NET_IP_ALIGN is a build time constant, this check will be constant folded away during optimization. Fixes: 82a9928db560 ("tc35815: Enable StripCRC feature") Link: https://github.com/ClangBuiltLinux/linux/issues/608 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/tc35815.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 8479a440527b..12466a72cefc 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -1504,7 +1504,7 @@ tc35815_rx(struct net_device *dev, int limit) pci_unmap_single(lp->pci_dev, lp->rx_skbs[cur_bd].skb_dma, RX_BUF_SIZE, PCI_DMA_FROMDEVICE); - if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN) + if (!HAVE_DMA_RXALIGN(lp) && NET_IP_ALIGN != 0) memmove(skb->data, skb->data - NET_IP_ALIGN, pkt_len); data = skb_put(skb, pkt_len); From 5dac665cf403967bb79a7aeb8c182a621fe617ff Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 31 Jul 2019 19:15:43 +0900 Subject: [PATCH 339/482] usb: gadget: udc: renesas_usb3: Fix sysfs interface of "role" Since the role_store() uses strncmp(), it's possible to refer out-of-memory if the sysfs data size is smaller than strlen("host"). This patch fixes it by using sysfs_streq() instead of strncmp(). Fixes: cc995c9ec118 ("usb: gadget: udc: renesas_usb3: add support for usb role swap") Cc: # v4.12+ Reviewed-by: Geert Uytterhoeven Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/renesas_usb3.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 87062d22134d..1f4c3fbd1df8 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -2450,9 +2451,9 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr, if (usb3->forced_b_device) return -EBUSY; - if (!strncmp(buf, "host", strlen("host"))) + if (sysfs_streq(buf, "host")) new_mode_is_host = true; - else if (!strncmp(buf, "peripheral", strlen("peripheral"))) + else if (sysfs_streq(buf, "peripheral")) new_mode_is_host = false; else return -EINVAL; From 602fda17c7356bb7ae98467d93549057481d11dd Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 26 Jul 2019 14:59:03 +1000 Subject: [PATCH 340/482] usb: gadget: composite: Clear "suspended" on reset/disconnect In some cases, one can get out of suspend with a reset or a disconnect followed by a reconnect. Previously we would leave a stale suspended flag set. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 9118b42c70b6..76883ff4f5bb 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1976,6 +1976,7 @@ void composite_disconnect(struct usb_gadget *gadget) * disconnect callbacks? */ spin_lock_irqsave(&cdev->lock, flags); + cdev->suspended = 0; if (cdev->config) reset_config(cdev); if (cdev->driver->disconnect) From 4a56a478a525d6427be90753451c40e1327caa1a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 26 Jul 2019 14:59:04 +1000 Subject: [PATCH 341/482] usb: gadget: mass_storage: Fix races between fsg_disable and fsg_set_alt If fsg_disable() and fsg_set_alt() are called too closely to each other (for example due to a quick reset/reconnect), what can happen is that fsg_set_alt sets common->new_fsg from an interrupt while handle_exception is trying to process the config change caused by fsg_disable(): fsg_disable() ... handle_exception() sets state back to FSG_STATE_NORMAL hasn't yet called do_set_interface() or is inside it. ---> interrupt fsg_set_alt sets common->new_fsg queues a new FSG_STATE_CONFIG_CHANGE <--- Now, the first handle_exception can "see" the updated new_fsg, treats it as if it was a fsg_set_alt() response, call usb_composite_setup_continue() etc... But then, the thread sees the second FSG_STATE_CONFIG_CHANGE, and goes back down the same path, wipes and reattaches a now active fsg, and .. calls usb_composite_setup_continue() which at this point is wrong. Not only we get a backtrace, but I suspect the second set_interface wrecks some state causing the host to get upset in my case. This fixes it by replacing "new_fsg" by a "state argument" (same principle) which is set in the same lock section as the state update, and retrieved similarly. That way, there is never any discrepancy between the dequeued state and the observed value of it. We keep the ability to have the latest reconfig operation take precedence, but we guarantee that once "dequeued" the argument (new_fsg) will not be clobbered by any new event. Signed-off-by: Benjamin Herrenschmidt Acked-by: Alan Stern Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_mass_storage.c | 28 +++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 29cc5693e05c..7c96c4665178 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -261,7 +261,7 @@ struct fsg_common; struct fsg_common { struct usb_gadget *gadget; struct usb_composite_dev *cdev; - struct fsg_dev *fsg, *new_fsg; + struct fsg_dev *fsg; wait_queue_head_t io_wait; wait_queue_head_t fsg_wait; @@ -290,6 +290,7 @@ struct fsg_common { unsigned int bulk_out_maxpacket; enum fsg_state state; /* For exception handling */ unsigned int exception_req_tag; + void *exception_arg; enum data_direction data_dir; u32 data_size; @@ -391,7 +392,8 @@ static int fsg_set_halt(struct fsg_dev *fsg, struct usb_ep *ep) /* These routines may be called in process context or in_irq */ -static void raise_exception(struct fsg_common *common, enum fsg_state new_state) +static void __raise_exception(struct fsg_common *common, enum fsg_state new_state, + void *arg) { unsigned long flags; @@ -404,6 +406,7 @@ static void raise_exception(struct fsg_common *common, enum fsg_state new_state) if (common->state <= new_state) { common->exception_req_tag = common->ep0_req_tag; common->state = new_state; + common->exception_arg = arg; if (common->thread_task) send_sig_info(SIGUSR1, SEND_SIG_PRIV, common->thread_task); @@ -411,6 +414,10 @@ static void raise_exception(struct fsg_common *common, enum fsg_state new_state) spin_unlock_irqrestore(&common->lock, flags); } +static void raise_exception(struct fsg_common *common, enum fsg_state new_state) +{ + __raise_exception(common, new_state, NULL); +} /*-------------------------------------------------------------------------*/ @@ -2285,16 +2292,16 @@ static int do_set_interface(struct fsg_common *common, struct fsg_dev *new_fsg) static int fsg_set_alt(struct usb_function *f, unsigned intf, unsigned alt) { struct fsg_dev *fsg = fsg_from_func(f); - fsg->common->new_fsg = fsg; - raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE); + + __raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE, fsg); return USB_GADGET_DELAYED_STATUS; } static void fsg_disable(struct usb_function *f) { struct fsg_dev *fsg = fsg_from_func(f); - fsg->common->new_fsg = NULL; - raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE); + + __raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE, NULL); } @@ -2307,6 +2314,7 @@ static void handle_exception(struct fsg_common *common) enum fsg_state old_state; struct fsg_lun *curlun; unsigned int exception_req_tag; + struct fsg_dev *new_fsg; /* * Clear the existing signals. Anything but SIGUSR1 is converted @@ -2360,6 +2368,7 @@ static void handle_exception(struct fsg_common *common) common->next_buffhd_to_fill = &common->buffhds[0]; common->next_buffhd_to_drain = &common->buffhds[0]; exception_req_tag = common->exception_req_tag; + new_fsg = common->exception_arg; old_state = common->state; common->state = FSG_STATE_NORMAL; @@ -2413,8 +2422,8 @@ static void handle_exception(struct fsg_common *common) break; case FSG_STATE_CONFIG_CHANGE: - do_set_interface(common, common->new_fsg); - if (common->new_fsg) + do_set_interface(common, new_fsg); + if (new_fsg) usb_composite_setup_continue(common->cdev); break; @@ -2989,8 +2998,7 @@ static void fsg_unbind(struct usb_configuration *c, struct usb_function *f) DBG(fsg, "unbind\n"); if (fsg->common->fsg == fsg) { - fsg->common->new_fsg = NULL; - raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE); + __raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE, NULL); /* FIXME: make interruptible or killable somehow? */ wait_event(common->fsg_wait, common->fsg != fsg); } From 508c5849c62d009e03f37ad0f556071fac5112f0 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 1 Aug 2019 13:57:36 +0000 Subject: [PATCH 342/482] habanalabs: Avoid double free in error flow In case kernel context init fails during device initialization, both hl_ctx_put() and kfree() are called, ending with a double free of the kernel context. Calling kfree() is needed only when a failure happens between the allocation of the kernel context and its initialization, so move it to there and remove it from the error flow. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 0c4894dd9c02..7a8f9d0b71b5 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -970,7 +970,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); if (rc) { dev_err(hdev->dev, "failed to initialize kernel context\n"); - goto free_ctx; + kfree(hdev->kernel_ctx); + goto mmu_fini; } rc = hl_cb_pool_init(hdev); @@ -1053,8 +1054,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) if (hl_ctx_put(hdev->kernel_ctx) != 1) dev_err(hdev->dev, "kernel ctx is still alive on initialization failure\n"); -free_ctx: - kfree(hdev->kernel_ctx); mmu_fini: hl_mmu_fini(hdev); eq_fini: From c8113756ba27298d6e95403c087dc5881b419a99 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Sun, 4 Aug 2019 07:03:41 +0000 Subject: [PATCH 343/482] habanalabs: fix DRAM usage accounting on context tear down The patch fix the DRAM usage accounting by adding a missing update of the DRAM memory consumption, when a context is being torn down without an organized release of the allocated memory. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/memory.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 42d237cae1dc..365fb0cb8dff 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -1629,6 +1629,8 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) dev_dbg(hdev->dev, "page list 0x%p of asid %d is still alive\n", phys_pg_list, ctx->asid); + atomic64_sub(phys_pg_list->total_size, + &hdev->dram_used_mem); free_phys_pg_pack(hdev, phys_pg_list); idr_remove(&vm->phys_pg_pack_handles, i); } From 213ad5ad016a0da975b35f54e8cd236c3b04724b Mon Sep 17 00:00:00 2001 From: Ben Segal Date: Thu, 1 Aug 2019 23:20:32 +0000 Subject: [PATCH 344/482] habanalabs: fix endianness handling for packets from user Packets that arrive from the user and need to be parsed by the driver are assumed to be in LE format. This patch fix all the places where the code handles these packets and use the correct endianness macros to handle them, as the driver handles the packets in CPU format (LE or BE depending on the arch). Signed-off-by: Ben Segal Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 32 +++++++++++-------- .../habanalabs/include/goya/goya_packets.h | 13 ++++++++ 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index a0e181714891..e8b1142910e0 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3428,12 +3428,13 @@ static int goya_validate_cb(struct hl_device *hdev, while (cb_parsed_length < parser->user_cb_size) { enum packet_id pkt_id; u16 pkt_size; - void *user_pkt; + struct goya_packet *user_pkt; - user_pkt = (void *) (uintptr_t) + user_pkt = (struct goya_packet *) (uintptr_t) (parser->user_cb->kernel_address + cb_parsed_length); - pkt_id = (enum packet_id) (((*(u64 *) user_pkt) & + pkt_id = (enum packet_id) ( + (le64_to_cpu(user_pkt->header) & PACKET_HEADER_PACKET_ID_MASK) >> PACKET_HEADER_PACKET_ID_SHIFT); @@ -3453,7 +3454,8 @@ static int goya_validate_cb(struct hl_device *hdev, * need to validate here as well because patch_cb() is * not called in MMU path while this function is called */ - rc = goya_validate_wreg32(hdev, parser, user_pkt); + rc = goya_validate_wreg32(hdev, + parser, (struct packet_wreg32 *) user_pkt); break; case PACKET_WREG_BULK: @@ -3481,10 +3483,10 @@ static int goya_validate_cb(struct hl_device *hdev, case PACKET_LIN_DMA: if (is_mmu) rc = goya_validate_dma_pkt_mmu(hdev, parser, - user_pkt); + (struct packet_lin_dma *) user_pkt); else rc = goya_validate_dma_pkt_no_mmu(hdev, parser, - user_pkt); + (struct packet_lin_dma *) user_pkt); break; case PACKET_MSG_LONG: @@ -3657,15 +3659,16 @@ static int goya_patch_cb(struct hl_device *hdev, enum packet_id pkt_id; u16 pkt_size; u32 new_pkt_size = 0; - void *user_pkt, *kernel_pkt; + struct goya_packet *user_pkt, *kernel_pkt; - user_pkt = (void *) (uintptr_t) + user_pkt = (struct goya_packet *) (uintptr_t) (parser->user_cb->kernel_address + cb_parsed_length); - kernel_pkt = (void *) (uintptr_t) + kernel_pkt = (struct goya_packet *) (uintptr_t) (parser->patched_cb->kernel_address + cb_patched_cur_length); - pkt_id = (enum packet_id) (((*(u64 *) user_pkt) & + pkt_id = (enum packet_id) ( + (le64_to_cpu(user_pkt->header) & PACKET_HEADER_PACKET_ID_MASK) >> PACKET_HEADER_PACKET_ID_SHIFT); @@ -3680,15 +3683,18 @@ static int goya_patch_cb(struct hl_device *hdev, switch (pkt_id) { case PACKET_LIN_DMA: - rc = goya_patch_dma_packet(hdev, parser, user_pkt, - kernel_pkt, &new_pkt_size); + rc = goya_patch_dma_packet(hdev, parser, + (struct packet_lin_dma *) user_pkt, + (struct packet_lin_dma *) kernel_pkt, + &new_pkt_size); cb_patched_cur_length += new_pkt_size; break; case PACKET_WREG_32: memcpy(kernel_pkt, user_pkt, pkt_size); cb_patched_cur_length += pkt_size; - rc = goya_validate_wreg32(hdev, parser, kernel_pkt); + rc = goya_validate_wreg32(hdev, parser, + (struct packet_wreg32 *) kernel_pkt); break; case PACKET_WREG_BULK: diff --git a/drivers/misc/habanalabs/include/goya/goya_packets.h b/drivers/misc/habanalabs/include/goya/goya_packets.h index a14407b975e4..ef54bad20509 100644 --- a/drivers/misc/habanalabs/include/goya/goya_packets.h +++ b/drivers/misc/habanalabs/include/goya/goya_packets.h @@ -52,6 +52,19 @@ enum goya_dma_direction { #define GOYA_PKT_CTL_MB_SHIFT 31 #define GOYA_PKT_CTL_MB_MASK 0x80000000 +/* All packets have, at least, an 8-byte header, which contains + * the packet type. The kernel driver uses the packet header for packet + * validation and to perform any necessary required preparation before + * sending them off to the hardware. + */ +struct goya_packet { + __le64 header; + /* The rest of the packet data follows. Use the corresponding + * packet_XXX struct to deference the data, based on packet type + */ + u8 contents[0]; +}; + struct packet_nop { __le32 reserved; __le32 ctl; From 4e87334a0ef43663019dbaf3638ad10fd8c3320c Mon Sep 17 00:00:00 2001 From: Ben Segal Date: Thu, 1 Aug 2019 23:22:20 +0000 Subject: [PATCH 345/482] habanalabs: fix completion queue handling when host is BE This patch fix the CQ irq handler to work in hosts with BE architecture. It adds the correct endian-swapping macros around the relevant memory accesses. Signed-off-by: Ben Segal Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/irq.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c index ea9f72ff456c..199791b57caf 100644 --- a/drivers/misc/habanalabs/irq.c +++ b/drivers/misc/habanalabs/irq.c @@ -80,8 +80,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) struct hl_cs_job *job; bool shadow_index_valid; u16 shadow_index; - u32 *cq_entry; - u32 *cq_base; + struct hl_cq_entry *cq_entry, *cq_base; if (hdev->disabled) { dev_dbg(hdev->dev, @@ -90,29 +89,29 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) return IRQ_HANDLED; } - cq_base = (u32 *) (uintptr_t) cq->kernel_address; + cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address; while (1) { - bool entry_ready = ((cq_base[cq->ci] & CQ_ENTRY_READY_MASK) + bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) & + CQ_ENTRY_READY_MASK) >> CQ_ENTRY_READY_SHIFT); if (!entry_ready) break; - cq_entry = (u32 *) &cq_base[cq->ci]; + cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci]; - /* - * Make sure we read CQ entry contents after we've + /* Make sure we read CQ entry contents after we've * checked the ownership bit. */ dma_rmb(); - shadow_index_valid = - ((*cq_entry & CQ_ENTRY_SHADOW_INDEX_VALID_MASK) + shadow_index_valid = ((le32_to_cpu(cq_entry->data) & + CQ_ENTRY_SHADOW_INDEX_VALID_MASK) >> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT); - shadow_index = (u16) - ((*cq_entry & CQ_ENTRY_SHADOW_INDEX_MASK) + shadow_index = (u16) ((le32_to_cpu(cq_entry->data) & + CQ_ENTRY_SHADOW_INDEX_MASK) >> CQ_ENTRY_SHADOW_INDEX_SHIFT); queue = &hdev->kernel_queues[cq->hw_queue_id]; @@ -122,8 +121,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) queue_work(hdev->cq_wq, &job->finish_work); } - /* - * Update ci of the context's queue. There is no + /* Update ci of the context's queue. There is no * need to protect it with spinlock because this update is * done only inside IRQ and there is a different IRQ per * queue @@ -131,7 +129,8 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) queue->ci = hl_queue_inc_ptr(queue->ci); /* Clear CQ entry ready bit */ - cq_base[cq->ci] &= ~CQ_ENTRY_READY_MASK; + cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) & + ~CQ_ENTRY_READY_MASK); cq->ci = hl_cq_inc_ptr(cq->ci); From b9040c99414ba5b85090595a61abc686a5dbb388 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 8 Aug 2019 15:45:58 +0300 Subject: [PATCH 346/482] habanalabs: fix endianness handling for internal QMAN submission The PQs of internal H/W queues (QMANs) can be located in different memory areas for different ASICs. Therefore, when writing PQEs, we need to use the correct function according to the location of the PQ. e.g. if the PQ is located in the device's memory (SRAM or DRAM), we need to use memcpy_toio() so it would work in architectures that have separate address ranges for IO memory. This patch makes the code that writes the PQE to be ASIC-specific so we can handle this properly per ASIC. Signed-off-by: Oded Gabbay Tested-by: Ben Segal --- drivers/misc/habanalabs/goya/goya.c | 7 ++++--- drivers/misc/habanalabs/goya/goyaP.h | 2 +- drivers/misc/habanalabs/habanalabs.h | 9 +++++++-- drivers/misc/habanalabs/hw_queue.c | 14 +++++--------- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index e8b1142910e0..b39b9c98fe1d 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2729,9 +2729,10 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) GOYA_ASYNC_EVENT_ID_PI_UPDATE); } -void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val) +void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) { - /* Not needed in Goya */ + /* The QMANs are on the SRAM so need to copy to IO space */ + memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd)); } static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size, @@ -5048,7 +5049,7 @@ static const struct hl_asic_funcs goya_funcs = { .resume = goya_resume, .cb_mmap = goya_cb_mmap, .ring_doorbell = goya_ring_doorbell, - .flush_pq_write = goya_flush_pq_write, + .pqe_write = goya_pqe_write, .asic_dma_alloc_coherent = goya_dma_alloc_coherent, .asic_dma_free_coherent = goya_dma_free_coherent, .get_int_queue_base = goya_get_int_queue_base, diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index f8c611883dc1..d7f48c9c41cd 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -177,7 +177,7 @@ int goya_late_init(struct hl_device *hdev); void goya_late_fini(struct hl_device *hdev); void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi); -void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val); +void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd); void goya_update_eq_ci(struct hl_device *hdev, u32 val); void goya_restore_phase_topology(struct hl_device *hdev); int goya_context_switch(struct hl_device *hdev, u32 asid); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 6a4c64b97f38..ce83adafcf2d 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -441,7 +441,11 @@ enum hl_pll_frequency { * @resume: handles IP specific H/W or SW changes for resume. * @cb_mmap: maps a CB. * @ring_doorbell: increment PI on a given QMAN. - * @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed. + * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific + * function because the PQs are located in different memory areas + * per ASIC (SRAM, DRAM, Host memory) and therefore, the method of + * writing the PQE must match the destination memory area + * properties. * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling * dma_alloc_coherent(). This is ASIC function because * its implementation is not trivial when the driver @@ -510,7 +514,8 @@ struct hl_asic_funcs { int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, u64 kaddress, phys_addr_t paddress, u32 size); void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi); - void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val); + void (*pqe_write)(struct hl_device *hdev, __le64 *pqe, + struct hl_bd *bd); void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, gfp_t flag); void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size, diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c index e3b5517897ea..5f5673b74985 100644 --- a/drivers/misc/habanalabs/hw_queue.c +++ b/drivers/misc/habanalabs/hw_queue.c @@ -290,23 +290,19 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job) struct hl_device *hdev = job->cs->ctx->hdev; struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; struct hl_bd bd; - u64 *pi, *pbd = (u64 *) &bd; + __le64 *pi; bd.ctl = 0; - bd.len = __cpu_to_le32(job->job_cb_size); - bd.ptr = __cpu_to_le64((u64) (uintptr_t) job->user_cb); + bd.len = cpu_to_le32(job->job_cb_size); + bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); - pi = (u64 *) (uintptr_t) (q->kernel_address + + pi = (__le64 *) (uintptr_t) (q->kernel_address + ((q->pi & (q->int_queue_len - 1)) * sizeof(bd))); - pi[0] = pbd[0]; - pi[1] = pbd[1]; - q->pi++; q->pi &= ((q->int_queue_len << 1) - 1); - /* Flush PQ entry write. Relevant only for specific ASICs */ - hdev->asic_funcs->flush_pq_write(hdev, pi, pbd[0]); + hdev->asic_funcs->pqe_write(hdev, pi, &bd); hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); } From b421d83a3947369fd5718824aecfaebe1efbf7ed Mon Sep 17 00:00:00 2001 From: Ben Segal Date: Wed, 7 Aug 2019 13:54:54 +0000 Subject: [PATCH 347/482] habanalabs: fix device IRQ unmasking for BE host When unmasking IRQs inside the ASIC, the driver passes an array of all the IRQ to unmask. The ASIC's CPU is working in LE so when running in a BE host, the driver needs to do the proper endianness swapping when preparing this array. In addition, this patch also fixes the endianness of a couple of kernel log debug messages that print values of packets Signed-off-by: Ben Segal Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 33 +++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index b39b9c98fe1d..271c5c8f53b4 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3314,9 +3314,11 @@ static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev, int rc; dev_dbg(hdev->dev, "DMA packet details:\n"); - dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr); - dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr); - dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize); + dev_dbg(hdev->dev, "source == 0x%llx\n", + le64_to_cpu(user_dma_pkt->src_addr)); + dev_dbg(hdev->dev, "destination == 0x%llx\n", + le64_to_cpu(user_dma_pkt->dst_addr)); + dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); ctl = le32_to_cpu(user_dma_pkt->ctl); user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> @@ -3345,9 +3347,11 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev, struct packet_lin_dma *user_dma_pkt) { dev_dbg(hdev->dev, "DMA packet details:\n"); - dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr); - dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr); - dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize); + dev_dbg(hdev->dev, "source == 0x%llx\n", + le64_to_cpu(user_dma_pkt->src_addr)); + dev_dbg(hdev->dev, "destination == 0x%llx\n", + le64_to_cpu(user_dma_pkt->dst_addr)); + dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); /* * WA for HW-23. @@ -3387,7 +3391,8 @@ static int goya_validate_wreg32(struct hl_device *hdev, dev_dbg(hdev->dev, "WREG32 packet details:\n"); dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset); - dev_dbg(hdev->dev, "value == 0x%x\n", wreg_pkt->value); + dev_dbg(hdev->dev, "value == 0x%x\n", + le32_to_cpu(wreg_pkt->value)); if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) { dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n", @@ -4359,6 +4364,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, size_t total_pkt_size; long result; int rc; + int irq_num_entries, irq_arr_index; + __le32 *goya_irq_arr; total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) + irq_arr_size; @@ -4376,8 +4383,16 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, if (!pkt) return -ENOMEM; - pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0])); - memcpy(&pkt->irqs, irq_arr, irq_arr_size); + irq_num_entries = irq_arr_size / sizeof(irq_arr[0]); + pkt->length = cpu_to_le32(irq_num_entries); + + /* We must perform any necessary endianness conversation on the irq + * array being passed to the goya hardware + */ + for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs; + irq_arr_index < irq_num_entries ; irq_arr_index++) + goya_irq_arr[irq_arr_index] = + cpu_to_le32(irq_arr[irq_arr_index]); pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << ARMCP_PKT_CTL_OPCODE_SHIFT); From accd2dd72c8f087441d725dd916688171519e4e6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 9 Aug 2019 10:23:57 +0200 Subject: [PATCH 348/482] PCI/ASPM: Add pcie_aspm_enabled() Add a function checking whether or not PCIe ASPM has been enabled for a given device. It will be used by the NVMe driver to decide how to handle the device during system suspend. Signed-off-by: Rafael J. Wysocki Reviewed-by: Keith Busch Acked-by: Bjorn Helgaas --- drivers/pci/pcie/aspm.c | 20 ++++++++++++++++++++ include/linux/pci.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index e44af7f4d37f..464f8f92653f 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1170,6 +1170,26 @@ static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp) module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy, NULL, 0644); +/** + * pcie_aspm_enabled - Check if PCIe ASPM has been enabled for a device. + * @pdev: Target device. + */ +bool pcie_aspm_enabled(struct pci_dev *pdev) +{ + struct pci_dev *bridge = pci_upstream_bridge(pdev); + bool ret; + + if (!bridge) + return false; + + mutex_lock(&aspm_lock); + ret = bridge->link_state ? !!bridge->link_state->aspm_enabled : false; + mutex_unlock(&aspm_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(pcie_aspm_enabled); + #ifdef CONFIG_PCIEASPM_DEBUG static ssize_t link_state_show(struct device *dev, struct device_attribute *attr, diff --git a/include/linux/pci.h b/include/linux/pci.h index 9e700d9f9f28..82e4cd1b7ac3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1567,8 +1567,10 @@ extern bool pcie_ports_native; #ifdef CONFIG_PCIEASPM bool pcie_aspm_support_enabled(void); +bool pcie_aspm_enabled(struct pci_dev *pdev); #else static inline bool pcie_aspm_support_enabled(void) { return false; } +static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } #endif #ifdef CONFIG_PCIEAER From 4eaefe8c621c6195c91044396ed8060c179f7aae Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 8 Aug 2019 23:58:38 +0200 Subject: [PATCH 349/482] nvme-pci: Allow PCI bus-level PM to be used if ASPM is disabled One of the modifications made by commit d916b1be94b6 ("nvme-pci: use host managed power state for suspend") was adding a pci_save_state() call to nvme_suspend() so as to instruct the PCI bus type to leave devices handled by the nvme driver in D0 during suspend-to-idle. That was done with the assumption that ASPM would transition the device's PCIe link into a low-power state when the device became inactive. However, if ASPM is disabled for the device, its PCIe link will stay in L0 and in that case commit d916b1be94b6 is likely to cause the energy used by the system while suspended to increase. Namely, if the device in question works in accordance with the PCIe specification, putting it into D3hot causes its PCIe link to go to L1 or L2/L3 Ready, which is lower-power than L0. Since the energy used by the system while suspended depends on the state of its PCIe link (as a general rule, the lower-power the state of the link, the less energy the system will use), putting the device into D3hot during suspend-to-idle should be more energy-efficient that leaving it in D0 with disabled ASPM. For this reason, avoid leaving NVMe devices with disabled ASPM in D0 during suspend-to-idle. Instead, shut them down entirely and let the PCI bus type put them into D3. Fixes: d916b1be94b6 ("nvme-pci: use host managed power state for suspend") Link: https://lore.kernel.org/linux-pm/2763495.NmdaWeg79L@kreacher/T/#t Signed-off-by: Rafael J. Wysocki Reviewed-by: Keith Busch --- drivers/nvme/host/pci.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index db160cee42ad..108e109e99f1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2846,7 +2846,7 @@ static int nvme_resume(struct device *dev) struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); struct nvme_ctrl *ctrl = &ndev->ctrl; - if (pm_resume_via_firmware() || !ctrl->npss || + if (ndev->last_ps == U32_MAX || nvme_set_power_state(ctrl, ndev->last_ps) != 0) nvme_reset_ctrl(ctrl); return 0; @@ -2859,6 +2859,8 @@ static int nvme_suspend(struct device *dev) struct nvme_ctrl *ctrl = &ndev->ctrl; int ret = -EBUSY; + ndev->last_ps = U32_MAX; + /* * The platform does not remove power for a kernel managed suspend so * use host managed nvme power settings for lowest idle power if @@ -2866,8 +2868,14 @@ static int nvme_suspend(struct device *dev) * shutdown. But if the firmware is involved after the suspend or the * device does not support any non-default power states, shut down the * device fully. + * + * If ASPM is not enabled for the device, shut down the device and allow + * the PCI bus layer to put it into D3 in order to take the PCIe link + * down, so as to allow the platform to achieve its minimum low-power + * state (which may not be possible if the link is up). */ - if (pm_suspend_via_firmware() || !ctrl->npss) { + if (pm_suspend_via_firmware() || !ctrl->npss || + !pcie_aspm_enabled(pdev)) { nvme_dev_disable(ndev, true); return 0; } @@ -2880,7 +2888,6 @@ static int nvme_suspend(struct device *dev) ctrl->state != NVME_CTRL_ADMIN_ONLY) goto unfreeze; - ndev->last_ps = 0; ret = nvme_get_power_state(ctrl, &ndev->last_ps); if (ret < 0) goto unfreeze; From b61fbc887af7a13a1c90c84c1feaeb4c9780e1e2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 Aug 2019 23:59:03 +0200 Subject: [PATCH 350/482] efi-stub: Fix get_efi_config_table on mixed-mode setups Fix get_efi_config_table using the wrong structs when booting a 64 bit kernel on 32 bit firmware. Fixes: 82d736ac56d7 ("Abstract out support for locating an EFI config table") Signed-off-by: Hans de Goede Acked-By: Matthew Garrett Reviewed-by: Ard Biesheuvel Acked-by: Jarkko Sakkinen Signed-off-by: Ard Biesheuvel --- .../firmware/efi/libstub/efi-stub-helper.c | 38 +++++++++++++------ 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 1db780c0f07b..3caae7f2cf56 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -927,17 +927,33 @@ efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg, return status; } +#define GET_EFI_CONFIG_TABLE(bits) \ +static void *get_efi_config_table##bits(efi_system_table_t *_sys_table, \ + efi_guid_t guid) \ +{ \ + efi_system_table_##bits##_t *sys_table; \ + efi_config_table_##bits##_t *tables; \ + int i; \ + \ + sys_table = (typeof(sys_table))_sys_table; \ + tables = (typeof(tables))(unsigned long)sys_table->tables; \ + \ + for (i = 0; i < sys_table->nr_tables; i++) { \ + if (efi_guidcmp(tables[i].guid, guid) != 0) \ + continue; \ + \ + return (void *)(unsigned long)tables[i].table; \ + } \ + \ + return NULL; \ +} +GET_EFI_CONFIG_TABLE(32) +GET_EFI_CONFIG_TABLE(64) + void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid) { - efi_config_table_t *tables = (efi_config_table_t *)sys_table->tables; - int i; - - for (i = 0; i < sys_table->nr_tables; i++) { - if (efi_guidcmp(tables[i].guid, guid) != 0) - continue; - - return (void *)tables[i].table; - } - - return NULL; + if (efi_is_64bit()) + return get_efi_config_table64(sys_table, guid); + else + return get_efi_config_table32(sys_table, guid); } From e7409258845a0f64967f8377e99294d438137537 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 9 Aug 2019 18:40:37 -0700 Subject: [PATCH 351/482] x86/umwait: Fix error handling in umwait_init() Currently, failure of cpuhp_setup_state() is ignored and the syscore ops and the control interfaces can still be added even after the failure. But, this error handling will cause a few issues: 1. The CPUs may have different values in the IA32_UMWAIT_CONTROL MSR because there is no way to roll back the control MSR on the CPUs which already set the MSR before the failure. 2. If the sysfs interface is added successfully, there will be a mismatch between the global control value and the control MSR: - The interface shows the default global control value. But, the control MSR is not set to the value because the CPU online function, which is supposed to set the MSR to the value, is not installed. - If the sysadmin changes the global control value through the interface, the control MSR on all current online CPUs is set to the new value. But, the control MSR on newly onlined CPUs after the value change will not be set to the new value due to lack of the CPU online function. 3. On resume from suspend/hibernation, the boot CPU restores the control MSR to the global control value through the syscore ops. But, the control MSR on all APs is not set due to lake of the CPU online function. To solve the issues and enforce consistent behavior on the failure of the CPU hotplug setup, make the following changes: 1. Cache the original control MSR value which is configured by hardware or BIOS before kernel boot. This value is likely to be 0. But it could be a different number as well. Cache the control MSR only once before the MSR is changed. 2. Add the CPU offline function so that the MSR is restored to the original control value on all CPUs on the failure. 3. On the failure, exit from cpumait_init() so that the syscore ops and the control interfaces are not added. Reported-by: Valdis Kletnieks Suggested-by: Thomas Gleixner Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/1565401237-60936-1-git-send-email-fenghua.yu@intel.com --- arch/x86/kernel/cpu/umwait.c | 39 +++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index 6a204e7336c1..32b4dc9030aa 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -17,6 +17,12 @@ */ static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); +/* + * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by + * hardware or BIOS before kernel boot. + */ +static u32 orig_umwait_control_cached __ro_after_init; + /* * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in * the sysfs write functions. @@ -52,6 +58,23 @@ static int umwait_cpu_online(unsigned int cpu) return 0; } +/* + * The CPU hotplug callback sets the control MSR to the original control + * value. + */ +static int umwait_cpu_offline(unsigned int cpu) +{ + /* + * This code is protected by the CPU hotplug already and + * orig_umwait_control_cached is never changed after it caches + * the original control MSR value in umwait_init(). So there + * is no race condition here. + */ + wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0); + + return 0; +} + /* * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which * is the only active CPU at this time. The MSR is set up on the APs via the @@ -185,8 +208,22 @@ static int __init umwait_init(void) if (!boot_cpu_has(X86_FEATURE_WAITPKG)) return -ENODEV; + /* + * Cache the original control MSR value before the control MSR is + * changed. This is the only place where orig_umwait_control_cached + * is modified. + */ + rdmsrl(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online", - umwait_cpu_online, NULL); + umwait_cpu_online, umwait_cpu_offline); + if (ret < 0) { + /* + * On failure, the control MSR on all CPUs has the + * original control value. + */ + return ret; + } register_syscore_ops(&umwait_syscore_ops); From e26cc08265dda37d2acc8394604f220ef412299d Mon Sep 17 00:00:00 2001 From: zhengbin Date: Mon, 12 Aug 2019 20:36:55 +0800 Subject: [PATCH 352/482] blk-mq: move cancel of requeue_work to the front of blk_exit_queue blk_exit_queue will free elevator_data, while blk_mq_requeue_work will access it. Move cancel of requeue_work to the front of blk_exit_queue to avoid use-after-free. blk_exit_queue blk_mq_requeue_work __elevator_exit blk_mq_run_hw_queues blk_mq_exit_sched blk_mq_run_hw_queue dd_exit_queue blk_mq_hctx_has_pending kfree(elevator_data) blk_mq_sched_has_work dd_has_work Fixes: fbc2a15e3433 ("blk-mq: move cancel of requeue_work into blk_mq_release") Cc: stable@vger.kernel.org Reviewed-by: Ming Lei Signed-off-by: zhengbin Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 -- block/blk-sysfs.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f78d3287dd82..a8e6a58f5f28 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2666,8 +2666,6 @@ void blk_mq_release(struct request_queue *q) struct blk_mq_hw_ctx *hctx, *next; int i; - cancel_delayed_work_sync(&q->requeue_work); - queue_for_each_hw_ctx(q, hctx, i) WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list)); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 977c659dcd18..9bfa3ea4ed63 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -892,6 +892,9 @@ static void __blk_release_queue(struct work_struct *work) blk_free_queue_stats(q->stats); + if (queue_is_mq(q)) + cancel_delayed_work_sync(&q->requeue_work); + blk_exit_queue(q); blk_queue_free_zone_bitmaps(q); From ae78ca3cf3d9e9f914bfcd0bc5c389ff18b9c2e0 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sun, 11 Aug 2019 12:23:22 -0500 Subject: [PATCH 353/482] xen/blkback: fix memory leaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In read_per_ring_refs(), after 'req' and related memory regions are allocated, xen_blkif_map() is invoked to map the shared frame, irq, and etc. However, if this mapping process fails, no cleanup is performed, leading to memory leaks. To fix this issue, invoke the cleanup before returning the error. Acked-by: Roger Pau Monné Reviewed-by: Boris Ostrovsky Signed-off-by: Wenwen Wang Signed-off-by: Jens Axboe --- drivers/block/xen-blkback/xenbus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 3ac6a5d18071..b90dbcd99c03 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -965,6 +965,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir) } } + err = -ENOMEM; for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) @@ -987,7 +988,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir) err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn); if (err) { xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn); - return err; + goto fail; } return 0; @@ -1007,8 +1008,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir) } kfree(req); } - return -ENOMEM; - + return err; } static int connect_ring(struct backend_info *be) From b4d98bc3fc93ec3a58459948a2c0e0c9b501cd88 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 12 Aug 2019 12:15:17 +0100 Subject: [PATCH 354/482] staging: comedi: dt3000: Fix signed integer overflow 'divider * base' In `dt3k_ns_to_timer()` the following lines near the end of the function result in a signed integer overflow: prescale = 15; base = timer_base * (1 << prescale); divider = 65535; *nanosec = divider * base; (`divider`, `base` and `prescale` are type `int`, `timer_base` and `*nanosec` are type `unsigned int`. The value of `timer_base` will be either 50 or 100.) The main reason for the overflow is that the calculation for `base` is completely wrong. It should be: base = timer_base * (prescale + 1); which matches an earlier instance of this calculation in the same function. Reported-by: David Binderman Cc: Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20190812111517.26803-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/dt3000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/dt3000.c b/drivers/staging/comedi/drivers/dt3000.c index 2edf3ee91300..4ad176fc14ad 100644 --- a/drivers/staging/comedi/drivers/dt3000.c +++ b/drivers/staging/comedi/drivers/dt3000.c @@ -368,7 +368,7 @@ static int dt3k_ns_to_timer(unsigned int timer_base, unsigned int *nanosec, } prescale = 15; - base = timer_base * (1 << prescale); + base = timer_base * (prescale + 1); divider = 65535; *nanosec = divider * base; return (prescale << 16) | (divider); From 8e2a589a3fc36ce858d42e767c3bcd8fc62a512b Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 12 Aug 2019 13:08:14 +0100 Subject: [PATCH 355/482] staging: comedi: dt3000: Fix rounding up of timer divisor `dt3k_ns_to_timer()` determines the prescaler and divisor to use to produce a desired timing period. It is influenced by a rounding mode and can round the divisor up, down, or to the nearest value. However, the code for rounding up currently does the same as rounding down! Fix ir by using the `DIV_ROUND_UP()` macro to calculate the divisor when rounding up. Also, change the types of the `divider`, `base` and `prescale` variables from `int` to `unsigned int` to avoid mixing signed and unsigned types in the calculations. Also fix a typo in a nearby comment: "improvment" => "improvement". Signed-off-by: Ian Abbott Cc: stable Link: https://lore.kernel.org/r/20190812120814.21188-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/dt3000.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/dt3000.c b/drivers/staging/comedi/drivers/dt3000.c index 4ad176fc14ad..caf4d4df4bd3 100644 --- a/drivers/staging/comedi/drivers/dt3000.c +++ b/drivers/staging/comedi/drivers/dt3000.c @@ -342,9 +342,9 @@ static irqreturn_t dt3k_interrupt(int irq, void *d) static int dt3k_ns_to_timer(unsigned int timer_base, unsigned int *nanosec, unsigned int flags) { - int divider, base, prescale; + unsigned int divider, base, prescale; - /* This function needs improvment */ + /* This function needs improvement */ /* Don't know if divider==0 works. */ for (prescale = 0; prescale < 16; prescale++) { @@ -358,7 +358,7 @@ static int dt3k_ns_to_timer(unsigned int timer_base, unsigned int *nanosec, divider = (*nanosec) / base; break; case CMDF_ROUND_UP: - divider = (*nanosec) / base; + divider = DIV_ROUND_UP(*nanosec, base); break; } if (divider < 65536) { From e9eec6a55c95fb918036bfe29c26a535dca1ad49 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Aug 2019 11:15:38 +0300 Subject: [PATCH 356/482] IB/mlx5: Fix use-after-free error while accessing ev_file pointer Call to uverbs_close_fd() releases file pointer to 'ev_file' and mlx5_ib_dev is going to be inaccessible. Cache pointer prior cleaning resources to solve the KASAN warning below. BUG: KASAN: use-after-free in devx_async_event_close+0x391/0x480 [mlx5_ib] Read of size 8 at addr ffff888301e3cec0 by task devx_direct_tes/4631 CPU: 1 PID: 4631 Comm: devx_direct_tes Tainted: G OE 5.3.0-rc1-for-upstream-dbg-2019-07-26_01-19-56-93 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014 Call Trace: dump_stack+0x9a/0xeb print_address_description+0x1e2/0x400 ? devx_async_event_close+0x391/0x480 [mlx5_ib] __kasan_report+0x15c/0x1df ? devx_async_event_close+0x391/0x480 [mlx5_ib] kasan_report+0xe/0x20 devx_async_event_close+0x391/0x480 [mlx5_ib] __fput+0x26a/0x7b0 task_work_run+0x10d/0x180 exit_to_usermode_loop+0x137/0x160 do_syscall_64+0x3c7/0x490 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f5df907d664 Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 80 00 00 00 00 8b 05 6a cd 20 00 48 63 ff 85 c0 75 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 44 f3 c3 66 90 48 83 ec 18 48 89 7c 24 08 e8 RSP: 002b:00007ffd353cb958 EFLAGS: 00000246 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 000056017a88c348 RCX: 00007f5df907d664 RDX: 00007f5df969d400 RSI: 00007f5de8f1ec90 RDI: 0000000000000006 RBP: 00007f5df9681dc0 R08: 00007f5de8736410 R09: 000056017a9d2dd0 R10: 000000000000000b R11: 0000000000000246 R12: 00007f5de899d7d0 R13: 00007f5df96c4248 R14: 00007f5de8f1ecb0 R15: 000056017ae41308 Allocated by task 4631: save_stack+0x19/0x80 kasan_kmalloc.constprop.3+0xa0/0xd0 alloc_uobj+0x71/0x230 [ib_uverbs] alloc_begin_fd_uobject+0x2e/0xc0 [ib_uverbs] rdma_alloc_begin_uobject+0x96/0x140 [ib_uverbs] ib_uverbs_run_method+0xdf0/0x1940 [ib_uverbs] ib_uverbs_cmd_verbs+0x57e/0xdb0 [ib_uverbs] ib_uverbs_ioctl+0x177/0x260 [ib_uverbs] do_vfs_ioctl+0x18f/0x1010 ksys_ioctl+0x70/0x80 __x64_sys_ioctl+0x6f/0xb0 do_syscall_64+0x95/0x490 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 4631: save_stack+0x19/0x80 __kasan_slab_free+0x11d/0x160 slab_free_freelist_hook+0x67/0x1a0 kfree+0xb9/0x2a0 uverbs_close_fd+0x118/0x1c0 [ib_uverbs] devx_async_event_close+0x28a/0x480 [mlx5_ib] __fput+0x26a/0x7b0 task_work_run+0x10d/0x180 exit_to_usermode_loop+0x137/0x160 do_syscall_64+0x3c7/0x490 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff888301e3cda8 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 280 bytes inside of 512-byte region [ffff888301e3cda8, ffff888301e3cfa8) The buggy address belongs to the page: page:ffffea000c078e00 refcount:1 mapcount:0 mapping:ffff888352811300 index:0x0 compound_mapcount: 0 flags: 0x2fffff80010200(slab|head) raw: 002fffff80010200 ffffea000d152608 ffffea000c077808 ffff888352811300 raw: 0000000000000000 0000000000250025 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888301e3cd80: fc fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb ffff888301e3ce00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888301e3ce80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888301e3cf00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888301e3cf80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc Disabling lock debugging due to kernel taint Cc: # 5.2 Fixes: 759738537142 ("IB/mlx5: Enable subscription for device events over DEVX") Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20190808081538.28772-1-leon@kernel.org Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/devx.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2d1b3d9609d9..af5bbb35c058 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2644,12 +2644,13 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) struct devx_async_event_file *ev_file = filp->private_data; struct devx_event_subscription *event_sub, *event_sub_tmp; struct devx_async_event_data *entry, *tmp; + struct mlx5_ib_dev *dev = ev_file->dev; - mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock); + mutex_lock(&dev->devx_event_table.event_xa_lock); /* delete the subscriptions which are related to this FD */ list_for_each_entry_safe(event_sub, event_sub_tmp, &ev_file->subscribed_events_list, file_list) { - devx_cleanup_subscription(ev_file->dev, event_sub); + devx_cleanup_subscription(dev, event_sub); if (event_sub->eventfd) eventfd_ctx_put(event_sub->eventfd); @@ -2658,7 +2659,7 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) kfree_rcu(event_sub, rcu); } - mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock); + mutex_unlock(&dev->devx_event_table.event_xa_lock); /* free the pending events allocation */ if (!ev_file->omit_data) { @@ -2670,7 +2671,7 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) } uverbs_close_fd(filp); - put_device(&ev_file->dev->ib_dev.dev); + put_device(&dev->ib_dev.dev); return 0; } From 17c19287ecf54fb55f155902dcd39c62a9547c4e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Aug 2019 17:09:04 +0300 Subject: [PATCH 357/482] RDMA/siw: Fix a memory leak in siw_init_cpulist() The error handling code doesn't free siw_cpu_info.tx_valid_cpus[0]. The first iteration through the loop is a no-op so this is sort of an off by one bug. Also Bernard pointed out that we can remove the NULL assignment and simplify the code a bit. Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface") Signed-off-by: Dan Carpenter Reviewed-by: Bernard Metzler Reviewed-by: Bernard Metzler Link: https://lore.kernel.org/r/20190809140904.GB3552@mwanda Signed-off-by: Doug Ledford --- drivers/infiniband/sw/siw/siw_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index d0f140daf659..05a92f997f60 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -160,10 +160,8 @@ static int siw_init_cpulist(void) out_err: siw_cpu_info.num_nodes = 0; - while (i) { + while (--i >= 0) kfree(siw_cpu_info.tx_valid_cpus[i]); - siw_cpu_info.tx_valid_cpus[i--] = NULL; - } kfree(siw_cpu_info.tx_valid_cpus); siw_cpu_info.tx_valid_cpus = NULL; From 932727c55653c1d7838d0ecd0cdce4393be156e0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Aug 2019 13:13:19 +0300 Subject: [PATCH 358/482] RDMA/core: Fix error code in stat_get_doit_qp() We need to set the error codes on these paths. Currently the only possible error code is -EMSGSIZE so that's what the patch uses. Fixes: 83c2c1fcbd08 ("RDMA/nldev: Allow get counter mode through RDMA netlink") Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190809101311.GA17867@mwanda Signed-off-by: Doug Ledford --- drivers/infiniband/core/nldev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 783e465e7c41..87d40d1ecdde 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1952,12 +1952,16 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) { + ret = -EMSGSIZE; goto err_msg; + } if ((mode == RDMA_COUNTER_MODE_AUTO) && - nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) { + ret = -EMSGSIZE; goto err_msg; + } nlmsg_end(msg, nlh); ib_device_put(device); From 5ee6310fb163ba7c66718905d4a19f1e71e641e0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 10 Jul 2019 06:12:22 +0000 Subject: [PATCH 359/482] Bluetooth: btusb: Fix error return code in btusb_mtk_setup_firmware() Fix to return error code -EINVAL from the error handling case instead of 0, as done elsewhere in this function. Fixes: a1c49c434e15 ("Bluetooth: btusb: Add protocol support for MediaTek MT7668U USB devices") Signed-off-by: Wei Yongjun Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 3876fee6ad13..5cf0734eb31b 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2762,8 +2762,10 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname) fw_size = fw->size; /* The size of patch header is 30 bytes, should be skip */ - if (fw_size < 30) + if (fw_size < 30) { + err = -EINVAL; goto err_release_fw; + } fw_size -= 30; fw_ptr += 30; From 8059ba0bd0e4694e51c2ee6438a77b325f06c0d5 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 9 Jul 2019 15:44:50 -0700 Subject: [PATCH 360/482] Bluetooth: btqca: Add a short delay before downloading the NVM On WCN3990 downloading the NVM sometimes fails with a "TLV response size mismatch" error: [ 174.949955] Bluetooth: btqca.c:qca_download_firmware() hci0: QCA Downloading qca/crnv21.bin [ 174.958718] Bluetooth: btqca.c:qca_tlv_send_segment() hci0: QCA TLV response size mismatch It seems the controller needs a short time after downloading the firmware before it is ready for the NVM. A delay as short as 1 ms seems sufficient, make it 10 ms just in case. No event is received during the delay, hence we don't just silently drop an extra event. Signed-off-by: Matthias Kaehlcke Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 8b33128dccee..c59ca5782b63 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -388,6 +388,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } + /* Give the controller some time to get ready to receive the NVM */ + msleep(10); + /* Download NVM configuration */ config.type = TLV_TYPE_NVM; if (firmware_name) From 4974c839d45e2ac89ce0e82b49d548cc12e02a9c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 9 Jul 2019 01:35:30 +0000 Subject: [PATCH 361/482] Bluetooth: hci_qca: Use kfree_skb() instead of kfree() Use kfree_skb() instead of kfree() to free sk_buff. Fixes: 2faa3f15fa2f ("Bluetooth: hci_qca: wcn3990: Drop baudrate change vendor event") Signed-off-by: Wei Yongjun Reviewed-by: Matthias Kaehlcke Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_qca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 82a0a3691a63..3c9fd165fda6 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -912,7 +912,7 @@ static int qca_recv_event(struct hci_dev *hdev, struct sk_buff *skb) if (hdr->evt == HCI_EV_VENDOR) complete(&qca->drop_ev_comp); - kfree(skb); + kfree_skb(skb); return 0; } From 2fde6afb8c7fce8e679c1072891cd31d54af5b83 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 8 Jul 2019 14:57:42 -0700 Subject: [PATCH 362/482] Bluetooth: btqca: Use correct byte format for opcode of injected command The opcode of the command injected by commit 32646db8cc28 ("Bluetooth: btqca: inject command complete event during fw download") uses the CPU byte format, however it should always be little endian. In practice it shouldn't really matter, since all we need is an opcode != 0, but still let's do things correctly and keep sparse happy. Fixes: 32646db8cc28 ("Bluetooth: btqca: inject command complete event during fw download") Reported-by: kbuild test robot Signed-off-by: Matthias Kaehlcke Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index c59ca5782b63..81a5c45bdcd9 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -268,7 +268,7 @@ static int qca_inject_cmd_complete_event(struct hci_dev *hdev) evt = skb_put(skb, sizeof(*evt)); evt->ncmd = 1; - evt->opcode = QCA_HCI_CC_OPCODE; + evt->opcode = cpu_to_le16(QCA_HCI_CC_OPCODE); skb_put_u8(skb, QCA_HCI_CC_SUCCESS); From a2780889e247561744dd8efbd3478a1999b72ae3 Mon Sep 17 00:00:00 2001 From: Harish Bandi Date: Fri, 12 Jul 2019 10:39:40 +0530 Subject: [PATCH 363/482] Bluetooth: hci_qca: Send VS pre shutdown command. WCN399x chips are coex chips, it needs a VS pre shutdown command while turning off the BT. So that chip can inform BT is OFF to other active clients. Signed-off-by: Harish Bandi Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 21 +++++++++++++++++++++ drivers/bluetooth/btqca.h | 7 +++++++ drivers/bluetooth/hci_qca.c | 3 +++ 3 files changed, 31 insertions(+) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 81a5c45bdcd9..2221935fac7e 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -99,6 +99,27 @@ static int qca_send_reset(struct hci_dev *hdev) return 0; } +int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) +{ + struct sk_buff *skb; + int err; + + bt_dev_dbg(hdev, "QCA pre shutdown cmd"); + + skb = __hci_cmd_sync(hdev, QCA_PRE_SHUTDOWN_CMD, 0, + NULL, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "QCA preshutdown_cmd failed (%d)", err); + return err; + } + + kfree_skb(skb); + + return 0; +} +EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd); + static void qca_tlv_check_data(struct rome_config *config, const struct firmware *fw) { diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index 6a291a7a5d96..69c5315a65fd 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -13,6 +13,7 @@ #define EDL_PATCH_TLV_REQ_CMD (0x1E) #define EDL_NVM_ACCESS_SET_REQ_CMD (0x01) #define MAX_SIZE_PER_TLV_SEGMENT (243) +#define QCA_PRE_SHUTDOWN_CMD (0xFC08) #define EDL_CMD_REQ_RES_EVT (0x00) #define EDL_PATCH_VER_RES_EVT (0x19) @@ -135,6 +136,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, const char *firmware_name); int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version); int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr); +int qca_send_pre_shutdown_cmd(struct hci_dev *hdev); static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) { return soc_type == QCA_WCN3990 || soc_type == QCA_WCN3998; @@ -167,4 +169,9 @@ static inline bool qca_is_wcn399x(enum qca_btsoc_type soc_type) { return false; } + +static inline int qca_send_pre_shutdown_cmd(struct hci_dev *hdev) +{ + return -EOPNOTSUPP; +} #endif diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 3c9fd165fda6..0cfa5b831d39 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1386,6 +1386,9 @@ static int qca_power_off(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); + /* Perform pre shutdown command */ + qca_send_pre_shutdown_cmd(hdev); + qca_power_shutdown(hu); return 0; } From 48d9cc9d85dde37c87abb7ac9bbec6598ba44b56 Mon Sep 17 00:00:00 2001 From: Fabian Henneke Date: Mon, 15 Jul 2019 19:40:56 +0200 Subject: [PATCH 364/482] Bluetooth: hidp: Let hidp_send_message return number of queued bytes Let hidp_send_message return the number of successfully queued bytes instead of an unconditional 0. With the return value fixed to 0, other drivers relying on hidp, such as hidraw, can not return meaningful values from their respective implementations of write(). In particular, with the current behavior, a hidraw device's write() will have different return values depending on whether the device is connected via USB or Bluetooth, which makes it harder to abstract away the transport layer. Signed-off-by: Fabian Henneke Signed-off-by: Marcel Holtmann --- net/bluetooth/hidp/core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 5abd423b55fa..8d889969ae7e 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -101,6 +101,7 @@ static int hidp_send_message(struct hidp_session *session, struct socket *sock, { struct sk_buff *skb; struct sock *sk = sock->sk; + int ret; BT_DBG("session %p data %p size %d", session, data, size); @@ -114,13 +115,17 @@ static int hidp_send_message(struct hidp_session *session, struct socket *sock, } skb_put_u8(skb, hdr); - if (data && size > 0) + if (data && size > 0) { skb_put_data(skb, data, size); + ret = size; + } else { + ret = 0; + } skb_queue_tail(transmit, skb); wake_up_interruptible(sk_sleep(sk)); - return 0; + return ret; } static int hidp_send_ctrl_message(struct hidp_session *session, From 858b44dc62a1018312fd9cbd15db24237a897e0e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 11 Aug 2019 15:52:26 -0700 Subject: [PATCH 365/482] xfs: remove more ondisk directory corruption asserts Continue our game of replacing ASSERTs for corrupt ondisk metadata with EFSCORRUPTED returns. Signed-off-by: Darrick J. Wong Reviewed-by: Bill O'Donnell --- fs/xfs/libxfs/xfs_da_btree.c | 19 ++++++++++++------- fs/xfs/libxfs/xfs_dir2_node.c | 3 ++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index d1c77fd0815d..0bf56e94bfe9 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -487,10 +487,8 @@ xfs_da3_split( ASSERT(state->path.active == 0); oldblk = &state->path.blk[0]; error = xfs_da3_root_split(state, oldblk, addblk); - if (error) { - addblk->bp = NULL; - return error; /* GROT: dir is inconsistent */ - } + if (error) + goto out; /* * Update pointers to the node which used to be block 0 and just got @@ -505,7 +503,10 @@ xfs_da3_split( */ node = oldblk->bp->b_addr; if (node->hdr.info.forw) { - ASSERT(be32_to_cpu(node->hdr.info.forw) == addblk->blkno); + if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) { + error = -EFSCORRUPTED; + goto out; + } node = addblk->bp->b_addr; node->hdr.info.back = cpu_to_be32(oldblk->blkno); xfs_trans_log_buf(state->args->trans, addblk->bp, @@ -514,15 +515,19 @@ xfs_da3_split( } node = oldblk->bp->b_addr; if (node->hdr.info.back) { - ASSERT(be32_to_cpu(node->hdr.info.back) == addblk->blkno); + if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) { + error = -EFSCORRUPTED; + goto out; + } node = addblk->bp->b_addr; node->hdr.info.forw = cpu_to_be32(oldblk->blkno); xfs_trans_log_buf(state->args->trans, addblk->bp, XFS_DA_LOGRANGE(node, &node->hdr.info, sizeof(node->hdr.info))); } +out: addblk->bp = NULL; - return 0; + return error; } /* diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index afcc6642690a..1fc44efc344d 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -741,7 +741,8 @@ xfs_dir2_leafn_lookup_for_entry( ents = dp->d_ops->leaf_ents_p(leaf); xfs_dir3_leaf_check(dp, bp); - ASSERT(leafhdr.count > 0); + if (leafhdr.count <= 0) + return -EFSCORRUPTED; /* * Look up the hash value in the leaf entries. From 8612de3f7ba6e900465e340516b8313806d27b2d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 11 Aug 2019 15:52:27 -0700 Subject: [PATCH 366/482] xfs: don't crash on null attr fork xfs_bmapi_read Zorro Lang reported a crash in generic/475 if we try to inactivate a corrupt inode with a NULL attr fork (stack trace shortened somewhat): RIP: 0010:xfs_bmapi_read+0x311/0xb00 [xfs] RSP: 0018:ffff888047f9ed68 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff888047f9f038 RCX: 1ffffffff5f99f51 RDX: 0000000000000002 RSI: 0000000000000008 RDI: 0000000000000012 RBP: ffff888002a41f00 R08: ffffed10005483f0 R09: ffffed10005483ef R10: ffffed10005483ef R11: ffff888002a41f7f R12: 0000000000000004 R13: ffffe8fff53b5768 R14: 0000000000000005 R15: 0000000000000001 FS: 00007f11d44b5b80(0000) GS:ffff888114200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000ef6000 CR3: 000000002e176003 CR4: 00000000001606e0 Call Trace: xfs_dabuf_map.constprop.18+0x696/0xe50 [xfs] xfs_da_read_buf+0xf5/0x2c0 [xfs] xfs_da3_node_read+0x1d/0x230 [xfs] xfs_attr_inactive+0x3cc/0x5e0 [xfs] xfs_inactive+0x4c8/0x5b0 [xfs] xfs_fs_destroy_inode+0x31b/0x8e0 [xfs] destroy_inode+0xbc/0x190 xfs_bulkstat_one_int+0xa8c/0x1200 [xfs] xfs_bulkstat_one+0x16/0x20 [xfs] xfs_bulkstat+0x6fa/0xf20 [xfs] xfs_ioc_bulkstat+0x182/0x2b0 [xfs] xfs_file_ioctl+0xee0/0x12a0 [xfs] do_vfs_ioctl+0x193/0x1000 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x6f/0xb0 do_syscall_64+0x9f/0x4d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f11d39a3e5b The "obvious" cause is that the attr ifork is null despite the inode claiming an attr fork having at least one extent, but it's not so obvious why we ended up with an inode in that state. Reported-by: Zorro Lang Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204031 Signed-off-by: Darrick J. Wong Reviewed-by: Bill O'Donnell --- fs/xfs/libxfs/xfs_bmap.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index baf0b72c0a37..07aad70f3931 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3835,15 +3835,28 @@ xfs_bmapi_read( XFS_STATS_INC(mp, xs_blk_mapr); ifp = XFS_IFORK_PTR(ip, whichfork); + if (!ifp) { + /* No CoW fork? Return a hole. */ + if (whichfork == XFS_COW_FORK) { + mval->br_startoff = bno; + mval->br_startblock = HOLESTARTBLOCK; + mval->br_blockcount = len; + mval->br_state = XFS_EXT_NORM; + *nmap = 1; + return 0; + } - /* No CoW fork? Return a hole. */ - if (whichfork == XFS_COW_FORK && !ifp) { - mval->br_startoff = bno; - mval->br_startblock = HOLESTARTBLOCK; - mval->br_blockcount = len; - mval->br_state = XFS_EXT_NORM; - *nmap = 1; - return 0; + /* + * A missing attr ifork implies that the inode says we're in + * extents or btree format but failed to pass the inode fork + * verifier while trying to load it. Treat that as a file + * corruption too. + */ +#ifdef DEBUG + xfs_alert(mp, "%s: inode %llu missing fork %d", + __func__, ip->i_ino, whichfork); +#endif /* DEBUG */ + return -EFSCORRUPTED; } if (!(ifp->if_flags & XFS_IFEXTENTS)) { From c7c5ae2902bf8fe9acc75f798c0de75ac9295ccf Mon Sep 17 00:00:00 2001 From: Claire Chang Date: Tue, 6 Aug 2019 17:56:29 +0800 Subject: [PATCH 367/482] Bluetooth: btqca: release_firmware after qca_inject_cmd_complete_event commit 32646db8cc28 ("Bluetooth: btqca: inject command complete event during fw download") added qca_inject_cmd_complete_event() for certain qualcomm chips. However, qca_download_firmware() will return without calling release_firmware() in this case. This leads to a memory leak like the following found by kmemleak: unreferenced object 0xfffffff3868a5880 (size 128): comm "kworker/u17:5", pid 347, jiffies 4294676481 (age 312.157s) hex dump (first 32 bytes): ac fd 00 00 00 00 00 00 00 d0 7e 17 80 ff ff ff ..........~..... 00 00 00 00 00 00 00 00 00 59 8a 86 f3 ff ff ff .........Y...... backtrace: [<00000000978ce31d>] kmem_cache_alloc_trace+0x194/0x298 [<000000006ea0398c>] _request_firmware+0x74/0x4e4 [<000000004da31ca0>] request_firmware+0x44/0x64 [<0000000094572996>] qca_download_firmware+0x74/0x6e4 [btqca] [<00000000b24d615a>] qca_uart_setup+0xc0/0x2b0 [btqca] [<00000000364a6d5a>] qca_setup+0x204/0x570 [hci_uart] [<000000006be1a544>] hci_uart_setup+0xa8/0x148 [hci_uart] [<00000000d64c0f4f>] hci_dev_do_open+0x144/0x530 [bluetooth] [<00000000f69f5110>] hci_power_on+0x84/0x288 [bluetooth] [<00000000d4151583>] process_one_work+0x210/0x420 [<000000003cf3dcfb>] worker_thread+0x2c4/0x3e4 [<000000007ccaf055>] kthread+0x124/0x134 [<00000000bef1f723>] ret_from_fork+0x10/0x18 [<00000000c36ee3dd>] 0xffffffffffffffff unreferenced object 0xfffffff37b16de00 (size 128): comm "kworker/u17:5", pid 347, jiffies 4294676873 (age 311.766s) hex dump (first 32 bytes): da 07 00 00 00 00 00 00 00 50 ff 0b 80 ff ff ff .........P...... 00 00 00 00 00 00 00 00 00 dd 16 7b f3 ff ff ff ...........{.... backtrace: [<00000000978ce31d>] kmem_cache_alloc_trace+0x194/0x298 [<000000006ea0398c>] _request_firmware+0x74/0x4e4 [<000000004da31ca0>] request_firmware+0x44/0x64 [<0000000094572996>] qca_download_firmware+0x74/0x6e4 [btqca] [<000000000cde20a9>] qca_uart_setup+0x144/0x2b0 [btqca] [<00000000364a6d5a>] qca_setup+0x204/0x570 [hci_uart] [<000000006be1a544>] hci_uart_setup+0xa8/0x148 [hci_uart] [<00000000d64c0f4f>] hci_dev_do_open+0x144/0x530 [bluetooth] [<00000000f69f5110>] hci_power_on+0x84/0x288 [bluetooth] [<00000000d4151583>] process_one_work+0x210/0x420 [<000000003cf3dcfb>] worker_thread+0x2c4/0x3e4 [<000000007ccaf055>] kthread+0x124/0x134 [<00000000bef1f723>] ret_from_fork+0x10/0x18 [<00000000c36ee3dd>] 0xffffffffffffffff Make sure release_firmware() is called aftre qca_inject_cmd_complete_event() to avoid the memory leak. Fixes: 32646db8cc28 ("Bluetooth: btqca: inject command complete event during fw download") Signed-off-by: Claire Chang Reviewed-by: Balakrishna Godavarthi Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 2221935fac7e..8f0fec5acade 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -344,7 +344,7 @@ static int qca_download_firmware(struct hci_dev *hdev, */ if (config->dnld_type == ROME_SKIP_EVT_VSE_CC || config->dnld_type == ROME_SKIP_EVT_VSE) - return qca_inject_cmd_complete_event(hdev); + ret = qca_inject_cmd_complete_event(hdev); out: release_firmware(fw); From 12072a68961af20e84ddb4aba2387ba5f70e8c14 Mon Sep 17 00:00:00 2001 From: Balakrishna Godavarthi Date: Thu, 8 Aug 2019 14:26:08 +0530 Subject: [PATCH 368/482] Bluetooth: btqca: Reset download type to default This patch will reset the download flag to default value before retrieving the download mode type. Fixes: 32646db8cc28 ("Bluetooth: btqca: inject command complete event during fw download") Signed-off-by: Balakrishna Godavarthi Tested-by: Claire Chang Reviewed-by: Claire Chang Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btqca.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 8f0fec5acade..0875470a7806 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -140,6 +140,7 @@ static void qca_tlv_check_data(struct rome_config *config, BT_DBG("Length\t\t : %d bytes", length); config->dnld_mode = ROME_SKIP_EVT_NONE; + config->dnld_type = ROME_SKIP_EVT_NONE; switch (config->type) { case TLV_TYPE_PATCH: From 5785675dfef4f9edcee66edef7b3af21618d2707 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 11 Aug 2019 17:40:36 +0200 Subject: [PATCH 369/482] x86/apic/32: Fix yet another implicit fallthrough warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix arch/x86/kernel/apic/probe_32.c: In function ‘default_setup_apic_routing’: arch/x86/kernel/apic/probe_32.c:146:7: warning: this statement may fall through [-Wimplicit-fallthrough=] if (!APIC_XAPIC(version)) { ^ arch/x86/kernel/apic/probe_32.c:151:3: note: here case X86_VENDOR_HYGON: ^~~~ for 32-bit builds. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190811154036.29805-1-bp@alien8.de --- arch/x86/kernel/apic/probe_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1492799b8f43..ee2d91e382f1 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -184,7 +184,8 @@ void __init default_setup_apic_routing(void) def_to_bigsmp = 0; break; } - /* If P4 and above fall through */ + /* P4 and above */ + /* fall through */ case X86_VENDOR_HYGON: case X86_VENDOR_AMD: def_to_bigsmp = 1; From 91be2587e82a0f16348fd8f12a57e4c328baffc7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 12 Aug 2019 20:16:17 +0200 Subject: [PATCH 370/482] x86/fpu/math-emu: Address fallthrough warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /home/tglx/work/kernel/linus/linux/arch/x86/math-emu/errors.c: In function ‘FPU_printall’: /home/tglx/work/kernel/linus/linux/arch/x86/math-emu/errors.c:187:9: warning: this statement may fall through [-Wimplicit-fallthrough=] tagi = FPU_Special(r); ~~~~~^~~~~~~~~~~~~~~~ /home/tglx/work/kernel/linus/linux/arch/x86/math-emu/errors.c:188:3: note: here case TAG_Valid: ^~~~ /home/tglx/work/kernel/linus/linux/arch/x86/math-emu/fpu_trig.c: In function ‘fyl2xp1’: /home/tglx/work/kernel/linus/linux/arch/x86/math-emu/fpu_trig.c:1353:7: warning: this statement may fall through [-Wimplicit-fallthrough=] if (denormal_operand() < 0) ^ /home/tglx/work/kernel/linus/linux/arch/x86/math-emu/fpu_trig.c:1356:3: note: here case TAG_Zero: Remove the pointless 'break;' after 'continue;' while at it. Signed-off-by: Thomas Gleixner --- arch/x86/math-emu/errors.c | 5 +++-- arch/x86/math-emu/fpu_trig.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c index 6b468517ab71..73dc66d887f3 100644 --- a/arch/x86/math-emu/errors.c +++ b/arch/x86/math-emu/errors.c @@ -178,13 +178,15 @@ void FPU_printall(void) for (i = 0; i < 8; i++) { FPU_REG *r = &st(i); u_char tagi = FPU_gettagi(i); + switch (tagi) { case TAG_Empty: continue; - break; case TAG_Zero: case TAG_Special: + /* Update tagi for the printk below */ tagi = FPU_Special(r); + /* fall through */ case TAG_Valid: printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i, getsign(r) ? '-' : '+', @@ -198,7 +200,6 @@ void FPU_printall(void) printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi); continue; - break; } printk("%s\n", tag_desc[(int)(unsigned)tagi]); } diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c index 783c509f957a..127ea54122d7 100644 --- a/arch/x86/math-emu/fpu_trig.c +++ b/arch/x86/math-emu/fpu_trig.c @@ -1352,7 +1352,7 @@ static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag) case TW_Denormal: if (denormal_operand() < 0) return; - + /* fall through */ case TAG_Zero: case TAG_Valid: setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr)); From 303911cfc5b95d33687d9046133ff184cf5043ff Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 12 Aug 2019 16:11:07 -0400 Subject: [PATCH 371/482] USB: core: Fix races in character device registration and deregistraion The syzbot fuzzer has found two (!) races in the USB character device registration and deregistration routines. This patch fixes the races. The first race results from the fact that usb_deregister_dev() sets usb_minors[intf->minor] to NULL before calling device_destroy() on the class device. This leaves a window during which another thread can allocate the same minor number but will encounter a duplicate name error when it tries to register its own class device. A typical error message in the system log would look like: sysfs: cannot create duplicate filename '/class/usbmisc/ldusb0' The patch fixes this race by destroying the class device first. The second race is in usb_register_dev(). When that routine runs, it first allocates a minor number, then drops minor_rwsem, and then creates the class device. If the device creation fails, the minor number is deallocated and the whole routine returns an error. But during the time while minor_rwsem was dropped, there is a window in which the minor number is allocated and so another thread can successfully open the device file. Typically this results in use-after-free errors or invalid accesses when the other thread closes its open file reference, because the kernel then tries to release resources that were already deallocated when usb_register_dev() failed. The patch fixes this race by keeping minor_rwsem locked throughout the entire routine. Reported-and-tested-by: syzbot+30cf45ebfe0b0c4847a1@syzkaller.appspotmail.com Signed-off-by: Alan Stern CC: Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1908121607590.1659-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/file.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c index 65de6f73b672..558890ada0e5 100644 --- a/drivers/usb/core/file.c +++ b/drivers/usb/core/file.c @@ -193,9 +193,10 @@ int usb_register_dev(struct usb_interface *intf, intf->minor = minor; break; } - up_write(&minor_rwsem); - if (intf->minor < 0) + if (intf->minor < 0) { + up_write(&minor_rwsem); return -EXFULL; + } /* create a usb class device for this usb interface */ snprintf(name, sizeof(name), class_driver->name, minor - minor_base); @@ -203,12 +204,11 @@ int usb_register_dev(struct usb_interface *intf, MKDEV(USB_MAJOR, minor), class_driver, "%s", kbasename(name)); if (IS_ERR(intf->usb_dev)) { - down_write(&minor_rwsem); usb_minors[minor] = NULL; intf->minor = -1; - up_write(&minor_rwsem); retval = PTR_ERR(intf->usb_dev); } + up_write(&minor_rwsem); return retval; } EXPORT_SYMBOL_GPL(usb_register_dev); @@ -234,12 +234,12 @@ void usb_deregister_dev(struct usb_interface *intf, return; dev_dbg(&intf->dev, "removing %d minor\n", intf->minor); + device_destroy(usb_class->class, MKDEV(USB_MAJOR, intf->minor)); down_write(&minor_rwsem); usb_minors[intf->minor] = NULL; up_write(&minor_rwsem); - device_destroy(usb_class->class, MKDEV(USB_MAJOR, intf->minor)); intf->usb_dev = NULL; intf->minor = -1; destroy_usb_class(); From bb0ce4c1517d299d1a38075ecded62a5a5342c6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 9 Aug 2019 12:20:25 +0100 Subject: [PATCH 372/482] net: phy: at803x: stop switching phy delay config needlessly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver does a funny dance disabling and re-enabling RX and/or TX delays. In any of the RGMII-ID modes, it first disables the delays, just to re-enable them again right away. This looks like a needless exercise. Just enable the respective delays when in any of the relevant 'id' modes, and disable them otherwise. Also, remove comments which don't add anything that can't be seen by looking at the code. Signed-off-by: André Draszik CC: Andrew Lunn CC: Florian Fainelli CC: Heiner Kallweit CC: "David S. Miller" CC: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 222ccd9ecfce..6ad8b1c63c34 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -257,36 +257,20 @@ static int at803x_config_init(struct phy_device *phydev) * after HW reset: RX delay enabled and TX delay disabled * after SW reset: RX delay enabled, while TX delay retains the * value before reset. - * - * So let's first disable the RX and TX delays in PHY and enable - * them based on the mode selected (this also takes care of RGMII - * mode where we expect delays to be disabled) */ - - ret = at803x_disable_rx_delay(phydev); - if (ret < 0) - return ret; - ret = at803x_disable_tx_delay(phydev); - if (ret < 0) - return ret; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { - /* If RGMII_ID or RGMII_RXID are specified enable RX delay, - * otherwise keep it disabled - */ + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) ret = at803x_enable_rx_delay(phydev); - if (ret < 0) - return ret; - } + else + ret = at803x_disable_rx_delay(phydev); + if (ret < 0) + return ret; if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { - /* If RGMII_ID or RGMII_TXID are specified enable TX delay, - * otherwise keep it disabled - */ + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) ret = at803x_enable_tx_delay(phydev); - } + else + ret = at803x_disable_tx_delay(phydev); return ret; } From cd8869f4cb257f22b89495ca40f5281e58ba359c Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 12 Aug 2019 15:01:30 -0700 Subject: [PATCH 373/482] xtensa: add missing isync to the cpu_reset TLB code ITLB entry modifications must be followed by the isync instruction before the new entries are possibly used. cpu_reset lacks one isync between ITLB way 6 initialization and jump to the identity mapping. Add missing isync to xtensa cpu_reset. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 5cb8a62e091c..7c3106093c75 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -511,6 +511,7 @@ void cpu_reset(void) "add %2, %2, %7\n\t" "addi %0, %0, -1\n\t" "bnez %0, 1b\n\t" + "isync\n\t" /* Jump to identity mapping */ "jx %3\n" "2:\n\t" From 60d437bbff358748fcfc3bce5f08da9a6b3761da Mon Sep 17 00:00:00 2001 From: Nishka Dasgupta Date: Tue, 16 Jul 2019 11:13:30 +0530 Subject: [PATCH 374/482] of: resolver: Add of_node_put() before return and break Each iteration of for_each_child_of_node puts the previous node, but in the case of a return or break from the middle of the loop, there is no put, thus causing a memory leak. Hence add an of_node_put before the return or break in three places. Issue found with Coccinelle. Signed-off-by: Nishka Dasgupta Signed-off-by: Rob Herring --- drivers/of/resolver.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c index c1b67dd7cd6e..83c766233181 100644 --- a/drivers/of/resolver.c +++ b/drivers/of/resolver.c @@ -206,16 +206,22 @@ static int adjust_local_phandle_references(struct device_node *local_fixups, for_each_child_of_node(local_fixups, child) { for_each_child_of_node(overlay, overlay_child) - if (!node_name_cmp(child, overlay_child)) + if (!node_name_cmp(child, overlay_child)) { + of_node_put(overlay_child); break; + } - if (!overlay_child) + if (!overlay_child) { + of_node_put(child); return -EINVAL; + } err = adjust_local_phandle_references(child, overlay_child, phandle_delta); - if (err) + if (err) { + of_node_put(child); return err; + } } return 0; From fd295733e702a63c189fea8294c12e04b5e5ecc0 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Thu, 18 Jul 2019 16:14:53 -0400 Subject: [PATCH 375/482] dt-bindings: fec: explicitly mark deprecated properties fec's gpio phy reset properties have been deprecated. Update the dt-bindings documentation to explicitly mark them as such, and provide a short description of the recommended alternative. Signed-off-by: Sven Van Asbroeck Signed-off-by: Rob Herring --- .../devicetree/bindings/net/fsl-fec.txt | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt index 2d41fb96ce0a..5b88fae0307d 100644 --- a/Documentation/devicetree/bindings/net/fsl-fec.txt +++ b/Documentation/devicetree/bindings/net/fsl-fec.txt @@ -7,18 +7,6 @@ Required properties: - phy-mode : See ethernet.txt file in the same directory Optional properties: -- phy-reset-gpios : Should specify the gpio for phy reset -- phy-reset-duration : Reset duration in milliseconds. Should present - only if property "phy-reset-gpios" is available. Missing the property - will have the duration be 1 millisecond. Numbers greater than 1000 are - invalid and 1 millisecond will be used instead. -- phy-reset-active-high : If present then the reset sequence using the GPIO - specified in the "phy-reset-gpios" property is reversed (H=reset state, - L=operation state). -- phy-reset-post-delay : Post reset delay in milliseconds. If present then - a delay of phy-reset-post-delay milliseconds will be observed after the - phy-reset-gpios has been toggled. Can be omitted thus no delay is - observed. Delay is in range of 1ms to 1000ms. Other delays are invalid. - phy-supply : regulator that powers the Ethernet PHY. - phy-handle : phandle to the PHY device connected to this device. - fixed-link : Assume a fixed link. See fixed-link.txt in the same directory. @@ -47,11 +35,27 @@ Optional properties: For imx6sx, "int0" handles all 3 queues and ENET_MII. "pps" is for the pulse per second interrupt associated with 1588 precision time protocol(PTP). - Optional subnodes: - mdio : specifies the mdio bus in the FEC, used as a container for phy nodes according to phy.txt in the same directory +Deprecated optional properties: + To avoid these, create a phy node according to phy.txt in the same + directory, and point the fec's "phy-handle" property to it. Then use + the phy's reset binding, again described by phy.txt. +- phy-reset-gpios : Should specify the gpio for phy reset +- phy-reset-duration : Reset duration in milliseconds. Should present + only if property "phy-reset-gpios" is available. Missing the property + will have the duration be 1 millisecond. Numbers greater than 1000 are + invalid and 1 millisecond will be used instead. +- phy-reset-active-high : If present then the reset sequence using the GPIO + specified in the "phy-reset-gpios" property is reversed (H=reset state, + L=operation state). +- phy-reset-post-delay : Post reset delay in milliseconds. If present then + a delay of phy-reset-post-delay milliseconds will be observed after the + phy-reset-gpios has been toggled. Can be omitted thus no delay is + observed. Delay is in range of 1ms to 1000ms. Other delays are invalid. + Example: ethernet@83fec000 { From 17b6d2d528542bc60ad400add35728b2259b3cc1 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 6 Aug 2019 18:27:26 +0200 Subject: [PATCH 376/482] drm/amdgpu: fix gfx9 soft recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SOC15_REG_OFFSET() macro wasn't used, making the soft recovery fail. v2: use WREG32_SOC15 instead of WREG32 + SOC15_REG_OFFSET Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1cf639a51178..04b8ac4432c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4869,7 +4869,7 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - WREG32(mmSQ_CMD, value); + WREG32_SOC15(GC, 0, mmSQ_CMD, value); } static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, From 31ec0dd38085aaeb75ab04211bf3b51ad917c9b4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 8 Aug 2019 00:29:23 -0500 Subject: [PATCH 377/482] drm/amd/display: use kvmalloc for dc_state (v2) It's large and doesn't need contiguous memory. Fixes allocation failures in some cases. v2: kvfree the memory. Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index fa20201eef3a..cbc480a33376 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -23,6 +23,7 @@ */ #include +#include #include "dm_services.h" @@ -1171,8 +1172,8 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc) struct dc_state *dc_create_state(struct dc *dc) { - struct dc_state *context = kzalloc(sizeof(struct dc_state), - GFP_KERNEL); + struct dc_state *context = kvzalloc(sizeof(struct dc_state), + GFP_KERNEL); if (!context) return NULL; @@ -1192,11 +1193,11 @@ struct dc_state *dc_create_state(struct dc *dc) struct dc_state *dc_copy_state(struct dc_state *src_ctx) { int i, j; - struct dc_state *new_ctx = kmemdup(src_ctx, - sizeof(struct dc_state), GFP_KERNEL); + struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL); if (!new_ctx) return NULL; + memcpy(new_ctx, src_ctx, sizeof(struct dc_state)); for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx *cur_pipe = &new_ctx->res_ctx.pipe_ctx[i]; @@ -1230,7 +1231,7 @@ static void dc_state_free(struct kref *kref) { struct dc_state *context = container_of(kref, struct dc_state, refcount); dc_resource_state_destruct(context); - kfree(context); + kvfree(context); } void dc_release_state(struct dc_state *context) From 5717fe5ab38f9ccb32718bcb03bea68409c9cce4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Aug 2019 16:02:25 +0100 Subject: [PATCH 378/482] arm64: cpufeature: Don't treat granule sizes as strict If a CPU doesn't support the page size for which the kernel is configured, then we will complain and refuse to bring it online. For secondary CPUs (and the boot CPU on a system booting with EFI), we will also print an error identifying the mismatch. Consequently, the only time that the cpufeature code can detect a granule size mismatch is for a granule other than the one that is currently being used. Although we would rather such systems didn't exist, we've unfortunately lost that battle and Kevin reports that on his amlogic S922X (odroid-n2 board) we end up warning and taining with defconfig because 16k pages are not supported by all of the CPUs. In such a situation, we don't actually care about the feature mismatch, particularly now that KVM only exposes the sanitised view of the CPU registers (commit 93390c0a1b20 - "arm64: KVM: Hide unsupported AArch64 CPU features from guests"). Treat the granule fields as non-strict and let Kevin run without a tainted kernel. Cc: Marc Zyngier Reported-by: Kevin Hilman Tested-by: Kevin Hilman Acked-by: Mark Rutland Acked-by: Suzuki K Poulose Signed-off-by: Will Deacon [catalin.marinas@arm.com: changelog updated with KVM sanitised regs commit] Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d19d14ba9ae4..b1fdc486aed8 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -184,9 +184,17 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), + /* + * We already refuse to boot CPUs that don't support our configured + * page size, so we can only detect mismatches for a page size other + * than the one we're currently using. Unfortunately, SoCs like this + * exist in the wild so, even though we don't like it, we'll have to go + * along with it and treat them as non-strict. + */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), /* Linux shouldn't care about secure memory */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), From 834de5c1aa768eb3d233d6544ea7153826c4b206 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 31 Jul 2019 08:46:16 +0000 Subject: [PATCH 379/482] mtd: spi-nor: Fix the disabling of write protection at init spi_nor_spansion_clear_sr_bp() depends on spansion_quad_enable(). While spansion_quad_enable() is selected as default when initializing the flash parameters, the nor->quad_enable() method can be overwritten later on when parsing BFPT. Select the write protection disable mechanism at spi_nor_init() time, when the nor->quad_enable() method is already known. Fixes: 191f5c2ed4b6faba ("mtd: spi-nor: use 16-bit WRR command when QE is set on spansion flashes") Signed-off-by: Tudor Ambarus Reviewed-by: Vignesh Raghavendra Signed-off-by: Richard Weinberger --- drivers/mtd/spi-nor/spi-nor.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 03cc788511d5..654bdc41fc99 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -3780,8 +3780,6 @@ static int spi_nor_init_params(struct spi_nor *nor, default: /* Kept only for backward compatibility purpose. */ params->quad_enable = spansion_quad_enable; - if (nor->clear_sr_bp) - nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp; break; } @@ -4035,6 +4033,9 @@ static int spi_nor_init(struct spi_nor *nor) int err; if (nor->clear_sr_bp) { + if (nor->quad_enable == spansion_quad_enable) + nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp; + err = nor->clear_sr_bp(nor); if (err) { dev_err(nor->dev, From 9a67b72552f8d019948453e56ca7db8c7e5a94ba Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Tue, 13 Aug 2019 08:15:07 -0700 Subject: [PATCH 380/482] MAINTAINERS: iomap: Remove fs/iomap.c record Update MAINTAINERS to reflect that fs/iomap.c file was splitted into separate files in fs/iomap/ Cc: Darrick J. Wong Cc: Christoph Hellwig Cc: linux-fsdevel@vger.kernel.org Fixes: cb7181ff4b1c ("iomap: move the main iteration code into a separate file") Signed-off-by: Denis Efremov Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6426db5198f0..5a35c0dd1d35 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8416,7 +8416,6 @@ L: linux-xfs@vger.kernel.org L: linux-fsdevel@vger.kernel.org T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git S: Supported -F: fs/iomap.c F: fs/iomap/ F: include/linux/iomap.h From 190d03814eb3b49d4f87ff38fef26d36f3568a60 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 13 Aug 2019 17:39:56 +0200 Subject: [PATCH 381/482] ALSA: hda/realtek - Add quirk for HP Envy x360 HP Envy x360 (AMD Ryzen-based model) with 103c:8497 needs the same quirk like HP Spectre x360 for enabling the mute LED over Mic3 pin. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204373 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index de224cbea7a0..8aaf1d9c55cf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6987,6 +6987,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x82bf, "HP G3 mini", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x82c0, "HP G3 mini premium", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 656c8e9cc1badbc18eefe6ba01d33ebbcae61b9a Mon Sep 17 00:00:00 2001 From: Dirk Morris Date: Thu, 8 Aug 2019 13:57:51 -0700 Subject: [PATCH 382/482] netfilter: conntrack: Use consistent ct id hash calculation Change ct id hash calculation to only use invariants. Currently the ct id hash calculation is based on some fields that can change in the lifetime on a conntrack entry in some corner cases. The current hash uses the whole tuple which contains an hlist pointer which will change when the conntrack is placed on the dying list resulting in a ct id change. This patch also removes the reply-side tuple and extension pointer from the hash calculation so that the ct id will will not change from initialization until confirmation. Fixes: 3c79107631db1f7 ("netfilter: ctnetlink: don't use conntrack/expect object addresses as id") Signed-off-by: Dirk Morris Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a542761e90d1..81a8ef42b88d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -453,13 +453,12 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); * table location, we assume id gets exposed to userspace. * * Following nf_conn items do not change throughout lifetime - * of the nf_conn after it has been committed to main hash table: + * of the nf_conn: * * 1. nf_conn address - * 2. nf_conn->ext address - * 3. nf_conn->master address (normally NULL) - * 4. tuple - * 5. the associated net namespace + * 2. nf_conn->master address (normally NULL) + * 3. the associated net namespace + * 4. the original direction tuple */ u32 nf_ct_get_id(const struct nf_conn *ct) { @@ -469,9 +468,10 @@ u32 nf_ct_get_id(const struct nf_conn *ct) net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); a = (unsigned long)ct; - b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); - c = (unsigned long)ct->ext; - d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + b = (unsigned long)ct->master; + c = (unsigned long)nf_ct_net(ct); + d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple), &ct_id_seed); #ifdef CONFIG_64BIT return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); From 2c8ccb37b08fe364f02a9914daca474d43151453 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Fri, 9 Aug 2019 17:18:16 +0200 Subject: [PATCH 383/482] RDMA/siw: Change CQ flags from 64->32 bits This patch changes the driver/user shared (mmapped) CQ notification flags field from unsigned 64-bits size to unsigned 32-bits size. This enables building siw on 32-bit architectures. This patch changes the siw-abi, but as siw was only just merged in this merge window cycle, there are no released kernels with the prior abi. We are making no attempt to be binary compatible with siw user space libraries prior to the merge of siw into the upstream kernel, only moving forward with upstream kernels and upstream rdma-core provided siw libraries are we guaranteeing compatibility. Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20190809151816.13018-1-bmt@zurich.ibm.com Signed-off-by: Doug Ledford --- drivers/infiniband/sw/siw/Kconfig | 2 +- drivers/infiniband/sw/siw/siw.h | 2 +- drivers/infiniband/sw/siw/siw_qp.c | 14 ++++++++++---- drivers/infiniband/sw/siw/siw_verbs.c | 16 +++++++++++----- include/uapi/rdma/siw-abi.h | 3 ++- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index dace276aea14..b622fc62f2cd 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -1,6 +1,6 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" - depends on INET && INFINIBAND && LIBCRC32C && 64BIT + depends on INET && INFINIBAND && LIBCRC32C select DMA_VIRT_OPS help This driver implements the iWARP RDMA transport over diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 03fd7b2f595f..77b1aabf6ff3 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -214,7 +214,7 @@ struct siw_wqe { struct siw_cq { struct ib_cq base_cq; spinlock_t lock; - u64 *notify; + struct siw_cq_ctrl *notify; struct siw_cqe *queue; u32 cq_put; u32 cq_get; diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index e27bd5b35b96..0990307c5d2c 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -1013,18 +1013,24 @@ int siw_activate_tx(struct siw_qp *qp) */ static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags) { - u64 cq_notify; + u32 cq_notify; if (!cq->base_cq.comp_handler) return false; - cq_notify = READ_ONCE(*cq->notify); + /* Read application shared notification state */ + cq_notify = READ_ONCE(cq->notify->flags); if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) || ((cq_notify & SIW_NOTIFY_SOLICITED) && (flags & SIW_WQE_SOLICITED))) { - /* dis-arm CQ */ - smp_store_mb(*cq->notify, SIW_NOTIFY_NOT); + /* + * CQ notification is one-shot: Since the + * current CQE causes user notification, + * the CQ gets dis-aremd and must be re-aremd + * by the user for a new notification. + */ + WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT); return true; } diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 32dc79d0e898..e7f3a2379d9d 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -1049,7 +1049,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, spin_lock_init(&cq->lock); - cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify; + cq->notify = (struct siw_cq_ctrl *)&cq->queue[size]; if (udata) { struct siw_uresp_create_cq uresp = {}; @@ -1141,11 +1141,17 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags) siw_dbg_cq(cq, "flags: 0x%02x\n", flags); if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) - /* CQ event for next solicited completion */ - smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED); + /* + * Enable CQ event for next solicited completion. + * and make it visible to all associated producers. + */ + smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED); else - /* CQ event for any signalled completion */ - smp_store_mb(*cq->notify, SIW_NOTIFY_ALL); + /* + * Enable CQ event for any signalled completion. + * and make it visible to all associated producers. + */ + smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL); if (flags & IB_CQ_REPORT_MISSED_EVENTS) return cq->cq_put - cq->cq_get; diff --git a/include/uapi/rdma/siw-abi.h b/include/uapi/rdma/siw-abi.h index 7de68f1dc707..af735f55b291 100644 --- a/include/uapi/rdma/siw-abi.h +++ b/include/uapi/rdma/siw-abi.h @@ -180,6 +180,7 @@ struct siw_cqe { * to control CQ arming. */ struct siw_cq_ctrl { - __aligned_u64 notify; + __u32 flags; + __u32 pad; }; #endif From 2d6c25215ab26bb009de3575faab7b685f138e92 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Mon, 5 Aug 2019 18:44:27 +0200 Subject: [PATCH 384/482] KEYS: trusted: allow module init if TPM is inactive or deactivated Commit c78719203fc6 ("KEYS: trusted: allow trusted.ko to initialize w/o a TPM") allows the trusted module to be loaded even if a TPM is not found, to avoid module dependency problems. However, trusted module initialization can still fail if the TPM is inactive or deactivated. tpm_get_random() returns an error. This patch removes the call to tpm_get_random() and instead extends the PCR specified by the user with zeros. The security of this alternative is equivalent to the previous one, as either option prevents with a PCR update unsealing and misuse of sealed data by a user space process. Even if a PCR is extended with zeros, instead of random data, it is still computationally infeasible to find a value as input for a new PCR extend operation, to obtain again the PCR value that would allow unsealing. Cc: stable@vger.kernel.org Fixes: 240730437deb ("KEYS: trusted: explicitly use tpm_chip structure...") Signed-off-by: Roberto Sassu Reviewed-by: Tyler Hicks Suggested-by: Mimi Zohar Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- security/keys/trusted.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 9a94672e7adc..ade699131065 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -1228,24 +1228,11 @@ static int __init trusted_shash_alloc(void) static int __init init_digests(void) { - u8 digest[TPM_MAX_DIGEST_SIZE]; - int ret; - int i; - - ret = tpm_get_random(chip, digest, TPM_MAX_DIGEST_SIZE); - if (ret < 0) - return ret; - if (ret < TPM_MAX_DIGEST_SIZE) - return -EFAULT; - digests = kcalloc(chip->nr_allocated_banks, sizeof(*digests), GFP_KERNEL); if (!digests) return -ENOMEM; - for (i = 0; i < chip->nr_allocated_banks; i++) - memcpy(digests[i].digest, digest, TPM_MAX_DIGEST_SIZE); - return 0; } From eb93685847a9055283d05951c1b205e737f38533 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Wed, 7 Aug 2019 19:07:34 -0700 Subject: [PATCH 385/482] riscv: fix flush_tlb_range() end address for flush_tlb_page() The RISC-V kernel implementation of flush_tlb_page() when CONFIG_SMP is set is wrong. It passes zero to flush_tlb_range() as the final address to flush, but it should be at least 'addr'. Some other Linux architecture ports use the beginning address to flush, plus PAGE_SIZE, as the final address to flush. This might flush slightly more than what's needed, but it seems unlikely that being more clever would improve anything. So let's just take that implementation for now. While here, convert the macro into a static inline function, primarily to avoid unintentional multiple evaluations of 'addr'. This second version of the patch fixes a coding style issue found by Christoph Hellwig . Reported-by: Andreas Schwab Signed-off-by: Paul Walmsley Reviewed-by: Christoph Hellwig --- arch/riscv/include/asm/tlbflush.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 687dd19735a7..4d9bbe8438bf 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -53,10 +53,17 @@ static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, } #define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) + #define flush_tlb_range(vma, start, end) \ remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) -#define flush_tlb_mm(mm) \ + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) +{ + flush_tlb_range(vma, addr, addr + PAGE_SIZE); +} + +#define flush_tlb_mm(mm) \ remote_sfence_vma(mm_cpumask(mm), 0, -1) #endif /* CONFIG_SMP */ From 76470ccd62f18bfa0954bec10f2329339f793914 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 Aug 2019 15:37:04 -0700 Subject: [PATCH 386/482] mm: document zone device struct page field usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm/hmm: fixes for device private page migration", v3. Testing the latest linux git tree turned up a few bugs with page migration to and from ZONE_DEVICE private and anonymous pages. Hopefully it clarifies how ZONE_DEVICE private struct page uses the same mapping and index fields from the source anonymous page mapping. This patch (of 3): Struct page for ZONE_DEVICE private pages uses the page->mapping and and page->index fields while the source anonymous pages are migrated to device private memory. This is so rmap_walk() can find the page when migrating the ZONE_DEVICE private page back to system memory. ZONE_DEVICE pmem backed fsdax pages also use the page->mapping and page->index fields when files are mapped into a process address space. Add comments to struct page and remove the unused "_zd_pad_1" field to make this more clear. Link: http://lkml.kernel.org/r/20190724232700.23327-2-rcampbell@nvidia.com Signed-off-by: Ralph Campbell Reviewed-by: John Hubbard Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: Christoph Lameter Cc: Dave Hansen Cc: Jérôme Glisse Cc: "Kirill A . Shutemov" Cc: Lai Jiangshan Cc: Martin Schwidefsky Cc: Pekka Enberg Cc: Randy Dunlap Cc: Andrey Ryabinin Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3a37a89eb7a7..6a7a1083b6fb 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -159,7 +159,16 @@ struct page { /** @pgmap: Points to the hosting device page map. */ struct dev_pagemap *pgmap; void *zone_device_data; - unsigned long _zd_pad_1; /* uses mapping */ + /* + * ZONE_DEVICE private pages are counted as being + * mapped so the next 3 words hold the mapping, index, + * and private fields from the source anonymous or + * page cache page while the page is migrated to device + * private memory. + * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also + * use the mapping, index, and private fields when + * pmem backed DAX files are mapped. + */ }; /** @rcu_head: You can use this to free a page by RCU. */ From 7ab0ad0e74f82db1a7e0810828785e44305cff20 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 Aug 2019 15:37:07 -0700 Subject: [PATCH 387/482] mm/hmm: fix ZONE_DEVICE anon page mapping reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a ZONE_DEVICE private page is freed, the page->mapping field can be set. If this page is reused as an anonymous page, the previous value can prevent the page from being inserted into the CPU's anon rmap table. For example, when migrating a pte_none() page to device memory: migrate_vma(ops, vma, start, end, src, dst, private) migrate_vma_collect() src[] = MIGRATE_PFN_MIGRATE migrate_vma_prepare() /* no page to lock or isolate so OK */ migrate_vma_unmap() /* no page to unmap so OK */ ops->alloc_and_copy() /* driver allocates ZONE_DEVICE page for dst[] */ migrate_vma_pages() migrate_vma_insert_page() page_add_new_anon_rmap() __page_set_anon_rmap() /* This check sees the page's stale mapping field */ if (PageAnon(page)) return /* page->mapping is not updated */ The result is that the migration appears to succeed but a subsequent CPU fault will be unable to migrate the page back to system memory or worse. Clear the page->mapping field when freeing the ZONE_DEVICE page so stale pointer data doesn't affect future page use. Link: http://lkml.kernel.org/r/20190719192955.30462-3-rcampbell@nvidia.com Fixes: b7a523109fb5c9d2d6dd ("mm: don't clear ->mapping in hmm_devmem_free") Signed-off-by: Ralph Campbell Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig Cc: Dan Williams Cc: Jason Gunthorpe Cc: Logan Gunthorpe Cc: Ira Weiny Cc: Matthew Wilcox Cc: Mel Gorman Cc: Jan Kara Cc: "Kirill A. Shutemov" Cc: Michal Hocko Cc: Andrea Arcangeli Cc: Mike Kravetz Cc: "Jérôme Glisse" Cc: Andrey Ryabinin Cc: Christoph Lameter Cc: Dave Hansen Cc: Lai Jiangshan Cc: Martin Schwidefsky Cc: Pekka Enberg Cc: Randy Dunlap Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memremap.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/mm/memremap.c b/mm/memremap.c index 86432650f829..ed70c4e8e52a 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -403,6 +403,30 @@ void __put_devmap_managed_page(struct page *page) mem_cgroup_uncharge(page); + /* + * When a device_private page is freed, the page->mapping field + * may still contain a (stale) mapping value. For example, the + * lower bits of page->mapping may still identify the page as + * an anonymous page. Ultimately, this entire field is just + * stale and wrong, and it will cause errors if not cleared. + * One example is: + * + * migrate_vma_pages() + * migrate_vma_insert_page() + * page_add_new_anon_rmap() + * __page_set_anon_rmap() + * ...checks page->mapping, via PageAnon(page) call, + * and incorrectly concludes that the page is an + * anonymous page. Therefore, it incorrectly, + * silently fails to set up the new anon rmap. + * + * For other types of ZONE_DEVICE pages, migration is either + * handled differently or not done at all, so there is no need + * to clear page->mapping. + */ + if (is_device_private_page(page)) + page->mapping = NULL; + page->pgmap->ops->page_free(page); } else if (!count) __put_page(page); From 1de13ee59225dfc98d483f8cce7d83f97c0b31de Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 13 Aug 2019 15:37:11 -0700 Subject: [PATCH 388/482] mm/hmm: fix bad subpage pointer in try_to_unmap_one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When migrating an anonymous private page to a ZONE_DEVICE private page, the source page->mapping and page->index fields are copied to the destination ZONE_DEVICE struct page and the page_mapcount() is increased. This is so rmap_walk() can be used to unmap and migrate the page back to system memory. However, try_to_unmap_one() computes the subpage pointer from a swap pte which computes an invalid page pointer and a kernel panic results such as: BUG: unable to handle page fault for address: ffffea1fffffffc8 Currently, only single pages can be migrated to device private memory so no subpage computation is needed and it can be set to "page". [rcampbell@nvidia.com: add comment] Link: http://lkml.kernel.org/r/20190724232700.23327-4-rcampbell@nvidia.com Link: http://lkml.kernel.org/r/20190719192955.30462-4-rcampbell@nvidia.com Fixes: a5430dda8a3a1c ("mm/migrate: support un-addressable ZONE_DEVICE page in migration") Signed-off-by: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Kirill A. Shutemov" Cc: Mike Kravetz Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: John Hubbard Cc: Andrea Arcangeli Cc: Andrey Ryabinin Cc: Christoph Lameter Cc: Dan Williams Cc: Dave Hansen Cc: Ira Weiny Cc: Jan Kara Cc: Lai Jiangshan Cc: Logan Gunthorpe Cc: Martin Schwidefsky Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Pekka Enberg Cc: Randy Dunlap Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/rmap.c b/mm/rmap.c index e5dfe2ae6b0d..003377e24232 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1475,7 +1475,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, /* * No need to invalidate here it will synchronize on * against the special swap migration pte. + * + * The assignment to subpage above was computed from a + * swap PTE which results in an invalid pointer. + * Since only PAGE_SIZE pages can currently be + * migrated, just set it to page. This will need to be + * changed when hugepage migrations to device private + * memory are supported. */ + subpage = page; goto discard; } From d883544515aae54842c21730b880172e7894fde9 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 13 Aug 2019 15:37:15 -0700 Subject: [PATCH 389/482] mm: mempolicy: make the behavior consistent when MPOL_MF_MOVE* and MPOL_MF_STRICT were specified When both MPOL_MF_MOVE* and MPOL_MF_STRICT was specified, mbind() should try best to migrate misplaced pages, if some of the pages could not be migrated, then return -EIO. There are three different sub-cases: 1. vma is not migratable 2. vma is migratable, but there are unmovable pages 3. vma is migratable, pages are movable, but migrate_pages() fails If #1 happens, kernel would just abort immediately, then return -EIO, after a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified"). If #3 happens, kernel would set policy and migrate pages with best-effort, but won't rollback the migrated pages and reset the policy back. Before that commit, they behaves in the same way. It'd better to keep their behavior consistent. But, rolling back the migrated pages and resetting the policy back sounds not feasible, so just make #1 behave as same as #3. Userspace will know that not everything was successfully migrated (via -EIO), and can take whatever steps it deems necessary - attempt rollback, determine which exact page(s) are violating the policy, etc. Make queue_pages_range() return 1 to indicate there are unmovable pages or vma is not migratable. The #2 is not handled correctly in the current kernel, the following patch will fix it. [yang.shi@linux.alibaba.com: fix review comments from Vlastimil] Link: http://lkml.kernel.org/r/1563556862-54056-2-git-send-email-yang.shi@linux.alibaba.com Link: http://lkml.kernel.org/r/1561162809-59140-2-git-send-email-yang.shi@linux.alibaba.com Signed-off-by: Yang Shi Reviewed-by: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 68 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f48693f75b37..932c26845e3e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -429,11 +429,14 @@ static inline bool queue_pages_required(struct page *page, } /* - * queue_pages_pmd() has three possible return values: - * 1 - pages are placed on the right node or queued successfully. - * 0 - THP was split. - * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing - * page was already on a node that does not follow the policy. + * queue_pages_pmd() has four possible return values: + * 0 - pages are placed on the right node or queued successfully. + * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were + * specified. + * 2 - THP was split. + * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an + * existing page was already on a node that does not follow the + * policy. */ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long end, struct mm_walk *walk) @@ -451,19 +454,17 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, if (is_huge_zero_page(page)) { spin_unlock(ptl); __split_huge_pmd(walk->vma, pmd, addr, false, NULL); + ret = 2; goto out; } - if (!queue_pages_required(page, qp)) { - ret = 1; + if (!queue_pages_required(page, qp)) goto unlock; - } - ret = 1; flags = qp->flags; /* go to thp migration */ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma)) { - ret = -EIO; + ret = 1; goto unlock; } @@ -479,6 +480,13 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, /* * Scan through pages checking if pages follow certain conditions, * and move them to the pagelist if they do. + * + * queue_pages_pte_range() has three possible return values: + * 0 - pages are placed on the right node or queued successfully. + * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were + * specified. + * -EIO - only MPOL_MF_STRICT was specified and an existing page was already + * on a node that does not follow the policy. */ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) @@ -488,17 +496,17 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; int ret; + bool has_unmovable = false; pte_t *pte; spinlock_t *ptl; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { ret = queue_pages_pmd(pmd, ptl, addr, end, walk); - if (ret > 0) - return 0; - else if (ret < 0) + if (ret != 2) return ret; } + /* THP was split, fall through to pte walk */ if (pmd_trans_unstable(pmd)) return 0; @@ -519,14 +527,21 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, if (!queue_pages_required(page, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - if (!vma_migratable(vma)) + /* MPOL_MF_STRICT must be specified if we get here */ + if (!vma_migratable(vma)) { + has_unmovable = true; break; + } migrate_page_add(page, qp->pagelist, flags); } else break; } pte_unmap_unlock(pte - 1, ptl); cond_resched(); + + if (has_unmovable) + return 1; + return addr != end ? -EIO : 0; } @@ -639,7 +654,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, * * If pages found in a given range are on a set of nodes (determined by * @nodes and @flags,) it's isolated and queued to the pagelist which is - * passed via @private.) + * passed via @private. + * + * queue_pages_range() has three possible return values: + * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were + * specified. + * 0 - queue pages successfully or no misplaced page. + * -EIO - there is misplaced page and only MPOL_MF_STRICT was specified. */ static int queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, @@ -1182,6 +1203,7 @@ static long do_mbind(unsigned long start, unsigned long len, struct mempolicy *new; unsigned long end; int err; + int ret; LIST_HEAD(pagelist); if (flags & ~(unsigned long)MPOL_MF_VALID) @@ -1243,10 +1265,15 @@ static long do_mbind(unsigned long start, unsigned long len, if (err) goto mpol_out; - err = queue_pages_range(mm, start, end, nmask, + ret = queue_pages_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); - if (!err) - err = mbind_range(mm, start, end, new); + + if (ret < 0) { + err = -EIO; + goto up_out; + } + + err = mbind_range(mm, start, end, new); if (!err) { int nr_failed = 0; @@ -1259,13 +1286,14 @@ static long do_mbind(unsigned long start, unsigned long len, putback_movable_pages(&pagelist); } - if (nr_failed && (flags & MPOL_MF_STRICT)) + if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) err = -EIO; } else putback_movable_pages(&pagelist); +up_out: up_write(&mm->mmap_sem); - mpol_out: +mpol_out: mpol_put(new); return err; } From a53190a4aaa36494f4d7209fd1fcc6f2ee08e0e0 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 13 Aug 2019 15:37:18 -0700 Subject: [PATCH 390/482] mm: mempolicy: handle vma with unmovable pages mapped correctly in mbind When running syzkaller internally, we ran into the below bug on 4.9.x kernel: kernel BUG at mm/huge_memory.c:2124! invalid opcode: 0000 [#1] SMP KASAN CPU: 0 PID: 1518 Comm: syz-executor107 Not tainted 4.9.168+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011 task: ffff880067b34900 task.stack: ffff880068998000 RIP: split_huge_page_to_list+0x8fb/0x1030 mm/huge_memory.c:2124 Call Trace: split_huge_page include/linux/huge_mm.h:100 [inline] queue_pages_pte_range+0x7e1/0x1480 mm/mempolicy.c:538 walk_pmd_range mm/pagewalk.c:50 [inline] walk_pud_range mm/pagewalk.c:90 [inline] walk_pgd_range mm/pagewalk.c:116 [inline] __walk_page_range+0x44a/0xdb0 mm/pagewalk.c:208 walk_page_range+0x154/0x370 mm/pagewalk.c:285 queue_pages_range+0x115/0x150 mm/mempolicy.c:694 do_mbind mm/mempolicy.c:1241 [inline] SYSC_mbind+0x3c3/0x1030 mm/mempolicy.c:1370 SyS_mbind+0x46/0x60 mm/mempolicy.c:1352 do_syscall_64+0x1d2/0x600 arch/x86/entry/common.c:282 entry_SYSCALL_64_after_swapgs+0x5d/0xdb Code: c7 80 1c 02 00 e8 26 0a 76 01 <0f> 0b 48 c7 c7 40 46 45 84 e8 4c RIP [] split_huge_page_to_list+0x8fb/0x1030 mm/huge_memory.c:2124 RSP with the below test: uint64_t r[1] = {0xffffffffffffffff}; int main(void) { syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0); intptr_t res = 0; res = syscall(__NR_socket, 0x11, 3, 0x300); if (res != -1) r[0] = res; *(uint32_t*)0x20000040 = 0x10000; *(uint32_t*)0x20000044 = 1; *(uint32_t*)0x20000048 = 0xc520; *(uint32_t*)0x2000004c = 1; syscall(__NR_setsockopt, r[0], 0x107, 0xd, 0x20000040, 0x10); syscall(__NR_mmap, 0x20fed000, 0x10000, 0, 0x8811, r[0], 0); *(uint64_t*)0x20000340 = 2; syscall(__NR_mbind, 0x20ff9000, 0x4000, 0x4002, 0x20000340, 0x45d4, 3); return 0; } Actually the test does: mmap(0x20000000, 16777216, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x20000000 socket(AF_PACKET, SOCK_RAW, 768) = 3 setsockopt(3, SOL_PACKET, PACKET_TX_RING, {block_size=65536, block_nr=1, frame_size=50464, frame_nr=1}, 16) = 0 mmap(0x20fed000, 65536, PROT_NONE, MAP_SHARED|MAP_FIXED|MAP_POPULATE|MAP_DENYWRITE, 3, 0) = 0x20fed000 mbind(..., MPOL_MF_STRICT|MPOL_MF_MOVE) = 0 The setsockopt() would allocate compound pages (16 pages in this test) for packet tx ring, then the mmap() would call packet_mmap() to map the pages into the user address space specified by the mmap() call. When calling mbind(), it would scan the vma to queue the pages for migration to the new node. It would split any huge page since 4.9 doesn't support THP migration, however, the packet tx ring compound pages are not THP and even not movable. So, the above bug is triggered. However, the later kernel is not hit by this issue due to commit d44d363f6578 ("mm: don't assume anonymous pages have SwapBacked flag"), which just removes the PageSwapBacked check for a different reason. But, there is a deeper issue. According to the semantic of mbind(), it should return -EIO if MPOL_MF_MOVE or MPOL_MF_MOVE_ALL was specified and MPOL_MF_STRICT was also specified, but the kernel was unable to move all existing pages in the range. The tx ring of the packet socket is definitely not movable, however, mbind() returns success for this case. Although the most socket file associates with non-movable pages, but XDP may have movable pages from gup. So, it sounds not fine to just check the underlying file type of vma in vma_migratable(). Change migrate_page_add() to check if the page is movable or not, if it is unmovable, just return -EIO. But do not abort pte walk immediately, since there may be pages off LRU temporarily. We should migrate other pages if MPOL_MF_MOVE* is specified. Set has_unmovable flag if some paged could not be not moved, then return -EIO for mbind() eventually. With this change the above test would return -EIO as expected. [yang.shi@linux.alibaba.com: fix review comments from Vlastimil] Link: http://lkml.kernel.org/r/1563556862-54056-3-git-send-email-yang.shi@linux.alibaba.com Link: http://lkml.kernel.org/r/1561162809-59140-3-git-send-email-yang.shi@linux.alibaba.com Signed-off-by: Yang Shi Reviewed-by: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 932c26845e3e..547cd403ed02 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -403,7 +403,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { }, }; -static void migrate_page_add(struct page *page, struct list_head *pagelist, +static int migrate_page_add(struct page *page, struct list_head *pagelist, unsigned long flags); struct queue_pages { @@ -463,12 +463,11 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, flags = qp->flags; /* go to thp migration */ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - if (!vma_migratable(walk->vma)) { + if (!vma_migratable(walk->vma) || + migrate_page_add(page, qp->pagelist, flags)) { ret = 1; goto unlock; } - - migrate_page_add(page, qp->pagelist, flags); } else ret = -EIO; unlock: @@ -532,7 +531,14 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, has_unmovable = true; break; } - migrate_page_add(page, qp->pagelist, flags); + + /* + * Do not abort immediately since there may be + * temporary off LRU pages in the range. Still + * need migrate other LRU pages. + */ + if (migrate_page_add(page, qp->pagelist, flags)) + has_unmovable = true; } else break; } @@ -961,7 +967,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, /* * page migration, thp tail pages can be passed. */ -static void migrate_page_add(struct page *page, struct list_head *pagelist, +static int migrate_page_add(struct page *page, struct list_head *pagelist, unsigned long flags) { struct page *head = compound_head(page); @@ -974,8 +980,19 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist, mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_cache(head), hpage_nr_pages(head)); + } else if (flags & MPOL_MF_STRICT) { + /* + * Non-movable page may reach here. And, there may be + * temporary off LRU pages or non-LRU movable pages. + * Treat them as unmovable pages since they can't be + * isolated, so they can't be moved at the moment. It + * should return -EIO for this case too. + */ + return -EIO; } } + + return 0; } /* page allocation callback for NUMA node migration */ @@ -1178,9 +1195,10 @@ static struct page *new_page(struct page *page, unsigned long start) } #else -static void migrate_page_add(struct page *page, struct list_head *pagelist, +static int migrate_page_add(struct page *page, struct list_head *pagelist, unsigned long flags) { + return -EIO; } int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, From 6051d3bd3b91e96c59e62b8be2dba1cc2b19ee40 Mon Sep 17 00:00:00 2001 From: Henry Burns Date: Tue, 13 Aug 2019 15:37:21 -0700 Subject: [PATCH 391/482] mm/z3fold.c: fix z3fold_destroy_pool() ordering The constraint from the zpool use of z3fold_destroy_pool() is there are no outstanding handles to memory (so no active allocations), but it is possible for there to be outstanding work on either of the two wqs in the pool. If there is work queued on pool->compact_workqueue when it is called, z3fold_destroy_pool() will do: z3fold_destroy_pool() destroy_workqueue(pool->release_wq) destroy_workqueue(pool->compact_wq) drain_workqueue(pool->compact_wq) do_compact_page(zhdr) kref_put(&zhdr->refcount) __release_z3fold_page(zhdr, ...) queue_work_on(pool->release_wq, &pool->work) *BOOM* So compact_wq needs to be destroyed before release_wq. Link: http://lkml.kernel.org/r/20190726224810.79660-1-henryburns@google.com Fixes: 5d03a6613957 ("mm/z3fold.c: use kref to prevent page free/compact race") Signed-off-by: Henry Burns Reviewed-by: Shakeel Butt Reviewed-by: Jonathan Adams Cc: Vitaly Vul Cc: Vitaly Wool Cc: David Howells Cc: Thomas Gleixner Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index 1a029a7432ee..43de92f52961 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -818,8 +818,15 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool) { kmem_cache_destroy(pool->c_handle); z3fold_unregister_migration(pool); - destroy_workqueue(pool->release_wq); + + /* + * We need to destroy pool->compact_wq before pool->release_wq, + * as any pending work on pool->compact_wq will call + * queue_work(pool->release_wq, &pool->work). + */ + destroy_workqueue(pool->compact_wq); + destroy_workqueue(pool->release_wq); kfree(pool); } From b997052bc3ac444a0bceab1093aff7ae71ed419e Mon Sep 17 00:00:00 2001 From: Henry Burns Date: Tue, 13 Aug 2019 15:37:25 -0700 Subject: [PATCH 392/482] mm/z3fold.c: fix z3fold_destroy_pool() race condition The constraint from the zpool use of z3fold_destroy_pool() is there are no outstanding handles to memory (so no active allocations), but it is possible for there to be outstanding work on either of the two wqs in the pool. Calling z3fold_deregister_migration() before the workqueues are drained means that there can be allocated pages referencing a freed inode, causing any thread in compaction to be able to trip over the bad pointer in PageMovable(). Link: http://lkml.kernel.org/r/20190726224810.79660-2-henryburns@google.com Fixes: 1f862989b04a ("mm/z3fold.c: support page migration") Signed-off-by: Henry Burns Reviewed-by: Shakeel Butt Reviewed-by: Jonathan Adams Cc: Vitaly Vul Cc: Vitaly Wool Cc: David Howells Cc: Thomas Gleixner Cc: Al Viro Cc: Henry Burns Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index 43de92f52961..ed19d98c9dcd 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -817,16 +817,19 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, static void z3fold_destroy_pool(struct z3fold_pool *pool) { kmem_cache_destroy(pool->c_handle); - z3fold_unregister_migration(pool); /* * We need to destroy pool->compact_wq before pool->release_wq, * as any pending work on pool->compact_wq will call * queue_work(pool->release_wq, &pool->work). + * + * There are still outstanding pages until both workqueues are drained, + * so we cannot unregister migration until then. */ destroy_workqueue(pool->compact_wq); destroy_workqueue(pool->release_wq); + z3fold_unregister_migration(pool); kfree(pool); } From 54a83d6bcbf8f4700013766b974bf9190d40b689 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Tue, 13 Aug 2019 15:37:28 -0700 Subject: [PATCH 393/482] mm/memcontrol.c: fix use after free in mem_cgroup_iter() This patch is sent to report an use after free in mem_cgroup_iter() after merging commit be2657752e9e ("mm: memcg: fix use after free in mem_cgroup_iter()"). I work with android kernel tree (4.9 & 4.14), and commit be2657752e9e ("mm: memcg: fix use after free in mem_cgroup_iter()") has been merged to the trees. However, I can still observe use after free issues addressed in the commit be2657752e9e. (on low-end devices, a few times this month) backtrace: css_tryget <- crash here mem_cgroup_iter shrink_node shrink_zones do_try_to_free_pages try_to_free_pages __perform_reclaim __alloc_pages_direct_reclaim __alloc_pages_slowpath __alloc_pages_nodemask To debug, I poisoned mem_cgroup before freeing it: static void __mem_cgroup_free(struct mem_cgroup *memcg) for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->stat); + /* poison memcg before freeing it */ + memset(memcg, 0x78, sizeof(struct mem_cgroup)); kfree(memcg); } The coredump shows the position=0xdbbc2a00 is freed. (gdb) p/x ((struct mem_cgroup_per_node *)0xe5009e00)->iter[8] $13 = {position = 0xdbbc2a00, generation = 0x2efd} 0xdbbc2a00: 0xdbbc2e00 0x00000000 0xdbbc2800 0x00000100 0xdbbc2a10: 0x00000200 0x78787878 0x00026218 0x00000000 0xdbbc2a20: 0xdcad6000 0x00000001 0x78787800 0x00000000 0xdbbc2a30: 0x78780000 0x00000000 0x0068fb84 0x78787878 0xdbbc2a40: 0x78787878 0x78787878 0x78787878 0xe3fa5cc0 0xdbbc2a50: 0x78787878 0x78787878 0x00000000 0x00000000 0xdbbc2a60: 0x00000000 0x00000000 0x00000000 0x00000000 0xdbbc2a70: 0x00000000 0x00000000 0x00000000 0x00000000 0xdbbc2a80: 0x00000000 0x00000000 0x00000000 0x00000000 0xdbbc2a90: 0x00000001 0x00000000 0x00000000 0x00100000 0xdbbc2aa0: 0x00000001 0xdbbc2ac8 0x00000000 0x00000000 0xdbbc2ab0: 0x00000000 0x00000000 0x00000000 0x00000000 0xdbbc2ac0: 0x00000000 0x00000000 0xe5b02618 0x00001000 0xdbbc2ad0: 0x00000000 0x78787878 0x78787878 0x78787878 0xdbbc2ae0: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2af0: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b00: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b10: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b20: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b30: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b40: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b50: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b60: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b70: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b80: 0x78787878 0x78787878 0x00000000 0x78787878 0xdbbc2b90: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2ba0: 0x78787878 0x78787878 0x78787878 0x78787878 In the reclaim path, try_to_free_pages() does not setup sc.target_mem_cgroup and sc is passed to do_try_to_free_pages(), ..., shrink_node(). In mem_cgroup_iter(), root is set to root_mem_cgroup because sc->target_mem_cgroup is NULL. It is possible to assign a memcg to root_mem_cgroup.nodeinfo.iter in mem_cgroup_iter(). try_to_free_pages struct scan_control sc = {...}, target_mem_cgroup is 0x0; do_try_to_free_pages shrink_zones shrink_node mem_cgroup *root = sc->target_mem_cgroup; memcg = mem_cgroup_iter(root, NULL, &reclaim); mem_cgroup_iter() if (!root) root = root_mem_cgroup; ... css = css_next_descendant_pre(css, &root->css); memcg = mem_cgroup_from_css(css); cmpxchg(&iter->position, pos, memcg); My device uses memcg non-hierarchical mode. When we release a memcg: invalidate_reclaim_iterators() reaches only dead_memcg and its parents. If non-hierarchical mode is used, invalidate_reclaim_iterators() never reaches root_mem_cgroup. static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) { struct mem_cgroup *memcg = dead_memcg; for (; memcg; memcg = parent_mem_cgroup(memcg) ... } So the use after free scenario looks like: CPU1 CPU2 try_to_free_pages do_try_to_free_pages shrink_zones shrink_node mem_cgroup_iter() if (!root) root = root_mem_cgroup; ... css = css_next_descendant_pre(css, &root->css); memcg = mem_cgroup_from_css(css); cmpxchg(&iter->position, pos, memcg); invalidate_reclaim_iterators(memcg); ... __mem_cgroup_free() kfree(memcg); try_to_free_pages do_try_to_free_pages shrink_zones shrink_node mem_cgroup_iter() if (!root) root = root_mem_cgroup; ... mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id); iter = &mz->iter[reclaim->priority]; pos = READ_ONCE(iter->position); css_tryget(&pos->css) <- use after free To avoid this, we should also invalidate root_mem_cgroup.nodeinfo.iter in invalidate_reclaim_iterators(). [cai@lca.pw: fix -Wparentheses compilation warning] Link: http://lkml.kernel.org/r/1564580753-17531-1-git-send-email-cai@lca.pw Link: http://lkml.kernel.org/r/20190730015729.4406-1-miles.chen@mediatek.com Fixes: 5ac8fb31ad2e ("mm: memcontrol: convert reclaim iterator to simple css refcounting") Signed-off-by: Miles Chen Signed-off-by: Qian Cai Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cdbb7a84cb6e..2e405e058eda 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1130,26 +1130,45 @@ void mem_cgroup_iter_break(struct mem_cgroup *root, css_put(&prev->css); } -static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) +static void __invalidate_reclaim_iterators(struct mem_cgroup *from, + struct mem_cgroup *dead_memcg) { - struct mem_cgroup *memcg = dead_memcg; struct mem_cgroup_reclaim_iter *iter; struct mem_cgroup_per_node *mz; int nid; int i; - for (; memcg; memcg = parent_mem_cgroup(memcg)) { - for_each_node(nid) { - mz = mem_cgroup_nodeinfo(memcg, nid); - for (i = 0; i <= DEF_PRIORITY; i++) { - iter = &mz->iter[i]; - cmpxchg(&iter->position, - dead_memcg, NULL); - } + for_each_node(nid) { + mz = mem_cgroup_nodeinfo(from, nid); + for (i = 0; i <= DEF_PRIORITY; i++) { + iter = &mz->iter[i]; + cmpxchg(&iter->position, + dead_memcg, NULL); } } } +static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) +{ + struct mem_cgroup *memcg = dead_memcg; + struct mem_cgroup *last; + + do { + __invalidate_reclaim_iterators(memcg, dead_memcg); + last = memcg; + } while ((memcg = parent_mem_cgroup(memcg))); + + /* + * When cgruop1 non-hierarchy mode is used, + * parent_mem_cgroup() does not walk all the way up to the + * cgroup root (root_mem_cgroup). So we have to handle + * dead_memcg from cgroup root separately. + */ + if (last != root_mem_cgroup) + __invalidate_reclaim_iterators(root_mem_cgroup, + dead_memcg); +} + /** * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy * @memcg: hierarchy root From 5336e52c9e46afa69b7a85a0a091f0e4daa23d6e Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Tue, 13 Aug 2019 15:37:31 -0700 Subject: [PATCH 394/482] mm/vmalloc.c: fix percpu free VM area search criteria Recent changes to the vmalloc code by commit 68ad4a330433 ("mm/vmalloc.c: keep track of free blocks for vmap allocation") can cause spurious percpu allocation failures. These, in turn, can result in panic()s in the slub code. One such possible panic was reported by Dave Hansen in following link https://lkml.org/lkml/2019/6/19/939. Another related panic observed is, RIP: 0033:0x7f46f7441b9b Call Trace: dump_stack+0x61/0x80 pcpu_alloc.cold.30+0x22/0x4f mem_cgroup_css_alloc+0x110/0x650 cgroup_apply_control_enable+0x133/0x330 cgroup_mkdir+0x41b/0x500 kernfs_iop_mkdir+0x5a/0x90 vfs_mkdir+0x102/0x1b0 do_mkdirat+0x7d/0xf0 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 VMALLOC memory manager divides the entire VMALLOC space (VMALLOC_START to VMALLOC_END) into multiple VM areas (struct vm_areas), and it mainly uses two lists (vmap_area_list & free_vmap_area_list) to track the used and free VM areas in VMALLOC space. And pcpu_get_vm_areas(offsets[], sizes[], nr_vms, align) function is used for allocating congruent VM areas for percpu memory allocator. In order to not conflict with VMALLOC users, pcpu_get_vm_areas allocates VM areas near the end of the VMALLOC space. So the search for free vm_area for the given requirement starts near VMALLOC_END and moves upwards towards VMALLOC_START. Prior to commit 68ad4a330433, the search for free vm_area in pcpu_get_vm_areas() involves following two main steps. Step 1: Find a aligned "base" adress near VMALLOC_END. va = free vm area near VMALLOC_END Step 2: Loop through number of requested vm_areas and check, Step 2.1: if (base < VMALLOC_START) 1. fail with error Step 2.2: // end is offsets[area] + sizes[area] if (base + end > va->vm_end) 1. Move the base downwards and repeat Step 2 Step 2.3: if (base + start < va->vm_start) 1. Move to previous free vm_area node, find aligned base address and repeat Step 2 But Commit 68ad4a330433 removed Step 2.2 and modified Step 2.3 as below: Step 2.3: if (base + start < va->vm_start || base + end > va->vm_end) 1. Move to previous free vm_area node, find aligned base address and repeat Step 2 Above change is the root cause of spurious percpu memory allocation failures. For example, consider a case where a relatively large vm_area (~ 30 TB) was ignored in free vm_area search because it did not pass the base + end < vm->vm_end boundary check. Ignoring such large free vm_area's would lead to not finding free vm_area within boundary of VMALLOC_start to VMALLOC_END which in turn leads to allocation failures. So modify the search algorithm to include Step 2.2. Link: http://lkml.kernel.org/r/20190729232139.91131-1-sathyanarayanan.kuppuswamy@linux.intel.com Fixes: 68ad4a330433 ("mm/vmalloc.c: keep track of free blocks for vmap allocation") Signed-off-by: Kuppuswamy Sathyanarayanan Reported-by: Dave Hansen Acked-by: Dennis Zhou Reviewed-by: Uladzislau Rezki (Sony) Cc: Roman Gushchin Cc: sathyanarayanan kuppuswamy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e0fc963acc41..7ba11e12a11f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3278,10 +3278,20 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, if (va == NULL) goto overflow; + /* + * If required width exeeds current VA block, move + * base downwards and then recheck. + */ + if (base + end > va->va_end) { + base = pvm_determine_end_from_reverse(&va, align) - end; + term_area = area; + continue; + } + /* * If this VA does not fit, move base downwards and recheck. */ - if (base + start < va->va_start || base + end > va->va_end) { + if (base + start < va->va_start) { va = node_to_va(rb_prev(&va->rb_node)); base = pvm_determine_end_from_reverse(&va, align) - end; term_area = area; From fcf3a5b62f431ce9feeac73afbe128b7b6395dbc Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 13 Aug 2019 15:37:34 -0700 Subject: [PATCH 395/482] mm: kmemleak: disable early logging in case of error If an error occurs during kmemleak_init() (e.g. kmem cache cannot be created), kmemleak is disabled but kmemleak_early_log remains enabled. Subsequently, when the .init.text section is freed, the log_early() function no longer exists. To avoid a page fault in such scenario, ensure that kmemleak_disable() also disables early logging. Link: http://lkml.kernel.org/r/20190731152302.42073-1-catalin.marinas@arm.com Signed-off-by: Catalin Marinas Reported-by: Qian Cai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 6e9e8cca663e..f6e602918dac 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1966,6 +1966,7 @@ static void kmemleak_disable(void) /* stop any memory operation tracing */ kmemleak_enabled = 0; + kmemleak_early_log = 0; /* check whether it is too early for a kernel thread */ if (kmemleak_initialized) @@ -2009,7 +2010,6 @@ void __init kmemleak_init(void) #ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF if (!kmemleak_skip_disable) { - kmemleak_early_log = 0; kmemleak_disable(); return; } From 951531691c4bcaa59f56a316e018bc2ff1ddf855 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Tue, 13 Aug 2019 15:37:37 -0700 Subject: [PATCH 396/482] mm/usercopy: use memory range to be accessed for wraparound check Currently, when checking to see if accessing n bytes starting at address "ptr" will cause a wraparound in the memory addresses, the check in check_bogus_address() adds an extra byte, which is incorrect, as the range of addresses that will be accessed is [ptr, ptr + (n - 1)]. This can lead to incorrectly detecting a wraparound in the memory address, when trying to read 4 KB from memory that is mapped to the the last possible page in the virtual address space, when in fact, accessing that range of memory would not cause a wraparound to occur. Use the memory range that will actually be accessed when considering if accessing a certain amount of bytes will cause the memory address to wrap around. Link: http://lkml.kernel.org/r/1564509253-23287-1-git-send-email-isaacm@codeaurora.org Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Prasad Sodagudi Signed-off-by: Isaac J. Manjarres Co-developed-by: Prasad Sodagudi Reviewed-by: William Kucharski Acked-by: Kees Cook Cc: Greg Kroah-Hartman Cc: Trilok Soni Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 2a09796edef8..98e924864554 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -147,7 +147,7 @@ static inline void check_bogus_address(const unsigned long ptr, unsigned long n, bool to_user) { /* Reject if object wraps past end of memory. */ - if (ptr + n < ptr) + if (ptr + (n - 1) < ptr) usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n); /* Reject if NULL or ZERO-allocation. */ From ec9f02384f6053f2a5417e82b65078edc5364a8d Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 13 Aug 2019 15:37:41 -0700 Subject: [PATCH 397/482] mm: workingset: fix vmstat counters for shadow nodes Memcg counters for shadow nodes are broken because the memcg pointer is obtained in a wrong way. The following approach is used: virt_to_page(xa_node)->mem_cgroup Since commit 4d96ba353075 ("mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages") page->mem_cgroup pointer isn't set for slab pages, so memcg_from_slab_page() should be used instead. Also I doubt that it ever worked correctly: virt_to_head_page() should be used instead of virt_to_page(). Otherwise objects residing on tail pages are not accounted, because only the head page contains a valid mem_cgroup pointer. That was a case since the introduction of these counters by the commit 68d48e6a2df5 ("mm: workingset: add vmstat counter for shadow nodes"). Link: http://lkml.kernel.org/r/20190801233532.138743-1-guro@fb.com Fixes: 4d96ba353075 ("mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages") Signed-off-by: Roman Gushchin Acked-by: Johannes Weiner Cc: Vladimir Davydov Cc: Shakeel Butt Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 19 +++++++++++++++++++ mm/memcontrol.c | 20 ++++++++++++++++++++ mm/workingset.c | 10 ++++------ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 44c41462be33..2cd4359cb38c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -668,6 +668,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); +void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) @@ -1072,6 +1073,14 @@ static inline void mod_lruvec_page_state(struct page *page, mod_node_page_state(page_pgdat(page), idx, val); } +static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, + int val) +{ + struct page *page = virt_to_head_page(p); + + __mod_node_page_state(page_pgdat(page), idx, val); +} + static inline unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, @@ -1159,6 +1168,16 @@ static inline void __dec_lruvec_page_state(struct page *page, __mod_lruvec_page_state(page, idx, -1); } +static inline void __inc_lruvec_slab_state(void *p, enum node_stat_item idx) +{ + __mod_lruvec_slab_state(p, idx, 1); +} + +static inline void __dec_lruvec_slab_state(void *p, enum node_stat_item idx) +{ + __mod_lruvec_slab_state(p, idx, -1); +} + /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void inc_memcg_state(struct mem_cgroup *memcg, int idx) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2e405e058eda..6f5c0c517c49 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -768,6 +768,26 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); } +void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) +{ + struct page *page = virt_to_head_page(p); + pg_data_t *pgdat = page_pgdat(page); + struct mem_cgroup *memcg; + struct lruvec *lruvec; + + rcu_read_lock(); + memcg = memcg_from_slab_page(page); + + /* Untracked pages have no memcg, no lruvec. Update only the node */ + if (!memcg || memcg == root_mem_cgroup) { + __mod_node_page_state(pgdat, idx, val); + } else { + lruvec = mem_cgroup_lruvec(pgdat, memcg); + __mod_lruvec_state(lruvec, idx, val); + } + rcu_read_unlock(); +} + /** * __count_memcg_events - account VM events in a cgroup * @memcg: the memory cgroup diff --git a/mm/workingset.c b/mm/workingset.c index e0b4edcb88c8..c963831d354f 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -380,14 +380,12 @@ void workingset_update_node(struct xa_node *node) if (node->count && node->count == node->nr_values) { if (list_empty(&node->private_list)) { list_lru_add(&shadow_nodes, &node->private_list); - __inc_lruvec_page_state(virt_to_page(node), - WORKINGSET_NODES); + __inc_lruvec_slab_state(node, WORKINGSET_NODES); } } else { if (!list_empty(&node->private_list)) { list_lru_del(&shadow_nodes, &node->private_list); - __dec_lruvec_page_state(virt_to_page(node), - WORKINGSET_NODES); + __dec_lruvec_slab_state(node, WORKINGSET_NODES); } } } @@ -480,7 +478,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, } list_lru_isolate(lru, item); - __dec_lruvec_page_state(virt_to_page(node), WORKINGSET_NODES); + __dec_lruvec_slab_state(node, WORKINGSET_NODES); spin_unlock(lru_lock); @@ -503,7 +501,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, * shadow entries we were tracking ... */ xas_store(&xas, NULL); - __inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); + __inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM); out_invalid: xa_unlock_irq(&mapping->i_pages); From 6a2aeab59e97101b4001bac84388fc49a992f87e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 13 Aug 2019 15:37:44 -0700 Subject: [PATCH 398/482] seq_file: fix problem when seeking mid-record If you use lseek or similar (e.g. pread) to access a location in a seq_file file that is within a record, rather than at a record boundary, then the first read will return the remainder of the record, and the second read will return the whole of that same record (instead of the next record). When seeking to a record boundary, the next record is correctly returned. This bug was introduced by a recent patch (identified below). Before that patch, seq_read() would increment m->index when the last of the buffer was returned (m->count == 0). After that patch, we rely on ->next to increment m->index after filling the buffer - but there was one place where that didn't happen. Link: https://lkml.kernel.org/lkml/877e7xl029.fsf@notabene.neil.brown.name/ Fixes: 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") Signed-off-by: NeilBrown Reported-by: Sergei Turchanov Tested-by: Sergei Turchanov Cc: Alexander Viro Cc: Markus Elfring Cc: [4.19+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/seq_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 04f09689cd6d..1600034a929b 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -119,6 +119,7 @@ static int traverse(struct seq_file *m, loff_t offset) } if (seq_has_overflowed(m)) goto Eoverflow; + p = m->op->next(m, p, &m->index); if (pos + m->count > offset) { m->from = offset - pos; m->count -= m->from; @@ -126,7 +127,6 @@ static int traverse(struct seq_file *m, loff_t offset) } pos += m->count; m->count = 0; - p = m->op->next(m, p, &m->index); if (pos == offset) break; } From 0cfaee2af3a04c0be5f056cebe5f804dedc59a43 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 13 Aug 2019 15:37:47 -0700 Subject: [PATCH 399/482] include/asm-generic/5level-fixup.h: fix variable 'p4d' set but not used A compiler throws a warning on an arm64 system since commit 9849a5697d3d ("arch, mm: convert all architectures to use 5level-fixup.h"), mm/kasan/init.c: In function 'kasan_free_p4d': mm/kasan/init.c:344:9: warning: variable 'p4d' set but not used [-Wunused-but-set-variable] p4d_t *p4d; ^~~ because p4d_none() in "5level-fixup.h" is compiled away while it is a static inline function in "pgtable-nopud.h". However, if converted p4d_none() to a static inline there, powerpc would be unhappy as it reads those in assembler language in "arch/powerpc/include/asm/book3s/64/pgtable.h", so it needs to skip assembly include for the static inline C function. While at it, converted a few similar functions to be consistent with the ones in "pgtable-nopud.h". Link: http://lkml.kernel.org/r/20190806232917.881-1-cai@lca.pw Signed-off-by: Qian Cai Acked-by: Arnd Bergmann Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Jason Gunthorpe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/5level-fixup.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h index bb6cb347018c..f6947da70d71 100644 --- a/include/asm-generic/5level-fixup.h +++ b/include/asm-generic/5level-fixup.h @@ -19,9 +19,24 @@ #define p4d_alloc(mm, pgd, address) (pgd) #define p4d_offset(pgd, start) (pgd) -#define p4d_none(p4d) 0 -#define p4d_bad(p4d) 0 -#define p4d_present(p4d) 1 + +#ifndef __ASSEMBLY__ +static inline int p4d_none(p4d_t p4d) +{ + return 0; +} + +static inline int p4d_bad(p4d_t p4d) +{ + return 0; +} + +static inline int p4d_present(p4d_t p4d) +{ + return 1; +} +#endif + #define p4d_ERROR(p4d) do { } while (0) #define p4d_clear(p4d) pgd_clear(p4d) #define p4d_val(p4d) pgd_val(p4d) From 92717d429b38e4f9f934eed7e605cc42858f1839 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 13 Aug 2019 15:37:50 -0700 Subject: [PATCH 400/482] Revert "Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask"" Patch series "reapply: relax __GFP_THISNODE for MADV_HUGEPAGE mappings". The fixes for what was originally reported as "pathological THP behavior" we rightfully reverted to be sure not to introduced regressions at end of a merge window after a severe regression report from the kernel bot. We can safely re-apply them now that we had time to analyze the problem. The mm process worked fine, because the good fixes were eventually committed upstream without excessive delay. The regression reported by the kernel bot however forced us to revert the good fixes to be sure not to introduce regressions and to give us the time to analyze the issue further. The silver lining is that this extra time allowed to think more at this issue and also plan for a future direction to improve things further in terms of THP NUMA locality. This patch (of 2): This reverts commit 356ff8a9a78fb35d ("Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask"). So it reapplies 89c83fb539f954 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask"). Consolidation of the THP allocation flags at the same place was meant to be a clean up to easier handle otherwise scattered code which is imposing a maintenance burden. There were no real problems observed with the gfp mask consolidation but the reversion was rushed through without a larger consensus regardless. This patch brings the consolidation back because this should make the long term maintainability easier as well as it should allow future changes to be less error prone. [mhocko@kernel.org: changelog additions] Link: http://lkml.kernel.org/r/20190503223146.2312-2-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Acked-by: Michal Hocko Cc: Mel Gorman Cc: Vlastimil Babka Cc: David Rientjes Cc: Zi Yan Cc: Stefan Priebe - Profihost AG Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 12 ++++-------- mm/huge_memory.c | 27 ++++++++++++++------------- mm/mempolicy.c | 32 +++----------------------------- mm/shmem.c | 2 +- 4 files changed, 22 insertions(+), 51 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index fb07b503dc45..f33881688f42 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) } extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, - int node, bool hugepage); -#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ - alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) + int node); #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ - alloc_pages(gfp_mask, order) -#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ +#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) + alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) + alloc_pages_vma(gfp_mask, 0, vma, addr, node) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1334ede667a8..f7e388b8662d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -644,30 +644,30 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, * available * never: never stall for any thp allocation */ -static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma) +static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) { const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); + const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE; /* Always do synchronous compaction */ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) - return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); + return GFP_TRANSHUGE | __GFP_THISNODE | + (vma_madvised ? 0 : __GFP_NORETRY); /* Kick kcompactd and fail quickly */ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) - return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM; + return gfp_mask | __GFP_KSWAPD_RECLAIM; /* Synchronous compaction if madvised, otherwise kick kcompactd */ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) - return GFP_TRANSHUGE_LIGHT | - (vma_madvised ? __GFP_DIRECT_RECLAIM : - __GFP_KSWAPD_RECLAIM); + return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : + __GFP_KSWAPD_RECLAIM); /* Only do synchronous compaction if madvised */ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) - return GFP_TRANSHUGE_LIGHT | - (vma_madvised ? __GFP_DIRECT_RECLAIM : 0); + return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0); - return GFP_TRANSHUGE_LIGHT; + return gfp_mask; } /* Caller must hold page table lock. */ @@ -739,8 +739,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) pte_free(vma->vm_mm, pgtable); return ret; } - gfp = alloc_hugepage_direct_gfpmask(vma); - page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); + gfp = alloc_hugepage_direct_gfpmask(vma, haddr); + page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id()); if (unlikely(!page)) { count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -1347,8 +1347,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) alloc: if (__transparent_hugepage_enabled(vma) && !transparent_hugepage_debug_cow()) { - huge_gfp = alloc_hugepage_direct_gfpmask(vma); - new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER); + huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr); + new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma, + haddr, numa_node_id()); } else new_page = NULL; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 547cd403ed02..9c9877a43d58 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1180,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start) } else if (PageTransHuge(page)) { struct page *thp; - thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, - HPAGE_PMD_ORDER); + thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, + address, numa_node_id()); if (!thp) return NULL; prep_transhuge_page(thp); @@ -2083,7 +2083,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, * @vma: Pointer to VMA or NULL if not available. * @addr: Virtual Address of the allocation. Must be inside the VMA. * @node: Which node to prefer for allocation (modulo policy). - * @hugepage: for hugepages try only the preferred node if possible * * This function allocates a page from the kernel page pool and applies * a NUMA policy associated with the VMA or the current process. @@ -2094,7 +2093,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, */ struct page * alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, - unsigned long addr, int node, bool hugepage) + unsigned long addr, int node) { struct mempolicy *pol; struct page *page; @@ -2112,31 +2111,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, goto out; } - if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { - int hpage_node = node; - - /* - * For hugepage allocation and non-interleave policy which - * allows the current node (or other explicitly preferred - * node) we only try to allocate from the current/preferred - * node and don't fall back to other nodes, as the cost of - * remote accesses would likely offset THP benefits. - * - * If the policy is interleave, or does not allow the current - * node in its nodemask, we allocate the standard way. - */ - if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL)) - hpage_node = pol->v.preferred_node; - - nmask = policy_nodemask(gfp, pol); - if (!nmask || node_isset(hpage_node, *nmask)) { - mpol_cond_put(pol); - page = __alloc_pages_node(hpage_node, - gfp | __GFP_THISNODE, order); - goto out; - } - } - nmask = policy_nodemask(gfp, pol); preferred_nid = policy_node(gfp, pol, node); page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); diff --git a/mm/shmem.c b/mm/shmem.c index 626d8c74b973..2bed4761f279 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1466,7 +1466,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, shmem_pseudo_vma_init(&pvma, info, hindex); page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, - HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); + HPAGE_PMD_ORDER, &pvma, 0, numa_node_id()); shmem_pseudo_vma_destroy(&pvma); if (page) prep_transhuge_page(page); From a8282608c88e08b1782141026eab61204c1e533f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 13 Aug 2019 15:37:53 -0700 Subject: [PATCH 401/482] Revert "mm, thp: restore node-local hugepage allocations" This reverts commit 2f0799a0ffc033b ("mm, thp: restore node-local hugepage allocations"). commit 2f0799a0ffc033b was rightfully applied to avoid the risk of a severe regression that was reported by the kernel test robot at the end of the merge window. Now we understood the regression was a false positive and was caused by a significant increase in fairness during a swap trashing benchmark. So it's safe to re-apply the fix and continue improving the code from there. The benchmark that reported the regression is very useful, but it provides a meaningful result only when there is no significant alteration in fairness during the workload. The removal of __GFP_THISNODE increased fairness. __GFP_THISNODE cannot be used in the generic page faults path for new memory allocations under the MPOL_DEFAULT mempolicy, or the allocation behavior significantly deviates from what the MPOL_DEFAULT semantics are supposed to be for THP and 4k allocations alike. Setting THP defrag to "always" or using MADV_HUGEPAGE (with THP defrag set to "madvise") has never meant to provide an implicit MPOL_BIND on the "current" node the task is running on, causing swap storms and providing a much more aggressive behavior than even zone_reclaim_node = 3. Any workload who could have benefited from __GFP_THISNODE has now to enable zone_reclaim_mode=1||2||3. __GFP_THISNODE implicitly provided the zone_reclaim_mode behavior, but it only did so if THP was enabled: if THP was disabled, there would have been no chance to get any 4k page from the current node if the current node was full of pagecache, which further shows how this __GFP_THISNODE was misplaced in MADV_HUGEPAGE. MADV_HUGEPAGE has never been intended to provide any zone_reclaim_mode semantics, in fact the two are orthogonal, zone_reclaim_mode = 1|2|3 must work exactly the same with MADV_HUGEPAGE set or not. The performance characteristic of memory depends on the hardware details. The numbers below are obtained on Naples/EPYC architecture and the N/A projection extends them to show what we should aim for in the future as a good THP NUMA locality default. The benchmark used exercises random memory seeks (note: the cost of the page faults is not part of the measurement). D0 THP | D0 4k | D1 THP | D1 4k | D2 THP | D2 4k | D3 THP | D3 4k | ... 0% | +43% | +45% | +106% | +131% | +224% | N/A | N/A D0 means distance zero (i.e. local memory), D1 means distance one (i.e. intra socket memory), D2 means distance two (i.e. inter socket memory), etc... For the guest physical memory allocated by qemu and for guest mode kernel the performance characteristic of RAM is more complex and an ideal default could be: D0 THP | D1 THP | D0 4k | D2 THP | D1 4k | D3 THP | D2 4k | D3 4k | ... 0% | +58% | +101% | N/A | +222% | N/A | N/A | N/A NOTE: the N/A are projections and haven't been measured yet, the measurement in this case is done on a 1950x with only two NUMA nodes. The THP case here means THP was used both in the host and in the guest. After applying this commit the THP NUMA locality order that we'll get out of MADV_HUGEPAGE is this: D0 THP | D1 THP | D2 THP | D3 THP | ... | D0 4k | D1 4k | D2 4k | D3 4k | ... Before this commit it was: D0 THP | D0 4k | D1 4k | D2 4k | D3 4k | ... Even if we ignore the breakage of large workloads that can't fit in a single node that the __GFP_THISNODE implicit "current node" mbind caused, the THP NUMA locality order provided by __GFP_THISNODE was still not the one we shall aim for in the long term (i.e. the first one at the top). After this commit is applied, we can introduce a new allocator multi order API and to replace those two alloc_pages_vmas calls in the page fault path, with a single multi order call: unsigned int order = (1 << HPAGE_PMD_ORDER) | (1 << 0); page = alloc_pages_multi_order(..., &order); if (!page) goto out; if (!(order & (1 << 0))) { VM_WARN_ON(order != 1 << HPAGE_PMD_ORDER); /* THP fault */ } else { VM_WARN_ON(order != 1 << 0); /* 4k fallback */ } The page allocator logic has to be altered so that when it fails on any zone with order 9, it has to try again with a order 0 before falling back to the next zone in the zonelist. After that we need to do more measurements and evaluate if adding an opt-in feature for guest mode is worth it, to swap "DN 4k | DN+1 THP" with "DN+1 THP | DN 4k" at every NUMA distance crossing. Link: http://lkml.kernel.org/r/20190503223146.2312-3-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Acked-by: Michal Hocko Acked-by: Mel Gorman Cc: Vlastimil Babka Cc: David Rientjes Cc: Zi Yan Cc: Stefan Priebe - Profihost AG Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 ++ mm/huge_memory.c | 42 ++++++++++++++++++++++++--------------- mm/mempolicy.c | 2 +- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5228c62af416..bac395f1d00a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr); +struct mempolicy *get_vma_policy(struct vm_area_struct *vma, + unsigned long addr); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f7e388b8662d..738065f765ab 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -647,27 +647,37 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) { const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); - const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE; + gfp_t this_node = 0; + +#ifdef CONFIG_NUMA + struct mempolicy *pol; + /* + * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not + * specified, to express a general desire to stay on the current + * node for optimistic allocation attempts. If the defrag mode + * and/or madvise hint requires the direct reclaim then we prefer + * to fallback to other node rather than node reclaim because that + * can lead to excessive reclaim even though there is free memory + * on other nodes. We expect that NUMA preferences are specified + * by memory policies. + */ + pol = get_vma_policy(vma, addr); + if (pol->mode != MPOL_BIND) + this_node = __GFP_THISNODE; + mpol_cond_put(pol); +#endif - /* Always do synchronous compaction */ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) - return GFP_TRANSHUGE | __GFP_THISNODE | - (vma_madvised ? 0 : __GFP_NORETRY); - - /* Kick kcompactd and fail quickly */ + return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) - return gfp_mask | __GFP_KSWAPD_RECLAIM; - - /* Synchronous compaction if madvised, otherwise kick kcompactd */ + return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node; if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) - return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : - __GFP_KSWAPD_RECLAIM); - - /* Only do synchronous compaction if madvised */ + return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : + __GFP_KSWAPD_RECLAIM | this_node); if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) - return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0); - - return gfp_mask; + return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : + this_node); + return GFP_TRANSHUGE_LIGHT | this_node; } /* Caller must hold page table lock. */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9c9877a43d58..65e0874fce17 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1734,7 +1734,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, * freeing by another task. It is the caller's responsibility to free the * extra reference for shared policies. */ -static struct mempolicy *get_vma_policy(struct vm_area_struct *vma, +struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = __get_vma_policy(vma, addr); From 28360f398778d7623a5ff8a8e90958c0d925e120 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 13 Aug 2019 15:37:57 -0700 Subject: [PATCH 402/482] mm, vmscan: do not special-case slab reclaim when watermarks are boosted Dave Chinner reported a problem pointing a finger at commit 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs"). The report is extensive: https://lore.kernel.org/linux-mm/20190807091858.2857-1-david@fromorbit.com/ and it's worth recording the most relevant parts (colorful language and typos included). When running a simple, steady state 4kB file creation test to simulate extracting tarballs larger than memory full of small files into the filesystem, I noticed that once memory fills up the cache balance goes to hell. The workload is creating one dirty cached inode for every dirty page, both of which should require a single IO each to clean and reclaim, and creation of inodes is throttled by the rate at which dirty writeback runs at (via balance dirty pages). Hence the ingest rate of new cached inodes and page cache pages is identical and steady. As a result, memory reclaim should quickly find a steady balance between page cache and inode caches. The moment memory fills, the page cache is reclaimed at a much faster rate than the inode cache, and evidence suggests that the inode cache shrinker is not being called when large batches of pages are being reclaimed. In roughly the same time period that it takes to fill memory with 50% pages and 50% slab caches, memory reclaim reduces the page cache down to just dirty pages and slab caches fill the entirety of memory. The LRU is largely full of dirty pages, and we're getting spikes of random writeback from memory reclaim so it's all going to shit. Behaviour never recovers, the page cache remains pinned at just dirty pages, and nothing I could tune would make any difference. vfs_cache_pressure makes no difference - I would set it so high it should trim the entire inode caches in a single pass, yet it didn't do anything. It was clear from tracing and live telemetry that the shrinkers were pretty much not running except when there was absolutely no memory free at all, and then they did the minimum necessary to free memory to make progress. So I went looking at the code, trying to find places where pages got reclaimed and the shrinkers weren't called. There's only one - kswapd doing boosted reclaim as per commit 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs"). The watermark boosting introduced by the commit is triggered in response to an allocation "fragmentation event". The boosting was not intended to target THP specifically and triggers even if THP is disabled. However, with Dave's perfectly reasonable workload, fragmentation events can be very common given the ratio of slab to page cache allocations so boosting remains active for long periods of time. As high-order allocations might use compaction and compaction cannot move slab pages the decision was made in the commit to special-case kswapd when watermarks are boosted -- kswapd avoids reclaiming slab as reclaiming slab does not directly help compaction. As Dave notes, this decision means that slab can be artificially protected for long periods of time and messes up the balance with slab and page caches. Removing the special casing can still indirectly help avoid fragmentation by avoiding fragmentation-causing events due to slab allocation as pages from a slab pageblock will have some slab objects freed. Furthermore, with the special casing, reclaim behaviour is unpredictable as kswapd sometimes examines slab and sometimes does not in a manner that is tricky to tune or analyse. This patch removes the special casing. The downside is that this is not a universal performance win. Some benchmarks that depend on the residency of data when rereading metadata may see a regression when slab reclaim is restored to its original behaviour. Similarly, some benchmarks that only read-once or write-once may perform better when page reclaim is too aggressive. The primary upside is that slab shrinker is less surprising (arguably more sane but that's a matter of opinion), behaves consistently regardless of the fragmentation state of the system and properly obeys VM sysctls. A fsmark benchmark configuration was constructed similar to what Dave reported and is codified by the mmtest configuration config-io-fsmark-small-file-stream. It was evaluated on a 1-socket machine to avoid dealing with NUMA-related issues and the timing of reclaim. The storage was an SSD Samsung Evo and a fresh trimmed XFS filesystem was used for the test data. This is not an exact replication of Dave's setup. The configuration scales its parameters depending on the memory size of the SUT to behave similarly across machines. The parameters mean the first sample reported by fs_mark is using 50% of RAM which will barely be throttled and look like a big outlier. Dave used fake NUMA to have multiple kswapd instances which I didn't replicate. Finally, the number of iterations differ from Dave's test as the target disk was not large enough. While not identical, it should be representative. fsmark 5.3.0-rc3 5.3.0-rc3 vanilla shrinker-v1r1 Min 1-files/sec 4444.80 ( 0.00%) 4765.60 ( 7.22%) 1st-qrtle 1-files/sec 5005.10 ( 0.00%) 5091.70 ( 1.73%) 2nd-qrtle 1-files/sec 4917.80 ( 0.00%) 4855.60 ( -1.26%) 3rd-qrtle 1-files/sec 4667.40 ( 0.00%) 4831.20 ( 3.51%) Max-1 1-files/sec 11421.50 ( 0.00%) 9999.30 ( -12.45%) Max-5 1-files/sec 11421.50 ( 0.00%) 9999.30 ( -12.45%) Max-10 1-files/sec 11421.50 ( 0.00%) 9999.30 ( -12.45%) Max-90 1-files/sec 4649.60 ( 0.00%) 4780.70 ( 2.82%) Max-95 1-files/sec 4491.00 ( 0.00%) 4768.20 ( 6.17%) Max-99 1-files/sec 4491.00 ( 0.00%) 4768.20 ( 6.17%) Max 1-files/sec 11421.50 ( 0.00%) 9999.30 ( -12.45%) Hmean 1-files/sec 5004.75 ( 0.00%) 5075.96 ( 1.42%) Stddev 1-files/sec 1778.70 ( 0.00%) 1369.66 ( 23.00%) CoeffVar 1-files/sec 33.70 ( 0.00%) 26.05 ( 22.71%) BHmean-99 1-files/sec 5053.72 ( 0.00%) 5101.52 ( 0.95%) BHmean-95 1-files/sec 5053.72 ( 0.00%) 5101.52 ( 0.95%) BHmean-90 1-files/sec 5107.05 ( 0.00%) 5131.41 ( 0.48%) BHmean-75 1-files/sec 5208.45 ( 0.00%) 5206.68 ( -0.03%) BHmean-50 1-files/sec 5405.53 ( 0.00%) 5381.62 ( -0.44%) BHmean-25 1-files/sec 6179.75 ( 0.00%) 6095.14 ( -1.37%) 5.3.0-rc3 5.3.0-rc3 vanillashrinker-v1r1 Duration User 501.82 497.29 Duration System 4401.44 4424.08 Duration Elapsed 8124.76 8358.05 This is showing a slight skew for the max result representing a large outlier for the 1st, 2nd and 3rd quartile are similar indicating that the bulk of the results show little difference. Note that an earlier version of the fsmark configuration showed a regression but that included more samples taken while memory was still filling. Note that the elapsed time is higher. Part of this is that the configuration included time to delete all the test files when the test completes -- the test automation handles the possibility of testing fsmark with multiple thread counts. Without the patch, many of these objects would be memory resident which is part of what the patch is addressing. There are other important observations that justify the patch. 1. With the vanilla kernel, the number of dirty pages in the system is very low for much of the test. With this patch, dirty pages is generally kept at 10% which matches vm.dirty_background_ratio which is normal expected historical behaviour. 2. With the vanilla kernel, the ratio of Slab/Pagecache is close to 0.95 for much of the test i.e. Slab is being left alone and dominating memory consumption. With the patch applied, the ratio varies between 0.35 and 0.45 with the bulk of the measured ratios roughly half way between those values. This is a different balance to what Dave reported but it was at least consistent. 3. Slabs are scanned throughout the entire test with the patch applied. The vanille kernel has periods with no scan activity and then relatively massive spikes. 4. Without the patch, kswapd scan rates are very variable. With the patch, the scan rates remain quite steady. 4. Overall vmstats are closer to normal expectations 5.3.0-rc3 5.3.0-rc3 vanilla shrinker-v1r1 Ops Direct pages scanned 99388.00 328410.00 Ops Kswapd pages scanned 45382917.00 33451026.00 Ops Kswapd pages reclaimed 30869570.00 25239655.00 Ops Direct pages reclaimed 74131.00 5830.00 Ops Kswapd efficiency % 68.02 75.45 Ops Kswapd velocity 5585.75 4002.25 Ops Page reclaim immediate 1179721.00 430927.00 Ops Slabs scanned 62367361.00 73581394.00 Ops Direct inode steals 2103.00 1002.00 Ops Kswapd inode steals 570180.00 5183206.00 o Vanilla kernel is hitting direct reclaim more frequently, not very much in absolute terms but the fact the patch reduces it is interesting o "Page reclaim immediate" in the vanilla kernel indicates dirty pages are being encountered at the tail of the LRU. This is generally bad and means in this case that the LRU is not long enough for dirty pages to be cleaned by the background flush in time. This is much reduced by the patch. o With the patch, kswapd is reclaiming 10 times more slab pages than with the vanilla kernel. This is indicative of the watermark boosting over-protecting slab A more complete set of tests were run that were part of the basis for introducing boosting and while there are some differences, they are well within tolerances. Bottom line, the special casing kswapd to avoid slab behaviour is unpredictable and can lead to abnormal results for normal workloads. This patch restores the expected behaviour that slab and page cache is balanced consistently for a workload with a steady allocation ratio of slab/pagecache pages. It also means that if there are workloads that favour the preservation of slab over pagecache that it can be tuned via vm.vfs_cache_pressure where as the vanilla kernel effectively ignores the parameter when boosting is active. Link: http://lkml.kernel.org/r/20190808182946.GM2739@techsingularity.net Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") Signed-off-by: Mel Gorman Reviewed-by: Dave Chinner Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: [5.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index dbdc46a84f63..c77d1e3761a7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -88,9 +88,6 @@ struct scan_control { /* Can pages be swapped as part of reclaim? */ unsigned int may_swap:1; - /* e.g. boosted watermark reclaim leaves slabs alone */ - unsigned int may_shrinkslab:1; - /* * Cgroups are not reclaimed below their configured memory.low, * unless we threaten to OOM. If any cgroups are skipped due to @@ -2714,10 +2711,8 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) shrink_node_memcg(pgdat, memcg, sc, &lru_pages); node_lru_pages += lru_pages; - if (sc->may_shrinkslab) { - shrink_slab(sc->gfp_mask, pgdat->node_id, - memcg, sc->priority); - } + shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, + sc->priority); /* Record the group's reclaim efficiency */ vmpressure(sc->gfp_mask, memcg, false, @@ -3194,7 +3189,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = 1, - .may_shrinkslab = 1, }; /* @@ -3238,7 +3232,6 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, .may_unmap = 1, .reclaim_idx = MAX_NR_ZONES - 1, .may_swap = !noswap, - .may_shrinkslab = 1, }; unsigned long lru_pages; @@ -3286,7 +3279,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = may_swap, - .may_shrinkslab = 1, }; set_task_reclaim_state(current, &sc.reclaim_state); @@ -3598,7 +3590,6 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) */ sc.may_writepage = !laptop_mode && !nr_boost_reclaim; sc.may_swap = !nr_boost_reclaim; - sc.may_shrinkslab = !nr_boost_reclaim; /* * Do some background aging of the anon list, to give From 4643d67e8cb0b3536ef0ab5cddd1cedc73fa14ad Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 13 Aug 2019 15:38:00 -0700 Subject: [PATCH 403/482] hugetlbfs: fix hugetlb page migration/fault race causing SIGBUS Li Wang discovered that LTP/move_page12 V2 sometimes triggers SIGBUS in the kernel-v5.2.3 testing. This is caused by a race between hugetlb page migration and page fault. If a hugetlb page can not be allocated to satisfy a page fault, the task is sent SIGBUS. This is normal hugetlbfs behavior. A hugetlb fault mutex exists to prevent two tasks from trying to instantiate the same page. This protects against the situation where there is only one hugetlb page, and both tasks would try to allocate. Without the mutex, one would fail and SIGBUS even though the other fault would be successful. There is a similar race between hugetlb page migration and fault. Migration code will allocate a page for the target of the migration. It will then unmap the original page from all page tables. It does this unmap by first clearing the pte and then writing a migration entry. The page table lock is held for the duration of this clear and write operation. However, the beginnings of the hugetlb page fault code optimistically checks the pte without taking the page table lock. If clear (as it can be during the migration unmap operation), a hugetlb page allocation is attempted to satisfy the fault. Note that the page which will eventually satisfy this fault was already allocated by the migration code. However, the allocation within the fault path could fail which would result in the task incorrectly being sent SIGBUS. Ideally, we could take the hugetlb fault mutex in the migration code when modifying the page tables. However, locks must be taken in the order of hugetlb fault mutex, page lock, page table lock. This would require significant rework of the migration code. Instead, the issue is addressed in the hugetlb fault code. After failing to allocate a huge page, take the page table lock and check for huge_pte_none before returning an error. This is the same check that must be made further in the code even if page allocation is successful. Link: http://lkml.kernel.org/r/20190808000533.7701-1-mike.kravetz@oracle.com Fixes: 290408d4a250 ("hugetlb: hugepage migration core") Signed-off-by: Mike Kravetz Reported-by: Li Wang Tested-by: Li Wang Reviewed-by: Naoya Horiguchi Acked-by: Michal Hocko Cc: Cyril Hrubis Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ede7e7f5d1ab..6d7296dd11b8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3856,6 +3856,25 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { + /* + * Returning error will result in faulting task being + * sent SIGBUS. The hugetlb fault mutex prevents two + * tasks from racing to fault in the same page which + * could result in false unable to allocate errors. + * Page migration does not take the fault mutex, but + * does a clear then write of pte's under page table + * lock. Page fault code could race with migration, + * notice the clear pte and try to allocate a page + * here. Before returning error, get ptl and make + * sure there really is no pte entry. + */ + ptl = huge_pte_lock(h, mm, ptep); + if (!huge_pte_none(huge_ptep_get(ptep))) { + ret = 0; + spin_unlock(ptl); + goto out; + } + spin_unlock(ptl); ret = vmf_error(PTR_ERR(page)); goto out; } From a1794de8b92ea6bc2037f445b296814ac826693e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 12 Aug 2019 20:49:12 +0800 Subject: [PATCH 404/482] sctp: fix the transport error_count check As the annotation says in sctp_do_8_2_transport_strike(): "If the transport error count is greater than the pf_retrans threshold, and less than pathmaxrtx ..." It should be transport->error_count checked with pathmaxrxt, instead of asoc->pf_retrans. Fixes: 5aa93bcf66f4 ("sctp: Implement quick failover draft from tsvwg") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_sideeffect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index a554d6d15d1b..1cf5bb5b73c4 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -546,7 +546,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands, */ if (net->sctp.pf_enable && (transport->state == SCTP_ACTIVE) && - (asoc->pf_retrans < transport->pathmaxrxt) && + (transport->error_count < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { sctp_assoc_control_transport(asoc, transport, From 500bc2c1f48aa6efad9474fdbe0c7cd8d600484c Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Tue, 13 Aug 2019 16:32:29 -0700 Subject: [PATCH 405/482] riscv: rv32_defconfig: Update the defconfig Update the rv32_defconfig: - Add 'CONFIG_DEVTMPFS_MOUNT=y' to match the RISC-V defconfig - Add CONFIG_HW_RANDOM=y and CONFIG_HW_RANDOM_VIRTIO=y to enable VirtIORNG when running on QEMU Signed-off-by: Alistair Francis Signed-off-by: Paul Walmsley --- arch/riscv/configs/rv32_defconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index d5449ef805a3..7da93e494445 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -34,6 +34,7 @@ CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y CONFIG_PCIE_XILINX=y CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y @@ -53,6 +54,8 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y CONFIG_DRM_RADEON=y From d568cb3f935123b4fb42b9029ebcfebc4e4533a2 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Tue, 13 Aug 2019 16:32:30 -0700 Subject: [PATCH 406/482] riscv: defconfig: Update the defconfig Update the defconfig: - Add CONFIG_HW_RANDOM=y and CONFIG_HW_RANDOM_VIRTIO=y to enable VirtIORNG when running on QEMU Signed-off-by: Alistair Francis Signed-off-by: Paul Walmsley --- arch/riscv/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 93205c0bf71d..3efff552a261 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -54,6 +54,8 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y CONFIG_SPI_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set From 072f79400032f74917726cf76f4248367ea2b5b8 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 12 Aug 2019 16:44:35 +0200 Subject: [PATCH 407/482] s390/qeth: serialize cmd reply with concurrent timeout Callbacks for a cmd reply run outside the protection of card->lock, to allow for additional cmds to be issued & enqueued in parallel. When qeth_send_control_data() bails out for a cmd without having received a reply (eg. due to timeout), its callback may concurrently be processing a reply that just arrived. In this case, the callback potentially accesses a stale reply->reply_param area that eg. was on-stack and has already been released. To avoid this race, add some locking so that qeth_send_control_data() can (1) wait for a concurrently running callback, and (2) zap any pending callback that still wants to run. Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core.h | 1 + drivers/s390/net/qeth_core_main.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index c7ee07ce3615..28db887d38ed 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -629,6 +629,7 @@ struct qeth_seqno { struct qeth_reply { struct list_head list; struct completion received; + spinlock_t lock; int (*callback)(struct qeth_card *, struct qeth_reply *, unsigned long); u32 seqno; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 4d0caeebc802..9c3310c4d61d 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -544,6 +544,7 @@ static struct qeth_reply *qeth_alloc_reply(struct qeth_card *card) if (reply) { refcount_set(&reply->refcnt, 1); init_completion(&reply->received); + spin_lock_init(&reply->lock); } return reply; } @@ -799,6 +800,13 @@ static void qeth_issue_next_read_cb(struct qeth_card *card, if (!reply->callback) { rc = 0; + goto no_callback; + } + + spin_lock_irqsave(&reply->lock, flags); + if (reply->rc) { + /* Bail out when the requestor has already left: */ + rc = reply->rc; } else { if (cmd) { reply->offset = (u16)((char *)cmd - (char *)iob->data); @@ -807,7 +815,9 @@ static void qeth_issue_next_read_cb(struct qeth_card *card, rc = reply->callback(card, reply, (unsigned long)iob); } } + spin_unlock_irqrestore(&reply->lock, flags); +no_callback: if (rc <= 0) qeth_notify_reply(reply, rc); qeth_put_reply(reply); @@ -1749,6 +1759,16 @@ static int qeth_send_control_data(struct qeth_card *card, rc = (timeout == -ERESTARTSYS) ? -EINTR : -ETIME; qeth_dequeue_reply(card, reply); + + if (reply_cb) { + /* Wait until the callback for a late reply has completed: */ + spin_lock_irq(&reply->lock); + if (rc) + /* Zap any callback that's still pending: */ + reply->rc = rc; + spin_unlock_irq(&reply->lock); + } + if (!rc) rc = reply->rc; qeth_put_reply(reply); From 66cf4710b23ab2adda11155684a2c8826f4fe732 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 12 Aug 2019 16:13:06 -0500 Subject: [PATCH 408/482] ibmveth: Convert multicast list size for little-endian system The ibm,mac-address-filters property defines the maximum number of addresses the hypervisor's multicast filter list can support. It is encoded as a big-endian integer in the OF device tree, but the virtual ethernet driver does not convert it for use by little-endian systems. As a result, the driver is not behaving as it should on affected systems when a large number of multicast addresses are assigned to the device. Reported-by: Hangbin Liu Signed-off-by: Thomas Falcon Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmveth.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index d654c234aaf7..c5be4ebd8437 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1605,7 +1605,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) struct net_device *netdev; struct ibmveth_adapter *adapter; unsigned char *mac_addr_p; - unsigned int *mcastFilterSize_p; + __be32 *mcastFilterSize_p; long ret; unsigned long ret_attr; @@ -1627,8 +1627,9 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) return -EINVAL; } - mcastFilterSize_p = (unsigned int *)vio_get_attribute(dev, - VETH_MCAST_FILTER_SIZE, NULL); + mcastFilterSize_p = (__be32 *)vio_get_attribute(dev, + VETH_MCAST_FILTER_SIZE, + NULL); if (!mcastFilterSize_p) { dev_err(&dev->dev, "Can't find VETH_MCAST_FILTER_SIZE " "attribute\n"); @@ -1645,7 +1646,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->vdev = dev; adapter->netdev = netdev; - adapter->mcastFilterSize = *mcastFilterSize_p; + adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p); adapter->pool_config = 0; netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16); From 48ec7014c56e5eb2fbf6f479896143622d834f3b Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 12 Aug 2019 14:11:35 -0500 Subject: [PATCH 409/482] net/mlx4_en: fix a memory leak bug In mlx4_en_config_rss_steer(), 'rss_map->indir_qp' is allocated through kzalloc(). After that, mlx4_qp_alloc() is invoked to configure RSS indirection. However, if mlx4_qp_alloc() fails, the allocated 'rss_map->indir_qp' is not deallocated, leading to a memory leak bug. To fix the above issue, add the 'qp_alloc_err' label to free 'rss_map->indir_qp'. Fixes: 4931c6ef04b4 ("net/mlx4_en: Optimized single ring steering") Signed-off-by: Wenwen Wang Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 6c01314e87b0..db3552f2d087 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1187,7 +1187,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); - goto rss_err; + goto qp_alloc_err; } rss_map->indir_qp->event = mlx4_en_sqp_event; @@ -1241,6 +1241,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) MLX4_QP_STATE_RST, NULL, 0, 0, rss_map->indir_qp); mlx4_qp_remove(mdev->dev, rss_map->indir_qp); mlx4_qp_free(mdev->dev, rss_map->indir_qp); +qp_alloc_err: kfree(rss_map->indir_qp); rss_map->indir_qp = NULL; rss_err: From c36757eb9dee13681227ad3676d37f14b3a2b2af Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 12 Aug 2019 21:20:02 +0200 Subject: [PATCH 410/482] net: phy: consider AN_RESTART status when reading link status After configuring and restarting aneg we immediately try to read the link status. On some systems the PHY may not yet have cleared the "aneg complete" and "link up" bits, resulting in a false link-up signal. See [0] for a report. Clause 22 and 45 both require the PHY to keep the AN_RESTART bit set until the PHY actually starts auto-negotiation. Let's consider this in the generic functions for reading link status. The commit marked as fixed is the first one where the patch applies cleanly. [0] https://marc.info/?t=156518400300003&r=1&w=2 Fixes: c1164bb1a631 ("net: phy: check PMAPMD link status only in genphy_c45_read_link") Tested-by: Yonglong Liu Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-c45.c | 14 ++++++++++++++ drivers/net/phy/phy_device.c | 12 +++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index b9d4145781ca..58bb25e4af10 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -219,6 +219,20 @@ int genphy_c45_read_link(struct phy_device *phydev) int val, devad; bool link = true; + if (phydev->c45_ids.devices_in_package & MDIO_DEVS_AN) { + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); + if (val < 0) + return val; + + /* Autoneg is being started, therefore disregard current + * link status and report link as down. + */ + if (val & MDIO_AN_CTRL1_RESTART) { + phydev->link = 0; + return 0; + } + } + while (mmd_mask && link) { devad = __ffs(mmd_mask); mmd_mask &= ~BIT(devad); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7ddd91df99e3..27ebc2c6c2d0 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1752,7 +1752,17 @@ EXPORT_SYMBOL(genphy_aneg_done); */ int genphy_update_link(struct phy_device *phydev) { - int status; + int status = 0, bmcr; + + bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; + + /* Autoneg is being started, therefore disregard BMSR value and + * report link as down. + */ + if (bmcr & BMCR_ANRESTART) + goto done; /* The link state is latched low so that momentary link * drops can be detected. Do not double-read the status From d00ee64e1dcf09b3afefd1340f3e9eb637272714 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 12 Aug 2019 13:07:07 -0700 Subject: [PATCH 411/482] netlink: Fix nlmsg_parse as a wrapper for strict message parsing Eric reported a syzbot warning: BUG: KMSAN: uninit-value in nh_valid_get_del_req+0x6f1/0x8c0 net/ipv4/nexthop.c:1510 CPU: 0 PID: 11812 Comm: syz-executor444 Not tainted 5.3.0-rc3+ #17 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x191/0x1f0 lib/dump_stack.c:113 kmsan_report+0x162/0x2d0 mm/kmsan/kmsan_report.c:109 __msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:294 nh_valid_get_del_req+0x6f1/0x8c0 net/ipv4/nexthop.c:1510 rtm_del_nexthop+0x1b1/0x610 net/ipv4/nexthop.c:1543 rtnetlink_rcv_msg+0x115a/0x1580 net/core/rtnetlink.c:5223 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5241 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf6c/0x1050 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x110f/0x1330 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg net/socket.c:657 [inline] ___sys_sendmsg+0x14ff/0x1590 net/socket.c:2311 __sys_sendmmsg+0x53a/0xae0 net/socket.c:2413 __do_sys_sendmmsg net/socket.c:2442 [inline] __se_sys_sendmmsg+0xbd/0xe0 net/socket.c:2439 __x64_sys_sendmmsg+0x56/0x70 net/socket.c:2439 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:297 entry_SYSCALL_64_after_hwframe+0x63/0xe7 The root cause is nlmsg_parse calling __nla_parse which means the header struct size is not checked. nlmsg_parse should be a wrapper around __nlmsg_parse with NL_VALIDATE_STRICT for the validate argument very much like nlmsg_parse_deprecated is for NL_VALIDATE_LIBERAL. Fixes: 3de6440354465 ("netlink: re-add parse/validate functions in strict mode") Reported-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/netlink.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index e4650e5b64a1..b140c8f1be22 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -684,9 +684,8 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, const struct nla_policy *policy, struct netlink_ext_ack *extack) { - return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), policy, - NL_VALIDATE_STRICT, extack); + return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy, + NL_VALIDATE_STRICT, extack); } /** From 6d5afe20397b478192ed8c38ec0ee10fa3aec649 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Tue, 13 Aug 2019 22:05:50 +0800 Subject: [PATCH 412/482] sctp: fix memleak in sctp_send_reset_streams If the stream outq is not empty, need to kfree nstr_list. Fixes: d570a59c5b5f ("sctp: only allow the out stream reset when the stream outq is empty") Reported-by: Hulk Robot Signed-off-by: zhengbin Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: Jakub Kicinski --- net/sctp/stream.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 25946604af85..e83cdaa2ab76 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -316,6 +316,7 @@ int sctp_send_reset_streams(struct sctp_association *asoc, nstr_list[i] = htons(str_list[i]); if (out && !sctp_stream_outq_is_empty(stream, str_nums, nstr_list)) { + kfree(nstr_list); retval = -EAGAIN; goto out; } From 401714d9534aad8c24196b32600da683116bbe09 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 14 Aug 2019 12:09:07 +0800 Subject: [PATCH 413/482] ALSA: hda - Let all conexant codec enter D3 when rebooting We have 3 new lenovo laptops which have conexant codec 0x14f11f86, these 3 laptops also have the noise issue when rebooting, after letting the codec enter D3 before rebooting or poweroff, the noise disappers. Instead of adding a new ID again in the reboot_notify(), let us make this function apply to all conexant codec. In theory make codec enter D3 before rebooting or poweroff is harmless, and I tested this change on a couple of other Lenovo laptops which have different conexant codecs, there is no side effect so far. Cc: stable@vger.kernel.org Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index f299f137eaea..93a303676aea 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -163,15 +163,6 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; - switch (codec->core.vendor_id) { - case 0x14f12008: /* CX8200 */ - case 0x14f150f2: /* CX20722 */ - case 0x14f150f4: /* CX20724 */ - break; - default: - return; - } - /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); From 871b9066027702e6e6589da0e1edd3b7dede7205 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 14 Aug 2019 12:09:08 +0800 Subject: [PATCH 414/482] ALSA: hda - Add a generic reboot_notify Make codec enter D3 before rebooting or poweroff can fix the noise issue on some laptops. And in theory it is harmless for all codecs to enter D3 before rebooting or poweroff, let us add a generic reboot_notify, then realtek and conexant drivers can call this function. Cc: stable@vger.kernel.org Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 19 +++++++++++++++++++ sound/pci/hda/hda_generic.h | 1 + sound/pci/hda/patch_conexant.c | 6 +----- sound/pci/hda/patch_realtek.c | 11 +---------- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 8f2beb1f3ae4..5bf24fb819d2 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -6051,6 +6051,24 @@ void snd_hda_gen_free(struct hda_codec *codec) } EXPORT_SYMBOL_GPL(snd_hda_gen_free); +/** + * snd_hda_gen_reboot_notify - Make codec enter D3 before rebooting + * @codec: the HDA codec + * + * This can be put as patch_ops reboot_notify function. + */ +void snd_hda_gen_reboot_notify(struct hda_codec *codec) +{ + /* Make the codec enter D3 to avoid spurious noises from the internal + * speaker during (and after) reboot + */ + snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); + snd_hda_codec_write(codec, codec->core.afg, 0, + AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + msleep(10); +} +EXPORT_SYMBOL_GPL(snd_hda_gen_reboot_notify); + #ifdef CONFIG_PM /** * snd_hda_gen_check_power_status - check the loopback power save state @@ -6078,6 +6096,7 @@ static const struct hda_codec_ops generic_patch_ops = { .init = snd_hda_gen_init, .free = snd_hda_gen_free, .unsol_event = snd_hda_jack_unsol_event, + .reboot_notify = snd_hda_gen_reboot_notify, #ifdef CONFIG_PM .check_power_status = snd_hda_gen_check_power_status, #endif diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 35a670a71c42..5f199dcb0d18 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -332,6 +332,7 @@ int snd_hda_gen_parse_auto_config(struct hda_codec *codec, struct auto_pin_cfg *cfg); int snd_hda_gen_build_controls(struct hda_codec *codec); int snd_hda_gen_build_pcms(struct hda_codec *codec); +void snd_hda_gen_reboot_notify(struct hda_codec *codec); /* standard jack event callbacks */ void snd_hda_gen_hp_automute(struct hda_codec *codec, diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 93a303676aea..14298ef45b21 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -166,11 +166,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); - - snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); - snd_hda_codec_write(codec, codec->core.afg, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); - msleep(10); + snd_hda_gen_reboot_notify(codec); } static void cx_auto_free(struct hda_codec *codec) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8aaf1d9c55cf..e333b3e30e31 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -869,15 +869,6 @@ static void alc_reboot_notify(struct hda_codec *codec) alc_shutup(codec); } -/* power down codec to D3 at reboot/shutdown; set as reboot_notify ops */ -static void alc_d3_at_reboot(struct hda_codec *codec) -{ - snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); - snd_hda_codec_write(codec, codec->core.afg, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); - msleep(10); -} - #define alc_free snd_hda_gen_free #ifdef CONFIG_PM @@ -5152,7 +5143,7 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec, struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { - spec->reboot_notify = alc_d3_at_reboot; /* reduce noise */ + spec->reboot_notify = snd_hda_gen_reboot_notify; /* reduce noise */ spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; codec->power_save_node = 0; /* avoid click noises */ snd_hda_apply_pincfgs(codec, pincfgs); From dfe42be15fde16232340b8b2a57c359f51cc10d9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 13 Aug 2019 17:41:13 +0200 Subject: [PATCH 415/482] netfilter: nft_flow_offload: skip tcp rst and fin packets TCP rst and fin packets do not qualify to place a flow into the flowtable. Most likely there will be no more packets after connection closure. Without this patch, this flow entry expires and connection tracking picks up the entry in ESTABLISHED state using the fixup timeout, which makes this look inconsistent to the user for a connection that is actually already closed. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index aa5f571d4361..060a4ed46d5e 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -72,11 +72,11 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, { struct nft_flow_offload *priv = nft_expr_priv(expr); struct nf_flowtable *flowtable = &priv->flowtable->data; + struct tcphdr _tcph, *tcph = NULL; enum ip_conntrack_info ctinfo; struct nf_flow_route route; struct flow_offload *flow; enum ip_conntrack_dir dir; - bool is_tcp = false; struct nf_conn *ct; int ret; @@ -89,7 +89,10 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { case IPPROTO_TCP: - is_tcp = true; + tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, + sizeof(_tcph), &_tcph); + if (unlikely(!tcph || tcph->fin || tcph->rst)) + goto out; break; case IPPROTO_UDP: break; @@ -115,7 +118,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, if (!flow) goto err_flow_alloc; - if (is_tcp) { + if (tcph) { ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; } From e8c220fac415d9f4a994b0c2871b835feac1eb4e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 8 Aug 2019 18:01:36 -0300 Subject: [PATCH 416/482] Revert "i2c: imx: improve the error handling in i2c_imx_dma_request()" Since commit e1ab9a468e3b ("i2c: imx: improve the error handling in i2c_imx_dma_request()") when booting with the DMA driver as module (such as CONFIG_FSL_EDMA=m) the following endless clk warnings are seen: [ 153.077831] ------------[ cut here ]------------ [ 153.082528] WARNING: CPU: 0 PID: 15 at drivers/clk/clk.c:924 clk_core_disable_lock+0x18/0x24 [ 153.093077] i2c0 already disabled [ 153.096416] Modules linked in: [ 153.099521] CPU: 0 PID: 15 Comm: kworker/0:1 Tainted: G W 5.2.0+ #321 [ 153.107290] Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) [ 153.113772] Workqueue: events deferred_probe_work_func [ 153.118979] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 153.126778] [] (show_stack) from [] (dump_stack+0x9c/0xd4) [ 153.134051] [] (dump_stack) from [] (__warn+0xf8/0x124) [ 153.141056] [] (__warn) from [] (warn_slowpath_fmt+0x38/0x48) [ 153.148580] [] (warn_slowpath_fmt) from [] (clk_core_disable_lock+0x18/0x24) [ 153.157413] [] (clk_core_disable_lock) from [] (i2c_imx_probe+0x554/0x6ec) [ 153.166076] [] (i2c_imx_probe) from [] (platform_drv_probe+0x48/0x98) [ 153.174297] [] (platform_drv_probe) from [] (really_probe+0x1d8/0x2c0) [ 153.182605] [] (really_probe) from [] (driver_probe_device+0x5c/0x174) [ 153.190909] [] (driver_probe_device) from [] (bus_for_each_drv+0x44/0x8c) [ 153.199480] [] (bus_for_each_drv) from [] (__device_attach+0xa0/0x108) [ 153.207782] [] (__device_attach) from [] (bus_probe_device+0x88/0x90) [ 153.215999] [] (bus_probe_device) from [] (deferred_probe_work_func+0x60/0x90) [ 153.225003] [] (deferred_probe_work_func) from [] (process_one_work+0x204/0x634) [ 153.234178] [] (process_one_work) from [] (worker_thread+0x20/0x484) [ 153.242315] [] (worker_thread) from [] (kthread+0x118/0x150) [ 153.249758] [] (kthread) from [] (ret_from_fork+0x14/0x20) [ 153.257006] Exception stack(0xdde43fb0 to 0xdde43ff8) [ 153.262095] 3fa0: 00000000 00000000 00000000 00000000 [ 153.270306] 3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 153.278520] 3fe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 153.285159] irq event stamp: 3323022 [ 153.288787] hardirqs last enabled at (3323021): [] _raw_spin_unlock_irq+0x24/0x2c [ 153.297261] hardirqs last disabled at (3323022): [] clk_enable_lock+0x10/0x124 [ 153.305392] softirqs last enabled at (3322092): [] __do_softirq+0x344/0x540 [ 153.313352] softirqs last disabled at (3322081): [] irq_exit+0x10c/0x128 [ 153.320946] ---[ end trace a506731ccd9bd703 ]--- This endless clk warnings behaviour is well explained by Andrey Smirnov: "Allocating DMA after registering I2C adapter can lead to infinite probing loop, for example, consider the following scenario: 1. i2c_imx_probe() is called and successfully registers an I2C adapter via i2c_add_numbered_adapter() 2. As a part of i2c_add_numbered_adapter() new I2C slave devices are added from DT which results in a call to driver_deferred_probe_trigger() 3. i2c_imx_probe() continues and calls i2c_imx_dma_request() which due to lack of proper DMA driver returns -EPROBE_DEFER 4. i2c_imx_probe() fails, removes I2C adapter and returns -EPROBE_DEFER, which places it into deferred probe list 5. Deferred probe work triggered in #2 above kicks in and calls i2c_imx_probe() again thus bringing us to step #1" So revert commit e1ab9a468e3b ("i2c: imx: improve the error handling in i2c_imx_dma_request()") and restore the old behaviour, in order to avoid regressions on existing setups. Cc: Reported-by: Andrey Smirnov Reported-by: Russell King Fixes: e1ab9a468e3b ("i2c: imx: improve the error handling in i2c_imx_dma_request()") Signed-off-by: Fabio Estevam Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index b1b8b938d7f4..15f6cde6452f 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -273,8 +273,8 @@ static inline unsigned char imx_i2c_read_reg(struct imx_i2c_struct *i2c_imx, } /* Functions for DMA support */ -static int i2c_imx_dma_request(struct imx_i2c_struct *i2c_imx, - dma_addr_t phy_addr) +static void i2c_imx_dma_request(struct imx_i2c_struct *i2c_imx, + dma_addr_t phy_addr) { struct imx_i2c_dma *dma; struct dma_slave_config dma_sconfig; @@ -283,7 +283,7 @@ static int i2c_imx_dma_request(struct imx_i2c_struct *i2c_imx, dma = devm_kzalloc(dev, sizeof(*dma), GFP_KERNEL); if (!dma) - return -ENOMEM; + return; dma->chan_tx = dma_request_chan(dev, "tx"); if (IS_ERR(dma->chan_tx)) { @@ -328,7 +328,7 @@ static int i2c_imx_dma_request(struct imx_i2c_struct *i2c_imx, dev_info(dev, "using %s (tx) and %s (rx) for DMA transfers\n", dma_chan_name(dma->chan_tx), dma_chan_name(dma->chan_rx)); - return 0; + return; fail_rx: dma_release_channel(dma->chan_rx); @@ -336,8 +336,6 @@ static int i2c_imx_dma_request(struct imx_i2c_struct *i2c_imx, dma_release_channel(dma->chan_tx); fail_al: devm_kfree(dev, dma); - /* return successfully if there is no dma support */ - return ret == -ENODEV ? 0 : ret; } static void i2c_imx_dma_callback(void *arg) @@ -1165,17 +1163,13 @@ static int i2c_imx_probe(struct platform_device *pdev) dev_dbg(&i2c_imx->adapter.dev, "device resources: %pR\n", res); dev_dbg(&i2c_imx->adapter.dev, "adapter name: \"%s\"\n", i2c_imx->adapter.name); + dev_info(&i2c_imx->adapter.dev, "IMX I2C adapter registered\n"); /* Init DMA config if supported */ - ret = i2c_imx_dma_request(i2c_imx, phy_addr); - if (ret < 0) - goto del_adapter; + i2c_imx_dma_request(i2c_imx, phy_addr); - dev_info(&i2c_imx->adapter.dev, "IMX I2C adapter registered\n"); return 0; /* Return OK */ -del_adapter: - i2c_del_adapter(&i2c_imx->adapter); clk_notifier_unregister: clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); rpm_disable: From 8fc3ae3b109d17157102f0abf9c8fb1e16bf810a Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 12 Aug 2019 07:08:17 +0200 Subject: [PATCH 417/482] MAINTAINERS: i2c-imx: take over maintainership I would like to maintain the i2c-imx driver. Since I work with different i.MX variants and have access to the hardware, I can spend some time on the reviewing of this driver. Signed-off-by: Oleksij Rempel Signed-off-by: Wolfram Sang --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index e81e60bd7c26..ddc1cf5cb0b9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6441,6 +6441,14 @@ S: Maintained F: drivers/perf/fsl_imx8_ddr_perf.c F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt +FREESCALE IMX I2C DRIVER +M: Oleksij Rempel +R: Pengutronix Kernel Team +L: linux-i2c@vger.kernel.org +S: Maintained +F: drivers/i2c/busses/i2c-imx.c +F: Documentation/devicetree/bindings/i2c/i2c-imx.txt + FREESCALE IMX LPI2C DRIVER M: Dong Aisheng L: linux-i2c@vger.kernel.org From b00df840fb4004b7087940ac5f68801562d0d2de Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 12 Aug 2019 23:30:06 +0100 Subject: [PATCH 418/482] rxrpc: Fix local endpoint replacement When a local endpoint (struct rxrpc_local) ceases to be in use by any AF_RXRPC sockets, it starts the process of being destroyed, but this doesn't cause it to be removed from the namespace endpoint list immediately as tearing it down isn't trivial and can't be done in softirq context, so it gets deferred. If a new socket comes along that wants to bind to the same endpoint, a new rxrpc_local object will be allocated and rxrpc_lookup_local() will use list_replace() to substitute the new one for the old. Then, when the dying object gets to rxrpc_local_destroyer(), it is removed unconditionally from whatever list it is on by calling list_del_init(). However, list_replace() doesn't reset the pointers in the replaced list_head and so the list_del_init() will likely corrupt the local endpoints list. Fix this by using list_replace_init() instead. Fixes: 730c5fd42c1e ("rxrpc: Fix local endpoint refcounting") Reported-by: syzbot+193e29e9387ea5837f1d@syzkaller.appspotmail.com Signed-off-by: David Howells --- net/rxrpc/local_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index c9db3e762d8d..c45765b7263e 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -283,7 +283,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, goto sock_error; if (cursor != &rxnet->local_endpoints) - list_replace(cursor, &local->link); + list_replace_init(cursor, &local->link); else list_add_tail(&local->link, cursor); age = "new"; From 06d9532fa6b34f12a6d75711162d47c17c1add72 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 13 Aug 2019 22:26:36 +0100 Subject: [PATCH 419/482] rxrpc: Fix read-after-free in rxrpc_queue_local() rxrpc_queue_local() attempts to queue the local endpoint it is given and then, if successful, prints a trace line. The trace line includes the current usage count - but we're not allowed to look at the local endpoint at this point as we passed our ref on it to the workqueue. Fix this by reading the usage count before queuing the work item. Also fix the reading of local->debug_id for trace lines, which must be done with the same consideration as reading the usage count. Fixes: 09d2bf595db4 ("rxrpc: Add a tracepoint to track rxrpc_local refcounting") Reported-by: syzbot+78e71c5bab4f76a6a719@syzkaller.appspotmail.com Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 +++--- net/rxrpc/local_object.c | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index cc1d060cbf13..fa06b528c73c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -498,10 +498,10 @@ rxrpc_tx_points; #define E_(a, b) { a, b } TRACE_EVENT(rxrpc_local, - TP_PROTO(struct rxrpc_local *local, enum rxrpc_local_trace op, + TP_PROTO(unsigned int local_debug_id, enum rxrpc_local_trace op, int usage, const void *where), - TP_ARGS(local, op, usage, where), + TP_ARGS(local_debug_id, op, usage, where), TP_STRUCT__entry( __field(unsigned int, local ) @@ -511,7 +511,7 @@ TRACE_EVENT(rxrpc_local, ), TP_fast_assign( - __entry->local = local->debug_id; + __entry->local = local_debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index c45765b7263e..72a6e12a9304 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -93,7 +93,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); local->srx.srx_service = 0; - trace_rxrpc_local(local, rxrpc_local_new, 1, NULL); + trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL); } _leave(" = %p", local); @@ -321,7 +321,7 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) int n; n = atomic_inc_return(&local->usage); - trace_rxrpc_local(local, rxrpc_local_got, n, here); + trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here); return local; } @@ -335,7 +335,8 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) if (local) { int n = atomic_fetch_add_unless(&local->usage, 1, 0); if (n > 0) - trace_rxrpc_local(local, rxrpc_local_got, n + 1, here); + trace_rxrpc_local(local->debug_id, rxrpc_local_got, + n + 1, here); else local = NULL; } @@ -343,16 +344,16 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) } /* - * Queue a local endpoint unless it has become unreferenced and pass the - * caller's reference to the work item. + * Queue a local endpoint and pass the caller's reference to the work item. */ void rxrpc_queue_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); + unsigned int debug_id = local->debug_id; + int n = atomic_read(&local->usage); if (rxrpc_queue_work(&local->processor)) - trace_rxrpc_local(local, rxrpc_local_queued, - atomic_read(&local->usage), here); + trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here); else rxrpc_put_local(local); } @@ -367,7 +368,7 @@ void rxrpc_put_local(struct rxrpc_local *local) if (local) { n = atomic_dec_return(&local->usage); - trace_rxrpc_local(local, rxrpc_local_put, n, here); + trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here); if (n == 0) call_rcu(&local->rcu, rxrpc_local_rcu); @@ -456,7 +457,7 @@ static void rxrpc_local_processor(struct work_struct *work) container_of(work, struct rxrpc_local, processor); bool again; - trace_rxrpc_local(local, rxrpc_local_processing, + trace_rxrpc_local(local->debug_id, rxrpc_local_processing, atomic_read(&local->usage), NULL); do { From 7b814d852af6944657c2961039f404c4490771c0 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 8 Aug 2019 21:39:10 +0200 Subject: [PATCH 420/482] i2c: rcar: avoid race when unregistering slave client After we disabled interrupts, there might still be an active one running. Sync before clearing the pointer to the slave device. Fixes: de20d1857dd6 ("i2c: rcar: add slave support") Reported-by: Krzysztof Adamski Signed-off-by: Wolfram Sang Reviewed-by: Krzysztof Adamski Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-rcar.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index d39a4606f72d..531c01100b56 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -139,6 +139,7 @@ struct rcar_i2c_priv { enum dma_data_direction dma_direction; struct reset_control *rstc; + int irq; }; #define rcar_i2c_priv_to_dev(p) ((p)->adap.dev.parent) @@ -861,9 +862,11 @@ static int rcar_unreg_slave(struct i2c_client *slave) WARN_ON(!priv->slave); + /* disable irqs and ensure none is running before clearing ptr */ rcar_i2c_write(priv, ICSIER, 0); rcar_i2c_write(priv, ICSCR, 0); + synchronize_irq(priv->irq); priv->slave = NULL; pm_runtime_put(rcar_i2c_priv_to_dev(priv)); @@ -918,7 +921,7 @@ static int rcar_i2c_probe(struct platform_device *pdev) struct i2c_adapter *adap; struct device *dev = &pdev->dev; struct i2c_timings i2c_t; - int irq, ret; + int ret; /* Otherwise logic will break because some bytes must always use PIO */ BUILD_BUG_ON_MSG(RCAR_MIN_DMA_LEN < 3, "Invalid min DMA length"); @@ -984,10 +987,10 @@ static int rcar_i2c_probe(struct platform_device *pdev) pm_runtime_put(dev); - irq = platform_get_irq(pdev, 0); - ret = devm_request_irq(dev, irq, rcar_i2c_irq, 0, dev_name(dev), priv); + priv->irq = platform_get_irq(pdev, 0); + ret = devm_request_irq(dev, priv->irq, rcar_i2c_irq, 0, dev_name(dev), priv); if (ret < 0) { - dev_err(dev, "cannot get irq %d\n", irq); + dev_err(dev, "cannot get irq %d\n", priv->irq); goto out_pm_disable; } From d7437fc0d8291181debe032671a289b6bd93f46f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 8 Aug 2019 21:54:17 +0200 Subject: [PATCH 421/482] i2c: emev2: avoid race when unregistering slave client After we disabled interrupts, there might still be an active one running. Sync before clearing the pointer to the slave device. Fixes: c31d0a00021d ("i2c: emev2: add slave support") Reported-by: Krzysztof Adamski Signed-off-by: Wolfram Sang Reviewed-by: Krzysztof Adamski Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-emev2.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c index 35b302d983e0..959d4912ec0d 100644 --- a/drivers/i2c/busses/i2c-emev2.c +++ b/drivers/i2c/busses/i2c-emev2.c @@ -69,6 +69,7 @@ struct em_i2c_device { struct completion msg_done; struct clk *sclk; struct i2c_client *slave; + int irq; }; static inline void em_clear_set_bit(struct em_i2c_device *priv, u8 clear, u8 set, u8 reg) @@ -339,6 +340,12 @@ static int em_i2c_unreg_slave(struct i2c_client *slave) writeb(0, priv->base + I2C_OFS_SVA0); + /* + * Wait for interrupt to finish. New slave irqs cannot happen because we + * cleared the slave address and, thus, only extension codes will be + * detected which do not use the slave ptr. + */ + synchronize_irq(priv->irq); priv->slave = NULL; return 0; @@ -355,7 +362,7 @@ static int em_i2c_probe(struct platform_device *pdev) { struct em_i2c_device *priv; struct resource *r; - int irq, ret; + int ret; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -390,8 +397,8 @@ static int em_i2c_probe(struct platform_device *pdev) em_i2c_reset(&priv->adap); - irq = platform_get_irq(pdev, 0); - ret = devm_request_irq(&pdev->dev, irq, em_i2c_irq_handler, 0, + priv->irq = platform_get_irq(pdev, 0); + ret = devm_request_irq(&pdev->dev, priv->irq, em_i2c_irq_handler, 0, "em_i2c", priv); if (ret) goto err_clk; @@ -401,7 +408,8 @@ static int em_i2c_probe(struct platform_device *pdev) if (ret) goto err_clk; - dev_info(&pdev->dev, "Added i2c controller %d, irq %d\n", priv->adap.nr, irq); + dev_info(&pdev->dev, "Added i2c controller %d, irq %d\n", priv->adap.nr, + priv->irq); return 0; From 90865a3dc597bd8463efacb749561095ba70b0aa Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Sat, 3 Aug 2019 19:43:35 +0530 Subject: [PATCH 422/482] i2c: stm32: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header file related to STM32 Driver for I2C hardware bus support. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46 Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32.h b/drivers/i2c/busses/i2c-stm32.h index 868755f82f88..2c21893905a3 100644 --- a/drivers/i2c/busses/i2c-stm32.h +++ b/drivers/i2c/busses/i2c-stm32.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * i2c-stm32.h * From 6600c0808e2ea2939009e53983f066fe38bd308a Mon Sep 17 00:00:00 2001 From: Rocky Liao Date: Wed, 14 Aug 2019 15:42:39 +0800 Subject: [PATCH 423/482] Bluetooth: hci_qca: Skip 1 error print in device_want_to_sleep() Don't fall through to print error message when receive sleep indication in HCI_IBS_RX_ASLEEP state, this is allowed behavior. Signed-off-by: Rocky Liao Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_qca.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 0cfa5b831d39..9a970fd1975a 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -705,7 +705,7 @@ static void device_want_to_sleep(struct hci_uart *hu) unsigned long flags; struct qca_data *qca = hu->priv; - BT_DBG("hu %p want to sleep", hu); + BT_DBG("hu %p want to sleep in %d state", hu, qca->rx_ibs_state); spin_lock_irqsave(&qca->hci_ibs_lock, flags); @@ -720,7 +720,7 @@ static void device_want_to_sleep(struct hci_uart *hu) break; case HCI_IBS_RX_ASLEEP: - /* Fall through */ + break; default: /* Any other state is illegal */ From daac07156b330b18eb5071aec4b3ddca1c377f2c Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Tue, 13 Aug 2019 22:34:04 -0400 Subject: [PATCH 424/482] ALSA: usb-audio: Fix an OOB bug in parse_audio_mixer_unit The `uac_mixer_unit_descriptor` shown as below is read from the device side. In `parse_audio_mixer_unit`, `baSourceID` field is accessed from index 0 to `bNrInPins` - 1, the current implementation assumes that descriptor is always valid (the length of descriptor is no shorter than 5 + `bNrInPins`). If a descriptor read from the device side is invalid, it may trigger out-of-bound memory access. ``` struct uac_mixer_unit_descriptor { __u8 bLength; __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bUnitID; __u8 bNrInPins; __u8 baSourceID[]; } ``` This patch fixes the bug by add a sanity check on the length of the descriptor. Reported-by: Hui Peng Reported-by: Mathias Payer Cc: Signed-off-by: Hui Peng Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 7498b5191b68..ea487378be17 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -744,6 +744,8 @@ static int uac_mixer_unit_get_channels(struct mixer_build *state, return -EINVAL; if (!desc->bNrInPins) return -EINVAL; + if (desc->bLength < sizeof(*desc) + desc->bNrInPins) + return -EINVAL; switch (state->mixer->protocol) { case UAC_VERSION_1: From 8ac71d7e46b94a4fc8ffc6f1c88004cdf24459e8 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Wed, 14 Aug 2019 16:23:52 +0800 Subject: [PATCH 425/482] riscv: Correct the initialized flow of FP register The following two reasons cause FP registers are sometimes not initialized before starting the user program. 1. Currently, the FP context is initialized in flush_thread() function and we expect these initial values to be restored to FP register when doing FP context switch. However, the FP context switch only occurs in switch_to function. Hence, if this process does not be scheduled out and scheduled in before entering the user space, the FP registers have no chance to initialize. 2. In flush_thread(), the state of reg->sstatus.FS inherits from the parent. Hence, the state of reg->sstatus.FS may be dirty. If this process is scheduled out during flush_thread() and initializing the FP register, the fstate_save() in switch_to will corrupt the FP context which has been initialized until flush_thread(). To solve the 1st case, the initialization of the FP register will be completed in start_thread(). It makes sure all FP registers are initialized before starting the user program. For the 2nd case, the state of reg->sstatus.FS in start_thread will be set to SR_FS_OFF to prevent this process from corrupting FP context in doing context save. The FP state is set to SR_FS_INITIAL in start_trhead(). Signed-off-by: Vincent Chen Reviewed-by: Anup Patel Reviewed-by: Christoph Hellwig Fixes: 7db91e57a0acd ("RISC-V: Task implementation") Cc: stable@vger.kernel.org [paul.walmsley@sifive.com: fixed brace alignment issue reported by checkpatch] Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/switch_to.h | 6 ++++++ arch/riscv/kernel/process.c | 11 +++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 853b65ef656d..949d9cd91dec 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -19,6 +19,12 @@ static inline void __fstate_clean(struct pt_regs *regs) regs->sstatus |= (regs->sstatus & ~(SR_FS)) | SR_FS_CLEAN; } +static inline void fstate_off(struct task_struct *task, + struct pt_regs *regs) +{ + regs->sstatus = (regs->sstatus & ~SR_FS) | SR_FS_OFF; +} + static inline void fstate_save(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index f23794bd1e90..fb3a082362eb 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -64,8 +64,14 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { regs->sstatus = SR_SPIE; - if (has_fpu) + if (has_fpu) { regs->sstatus |= SR_FS_INITIAL; + /* + * Restore the initial value to the FP register + * before starting the user program. + */ + fstate_restore(current, regs); + } regs->sepc = pc; regs->sp = sp; set_fs(USER_DS); @@ -75,10 +81,11 @@ void flush_thread(void) { #ifdef CONFIG_FPU /* - * Reset FPU context + * Reset FPU state and context * frm: round to nearest, ties to even (IEEE default) * fflags: accrued exceptions cleared */ + fstate_off(current, task_pt_regs(current)); memset(¤t->thread.fstate, 0, sizeof(current->thread.fstate)); #endif } From 69703eb9a8ae28a46cd5bce7d69ceeef6273a104 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Wed, 14 Aug 2019 16:23:53 +0800 Subject: [PATCH 426/482] riscv: Make __fstate_clean() work correctly. Make the __fstate_clean() function correctly set the state of sstatus.FS in pt_regs to SR_FS_CLEAN. Fixes: 7db91e57a0acd ("RISC-V: Task implementation") Cc: linux-stable Signed-off-by: Vincent Chen Reviewed-by: Anup Patel Reviewed-by: Christoph Hellwig [paul.walmsley@sifive.com: expanded "Fixes" commit ID] Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/switch_to.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 949d9cd91dec..f0227bdce0f0 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -16,7 +16,7 @@ extern void __fstate_restore(struct task_struct *restore_from); static inline void __fstate_clean(struct pt_regs *regs) { - regs->sstatus |= (regs->sstatus & ~(SR_FS)) | SR_FS_CLEAN; + regs->sstatus = (regs->sstatus & ~SR_FS) | SR_FS_CLEAN; } static inline void fstate_off(struct task_struct *task, From e1b4ce25dbc93ab0cb8ed0f236a3b9ff7b03802c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 9 Aug 2019 17:27:21 +0200 Subject: [PATCH 427/482] drm/scheduler: use job count instead of peek MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spsc_queue_peek function is accessing queue->head which belongs to the consumer thread and shouldn't be accessed by the producer This is fixing a rare race condition when destroying entities. Signed-off-by: Christian König Acked-by: Andrey Grodzovsky Reviewed-by: Monk.liu@amd.com Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/sched_entity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 35ddbec1375a..671c90f34ede 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -95,7 +95,7 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) rmb(); /* for list_empty to work without lock */ if (list_empty(&entity->list) || - spsc_queue_peek(&entity->job_queue) == NULL) + spsc_queue_count(&entity->job_queue) == 0) return true; return false; @@ -281,7 +281,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity) /* Consumption of existing IBs wasn't completed. Forcefully * remove them here. */ - if (spsc_queue_peek(&entity->job_queue)) { + if (spsc_queue_count(&entity->job_queue)) { if (sched) { /* Park the kernel for a moment to make sure it isn't processing * our enity. From 626633425c079742c04f4f2d81c16962b084605c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Aug 2019 14:47:54 -0600 Subject: [PATCH 428/482] dt-bindings: pinctrl: stm32: Fix 'st,syscfg' schema The proper way to add additional contraints to an existing json-schema is using 'allOf' to reference the base schema. Using just '$ref' doesn't work. Fix this for the 'st,syscfg' property. Cc: Mark Rutland Cc: Maxime Coquelin Cc: Alexandre Torgue Cc: linux-gpio@vger.kernel.org Cc: linux-stm32@st-md-mailman.stormreply.com Cc: linux-arm-kernel@lists.infradead.org Reviewed-by: Linus Walleij Signed-off-by: Rob Herring --- .../devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml index 91d3e78b3395..400df2da018a 100644 --- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml @@ -37,7 +37,8 @@ properties: hwlocks: true st,syscfg: - $ref: "/schemas/types.yaml#/definitions/phandle-array" + allOf: + - $ref: "/schemas/types.yaml#/definitions/phandle-array" description: Should be phandle/offset/mask items: - description: Phandle to the syscon node which includes IRQ mux selection. From 83f82d7a42583e93d0f0dde3d61ed10f75c0f4d8 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Wed, 7 Aug 2019 15:22:31 +0200 Subject: [PATCH 429/482] of: irq: fix a trivial typo in a doc comment Diverged from what the code does with commit 530210c7814e ("of/irq: Replace of_irq with of_phandle_args"). Signed-off-by: Lubomir Rintel Signed-off-by: Rob Herring --- drivers/of/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 7f84bb4903ca..a296eaf52a5b 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -277,7 +277,7 @@ EXPORT_SYMBOL_GPL(of_irq_parse_raw); * of_irq_parse_one - Resolve an interrupt for a device * @device: the device whose interrupt is to be resolved * @index: index of the interrupt to resolve - * @out_irq: structure of_irq filled by this function + * @out_irq: structure of_phandle_args filled by this function * * This function resolves an interrupt for a node by walking the interrupt tree, * finding which interrupt controller node it is attached to, and returning the From c554336efa9bbc28d6ec14efbee3c7d63c61a34f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Tue, 13 Aug 2019 04:18:52 -0500 Subject: [PATCH 430/482] cxgb4: fix a memory leak bug In blocked_fl_write(), 't' is not deallocated if bitmap_parse_user() fails, leading to a memory leak bug. To fix this issue, free t before returning the error. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 02959035ed3f..d692251ee252 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -3236,8 +3236,10 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf, return -ENOMEM; err = bitmap_parse_user(ubuf, count, t, adap->sge.egr_sz); - if (err) + if (err) { + kvfree(t); return err; + } bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz); kvfree(t); From 05b439711f6ff8700e8660f97a1179650778b9cb Mon Sep 17 00:00:00 2001 From: "Y.C. Chen" Date: Wed, 11 Apr 2018 09:27:39 +0800 Subject: [PATCH 431/482] drm/ast: Fixed reboot test may cause system hanged There is another thread still access standard VGA I/O while loading drm driver. Disable standard VGA I/O decode to avoid this issue. Signed-off-by: Y.C. Chen Reviewed-by: Benjamin Herrenschmidt Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/1523410059-18415-1-git-send-email-yc_chen@aspeedtech.com --- drivers/gpu/drm/ast/ast_main.c | 5 ++++- drivers/gpu/drm/ast/ast_mode.c | 2 +- drivers/gpu/drm/ast/ast_post.c | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 4c7e31cb45ff..a5d1494a3dc4 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -131,8 +131,8 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) /* Enable extended register access */ - ast_enable_mmio(dev); ast_open_key(ast); + ast_enable_mmio(dev); /* Find out whether P2A works or whether to use device-tree */ ast_detect_config_mode(dev, &scu_rev); @@ -576,6 +576,9 @@ void ast_driver_unload(struct drm_device *dev) { struct ast_private *ast = dev->dev_private; + /* enable standard VGA decode */ + ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa1, 0x04); + ast_release_firmware(dev); kfree(ast->dp501_fw_addr); ast_mode_fini(dev); diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index ffccbef962a4..a1cb020e07e5 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -604,7 +604,7 @@ static int ast_crtc_mode_set(struct drm_crtc *crtc, return -EINVAL; ast_open_key(ast); - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa1, 0xff, 0x04); + ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa1, 0x06); ast_set_std_reg(crtc, adjusted_mode, &vbios_mode); ast_set_crtc_reg(crtc, adjusted_mode, &vbios_mode); diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index f7d421359d56..c1d1ac51d1c2 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -46,7 +46,7 @@ void ast_enable_mmio(struct drm_device *dev) { struct ast_private *ast = dev->dev_private; - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa1, 0xff, 0x04); + ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa1, 0x06); } From db1231ddc04682f60d56ff42447f13099c6c4a4c Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 8 Aug 2019 20:53:05 -0400 Subject: [PATCH 432/482] drm/nouveau: Only recalculate PBN/VCPI on mode/connector changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I -thought- I had fixed this entirely, but it looks like that I didn't test this thoroughly enough as we apparently still make one big mistake with nv50_msto_atomic_check() - we don't handle the following scenario: * CRTC #1 has n VCPI allocated to it, is attached to connector DP-4 which is attached to encoder #1. enabled=y active=n * CRTC #1 is changed from DP-4 to DP-5, causing: * DP-4 crtc=#1→NULL (VCPI n→0) * DP-5 crtc=NULL→#1 * CRTC #1 steals encoder #1 back from DP-4 and gives it to DP-5 * CRTC #1 maintains the same mode as before, just with a different connector * mode_changed=n connectors_changed=y (we _SHOULD_ do VCPI 0→n here, but don't) Once the above scenario is repeated once, we'll attempt freeing VCPI from the connector that we didn't allocate due to the connectors changing, but the mode staying the same. Sigh. Since nv50_msto_atomic_check() has broken a few times now, let's rethink things a bit to be more careful: limit both VCPI/PBN allocations to mode_changed || connectors_changed, since neither VCPI or PBN should ever need to change outside of routing and mode changes. Changes since v1: * Fix accidental reversal of clock and bpp arguments in drm_dp_calc_pbn_mode() - William Lewis Signed-off-by: Lyude Paul Reported-by: Bohdan Milar Tested-by: Bohdan Milar Fixes: 232c9eec417a ("drm/nouveau: Use atomic VCPI helpers for MST") References: 412e85b60531 ("drm/nouveau: Only release VCPI slots on mode changes") Cc: Lyude Paul Cc: Ben Skeggs Cc: Daniel Vetter Cc: David Airlie Cc: Jerry Zuo Cc: Harry Wentland Cc: Juston Li Cc: Laurent Pinchart Cc: Karol Herbst Cc: Ilia Mirkin Cc: # v5.1+ Acked-by: Ben Skeggs Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20190809005307.18391-1-lyude@redhat.com --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 126703816794..5c36c75232e6 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -771,16 +771,20 @@ nv50_msto_atomic_check(struct drm_encoder *encoder, struct nv50_head_atom *asyh = nv50_head_atom(crtc_state); int slots; - /* When restoring duplicated states, we need to make sure that the - * bw remains the same and avoid recalculating it, as the connector's - * bpc may have changed after the state was duplicated - */ - if (!state->duplicated) - asyh->dp.pbn = - drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock, - connector->display_info.bpc * 3); + if (crtc_state->mode_changed || crtc_state->connectors_changed) { + /* + * When restoring duplicated states, we need to make sure that + * the bw remains the same and avoid recalculating it, as the + * connector's bpc may have changed after the state was + * duplicated + */ + if (!state->duplicated) { + const int bpp = connector->display_info.bpc * 3; + const int clock = crtc_state->adjusted_mode.clock; + + asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, bpp); + } - if (crtc_state->mode_changed) { slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr, mstc->port, asyh->dp.pbn); From 5ed1c835ed8b522ce25071cc2d56a9a09bd5b59e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 14 Aug 2019 16:40:30 -0700 Subject: [PATCH 433/482] MAINTAINERS, x86/CPU: Tony Luck will maintain asm/intel-family.h There are a few different subsystems in the kernel that depend on model specific behaviour (perf, EDAC, power, ...). Easier for just one person to have the task to get new model numbers included instead of having these groups trip over each other to do it. [ bp: s/Cpu/CPU/ and add x86@kernel.org so that it gets CCed too as FYI. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Cc: Dave Hansen Cc: x86-ml Link: https://lkml.kernel.org/r/20190814234030.30817-1-tony.luck@intel.com --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index e81e60bd7c26..f3a78403b47f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8064,6 +8064,13 @@ T: git git://git.code.sf.net/p/intel-sas/isci S: Supported F: drivers/scsi/isci/ +INTEL CPU family model numbers +M: Tony Luck +M: x86@kernel.org +L: linux-kernel@vger.kernel.org +S: Supported +F: arch/x86/include/asm/intel-family.h + INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets) M: Jani Nikula M: Joonas Lahtinen From 6caf0be40a707689e8ff8824fdb96ef77685b1ba Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 15 Aug 2019 01:26:02 -0700 Subject: [PATCH 434/482] USB: serial: option: Add Motorola modem UARTs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Motorola Mapphone devices such as Droid 4 there are five USB ports that do not use the same layout as Gobi 1K/2K/etc devices listed in qcserial.c. So we should use qcaux.c or option.c as noted by Dan Williams . As the Motorola USB serial ports have an interrupt endpoint as shown with lsusb -v, we should use option.c instead of qcaux.c as pointed out by Johan Hovold . The ff/ff/ff interfaces seem to always be UARTs on Motorola devices. For the other interfaces, class 0x0a (CDC Data) should not in general be added as they are typically part of a multi-interface function as noted earlier by Bjørn Mork . However, looking at the Motorola mapphone kernel code, the mdm6600 0x0a class is only used for flashing the modem firmware, and there are no other interfaces. So I've added that too with more details below as it works just fine. The ttyUSB ports on Droid 4 are: ttyUSB0 DIAG, CQDM-capable ttyUSB1 MUX or NMEA, no response ttyUSB2 MUX or NMEA, no response ttyUSB3 TCMD ttyUSB4 AT-capable The ttyUSB0 is detected as QCDM capable by ModemManager. I think it's only used for debugging with ModemManager --debug for sending custom AT commands though. ModemManager already can manage data connection using the USB QMI ports that are already handled by the qmi_wwan.c driver. To enable the MUX or NMEA ports, it seems that something needs to be done additionally to enable them, maybe via the DIAG or TCMD port. It might be just a NVRAM setting somewhere, but I have no idea what NVRAM settings may need changing for that. The TCMD port seems to be a Motorola custom protocol for testing the modem and to configure it's NVRAM and seems to work just fine based on a quick test with a minimal tcmdrw tool I wrote. The voice modem AT-capable port seems to provide only partial support, and no PM support compared to the TS 27.010 based UART wired directly to the modem. The UARTs added with this change are the same product IDs as the Motorola Mapphone Android Linux kernel mdm6600_id_table. I don't have any mdm9600 based devices, so I have only tested these on mdm6600 based droid 4. Then for the class 0x0a (CDC Data) mode, the Motorola Mapphone Android Linux kernel driver moto_flashqsc.c just seems to change the port->bulk_out_size to 8K from the default. And is only used for flashing the modem firmware it seems. I've verified that flashing the modem with signed firmware works just fine with the option driver after manually toggling the GPIO pins, so I've added droid 4 modem flashing mode to the option driver. I've not added the other devices listed in moto_flashqsc.c in case they really need different port->bulk_out_size. Those can be added as they get tested to work for flashing the modem. After this patch the output of /sys/kernel/debug/usb/devices has the following for normal 22b8:2a70 mode including the related qmi_wwan interfaces: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=22b8 ProdID=2a70 Rev= 0.00 S: Manufacturer=Motorola, Incorporated S: Product=Flash MZ600 C:* #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=5ms E: Ad=86(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=5ms E: Ad=88(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=5ms E: Ad=8a(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=07(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan E: Ad=8b(I) Atr=03(Int.) MxPS= 64 Ivl=5ms E: Ad=8c(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=08(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan E: Ad=8d(I) Atr=03(Int.) MxPS= 64 Ivl=5ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=09(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms In 22b8:900e "qc_dload" mode the device shows up as: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=22b8 ProdID=900e Rev= 0.00 S: Manufacturer=Motorola, Incorporated S: Product=Flash MZ600 C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms And in 22b8:4281 "ram_downloader" mode the device shows up as: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=22b8 ProdID=4281 Rev= 0.00 S: Manufacturer=Motorola, Incorporated S: Product=Flash MZ600 C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=fc Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms Cc: Bjørn Mork Cc: Dan Williams Cc: Lars Melin Cc: Marcel Partap Cc: Merlijn Wajer Cc: Michael Scott Cc: NeKit Cc: Pavel Machek Cc: Sebastian Reichel Tested-by: Pavel Machek Signed-off-by: Tony Lindgren Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f2c19660ed16..546d7f610a24 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -968,6 +968,11 @@ static const struct usb_device_id option_ids[] = { { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x7B) }, { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x7C) }, + /* Motorola devices */ + { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x2a70, 0xff, 0xff, 0xff) }, /* mdm6600 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x2e0a, 0xff, 0xff, 0xff) }, /* mdm9600 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x4281, 0x0a, 0x00, 0xfc) }, /* mdm ram dl */ + { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x900e, 0xff, 0xff, 0xff) }, /* mdm qc dl */ { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) }, { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) }, From e5d8badf37e6b547842f2fcde10361b29e08bd36 Mon Sep 17 00:00:00 2001 From: Bob Ham Date: Wed, 24 Jul 2019 07:52:26 -0700 Subject: [PATCH 435/482] USB: serial: option: add the BroadMobi BM818 card Add a VID:PID for the BroadMobi BM818 M.2 card T: Bus=01 Lev=03 Prnt=40 Port=03 Cnt=01 Dev#= 44 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2020 ProdID=2060 Rev=00.00 S: Manufacturer=Qualcomm, Incorporated S: Product=Qualcomm CDMA Technologies MSM C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=(none) I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) Signed-off-by: Bob Ham Signed-off-by: Angus Ainslie (Purism) Cc: stable [ johan: use USB_DEVICE_INTERFACE_CLASS() ] Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 546d7f610a24..38e920ac7f82 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1965,6 +1965,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff), /* Olicard 600 */ .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2060, 0xff), /* BroadMobi BM818 */ + .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, From c52873e5a1ef72f845526d9f6a50704433f9c625 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 8 Aug 2019 16:21:19 +0200 Subject: [PATCH 436/482] usb: cdc-acm: make sure a refcount is taken early enough destroy() will decrement the refcount on the interface, so that it needs to be taken so early that it never undercounts. Fixes: 7fb57a019f94e ("USB: cdc-acm: Fix potential deadlock (lockdep warning)") Cc: stable Reported-and-tested-by: syzbot+1b2449b7b5dc240d107a@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20190808142119.7998-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 183b41753c98..62f4fb9b362f 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1301,10 +1301,6 @@ static int acm_probe(struct usb_interface *intf, tty_port_init(&acm->port); acm->port.ops = &acm_port_ops; - minor = acm_alloc_minor(acm); - if (minor < 0) - goto alloc_fail1; - ctrlsize = usb_endpoint_maxp(epctrl); readsize = usb_endpoint_maxp(epread) * (quirks == SINGLE_RX_URB ? 1 : 2); @@ -1312,6 +1308,13 @@ static int acm_probe(struct usb_interface *intf, acm->writesize = usb_endpoint_maxp(epwrite) * 20; acm->control = control_interface; acm->data = data_interface; + + usb_get_intf(acm->control); /* undone in destruct() */ + + minor = acm_alloc_minor(acm); + if (minor < 0) + goto alloc_fail1; + acm->minor = minor; acm->dev = usb_dev; if (h.usb_cdc_acm_descriptor) @@ -1458,7 +1461,6 @@ static int acm_probe(struct usb_interface *intf, usb_driver_claim_interface(&acm_driver, data_interface, acm); usb_set_intfdata(data_interface, acm); - usb_get_intf(control_interface); tty_dev = tty_port_register_device(&acm->port, acm_tty_driver, minor, &control_interface->dev); if (IS_ERR(tty_dev)) { From 54364278fb3cabdea51d6398b07c87415065b3fc Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 13 Aug 2019 11:35:41 +0200 Subject: [PATCH 437/482] USB: CDC: fix sanity checks in CDC union parser A few checks checked for the size of the pointer to a structure instead of the structure itself. Copy & paste issue presumably. Fixes: e4c6fb7794982 ("usbnet: move the CDC parser into USB core") Cc: stable Reported-by: syzbot+45a53506b65321c1fe91@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20190813093541.18889-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index e844bb7b5676..5adf489428aa 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -2218,14 +2218,14 @@ int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr, (struct usb_cdc_dmm_desc *)buffer; break; case USB_CDC_MDLM_TYPE: - if (elength < sizeof(struct usb_cdc_mdlm_desc *)) + if (elength < sizeof(struct usb_cdc_mdlm_desc)) goto next_desc; if (desc) return -EINVAL; desc = (struct usb_cdc_mdlm_desc *)buffer; break; case USB_CDC_MDLM_DETAIL_TYPE: - if (elength < sizeof(struct usb_cdc_mdlm_detail_desc *)) + if (elength < sizeof(struct usb_cdc_mdlm_detail_desc)) goto next_desc; if (detail) return -EINVAL; From 777758888ffe59ef754cc39ab2f275dc277732f4 Mon Sep 17 00:00:00 2001 From: Hans Ulli Kroll Date: Sat, 10 Aug 2019 17:04:58 +0200 Subject: [PATCH 438/482] usb: host: fotg2: restart hcd after port reset On the Gemini SoC the FOTG2 stalls after port reset so restart the HCD after each port reset. Signed-off-by: Hans Ulli Kroll Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20190810150458.817-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/fotg210-hcd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c index 77cc36efae95..0dbfa5c10703 100644 --- a/drivers/usb/host/fotg210-hcd.c +++ b/drivers/usb/host/fotg210-hcd.c @@ -1629,6 +1629,10 @@ static int fotg210_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, /* see what we found out */ temp = check_reset_complete(fotg210, wIndex, status_reg, fotg210_readl(fotg210, status_reg)); + + /* restart schedule */ + fotg210->command |= CMD_RUN; + fotg210_writel(fotg210, fotg210->command, &fotg210->regs->command); } if (!(temp & (PORT_RESUME|PORT_RESET))) { From 141822aa3f79efc8a2ec3ed464f2fd2c93ccd803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Sat, 10 Aug 2019 16:07:58 +0100 Subject: [PATCH 439/482] usb: chipidea: imx: fix EPROBE_DEFER support during driver probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If driver probe needs to be deferred, e.g. because ci_hdrc_add_device() isn't ready yet, this driver currently misbehaves badly: a) success is still reported to the driver core (meaning a 2nd probe attempt will never be done), leaving the driver in a dysfunctional state and the hardware unusable b) driver remove / shutdown OOPSes: [ 206.786916] Unable to handle kernel paging request at virtual address fffffdff [ 206.794148] pgd = 880b9f82 [ 206.796890] [fffffdff] *pgd=abf5e861, *pte=00000000, *ppte=00000000 [ 206.803179] Internal error: Oops: 37 [#1] PREEMPT SMP ARM [ 206.808581] Modules linked in: wl18xx evbug [ 206.813308] CPU: 1 PID: 1 Comm: systemd-shutdow Not tainted 4.19.35+gf345c93b4195 #1 [ 206.821053] Hardware name: Freescale i.MX7 Dual (Device Tree) [ 206.826813] PC is at ci_hdrc_remove_device+0x4/0x20 [ 206.831699] LR is at ci_hdrc_imx_remove+0x20/0xe8 [ 206.836407] pc : [<805cd4b0>] lr : [<805d62cc>] psr: 20000013 [ 206.842678] sp : a806be40 ip : 00000001 fp : 80adbd3c [ 206.847906] r10: 80b1b794 r9 : 80d5dfe0 r8 : a8192c44 [ 206.853136] r7 : 80db93a0 r6 : a8192c10 r5 : a8192c00 r4 : a93a4a00 [ 206.859668] r3 : 00000000 r2 : a8192ce4 r1 : ffffffff r0 : fffffdfb [ 206.866201] Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 206.873341] Control: 10c5387d Table: a9e0c06a DAC: 00000051 [ 206.879092] Process systemd-shutdow (pid: 1, stack limit = 0xb271353c) [ 206.885624] Stack: (0xa806be40 to 0xa806c000) [ 206.889992] be40: a93a4a00 805d62cc a8192c1c a8170e10 a8192c10 8049a490 80d04d08 00000000 [ 206.898179] be60: 00000000 80d0da2c fee1dead 00000000 a806a000 00000058 00000000 80148b08 [ 206.906366] be80: 01234567 80148d8c a9858600 00000000 00000000 00000000 00000000 80d04d08 [ 206.914553] bea0: 00000000 00000000 a82741e0 a9858600 00000024 00000002 a9858608 00000005 [ 206.922740] bec0: 0000001e 8022c058 00000000 00000000 a806bf14 a9858600 00000000 a806befc [ 206.930927] bee0: a806bf78 00000000 7ee12c30 8022c18c a806bef8 a806befc 00000000 00000001 [ 206.939115] bf00: 00000000 00000024 a806bf14 00000005 7ee13b34 7ee12c68 00000004 7ee13f20 [ 206.947302] bf20: 00000010 7ee12c7c 00000005 7ee12d04 0000000a 76e7dc00 00000001 80d0f140 [ 206.955490] bf40: ab637880 a974de40 60000013 80d0f140 ab6378a0 80d04d08 a8080470 a9858600 [ 206.963677] bf60: a9858600 00000000 00000000 8022c24c 00000000 80144310 00000000 00000000 [ 206.971864] bf80: 80101204 80d04d08 00000000 80d04d08 00000000 00000000 00000003 00000058 [ 206.980051] bfa0: 80101204 80101000 00000000 00000000 fee1dead 28121969 01234567 00000000 [ 206.988237] bfc0: 00000000 00000000 00000003 00000058 00000000 00000000 00000000 00000000 [ 206.996425] bfe0: 0049ffb0 7ee13d58 0048a84b 76f245a6 60000030 fee1dead 00000000 00000000 [ 207.004622] [<805cd4b0>] (ci_hdrc_remove_device) from [<805d62cc>] (ci_hdrc_imx_remove+0x20/0xe8) [ 207.013509] [<805d62cc>] (ci_hdrc_imx_remove) from [<8049a490>] (device_shutdown+0x16c/0x218) [ 207.022050] [<8049a490>] (device_shutdown) from [<80148b08>] (kernel_restart+0xc/0x50) [ 207.029980] [<80148b08>] (kernel_restart) from [<80148d8c>] (sys_reboot+0xf4/0x1f0) [ 207.037648] [<80148d8c>] (sys_reboot) from [<80101000>] (ret_fast_syscall+0x0/0x54) [ 207.045308] Exception stack(0xa806bfa8 to 0xa806bff0) [ 207.050368] bfa0: 00000000 00000000 fee1dead 28121969 01234567 00000000 [ 207.058554] bfc0: 00000000 00000000 00000003 00000058 00000000 00000000 00000000 00000000 [ 207.066737] bfe0: 0049ffb0 7ee13d58 0048a84b 76f245a6 [ 207.071799] Code: ebffffa8 e3a00000 e8bd8010 e92d4010 (e5904004) [ 207.078021] ---[ end trace be47424e3fd46e9f ]--- [ 207.082647] Kernel panic - not syncing: Fatal exception [ 207.087894] ---[ end Kernel panic - not syncing: Fatal exception ]--- c) the error path in combination with driver removal causes imbalanced calls to the clk_*() and pm_()* APIs a) happens because the original intended return value is overwritten (with 0) by the return code of regulator_disable() in ci_hdrc_imx_probe()'s error path b) happens because ci_pdev is -EPROBE_DEFER, which causes ci_hdrc_remove_device() to OOPS Fix a) by being more careful in ci_hdrc_imx_probe()'s error path and not overwriting the real error code Fix b) by calling the respective cleanup functions during remove only when needed (when ci_pdev != NULL, i.e. when everything was initialised correctly). This also has the side effect of not causing imbalanced clk_*() and pm_*() API calls as part of the error code path. Fixes: 7c8e8909417e ("usb: chipidea: imx: add HSIC support") Signed-off-by: André Draszik Cc: stable CC: Peter Chen CC: Greg Kroah-Hartman CC: Shawn Guo CC: Sascha Hauer CC: Pengutronix Kernel Team CC: Fabio Estevam CC: NXP Linux Team CC: linux-usb@vger.kernel.org CC: linux-arm-kernel@lists.infradead.org CC: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/20190810150758.17694-1-git@andred.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index b5abfe89190c..df8812c30640 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -454,9 +454,11 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) imx_disable_unprepare_clks(dev); disable_hsic_regulator: if (data->hsic_pad_regulator) - ret = regulator_disable(data->hsic_pad_regulator); + /* don't overwrite original ret (cf. EPROBE_DEFER) */ + regulator_disable(data->hsic_pad_regulator); if (pdata.flags & CI_HDRC_PMQOS) pm_qos_remove_request(&data->pm_qos_req); + data->ci_pdev = NULL; return ret; } @@ -469,14 +471,17 @@ static int ci_hdrc_imx_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); } - ci_hdrc_remove_device(data->ci_pdev); + if (data->ci_pdev) + ci_hdrc_remove_device(data->ci_pdev); if (data->override_phy_control) usb_phy_shutdown(data->phy); - imx_disable_unprepare_clks(&pdev->dev); - if (data->plat_data->flags & CI_HDRC_PMQOS) - pm_qos_remove_request(&data->pm_qos_req); - if (data->hsic_pad_regulator) - regulator_disable(data->hsic_pad_regulator); + if (data->ci_pdev) { + imx_disable_unprepare_clks(&pdev->dev); + if (data->plat_data->flags & CI_HDRC_PMQOS) + pm_qos_remove_request(&data->pm_qos_req); + if (data->hsic_pad_regulator) + regulator_disable(data->hsic_pad_regulator); + } return 0; } From dd3ecf17ba70a70d2c9ef9ba725281b84f8eef12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Aug 2019 10:05:15 +0200 Subject: [PATCH 440/482] usb: don't create dma pools for HCDs with a localmem_pool If the HCD provides a localmem pool we will never use the DMA pools, so don't create them. Fixes: b0310c2f09bb ("USB: use genalloc for USB HCs with local memory") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20190811080520.21712-2-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index 1359b78a624e..1a5b3dcae930 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -66,9 +66,9 @@ int hcd_buffer_create(struct usb_hcd *hcd) char name[16]; int i, size; - if (!IS_ENABLED(CONFIG_HAS_DMA) || - (!is_device_dma_capable(hcd->self.sysdev) && - !hcd->localmem_pool)) + if (hcd->localmem_pool || + !IS_ENABLED(CONFIG_HAS_DMA) || + !is_device_dma_capable(hcd->self.sysdev)) return 0; for (i = 0; i < HCD_BUFFER_POOLS; i++) { From edfbcb321faf07ca970e4191abe061deeb7d3788 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Aug 2019 10:05:16 +0200 Subject: [PATCH 441/482] usb: add a hcd_uses_dma helper The USB buffer allocation code is the only place in the usb core (and in fact the whole kernel) that uses is_device_dma_capable, while the URB mapping code uses the uses_dma flag in struct usb_bus. Switch the buffer allocation to use the uses_dma flag used by the rest of the USB code, and create a helper in hcd.h that checks this flag as well as the CONFIG_HAS_DMA to simplify the caller a bit. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20190811080520.21712-3-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/buffer.c | 10 +++------- drivers/usb/core/hcd.c | 4 ++-- drivers/usb/dwc2/hcd.c | 2 +- include/linux/usb.h | 2 +- include/linux/usb/hcd.h | 3 +++ 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index 1a5b3dcae930..6cf22c27f2d2 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -66,9 +66,7 @@ int hcd_buffer_create(struct usb_hcd *hcd) char name[16]; int i, size; - if (hcd->localmem_pool || - !IS_ENABLED(CONFIG_HAS_DMA) || - !is_device_dma_capable(hcd->self.sysdev)) + if (hcd->localmem_pool || !hcd_uses_dma(hcd)) return 0; for (i = 0; i < HCD_BUFFER_POOLS; i++) { @@ -129,8 +127,7 @@ void *hcd_buffer_alloc( return gen_pool_dma_alloc(hcd->localmem_pool, size, dma); /* some USB hosts just use PIO */ - if (!IS_ENABLED(CONFIG_HAS_DMA) || - !is_device_dma_capable(bus->sysdev)) { + if (!hcd_uses_dma(hcd)) { *dma = ~(dma_addr_t) 0; return kmalloc(size, mem_flags); } @@ -160,8 +157,7 @@ void hcd_buffer_free( return; } - if (!IS_ENABLED(CONFIG_HAS_DMA) || - !is_device_dma_capable(bus->sysdev)) { + if (!hcd_uses_dma(hcd)) { kfree(addr); return; } diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 2ccbc2f83570..8592c0344fe8 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1412,7 +1412,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, if (usb_endpoint_xfer_control(&urb->ep->desc)) { if (hcd->self.uses_pio_for_control) return ret; - if (IS_ENABLED(CONFIG_HAS_DMA) && hcd->self.uses_dma) { + if (hcd_uses_dma(hcd)) { if (is_vmalloc_addr(urb->setup_packet)) { WARN_ONCE(1, "setup packet is not dma capable\n"); return -EAGAIN; @@ -1446,7 +1446,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; if (urb->transfer_buffer_length != 0 && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) { - if (IS_ENABLED(CONFIG_HAS_DMA) && hcd->self.uses_dma) { + if (hcd_uses_dma(hcd)) { if (urb->num_sgs) { int n; diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index ee144ff8af5b..111787a137ee 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -4608,7 +4608,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, buf = urb->transfer_buffer; - if (hcd->self.uses_dma) { + if (hcd_uses_dma(hcd)) { if (!buf && (urb->transfer_dma & 3)) { dev_err(hsotg->dev, "%s: unaligned transfer with no transfer_buffer", diff --git a/include/linux/usb.h b/include/linux/usb.h index 83d35d993e8c..e87826e23d59 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1457,7 +1457,7 @@ typedef void (*usb_complete_t)(struct urb *); * field rather than determining a dma address themselves. * * Note that transfer_buffer must still be set if the controller - * does not support DMA (as indicated by bus.uses_dma) and when talking + * does not support DMA (as indicated by hcd_uses_dma()) and when talking * to root hub. If you have to trasfer between highmem zone and the device * on such controller, create a bounce buffer or bail out with an error. * If transfer_buffer cannot be set (is in highmem) and the controller is DMA diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index bab27ccc8ff5..a20e7815d814 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -422,6 +422,9 @@ static inline bool hcd_periodic_completion_in_progress(struct usb_hcd *hcd, return hcd->high_prio_bh.completing_ep == ep; } +#define hcd_uses_dma(hcd) \ + (IS_ENABLED(CONFIG_HAS_DMA) && (hcd)->self.uses_dma) + extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb); extern int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb, int status); From 01fd150f4aff5bee03db5bda3b94c1bdbe9c6375 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 13 Aug 2019 16:01:20 -0700 Subject: [PATCH 442/482] misc: xilinx-sdfec: fix dependency and build error lib/devres.c, which implements devm_ioremap_resource(), is only built when CONFIG_HAS_IOMEM is set/enabled, so XILINX_SDFEC should depend on HAS_IOMEM. Fixes this build error (as seen on UML builds): ERROR: "devm_ioremap_resource" [drivers/misc/xilinx_sdfec.ko] undefined! Fixes: 76d83e1c3233 ("misc: xilinx-sdfec: add core driver") Signed-off-by: Randy Dunlap Cc: Derek Kiernan Cc: Dragan Cvetic Cc: Greg Kroah-Hartman Cc: Geert Uytterhoeven Cc: Arnd Bergmann Link: https://lore.kernel.org/r/f9004be5-9925-327b-3ec2-6506e46fe565@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 6abfc8e92fcc..16900357afc2 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -465,6 +465,7 @@ config PCI_ENDPOINT_TEST config XILINX_SDFEC tristate "Xilinx SDFEC 16" + depends on HAS_IOMEM help This option enables support for the Xilinx SDFEC (Soft Decision Forward Error Correction) driver. This enables a char driver From 99c79f6692ccdc42e04deea8a36e22bb48168a62 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Thu, 15 Aug 2019 14:03:22 +0200 Subject: [PATCH 443/482] io_uring: fix manual setup of iov_iter for fixed buffers Commit bd11b3a391e3 ("io_uring: don't use iov_iter_advance() for fixed buffers") introduced an optimization to avoid using the slow iov_iter_advance by manually populating the iov_iter iterator in some cases. However, the computation of the iterator count field was erroneous: The first bvec was always accounted for an extent of page size even if the bvec length was smaller. In consequence, some I/O operations on fixed buffers were unable to operate on the full extent of the buffer, consistently skipping some bytes at the end of it. Fixes: bd11b3a391e3 ("io_uring: don't use iov_iter_advance() for fixed buffers") Cc: stable@vger.kernel.org Signed-off-by: Aleix Roca Nonell Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d542f1cf4428..aa25b5bbd4ae 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1097,10 +1097,8 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, iter->bvec = bvec + seg_skip; iter->nr_segs -= seg_skip; - iter->count -= (seg_skip << PAGE_SHIFT); + iter->count -= bvec->bv_len + offset; iter->iov_offset = offset & ~PAGE_MASK; - if (iter->iov_offset) - iter->count -= iter->iov_offset; } } From 7b6620d7db566a46f49b4b9deab9fa061fd4b59b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Aug 2019 11:09:16 -0600 Subject: [PATCH 444/482] block: remove REQ_NOWAIT_INLINE We had a few issues with this code, and there's still a problem around how we deal with error handling for chained/split bios. For now, just revert the code and we'll try again with a thoroug solution. This reverts commits: e15c2ffa1091 ("block: fix O_DIRECT error handling for bio fragments") 0eb6ddfb865c ("block: Fix __blkdev_direct_IO() for bio fragments") 6a43074e2f46 ("block: properly handle IOCB_NOWAIT for async O_DIRECT IO") 893a1c97205a ("blk-mq: allow REQ_NOWAIT to return an error inline") Signed-off-by: Jens Axboe --- block/blk-mq.c | 8 ++----- fs/block_dev.c | 49 ++++----------------------------------- include/linux/blk_types.h | 5 +--- 3 files changed, 8 insertions(+), 54 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index a8e6a58f5f28..0835f4d8d42e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1958,13 +1958,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) rq = blk_mq_get_request(q, bio, &data); if (unlikely(!rq)) { rq_qos_cleanup(q, bio); - - cookie = BLK_QC_T_NONE; - if (bio->bi_opf & REQ_NOWAIT_INLINE) - cookie = BLK_QC_T_EAGAIN; - else if (bio->bi_opf & REQ_NOWAIT) + if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); - return cookie; + return BLK_QC_T_NONE; } trace_block_getrq(q, bio, bio->bi_opf); diff --git a/fs/block_dev.c b/fs/block_dev.c index eb657ab94060..677cb364d33f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -345,24 +345,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct bio *bio; bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; bool is_read = (iov_iter_rw(iter) == READ), is_sync; - bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0; loff_t pos = iocb->ki_pos; blk_qc_t qc = BLK_QC_T_NONE; - gfp_t gfp; - int ret; + int ret = 0; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - if (nowait) - gfp = GFP_NOWAIT; - else - gfp = GFP_KERNEL; - - bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool); - if (!bio) - return -EAGAIN; + bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); dio->is_sync = is_sync = is_sync_kiocb(iocb); @@ -384,7 +375,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) if (!is_poll) blk_start_plug(&plug); - ret = 0; for (;;) { bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = pos >> 9; @@ -409,14 +399,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) task_io_account_write(bio->bi_iter.bi_size); } - /* - * Tell underlying layer to not block for resource shortage. - * And if we would have blocked, return error inline instead - * of through the bio->bi_end_io() callback. - */ - if (nowait) - bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE); - + dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size; nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES); @@ -428,13 +411,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) polled = true; } - dio->size += bio->bi_iter.bi_size; qc = submit_bio(bio); - if (qc == BLK_QC_T_EAGAIN) { - dio->size -= bio->bi_iter.bi_size; - ret = -EAGAIN; - goto error; - } if (polled) WRITE_ONCE(iocb->ki_cookie, qc); @@ -455,19 +432,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) atomic_inc(&dio->ref); } - dio->size += bio->bi_iter.bi_size; - qc = submit_bio(bio); - if (qc == BLK_QC_T_EAGAIN) { - dio->size -= bio->bi_iter.bi_size; - ret = -EAGAIN; - goto error; - } - - bio = bio_alloc(gfp, nr_pages); - if (!bio) { - ret = -EAGAIN; - goto error; - } + submit_bio(bio); + bio = bio_alloc(GFP_KERNEL, nr_pages); } if (!is_poll) @@ -487,7 +453,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } __set_current_state(TASK_RUNNING); -out: if (!ret) ret = blk_status_to_errno(dio->bio.bi_status); if (likely(!ret)) @@ -495,10 +460,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bio_put(&dio->bio); return ret; -error: - if (!is_poll) - blk_finish_plug(&plug); - goto out; } static ssize_t diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1b1fa1557e68..feff3fe4467e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -311,7 +311,6 @@ enum req_flag_bits { __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_BACKGROUND, /* background IO */ __REQ_NOWAIT, /* Don't wait if request will block */ - __REQ_NOWAIT_INLINE, /* Return would-block error inline */ /* * When a shared kthread needs to issue a bio for a cgroup, doing * so synchronously can lead to priority inversions as the kthread @@ -346,7 +345,6 @@ enum req_flag_bits { #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) -#define REQ_NOWAIT_INLINE (1ULL << __REQ_NOWAIT_INLINE) #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) @@ -420,13 +418,12 @@ static inline int op_stat_group(unsigned int op) typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U -#define BLK_QC_T_EAGAIN -2U #define BLK_QC_T_SHIFT 16 #define BLK_QC_T_INTERNAL (1U << 31) static inline bool blk_qc_t_valid(blk_qc_t cookie) { - return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN; + return cookie != BLK_QC_T_NONE; } static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) From a982eeb09b6030e567b8b815277c8c9197168040 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Wed, 14 Aug 2019 17:35:22 +0800 Subject: [PATCH 445/482] io_uring: fix an issue when IOSQE_IO_LINK is inserted into defer list This patch may fix two issues: First, when IOSQE_IO_DRAIN set, the next IOs need to be inserted into defer list to delay execution, but link io will be actively scheduled to run by calling io_queue_sqe. Second, when multiple LINK_IOs are inserted together with defer_list, the LINK_IO is no longer keep order. |-------------| | LINK_IO | ----> insert to defer_list ----------- |-------------| | | LINK_IO | ----> insert to defer_list ----------| |-------------| | | LINK_IO | ----> insert to defer_list ----------| |-------------| | | NORMAL_IO | ----> insert to defer_list ----------| |-------------| | | queue_work at same time <-----| Fixes: 9e645e1105c ("io_uring: add support for sqe links") Signed-off-by: Jackie Liu Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index aa25b5bbd4ae..24bbe3cb7ad4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2023,6 +2023,15 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, { int ret; + ret = io_req_defer(ctx, req, s->sqe); + if (ret) { + if (ret != -EIOCBQUEUED) { + io_free_req(req); + io_cqring_add_event(ctx, s->sqe->user_data, ret); + } + return 0; + } + ret = __io_submit_sqe(ctx, req, s, true); if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { struct io_uring_sqe *sqe_copy; @@ -2095,13 +2104,6 @@ static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, return; } - ret = io_req_defer(ctx, req, s->sqe); - if (ret) { - if (ret != -EIOCBQUEUED) - goto err_req; - return; - } - /* * If we already have a head request, queue this one for async * submittal once the head completes. If we don't have a head but From 2aafdf5a5786ebbd8ccfe132ed6267c6962c5c3c Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 15 Aug 2019 09:58:26 +0200 Subject: [PATCH 446/482] selftests: net: tcp_fastopen_backup_key.sh: fix shellcheck issue When running tcp_fastopen_backup_key.sh the following issue was seen in a busybox environment. ./tcp_fastopen_backup_key.sh: line 33: [: -ne: unary operator expected Shellcheck showed the following issue. $ shellcheck tools/testing/selftests/net/tcp_fastopen_backup_key.sh In tools/testing/selftests/net/tcp_fastopen_backup_key.sh line 33: if [ $val -ne 0 ]; then ^-- SC2086: Double quote to prevent globbing and word splitting. Rework to do a string comparison instead. Signed-off-by: Anders Roxell Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_fastopen_backup_key.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh index 41476399e184..f6e65674b83c 100755 --- a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh +++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh @@ -30,7 +30,7 @@ do_test() { ip netns exec "${NETNS}" ./tcp_fastopen_backup_key "$1" val=$(ip netns exec "${NETNS}" nstat -az | \ grep TcpExtTCPFastOpenPassiveFail | awk '{print $2}') - if [ $val -ne 0 ]; then + if [ "$val" != 0 ]; then echo "FAIL: TcpExtTCPFastOpenPassiveFail non-zero" return 1 fi From e0d57d9c7e7a223f3c2ff8b7b63ec1bf63f11ed4 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 2 Aug 2019 14:42:14 +0300 Subject: [PATCH 447/482] net/mlx5e: Fix a race with XSKICOSQ in XSK wakeup flow Add a missing spinlock around XSKICOSQ usage at the activation stage, because there is a race between a configuration change and the application calling sendto(). Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index aaffa6f68dc0..7f78c004d12f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -143,7 +143,10 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c) { set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* TX queue is created active. */ + + spin_lock(&c->xskicosq_lock); mlx5e_trigger_irq(&c->xskicosq); + spin_unlock(&c->xskicosq_lock); } void mlx5e_deactivate_xsk(struct mlx5e_channel *c) From f43d48d10a42787c1de1d3facd7db210c91db1da Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 1 Aug 2019 14:27:30 +0300 Subject: [PATCH 448/482] net/mlx5e: Fix compatibility issue with ethtool flash device Cited patch deleted ethtool flash device support, as ethtool core can fallback into devlink flash callback. However, this is supported only if there is a devlink port registered over the corresponding netdevice. As mlx5e do not have devlink port support over native netdevice, it broke the ability to flash device via ethtool. This patch re-add the ethtool callback to avoid user functionality breakage when trying to flash device via ethtool. Fixes: 9c8bca2637b8 ("mlx5: Move firmware flash implementation to devlink") Signed-off-by: Eran Ben Elisha Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ .../ethernet/mellanox/mlx5/core/en_ethtool.c | 35 +++++++++++++++++++ .../mellanox/mlx5/core/ipoib/ethtool.c | 9 +++++ 3 files changed, 46 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f6b64a03cd06..65bec19a438f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1105,6 +1105,8 @@ u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash); void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index e89dba790a2d..20e628c907e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1690,6 +1690,40 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, return 0; } +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *dev = priv->netdev; + const struct firmware *fw; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, flash->data, &dev->dev); + if (err) + return err; + + dev_hold(dev); + rtnl_unlock(); + + err = mlx5_firmware_flash(mdev, fw, NULL); + release_firmware(fw); + + rtnl_lock(); + dev_put(dev); + return err; +} + +static int mlx5e_flash_device(struct net_device *dev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { @@ -1972,6 +2006,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_wol = mlx5e_set_wol, .get_module_info = mlx5e_get_module_info, .get_module_eeprom = mlx5e_get_module_eeprom, + .flash_device = mlx5e_flash_device, .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index ebd81f6b556e..90cb50fe17fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -122,6 +122,14 @@ static int mlx5i_get_ts_info(struct net_device *netdev, return mlx5e_ethtool_get_ts_info(priv, info); } +static int mlx5i_flash_device(struct net_device *netdev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + enum mlx5_ptys_width { MLX5_PTYS_WIDTH_1X = 1 << 0, MLX5_PTYS_WIDTH_2X = 1 << 1, @@ -233,6 +241,7 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_ethtool_stats = mlx5i_get_ethtool_stats, .get_ringparam = mlx5i_get_ringparam, .set_ringparam = mlx5i_set_ringparam, + .flash_device = mlx5i_flash_device, .get_channels = mlx5i_get_channels, .set_channels = mlx5i_set_channels, .get_coalesce = mlx5i_get_coalesce, From 6f967f8b1be7001b31c46429f2ee7d275af2190f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 14 Aug 2019 00:14:49 -0500 Subject: [PATCH 449/482] liquidio: add cleanup in octeon_setup_iq() If oct->fn_list.enable_io_queues() fails, no cleanup is executed, leading to memory/resource leaks. To fix this issue, invoke octeon_delete_instr_queue() before returning from the function. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/request_manager.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 032224178b64..6dd65f9b347c 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -237,8 +237,10 @@ int octeon_setup_iq(struct octeon_device *oct, } oct->num_iqs++; - if (oct->fn_list.enable_io_queues(oct)) + if (oct->fn_list.enable_io_queues(oct)) { + octeon_delete_instr_queue(oct, iq_no); return 1; + } return 0; } From d85f01775850a35eae47a0090839baf510c1ef12 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 14 Aug 2019 05:31:54 +0000 Subject: [PATCH 450/482] net: tls, fix sk_write_space NULL write when tx disabled The ctx->sk_write_space pointer is only set when TLS tx mode is enabled. When running without TX mode its a null pointer but we still set the sk sk_write_space pointer on close(). Fix the close path to only overwrite sk->sk_write_space when the current pointer is to the tls_write_space function indicating the tls module should clean it up properly as well. Reported-by: Hillf Danton Cc: Ying Xue Cc: Andrey Konovalov Fixes: 57c722e932cfb ("net/tls: swap sk_write_space on close") Signed-off-by: John Fastabend Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ce6ef56a65ef..43252a801c3f 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -308,7 +308,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) if (free_ctx) icsk->icsk_ulp_data = NULL; sk->sk_prot = ctx->sk_proto; - sk->sk_write_space = ctx->sk_write_space; + if (sk->sk_write_space == tls_write_space) + sk->sk_write_space = ctx->sk_write_space; write_unlock_bh(&sk->sk_callback_lock); release_sock(sk); if (ctx->tx_conf == TLS_SW) From 20fb7c7a39b5c719e2e619673b5f5729ee7d2306 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 14 Aug 2019 01:38:39 -0500 Subject: [PATCH 451/482] net: myri10ge: fix memory leaks In myri10ge_probe(), myri10ge_alloc_slices() is invoked to allocate slices related structures. Later on, myri10ge_request_irq() is used to get an irq. However, if this process fails, the allocated slices related structures are not deallocated, leading to memory leaks. To fix this issue, revise the target label of the goto statement to 'abort_with_slices'. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index d8b7fba96d58..337b0cbfd153 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -3919,7 +3919,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * setup (if available). */ status = myri10ge_request_irq(mgp); if (status != 0) - goto abort_with_firmware; + goto abort_with_slices; myri10ge_free_irq(mgp); /* Save configuration space to be restored if the From 19bce474c45be69a284ecee660aa12d8f1e88f18 Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Thu, 15 Aug 2019 00:31:34 -0400 Subject: [PATCH 452/482] ALSA: usb-audio: Fix a stack buffer overflow bug in check_input_term `check_input_term` recursively calls itself with input from device side (e.g., uac_input_terminal_descriptor.bCSourceID) as argument (id). In `check_input_term`, if `check_input_term` is called with the same `id` argument as the caller, it triggers endless recursive call, resulting kernel space stack overflow. This patch fixes the bug by adding a bitmap to `struct mixer_build` to keep track of the checked ids and stop the execution if some id has been checked (similar to how parse_audio_unit handles unitid argument). Reported-by: Hui Peng Reported-by: Mathias Payer Signed-off-by: Hui Peng Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index ea487378be17..b5927c3d5bc0 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -68,6 +68,7 @@ struct mixer_build { unsigned char *buffer; unsigned int buflen; DECLARE_BITMAP(unitbitmap, MAX_ID_ELEMS); + DECLARE_BITMAP(termbitmap, MAX_ID_ELEMS); struct usb_audio_term oterm; const struct usbmix_name_map *map; const struct usbmix_selector_map *selector_map; @@ -775,16 +776,25 @@ static int uac_mixer_unit_get_channels(struct mixer_build *state, * parse the source unit recursively until it reaches to a terminal * or a branched unit. */ -static int check_input_term(struct mixer_build *state, int id, +static int __check_input_term(struct mixer_build *state, int id, struct usb_audio_term *term) { int protocol = state->mixer->protocol; int err; void *p1; + unsigned char *hdr; memset(term, 0, sizeof(*term)); - while ((p1 = find_audio_control_unit(state, id)) != NULL) { - unsigned char *hdr = p1; + for (;;) { + /* a loop in the terminal chain? */ + if (test_and_set_bit(id, state->termbitmap)) + return -EINVAL; + + p1 = find_audio_control_unit(state, id); + if (!p1) + break; + + hdr = p1; term->id = id; if (protocol == UAC_VERSION_1 || protocol == UAC_VERSION_2) { @@ -802,7 +812,7 @@ static int check_input_term(struct mixer_build *state, int id, /* call recursively to verify that the * referenced clock entity is valid */ - err = check_input_term(state, d->bCSourceID, term); + err = __check_input_term(state, d->bCSourceID, term); if (err < 0) return err; @@ -836,7 +846,7 @@ static int check_input_term(struct mixer_build *state, int id, case UAC2_CLOCK_SELECTOR: { struct uac_selector_unit_descriptor *d = p1; /* call recursively to retrieve the channel info */ - err = check_input_term(state, d->baSourceID[0], term); + err = __check_input_term(state, d->baSourceID[0], term); if (err < 0) return err; term->type = UAC3_SELECTOR_UNIT << 16; /* virtual type */ @@ -899,7 +909,7 @@ static int check_input_term(struct mixer_build *state, int id, /* call recursively to verify that the * referenced clock entity is valid */ - err = check_input_term(state, d->bCSourceID, term); + err = __check_input_term(state, d->bCSourceID, term); if (err < 0) return err; @@ -950,7 +960,7 @@ static int check_input_term(struct mixer_build *state, int id, case UAC3_CLOCK_SELECTOR: { struct uac_selector_unit_descriptor *d = p1; /* call recursively to retrieve the channel info */ - err = check_input_term(state, d->baSourceID[0], term); + err = __check_input_term(state, d->baSourceID[0], term); if (err < 0) return err; term->type = UAC3_SELECTOR_UNIT << 16; /* virtual type */ @@ -966,7 +976,7 @@ static int check_input_term(struct mixer_build *state, int id, return -EINVAL; /* call recursively to retrieve the channel info */ - err = check_input_term(state, d->baSourceID[0], term); + err = __check_input_term(state, d->baSourceID[0], term); if (err < 0) return err; @@ -984,6 +994,15 @@ static int check_input_term(struct mixer_build *state, int id, return -ENODEV; } + +static int check_input_term(struct mixer_build *state, int id, + struct usb_audio_term *term) +{ + memset(term, 0, sizeof(*term)); + memset(state->termbitmap, 0, sizeof(state->termbitmap)); + return __check_input_term(state, id, term); +} + /* * Feature Unit */ From 32d3182cd2cd29b2e7e04df7b0db350fbe11289f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Aug 2019 02:11:57 -0700 Subject: [PATCH 453/482] net/packet: fix race in tpacket_snd() packet_sendmsg() checks tx_ring.pg_vec to decide if it must call tpacket_snd(). Problem is that the check is lockless, meaning another thread can issue a concurrent setsockopt(PACKET_TX_RING ) to flip tx_ring.pg_vec back to NULL. Given that tpacket_snd() grabs pg_vec_lock mutex, we can perform the check again to solve the race. syzbot reported : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 11429 Comm: syz-executor394 Not tainted 5.3.0-rc4+ #101 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:packet_lookup_frame+0x8d/0x270 net/packet/af_packet.c:474 Code: c1 ee 03 f7 73 0c 80 3c 0e 00 0f 85 cb 01 00 00 48 8b 0b 89 c0 4c 8d 24 c1 48 b8 00 00 00 00 00 fc ff df 4c 89 e1 48 c1 e9 03 <80> 3c 01 00 0f 85 94 01 00 00 48 8d 7b 10 4d 8b 3c 24 48 b8 00 00 RSP: 0018:ffff88809f82f7b8 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff8880a45c7030 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 1ffff110148b8e06 RDI: ffff8880a45c703c RBP: ffff88809f82f7e8 R08: ffff888087aea200 R09: fffffbfff134ae50 R10: fffffbfff134ae4f R11: ffffffff89a5727f R12: 0000000000000000 R13: 0000000000000001 R14: ffff8880a45c6ac0 R15: 0000000000000000 FS: 00007fa04716f700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa04716edb8 CR3: 0000000091eb4000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: packet_current_frame net/packet/af_packet.c:487 [inline] tpacket_snd net/packet/af_packet.c:2667 [inline] packet_sendmsg+0x590/0x6250 net/packet/af_packet.c:2975 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:657 ___sys_sendmsg+0x3e2/0x920 net/socket.c:2311 __sys_sendmmsg+0x1bf/0x4d0 net/socket.c:2413 __do_sys_sendmmsg net/socket.c:2442 [inline] __se_sys_sendmmsg net/socket.c:2439 [inline] __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2439 do_syscall_64+0xfd/0x6a0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/packet/af_packet.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8d54f3047768..e2742b006d25 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2618,6 +2618,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); + /* packet_sendmsg() check on tx_ring.pg_vec was lockless, + * we need to confirm it under protection of pg_vec_lock. + */ + if (unlikely(!po->tx_ring.pg_vec)) { + err = -EBUSY; + goto out; + } if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; From a90118c445cc7f07781de26a9684d4ec58bfcfd1 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Tue, 30 Jul 2019 22:46:27 -0700 Subject: [PATCH 454/482] x86/boot: Save fields explicitly, zero out everything else Recent gcc compilers (gcc 9.1) generate warnings about an out of bounds memset, if the memset goes accross several fields of a struct. This generated a couple of warnings on x86_64 builds in sanitize_boot_params(). Fix this by explicitly saving the fields in struct boot_params that are intended to be preserved, and zeroing all the rest. [ tglx: Tagged for stable as it breaks the warning free build there as well ] Suggested-by: Thomas Gleixner Suggested-by: H. Peter Anvin Signed-off-by: John Hubbard Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190731054627.5627-2-jhubbard@nvidia.com --- arch/x86/include/asm/bootparam_utils.h | 63 ++++++++++++++++++++------ 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 101eb944f13c..f5e90a849bca 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -18,6 +18,20 @@ * Note: efi_info is commonly left uninitialized, but that field has a * private magic, so it is better to leave it unchanged. */ + +#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); }) + +#define BOOT_PARAM_PRESERVE(struct_member) \ + { \ + .start = offsetof(struct boot_params, struct_member), \ + .len = sizeof_mbr(struct boot_params, struct_member), \ + } + +struct boot_params_to_save { + unsigned int start; + unsigned int len; +}; + static void sanitize_boot_params(struct boot_params *boot_params) { /* @@ -35,21 +49,40 @@ static void sanitize_boot_params(struct boot_params *boot_params) * problems again. */ if (boot_params->sentinel) { - /* fields in boot_params are left uninitialized, clear them */ - boot_params->acpi_rsdp_addr = 0; - memset(&boot_params->ext_ramdisk_image, 0, - (char *)&boot_params->efi_info - - (char *)&boot_params->ext_ramdisk_image); - memset(&boot_params->kbd_status, 0, - (char *)&boot_params->hdr - - (char *)&boot_params->kbd_status); - memset(&boot_params->_pad7[0], 0, - (char *)&boot_params->edd_mbr_sig_buffer[0] - - (char *)&boot_params->_pad7[0]); - memset(&boot_params->_pad8[0], 0, - (char *)&boot_params->eddbuf[0] - - (char *)&boot_params->_pad8[0]); - memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9)); + static struct boot_params scratch; + char *bp_base = (char *)boot_params; + char *save_base = (char *)&scratch; + int i; + + const struct boot_params_to_save to_save[] = { + BOOT_PARAM_PRESERVE(screen_info), + BOOT_PARAM_PRESERVE(apm_bios_info), + BOOT_PARAM_PRESERVE(tboot_addr), + BOOT_PARAM_PRESERVE(ist_info), + BOOT_PARAM_PRESERVE(acpi_rsdp_addr), + BOOT_PARAM_PRESERVE(hd0_info), + BOOT_PARAM_PRESERVE(hd1_info), + BOOT_PARAM_PRESERVE(sys_desc_table), + BOOT_PARAM_PRESERVE(olpc_ofw_header), + BOOT_PARAM_PRESERVE(efi_info), + BOOT_PARAM_PRESERVE(alt_mem_k), + BOOT_PARAM_PRESERVE(scratch), + BOOT_PARAM_PRESERVE(e820_entries), + BOOT_PARAM_PRESERVE(eddbuf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), + BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), + BOOT_PARAM_PRESERVE(e820_table), + BOOT_PARAM_PRESERVE(eddbuf), + }; + + memset(&scratch, 0, sizeof(scratch)); + + for (i = 0; i < ARRAY_SIZE(to_save); i++) { + memcpy(save_base + to_save[i].start, + bp_base + to_save[i].start, to_save[i].len); + } + + memcpy(boot_params, save_base, sizeof(*boot_params)); } } From b6143d10d23ebb4a77af311e8b8b7f019d0163e6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 16 Aug 2019 14:57:43 +0100 Subject: [PATCH 455/482] arm64: ftrace: Ensure module ftrace trampoline is coherent with I-side The initial support for dynamic ftrace trampolines in modules made use of an indirect branch which loaded its target from the beginning of a special section (e71a4e1bebaf7 ("arm64: ftrace: add support for far branches to dynamic ftrace")). Since no instructions were being patched, no cache maintenance was needed. However, later in be0f272bfc83 ("arm64: ftrace: emit ftrace-mod.o contents through code") this code was reworked to output the trampoline instructions directly into the PLT entry but, unfortunately, the necessary cache maintenance was overlooked. Add a call to __flush_icache_range() after writing the new trampoline instructions but before patching in the branch to the trampoline. Cc: Ard Biesheuvel Cc: James Morse Cc: Fixes: be0f272bfc83 ("arm64: ftrace: emit ftrace-mod.o contents through code") Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ftrace.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 1285c7b2947f..171773257974 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -73,7 +73,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS - struct plt_entry trampoline; + struct plt_entry trampoline, *dst; struct module *mod; /* @@ -106,23 +106,27 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * to check if the actual opcodes are in fact identical, * regardless of the offset in memory so use memcmp() instead. */ - trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline); - if (memcmp(mod->arch.ftrace_trampoline, &trampoline, - sizeof(trampoline))) { - if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) { + dst = mod->arch.ftrace_trampoline; + trampoline = get_plt_entry(addr, dst); + if (memcmp(dst, &trampoline, sizeof(trampoline))) { + if (plt_entry_is_initialized(dst)) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; } /* point the trampoline to our ftrace entry point */ module_disable_ro(mod); - *mod->arch.ftrace_trampoline = trampoline; + *dst = trampoline; module_enable_ro(mod, true); - /* update trampoline before patching in the branch */ - smp_wmb(); + /* + * Ensure updated trampoline is visible to instruction + * fetch before we patch in the branch. + */ + __flush_icache_range((unsigned long)&dst[0], + (unsigned long)&dst[1]); } - addr = (unsigned long)(void *)mod->arch.ftrace_trampoline; + addr = (unsigned long)dst; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ From 5a69e4980258c56f6d4c2048a1b9c260218785b7 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Wed, 14 Aug 2019 15:58:00 +0300 Subject: [PATCH 456/482] MAINTAINERS: PHY LIBRARY: Update files in the record Update MAINTAINERS to reflect that sysfs-bus-mdio was removed in commit a6cd0d2d493a ("Documentation: net-sysfs: Remove duplicate PHY device documentation") and sysfs-class-net-phydev was added in commit 86f22d04dfb5 ("net: sysfs: Document PHY device sysfs attributes"). Cc: Florian Fainelli Cc: Andrew Lunn Cc: Heiner Kallweit Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Denis Efremov Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 47800d32cfbc..d7e44a29b13e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6065,7 +6065,7 @@ M: Florian Fainelli M: Heiner Kallweit L: netdev@vger.kernel.org S: Maintained -F: Documentation/ABI/testing/sysfs-bus-mdio +F: Documentation/ABI/testing/sysfs-class-net-phydev F: Documentation/devicetree/bindings/net/ethernet-phy.yaml F: Documentation/devicetree/bindings/net/mdio* F: Documentation/networking/phy.rst From 0a66c20a6a123d6dc96c6197f02455cb64615271 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Wed, 14 Aug 2019 15:12:09 +0300 Subject: [PATCH 457/482] MAINTAINERS: r8169: Update path to the driver Update MAINTAINERS record to reflect the filename change. The file was moved in commit 25e992a4603c ("r8169: rename r8169.c to r8169_main.c") Cc: Heiner Kallweit Cc: nic_swsd@realtek.com Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: Denis Efremov Reviewed-by: Heiner Kallweit Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d7e44a29b13e..a416574780d6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -183,7 +183,7 @@ M: Realtek linux nic maintainers M: Heiner Kallweit L: netdev@vger.kernel.org S: Maintained -F: drivers/net/ethernet/realtek/r8169.c +F: drivers/net/ethernet/realtek/r8169* 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER M: Greg Kroah-Hartman From b9cbf8a64865b50fd0f4a3915fa00ac7365cdf8f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 14 Aug 2019 11:23:13 -0500 Subject: [PATCH 458/482] lan78xx: Fix memory leaks In lan78xx_probe(), a new urb is allocated through usb_alloc_urb() and saved to 'dev->urb_intr'. However, in the following execution, if an error occurs, 'dev->urb_intr' is not deallocated, leading to memory leaks. To fix this issue, invoke usb_free_urb() to free the allocated urb before returning from the function. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 3d92ea6fcc02..f033fee225a1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3792,7 +3792,7 @@ static int lan78xx_probe(struct usb_interface *intf, ret = register_netdev(netdev); if (ret != 0) { netif_err(dev, probe, netdev, "couldn't register the device\n"); - goto out3; + goto out4; } usb_set_intfdata(intf, dev); @@ -3807,12 +3807,14 @@ static int lan78xx_probe(struct usb_interface *intf, ret = lan78xx_phy_init(dev); if (ret < 0) - goto out4; + goto out5; return 0; -out4: +out5: unregister_netdev(netdev); +out4: + usb_free_urb(dev->urb_intr); out3: lan78xx_unbind(dev, intf); out2: From 712042313b23b5df7451faf4b279beb3025e990c Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Thu, 15 Aug 2019 10:24:08 +0700 Subject: [PATCH 459/482] tipc: fix false detection of retransmit failures This commit eliminates the use of the link 'stale_limit' & 'prev_from' (besides the already removed - 'stale_cnt') variables in the detection of repeated retransmit failures as there is no proper way to initialize them to avoid a false detection, i.e. it is not really a retransmission failure but due to a garbage values in the variables. Instead, a jiffies variable will be added to individual skbs (like the way we restrict the skb retransmissions) in order to mark the first skb retransmit time. Later on, at the next retransmissions, the timestamp will be checked to see if the skb in the link transmq is "too stale", that is, the link tolerance time has passed, so that a link reset will be ordered. Note, just checking on the first skb in the queue is fine enough since it must be the oldest one. A counter is also added to keep track the actual skb retransmissions' number for later checking when the failure happens. The downside of this approach is that the skb->cb[] buffer is about to be exhausted, however it is always able to allocate another memory area and keep a reference to it when needed. Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria") Reported-by: Hoang Le Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/link.c | 90 ++++++++++++++++++++++++++++--------------------- net/tipc/msg.h | 8 +++-- 2 files changed, 56 insertions(+), 42 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 66d3a07bc571..c2c5c53cad22 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -106,8 +106,6 @@ struct tipc_stats { * @transmitq: queue for sent, non-acked messages * @backlogq: queue for messages waiting to be sent * @snt_nxt: next sequence number to use for outbound messages - * @prev_from: sequence number of most previous retransmission request - * @stale_limit: time when repeated identical retransmits must force link reset * @ackers: # of peers that needs to ack each packet before it can be released * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages @@ -164,9 +162,7 @@ struct tipc_link { u16 limit; } backlog[5]; u16 snd_nxt; - u16 prev_from; u16 window; - unsigned long stale_limit; /* Reception */ u16 rcv_nxt; @@ -1044,47 +1040,53 @@ static void tipc_link_advance_backlog(struct tipc_link *l, * link_retransmit_failure() - Detect repeated retransmit failures * @l: tipc link sender * @r: tipc link receiver (= l in case of unicast) - * @from: seqno of the 1st packet in retransmit request * @rc: returned code * * Return: true if the repeated retransmit failures happens, otherwise * false */ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, - u16 from, int *rc) + int *rc) { struct sk_buff *skb = skb_peek(&l->transmq); struct tipc_msg *hdr; if (!skb) return false; + + if (!TIPC_SKB_CB(skb)->retr_cnt) + return false; + + if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + + msecs_to_jiffies(r->tolerance))) + return false; + hdr = buf_msg(skb); + if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr))) + return false; - /* Detect repeated retransmit failures on same packet */ - if (r->prev_from != from) { - r->prev_from = from; - r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); - } else if (time_after(jiffies, r->stale_limit)) { - pr_warn("Retransmission failure on link <%s>\n", l->name); - link_print(l, "State of link "); - pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", - msg_user(hdr), msg_type(hdr), msg_size(hdr), - msg_errcode(hdr)); - pr_info("sqno %u, prev: %x, src: %x\n", - msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr)); + pr_warn("Retransmission failure on link <%s>\n", l->name); + link_print(l, "State of link "); + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", + msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); + pr_info("sqno %u, prev: %x, dest: %x\n", + msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr)); + pr_info("retr_stamp %d, retr_cnt %d\n", + jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp), + TIPC_SKB_CB(skb)->retr_cnt); - trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); - trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); - trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); - - if (link_is_bc_sndlink(l)) - *rc = TIPC_LINK_DOWN_EVT; + trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); + trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); + trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); + if (link_is_bc_sndlink(l)) { + r->state = LINK_RESET; + *rc = TIPC_LINK_DOWN_EVT; + } else { *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - return true; } - return false; + return true; } /* tipc_link_bc_retrans() - retransmit zero or more packets @@ -1110,7 +1112,7 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, trace_tipc_link_retrans(r, from, to, &l->transmq); - if (link_retransmit_failure(l, r, from, &rc)) + if (link_retransmit_failure(l, r, &rc)) return rc; skb_queue_walk(&l->transmq, skb) { @@ -1119,11 +1121,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, continue; if (more(msg_seqno(hdr), to)) break; - if (link_is_bc_sndlink(l)) { - if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) - continue; - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; - } + + if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) + continue; + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC); if (!_skb) return 0; @@ -1133,6 +1134,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; + + /* Increase actual retrans counter & mark first time */ + if (!TIPC_SKB_CB(skb)->retr_cnt++) + TIPC_SKB_CB(skb)->retr_stamp = jiffies; } return 0; } @@ -1357,12 +1362,10 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, struct tipc_msg *hdr; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u16 ack = l->rcv_nxt - 1; + bool passed = false; u16 seqno, n = 0; int rc = 0; - if (gap && link_retransmit_failure(l, l, acked + 1, &rc)) - return rc; - skb_queue_walk_safe(&l->transmq, skb, tmp) { seqno = buf_seqno(skb); @@ -1372,12 +1375,17 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, __skb_unlink(skb, &l->transmq); kfree_skb(skb); } else if (less_eq(seqno, acked + gap)) { - /* retransmit skb */ + /* First, check if repeated retrans failures occurs? */ + if (!passed && link_retransmit_failure(l, l, &rc)) + return rc; + passed = true; + + /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; - - _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); + _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, + GFP_ATOMIC); if (!_skb) continue; hdr = buf_msg(_skb); @@ -1386,6 +1394,10 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; + + /* Increase actual retrans counter & mark first time */ + if (!TIPC_SKB_CB(skb)->retr_cnt++) + TIPC_SKB_CB(skb)->retr_stamp = jiffies; } else { /* retry with Gap ACK blocks if any */ if (!ga || n >= ga->gack_cnt) @@ -2577,7 +2589,7 @@ int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf) i += scnprintf(buf + i, sz - i, " %x", l->peer_caps); i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt); i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt); - i += scnprintf(buf + i, sz - i, " %u", l->prev_from); + i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", l->acked); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index da509f0eb9ca..d7ebc9e955f6 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -102,13 +102,15 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 struct tipc_skb_cb { - u32 bytes_read; - u32 orig_member; struct sk_buff *tail; unsigned long nxt_retr; - bool validated; + unsigned long retr_stamp; + u32 bytes_read; + u32 orig_member; u16 chain_imp; u16 ackers; + u16 retr_cnt; + bool validated; }; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) From 58a96fc35375ab87db7c5b69336f5befde1b548f Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 16 Jul 2019 20:34:41 +0200 Subject: [PATCH 460/482] Bluetooth: Add debug setting for changing minimum encryption key size For testing and qualification purposes it is useful to allow changing the minimum encryption key size value that the host stack is going to enforce. This adds a new debugfs setting min_encrypt_key_size to achieve this functionality. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 1 + net/bluetooth/hci_debugfs.c | 31 +++++++++++++++++++++++++++++++ net/bluetooth/l2cap_core.c | 2 +- 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ded574b32c20..ffc95b382eb5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -278,6 +278,7 @@ struct hci_dev { __u16 conn_info_min_age; __u16 conn_info_max_age; __u16 auth_payload_timeout; + __u8 min_enc_key_size; __u8 ssp_debug_mode; __u8 hw_error_code; __u32 clock; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b9585e7d9d2e..04bc79359a17 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3202,6 +3202,7 @@ struct hci_dev *hci_alloc_dev(void) hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE; hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE; hdev->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT; + hdev->min_enc_key_size = HCI_MIN_ENC_KEY_SIZE; mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index bb67f4a5479a..402e2cc54044 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -433,6 +433,35 @@ static int auto_accept_delay_set(void *data, u64 val) return 0; } +static int min_encrypt_key_size_set(void *data, u64 val) +{ + struct hci_dev *hdev = data; + + if (val < 1 || val > 16) + return -EINVAL; + + hci_dev_lock(hdev); + hdev->min_enc_key_size = val; + hci_dev_unlock(hdev); + + return 0; +} + +static int min_encrypt_key_size_get(void *data, u64 *val) +{ + struct hci_dev *hdev = data; + + hci_dev_lock(hdev); + *val = hdev->min_enc_key_size; + hci_dev_unlock(hdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(min_encrypt_key_size_fops, + min_encrypt_key_size_get, + min_encrypt_key_size_set, "%llu\n"); + static int auto_accept_delay_get(void *data, u64 *val) { struct hci_dev *hdev = data; @@ -545,6 +574,8 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev) if (lmp_ssp_capable(hdev)) { debugfs_create_file("ssp_debug_mode", 0444, hdev->debugfs, hdev, &ssp_debug_mode_fops); + debugfs_create_file("min_encrypt_key_size", 0644, hdev->debugfs, + hdev, &min_encrypt_key_size_fops); debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs, hdev, &auto_accept_delay_fops); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index cc506fe99b4d..dfc1edb168b7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1361,7 +1361,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon) * actually encrypted before enforcing a key size. */ return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || - hcon->enc_key_size >= HCI_MIN_ENC_KEY_SIZE); + hcon->enc_key_size >= hcon->hdev->min_enc_key_size); } static void l2cap_do_start(struct l2cap_chan *chan) From 5912e791f3018de0a007c8cfa9cb38c97d3e5f5c Mon Sep 17 00:00:00 2001 From: Adrian Vladu Date: Mon, 6 May 2019 17:27:37 +0000 Subject: [PATCH 461/482] tools: hv: fixed Python pep8/flake8 warnings for lsvmbus Fixed pep8/flake8 python style code for lsvmbus tool. The TAB indentation was on purpose ignored (pep8 rule W191) to make sure the code is complying with the Linux code guideline. The following command doe not show any warnings now: pep8 --ignore=W191 lsvmbus flake8 --ignore=W191 lsvmbus Signed-off-by: Adrian Vladu Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Sasha Levin Cc: Dexuan Cui Cc: Alessandro Pilotti Signed-off-by: Sasha Levin --- tools/hv/lsvmbus | 75 +++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus index 55e7374bade0..099f2c44dbed 100644 --- a/tools/hv/lsvmbus +++ b/tools/hv/lsvmbus @@ -4,10 +4,10 @@ import os from optparse import OptionParser +help_msg = "print verbose messages. Try -vv, -vvv for more verbose messages" parser = OptionParser() -parser.add_option("-v", "--verbose", dest="verbose", - help="print verbose messages. Try -vv, -vvv for \ - more verbose messages", action="count") +parser.add_option( + "-v", "--verbose", dest="verbose", help=help_msg, action="count") (options, args) = parser.parse_args() @@ -21,27 +21,28 @@ if not os.path.isdir(vmbus_sys_path): exit(-1) vmbus_dev_dict = { - '{0e0b6031-5213-4934-818b-38d90ced39db}' : '[Operating system shutdown]', - '{9527e630-d0ae-497b-adce-e80ab0175caf}' : '[Time Synchronization]', - '{57164f39-9115-4e78-ab55-382f3bd5422d}' : '[Heartbeat]', - '{a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}' : '[Data Exchange]', - '{35fa2e29-ea23-4236-96ae-3a6ebacba440}' : '[Backup (volume checkpoint)]', - '{34d14be3-dee4-41c8-9ae7-6b174977c192}' : '[Guest services]', - '{525074dc-8985-46e2-8057-a307dc18a502}' : '[Dynamic Memory]', - '{cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}' : 'Synthetic mouse', - '{f912ad6d-2b17-48ea-bd65-f927a61c7684}' : 'Synthetic keyboard', - '{da0a7802-e377-4aac-8e77-0558eb1073f8}' : 'Synthetic framebuffer adapter', - '{f8615163-df3e-46c5-913f-f2d2f965ed0e}' : 'Synthetic network adapter', - '{32412632-86cb-44a2-9b5c-50d1417354f5}' : 'Synthetic IDE Controller', - '{ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}' : 'Synthetic SCSI Controller', - '{2f9bcc4a-0069-4af3-b76b-6fd0be528cda}' : 'Synthetic fiber channel adapter', - '{8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}' : 'Synthetic RDMA adapter', - '{44c4f61d-4444-4400-9d52-802e27ede19f}' : 'PCI Express pass-through', - '{276aacf4-ac15-426c-98dd-7521ad3f01fe}' : '[Reserved system device]', - '{f8e65716-3cb3-4a06-9a60-1889c5cccab5}' : '[Reserved system device]', - '{3375baf4-9e15-4b30-b765-67acb10d607b}' : '[Reserved system device]', + '{0e0b6031-5213-4934-818b-38d90ced39db}': '[Operating system shutdown]', + '{9527e630-d0ae-497b-adce-e80ab0175caf}': '[Time Synchronization]', + '{57164f39-9115-4e78-ab55-382f3bd5422d}': '[Heartbeat]', + '{a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}': '[Data Exchange]', + '{35fa2e29-ea23-4236-96ae-3a6ebacba440}': '[Backup (volume checkpoint)]', + '{34d14be3-dee4-41c8-9ae7-6b174977c192}': '[Guest services]', + '{525074dc-8985-46e2-8057-a307dc18a502}': '[Dynamic Memory]', + '{cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}': 'Synthetic mouse', + '{f912ad6d-2b17-48ea-bd65-f927a61c7684}': 'Synthetic keyboard', + '{da0a7802-e377-4aac-8e77-0558eb1073f8}': 'Synthetic framebuffer adapter', + '{f8615163-df3e-46c5-913f-f2d2f965ed0e}': 'Synthetic network adapter', + '{32412632-86cb-44a2-9b5c-50d1417354f5}': 'Synthetic IDE Controller', + '{ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}': 'Synthetic SCSI Controller', + '{2f9bcc4a-0069-4af3-b76b-6fd0be528cda}': 'Synthetic fiber channel adapter', + '{8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}': 'Synthetic RDMA adapter', + '{44c4f61d-4444-4400-9d52-802e27ede19f}': 'PCI Express pass-through', + '{276aacf4-ac15-426c-98dd-7521ad3f01fe}': '[Reserved system device]', + '{f8e65716-3cb3-4a06-9a60-1889c5cccab5}': '[Reserved system device]', + '{3375baf4-9e15-4b30-b765-67acb10d607b}': '[Reserved system device]', } + def get_vmbus_dev_attr(dev_name, attr): try: f = open('%s/%s/%s' % (vmbus_sys_path, dev_name, attr), 'r') @@ -52,6 +53,7 @@ def get_vmbus_dev_attr(dev_name, attr): return lines + class VMBus_Dev: pass @@ -66,12 +68,13 @@ for f in os.listdir(vmbus_sys_path): chn_vp_mapping = get_vmbus_dev_attr(f, 'channel_vp_mapping') chn_vp_mapping = [c.strip() for c in chn_vp_mapping] - chn_vp_mapping = sorted(chn_vp_mapping, - key = lambda c : int(c.split(':')[0])) + chn_vp_mapping = sorted( + chn_vp_mapping, key=lambda c: int(c.split(':')[0])) - chn_vp_mapping = ['\tRel_ID=%s, target_cpu=%s' % - (c.split(':')[0], c.split(':')[1]) - for c in chn_vp_mapping] + chn_vp_mapping = [ + '\tRel_ID=%s, target_cpu=%s' % + (c.split(':')[0], c.split(':')[1]) for c in chn_vp_mapping + ] d = VMBus_Dev() d.sysfs_path = '%s/%s' % (vmbus_sys_path, f) d.vmbus_id = vmbus_id @@ -85,7 +88,7 @@ for f in os.listdir(vmbus_sys_path): vmbus_dev_list.append(d) -vmbus_dev_list = sorted(vmbus_dev_list, key = lambda d : int(d.vmbus_id)) +vmbus_dev_list = sorted(vmbus_dev_list, key=lambda d: int(d.vmbus_id)) format0 = '%2s: %s' format1 = '%2s: Class_ID = %s - %s\n%s' @@ -95,9 +98,15 @@ for d in vmbus_dev_list: if verbose == 0: print(('VMBUS ID ' + format0) % (d.vmbus_id, d.dev_desc)) elif verbose == 1: - print (('VMBUS ID ' + format1) % \ - (d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping)) + print( + ('VMBUS ID ' + format1) % + (d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping) + ) else: - print (('VMBUS ID ' + format2) % \ - (d.vmbus_id, d.class_id, d.dev_desc, \ - d.device_id, d.sysfs_path, d.chn_vp_mapping)) + print( + ('VMBUS ID ' + format2) % + ( + d.vmbus_id, d.class_id, d.dev_desc, + d.device_id, d.sysfs_path, d.chn_vp_mapping + ) + ) From b0995156071b0ff29a5902964a9dc8cfad6f81c0 Mon Sep 17 00:00:00 2001 From: Adrian Vladu Date: Mon, 6 May 2019 16:50:58 +0000 Subject: [PATCH 462/482] tools: hv: fix KVP and VSS daemons exit code HyperV KVP and VSS daemons should exit with 0 when the '--help' or '-h' flags are used. Signed-off-by: Adrian Vladu Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Sasha Levin Cc: Alessandro Pilotti Signed-off-by: Sasha Levin --- tools/hv/hv_kvp_daemon.c | 2 ++ tools/hv/hv_vss_daemon.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index d7e06fe0270e..0ce50c319cfd 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1386,6 +1386,8 @@ int main(int argc, char *argv[]) daemonize = 0; break; case 'h': + print_usage(argv); + exit(0); default: print_usage(argv); exit(EXIT_FAILURE); diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c index efe1e34dd91b..8f813f5233d4 100644 --- a/tools/hv/hv_vss_daemon.c +++ b/tools/hv/hv_vss_daemon.c @@ -218,6 +218,8 @@ int main(int argc, char *argv[]) daemonize = 0; break; case 'h': + print_usage(argv); + exit(0); default: print_usage(argv); exit(EXIT_FAILURE); From 2d35c66036b2494c329a32468c85405493370e75 Mon Sep 17 00:00:00 2001 From: Adrian Vladu Date: Mon, 6 May 2019 16:51:24 +0000 Subject: [PATCH 463/482] tools: hv: fix typos in toolchain Fix typos in the HyperV toolchain. Signed-off-by: Adrian Vladu Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Sasha Levin Cc: Alessandro Pilotti Signed-off-by: Sasha Levin --- tools/hv/hv_get_dhcp_info.sh | 2 +- tools/hv/hv_kvp_daemon.c | 6 +++--- tools/hv/hv_set_ifconfig.sh | 2 +- tools/hv/hv_vss_daemon.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/hv/hv_get_dhcp_info.sh b/tools/hv/hv_get_dhcp_info.sh index c38686c44656..2f2a3c7df3de 100755 --- a/tools/hv/hv_get_dhcp_info.sh +++ b/tools/hv/hv_get_dhcp_info.sh @@ -13,7 +13,7 @@ # the script prints the string "Disabled" to stdout. # # Each Distro is expected to implement this script in a distro specific -# fashion. For instance on Distros that ship with Network Manager enabled, +# fashion. For instance, on Distros that ship with Network Manager enabled, # this script can be based on the Network Manager APIs for retrieving DHCP # information. diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 0ce50c319cfd..f5597503c771 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -700,7 +700,7 @@ static void kvp_get_ipconfig_info(char *if_name, /* - * Gather the DNS state. + * Gather the DNS state. * Since there is no standard way to get this information * across various distributions of interest; we just invoke * an external script that needs to be ported across distros @@ -1051,7 +1051,7 @@ static int parse_ip_val_buffer(char *in_buf, int *offset, char *start; /* - * in_buf has sequence of characters that are seperated by + * in_buf has sequence of characters that are separated by * the character ';'. The last sequence does not have the * terminating ";" character. */ @@ -1492,7 +1492,7 @@ int main(int argc, char *argv[]) case KVP_OP_GET_IP_INFO: kvp_ip_val = &hv_msg->body.kvp_ip_val; - error = kvp_mac_to_ip(kvp_ip_val); + error = kvp_mac_to_ip(kvp_ip_val); if (error) hv_msg->error = error; diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index 7ed9f85ef908..d10fe35b7f25 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -12,7 +12,7 @@ # be used to configure the interface. # # Each Distro is expected to implement this script in a distro specific -# fashion. For instance on Distros that ship with Network Manager enabled, +# fashion. For instance, on Distros that ship with Network Manager enabled, # this script can be based on the Network Manager APIs for configuring the # interface. # diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c index 8f813f5233d4..92902a88f671 100644 --- a/tools/hv/hv_vss_daemon.c +++ b/tools/hv/hv_vss_daemon.c @@ -42,7 +42,7 @@ static int vss_do_freeze(char *dir, unsigned int cmd) * If a partition is mounted more than once, only the first * FREEZE/THAW can succeed and the later ones will get * EBUSY/EINVAL respectively: there could be 2 cases: - * 1) a user may mount the same partition to differnt directories + * 1) a user may mount the same partition to different directories * by mistake or on purpose; * 2) The subvolume of btrfs appears to have the same partition * mounted more than once. From 61e0f39105b7926a41bc03158eccf5ed13207ebd Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Mon, 22 Jul 2019 19:01:17 +0530 Subject: [PATCH 464/482] tools: hv: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in the trace header file related to Microsoft Hyper-V client drivers. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46 Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Acked-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hv/hv_trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h index 999f80a63bff..e70783e33680 100644 --- a/drivers/hv/hv_trace.h +++ b/drivers/hv/hv_trace.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM hyperv From bafe1e79e05de725e26b3f60c90b49e635b686b9 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Tue, 26 Mar 2019 14:28:21 +0800 Subject: [PATCH 465/482] MAINTAINERS: Fix Hyperv vIOMMU driver file name The Hyperv vIOMMU file name should be "hyperv-iommu.c" rather than "hyperv_iommu.c". This patch is to fix it. Signed-off-by: Lan Tianyu Signed-off-by: Sasha Levin --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 783569e3c4b4..b03179510e26 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7454,7 +7454,7 @@ F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c F: drivers/video/fbdev/hyperv_fb.c -F: drivers/iommu/hyperv_iommu.c +F: drivers/iommu/hyperv-iommu.c F: net/vmw_vsock/hyperv_transport.c F: include/clocksource/hyperv_timer.h F: include/linux/hyperv.h From 4a4d2d372fb9b9229327e2ed01d5d9572eddf4de Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Sun, 18 Aug 2019 07:25:48 -0700 Subject: [PATCH 466/482] bnx2x: Fix VF's VLAN reconfiguration in reload. Commit 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload sequence."), introduced a regression in driver that as a part of VF's reload flow, VLANs created on the VF doesn't get re-configured in hardware as vlan metadata/info was not getting cleared for the VFs which causes vlan PING to stop. This patch clears the vlan metadata/info so that VLANs gets re-configured back in the hardware in VF's reload flow and PING/traffic continues for VLANs created over the VFs. Fixes: 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload sequence.") Signed-off-by: Manish Chopra Signed-off-by: Sudarsana Kalluru Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 7 ++++--- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 2 ++ .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 17 ++++++++++++----- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index e47ea92e2ae3..d10b421ed1f1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3057,12 +3057,13 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) /* if VF indicate to PF this function is going down (PF will delete sp * elements and clear initializations */ - if (IS_VF(bp)) + if (IS_VF(bp)) { + bnx2x_clear_vlan_info(bp); bnx2x_vfpf_close_vf(bp); - else if (unload_mode != UNLOAD_RECOVERY) + } else if (unload_mode != UNLOAD_RECOVERY) { /* if this is a normal/close unload need to clean up chip*/ bnx2x_chip_cleanup(bp, unload_mode, keep_link); - else { + } else { /* Send the UNLOAD_REQUEST to the MCP */ bnx2x_send_unload_req(bp, unload_mode); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index c2f6e44e9a3f..8b08cb18e363 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -425,6 +425,8 @@ void bnx2x_set_reset_global(struct bnx2x *bp); void bnx2x_disable_close_the_gate(struct bnx2x *bp); int bnx2x_init_hw_func_cnic(struct bnx2x *bp); +void bnx2x_clear_vlan_info(struct bnx2x *bp); + /** * bnx2x_sp_event - handle ramrods completion. * diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 2cc14db8f0ec..192ff8d5da32 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -8482,11 +8482,21 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan, return rc; } +void bnx2x_clear_vlan_info(struct bnx2x *bp) +{ + struct bnx2x_vlan_entry *vlan; + + /* Mark that hw forgot all entries */ + list_for_each_entry(vlan, &bp->vlan_reg, link) + vlan->hw = false; + + bp->vlan_cnt = 0; +} + static int bnx2x_del_all_vlans(struct bnx2x *bp) { struct bnx2x_vlan_mac_obj *vlan_obj = &bp->sp_objs[0].vlan_obj; unsigned long ramrod_flags = 0, vlan_flags = 0; - struct bnx2x_vlan_entry *vlan; int rc; __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); @@ -8495,10 +8505,7 @@ static int bnx2x_del_all_vlans(struct bnx2x *bp) if (rc) return rc; - /* Mark that hw forgot all entries */ - list_for_each_entry(vlan, &bp->vlan_reg, link) - vlan->hw = false; - bp->vlan_cnt = 0; + bnx2x_clear_vlan_info(bp); return 0; } From 1eca92eef18719027d394bf1a2d276f43e7cf886 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 14 Aug 2019 13:03:38 -0500 Subject: [PATCH 467/482] cx82310_eth: fix a memory leak bug In cx82310_bind(), 'dev->partial_data' is allocated through kmalloc(). Then, the execution waits for the firmware to become ready. If the firmware is not ready in time, the execution is terminated. However, the allocated 'dev->partial_data' is not deallocated on this path, leading to a memory leak bug. To fix this issue, free 'dev->partial_data' before returning the error. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/usb/cx82310_eth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index 5519248a791e..32b08b18e120 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -163,7 +163,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) } if (!timeout) { dev_err(&udev->dev, "firmware not ready in time\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto err; } /* enable ethernet mode (?) */ From f1472cb09f11ddb41d4be84f0650835cb65a9073 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Wed, 14 Aug 2019 13:56:43 -0500 Subject: [PATCH 468/482] net: kalmia: fix memory leaks In kalmia_init_and_get_ethernet_addr(), 'usb_buf' is allocated through kmalloc(). In the following execution, if the 'status' returned by kalmia_send_init_packet() is not 0, 'usb_buf' is not deallocated, leading to memory leaks. To fix this issue, add the 'out' label to free 'usb_buf'. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/usb/kalmia.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c index d62b6706a537..fc5895f85cee 100644 --- a/drivers/net/usb/kalmia.c +++ b/drivers/net/usb/kalmia.c @@ -113,16 +113,16 @@ kalmia_init_and_get_ethernet_addr(struct usbnet *dev, u8 *ethernet_addr) status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_1), usb_buf, 24); if (status != 0) - return status; + goto out; memcpy(usb_buf, init_msg_2, 12); status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_2), usb_buf, 28); if (status != 0) - return status; + goto out; memcpy(ethernet_addr, usb_buf + 10, ETH_ALEN); - +out: kfree(usb_buf); return status; } From a46ecb116fb7f722fa8cb2da01959c36e4e10c41 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 17 Aug 2019 17:04:47 -0400 Subject: [PATCH 469/482] bnxt_en: Fix VNIC clearing logic for 57500 chips. During device shutdown, the VNIC clearing sequence needs to be modified to free the VNIC first before freeing the RSS contexts. The current code is doing the reverse and we can get mis-directed RX completions to CP ring ID 0 when the RSS contexts are freed and zeroed. The clearing of RSS contexts is not required with the new sequence. Refactor the VNIC clearing logic into a new function bnxt_clear_vnic() and do the chip specific VNIC clearing sequence. Fixes: 7b3af4f75b81 ("bnxt_en: Add RSS support for 57500 chips.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 26 ++++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7070349915bc..1ef224fbe302 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7016,19 +7016,29 @@ static void bnxt_hwrm_clear_vnic_rss(struct bnxt *bp) bnxt_hwrm_vnic_set_rss(bp, i, false); } -static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path, - bool irq_re_init) +static void bnxt_clear_vnic(struct bnxt *bp) { - if (bp->vnic_info) { - bnxt_hwrm_clear_vnic_filter(bp); + if (!bp->vnic_info) + return; + + bnxt_hwrm_clear_vnic_filter(bp); + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) { /* clear all RSS setting before free vnic ctx */ bnxt_hwrm_clear_vnic_rss(bp); bnxt_hwrm_vnic_ctx_free(bp); - /* before free the vnic, undo the vnic tpa settings */ - if (bp->flags & BNXT_FLAG_TPA) - bnxt_set_tpa(bp, false); - bnxt_hwrm_vnic_free(bp); } + /* before free the vnic, undo the vnic tpa settings */ + if (bp->flags & BNXT_FLAG_TPA) + bnxt_set_tpa(bp, false); + bnxt_hwrm_vnic_free(bp); + if (bp->flags & BNXT_FLAG_CHIP_P5) + bnxt_hwrm_vnic_ctx_free(bp); +} + +static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path, + bool irq_re_init) +{ + bnxt_clear_vnic(bp); bnxt_hwrm_ring_free(bp, close_path); bnxt_hwrm_ring_grp_free(bp); if (irq_re_init) { From e8f267b063208372f7a329c6d5288d58944d873c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 17 Aug 2019 17:04:48 -0400 Subject: [PATCH 470/482] bnxt_en: Improve RX doorbell sequence. When both RX buffers and RX aggregation buffers have to be replenished at the end of NAPI, post the RX aggregation buffers first before RX buffers. Otherwise, we may run into a situation where there are only RX buffers without RX aggregation buffers for a split second. This will cause the hardware to abort the RX packet and report buffer errors, which will cause unnecessary cleanup by the driver. Ringing the Aggregation ring doorbell first before the RX ring doorbell will prevent some of these buffer errors. Use the same sequence during ring initialization as well. Fixes: 697197e5a173 ("bnxt_en: Re-structure doorbells.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1ef224fbe302..8dce4069472b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2021,9 +2021,9 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi) if (bnapi->events & BNXT_RX_EVENT) { struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; - bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); if (bnapi->events & BNXT_AGG_EVENT) bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); } bnapi->events = 0; } @@ -5064,6 +5064,7 @@ static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type, static int bnxt_hwrm_ring_alloc(struct bnxt *bp) { + bool agg_rings = !!(bp->flags & BNXT_FLAG_AGG_RINGS); int i, rc = 0; u32 type; @@ -5139,7 +5140,9 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) if (rc) goto err_out; bnxt_set_db(bp, &rxr->rx_db, type, map_idx, ring->fw_ring_id); - bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); + /* If we have agg rings, post agg buffers first. */ + if (!agg_rings) + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id; if (bp->flags & BNXT_FLAG_CHIP_P5) { struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; @@ -5158,7 +5161,7 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) } } - if (bp->flags & BNXT_FLAG_AGG_RINGS) { + if (agg_rings) { type = HWRM_RING_ALLOC_AGG; for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; @@ -5174,6 +5177,7 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) bnxt_set_db(bp, &rxr->rx_agg_db, type, map_idx, ring->fw_ring_id); bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); bp->grp_info[grp_idx].agg_fw_ring_id = ring->fw_ring_id; } } From dd2ebf3404c7c295014bc025dea23960960ceb1a Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 17 Aug 2019 17:04:49 -0400 Subject: [PATCH 471/482] bnxt_en: Fix handling FRAG_ERR when NVM_INSTALL_UPDATE cmd fails If FW returns FRAG_ERR in response error code, driver is resending the command only when HWRM command returns success. Fix the code to resend NVM_INSTALL_UPDATE command with DEFRAG install flags, if FW returns FRAG_ERR in its response error code. Fixes: cb4d1d626145 ("bnxt_en: Retry failed NVM_INSTALL_UPDATE with defragmentation flag enabled.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index c7ee63d69679..8445a0cce849 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2016,21 +2016,19 @@ static int bnxt_flash_package_from_file(struct net_device *dev, mutex_lock(&bp->hwrm_cmd_lock); hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) - goto flash_pkg_exit; - - if (resp->error_code) { + if (hwrm_err) { u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err; - if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { + if (resp->error_code && error_code == + NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { install.flags |= cpu_to_le16( NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) - goto flash_pkg_exit; } + if (hwrm_err) + goto flash_pkg_exit; } if (resp->result) { From b703ba751dbb4bcd086509ed4b28102bc1670b35 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 17 Aug 2019 17:04:50 -0400 Subject: [PATCH 472/482] bnxt_en: Suppress HWRM errors for HWRM_NVM_GET_VARIABLE command For newly added NVM parameters, older firmware may not have the support. Suppress the error message to avoid the unncessary error message which is triggered when devlink calls the driver during initialization. Fixes: 782a624d00fa ("bnxt_en: Add bnxt_en initial params table and register it.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 549c90d3e465..c05d663212b2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -98,10 +98,13 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, if (idx) req->dimensions = cpu_to_le16(1); - if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) + if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) { memcpy(data_addr, buf, bytesize); - - rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT); + rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT); + } else { + rc = hwrm_send_message_silent(bp, msg, msg_len, + HWRM_CMD_TIMEOUT); + } if (!rc && req->req_type == cpu_to_le16(HWRM_NVM_GET_VARIABLE)) memcpy(buf, data_addr, bytesize); From 685ec6a81bb0d47faf1dba49437d5bdaede2733d Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Sat, 17 Aug 2019 17:04:51 -0400 Subject: [PATCH 473/482] bnxt_en: Use correct src_fid to determine direction of the flow Direction of the flow is determined using src_fid. For an RX flow, src_fid is PF's fid and for TX flow, src_fid is VF's fid. Direction of the flow must be specified, when getting statistics for that flow. Currently, for DECAP flow, direction is determined incorrectly, i.e., direction is initialized as TX for DECAP flow, instead of RX. Because of which, stats are not reported for this DECAP flow, though it is offloaded and there is traffic for that flow, resulting in flow age out. This patch fixes the problem by determining the DECAP flow's direction using correct fid. Set the flow direction in all cases for consistency even if 64-bit flow handle is not used. Fixes: abd43a13525d ("bnxt_en: Support for 64-bit flow handle.") Signed-off-by: Venkat Duvvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 6fe4a7174271..6224c30f8821 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1285,9 +1285,7 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, goto free_node; bnxt_tc_set_src_fid(bp, flow, src_fid); - - if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) - bnxt_tc_set_flow_dir(bp, flow, src_fid); + bnxt_tc_set_flow_dir(bp, flow, flow->src_fid); if (!bnxt_tc_can_offload(bp, flow)) { rc = -EOPNOTSUPP; From 9bf46566e80fd94845527d01ebd888eb49313551 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Sat, 17 Aug 2019 17:04:52 -0400 Subject: [PATCH 474/482] bnxt_en: Fix to include flow direction in L2 key FW expects the driver to provide unique flow reference handles for Tx or Rx flows. When a Tx flow and an Rx flow end up sharing a reference handle, flow offload does not seem to work. This could happen in the case of 2 flows having their L2 fields wildcarded but in different direction. Fix to incorporate the flow direction as part of the L2 key v2: Move the dir field to the end of the bnxt_tc_l2_key struct to fix the warning reported by kbuild test robot . There is existing code that initializes the structure using nested initializer and will warn with the new u8 field added to the beginning. The structure also packs nicer when this new u8 is added to the end of the structure [MChan]. Fixes: abd43a13525d ("bnxt_en: Support for 64-bit flow handle.") Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 6224c30f8821..dd621f6bd127 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1236,7 +1236,7 @@ static int __bnxt_tc_del_flow(struct bnxt *bp, static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow, u16 src_fid) { - flow->dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX; + flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX; } static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow, @@ -1405,7 +1405,7 @@ static void bnxt_fill_cfa_stats_req(struct bnxt *bp, * 2. 15th bit of flow_handle must specify the flow * direction (TX/RX). */ - if (flow_node->flow.dir == BNXT_DIR_RX) + if (flow_node->flow.l2_key.dir == BNXT_DIR_RX) handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX | CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK; else diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h index ffec57d1a5ec..4f05305052f2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h @@ -23,6 +23,9 @@ struct bnxt_tc_l2_key { __be16 inner_vlan_tci; __be16 ether_type; u8 num_vlans; + u8 dir; +#define BNXT_DIR_RX 1 +#define BNXT_DIR_TX 0 }; struct bnxt_tc_l3_key { @@ -98,9 +101,6 @@ struct bnxt_tc_flow { /* flow applicable to pkts ingressing on this fid */ u16 src_fid; - u8 dir; -#define BNXT_DIR_RX 1 -#define BNXT_DIR_TX 0 struct bnxt_tc_l2_key l2_key; struct bnxt_tc_l2_key l2_mask; struct bnxt_tc_l3_key l3_key; From 80f0fe0934cd3daa13a5e4d48a103f469115b160 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 14 Aug 2019 14:57:05 -0500 Subject: [PATCH 475/482] ibmvnic: Unmap DMA address of TX descriptor buffers after use There's no need to wait until a completion is received to unmap TX descriptor buffers that have been passed to the hypervisor. Instead unmap it when the hypervisor call has completed. This patch avoids the possibility that a buffer will not be unmapped because a TX completion is lost or mishandled. Reported-by: Abdul Haleem Tested-by: Devesh K. Singh Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 3da680073265..cebd20f3128d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1568,6 +1568,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num], (u64)tx_buff->indir_dma, (u64)num_entries); + dma_unmap_single(dev, tx_buff->indir_dma, + sizeof(tx_buff->indir_arr), DMA_TO_DEVICE); } else { tx_buff->num_entries = num_entries; lpar_rc = send_subcrq(adapter, handle_array[queue_num], @@ -2788,7 +2790,6 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, union sub_crq *next; int index; int i, j; - u8 *first; restart_loop: while (pending_scrq(adapter, scrq)) { @@ -2818,14 +2819,6 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, txbuff->data_dma[j] = 0; } - /* if sub_crq was sent indirectly */ - first = &txbuff->indir_arr[0].generic.first; - if (*first == IBMVNIC_CRQ_CMD) { - dma_unmap_single(dev, txbuff->indir_dma, - sizeof(txbuff->indir_arr), - DMA_TO_DEVICE); - *first = 0; - } if (txbuff->last_frag) { dev_kfree_skb_any(txbuff->skb); From 3434341004a380f4e47c3a03d4320d43982162a0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 15 Aug 2019 12:49:49 -0700 Subject: [PATCH 476/482] net: cavium: fix driver name The driver name gets exposed in sysfs under /sys/bus/pci/drivers so it should look like other devices. Change it to be common format (instead of "Cavium PTP"). This is a trivial fix that was observed by accident because Debian kernels were building this driver into kernel (bug). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/common/cavium_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.c b/drivers/net/ethernet/cavium/common/cavium_ptp.c index 73632b843749..b821c9e1604c 100644 --- a/drivers/net/ethernet/cavium/common/cavium_ptp.c +++ b/drivers/net/ethernet/cavium/common/cavium_ptp.c @@ -10,7 +10,7 @@ #include "cavium_ptp.h" -#define DRV_NAME "Cavium PTP Driver" +#define DRV_NAME "cavium_ptp" #define PCI_DEVICE_ID_CAVIUM_PTP 0xA00C #define PCI_DEVICE_ID_CAVIUM_RST 0xA00E From 44ef3a03252844a8753479b0cea7f29e4a804bdc Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 15 Aug 2019 15:29:51 -0500 Subject: [PATCH 477/482] wimax/i2400m: fix a memory leak bug In i2400m_barker_db_init(), 'options_orig' is allocated through kstrdup() to hold the original command line options. Then, the options are parsed. However, if an error occurs during the parsing process, 'options_orig' is not deallocated, leading to a memory leak bug. To fix this issue, free 'options_orig' before returning the error. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/fw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c index e9fc168bb734..489cba9b284d 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/net/wimax/i2400m/fw.c @@ -351,13 +351,15 @@ int i2400m_barker_db_init(const char *_options) } result = i2400m_barker_db_add(barker); if (result < 0) - goto error_add; + goto error_parse_add; } kfree(options_orig); } return 0; +error_parse_add: error_parse: + kfree(options_orig); error_add: kfree(i2400m_barker_db); return result; From ef01adae0e43cfb2468d0ea07137cc63cf31495c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Aug 2019 03:24:09 +0200 Subject: [PATCH 478/482] net: sched: use major priority number as hardware priority tc transparently maps the software priority number to hardware. Update it to pass the major priority which is what most drivers expect. Update drivers too so they do not need to lshift the priority field of the flow_cls_common_offload object. The stmmac driver is an exception, since this code assumes the tc software priority is fine, therefore, lshift it just to be conservative. Signed-off-by: Pablo Neira Ayuso Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 2 +- drivers/net/ethernet/mscc/ocelot_flower.c | 12 +++--------- drivers/net/ethernet/netronome/nfp/flower/qos_conf.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 2 +- include/net/pkt_cls.h | 2 +- 6 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index deeb65da99f3..00b2d4a86159 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3167,7 +3167,7 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, esw_attr->parse_attr = parse_attr; esw_attr->chain = f->common.chain_index; - esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; + esw_attr->prio = f->common.prio; esw_attr->in_rep = in_rep; esw_attr->in_mdev = in_mdev; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index e8ac90564dbe..84a87d059333 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -471,7 +471,7 @@ int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei) void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei, unsigned int priority) { - rulei->priority = priority >> 16; + rulei->priority = priority; } void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei, diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 59487d446a09..b894bc0c9c16 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -13,12 +13,6 @@ struct ocelot_port_block { struct ocelot_port *port; }; -static u16 get_prio(u32 prio) -{ - /* prio starts from 0x1000 while the ids starts from 0 */ - return prio >> 16; -} - static int ocelot_flower_parse_action(struct flow_cls_offload *f, struct ocelot_ace_rule *rule) { @@ -168,7 +162,7 @@ static int ocelot_flower_parse(struct flow_cls_offload *f, } finished_key_parsing: - ocelot_rule->prio = get_prio(f->common.prio); + ocelot_rule->prio = f->common.prio; ocelot_rule->id = f->cookie; return ocelot_flower_parse_action(f, ocelot_rule); } @@ -218,7 +212,7 @@ static int ocelot_flower_destroy(struct flow_cls_offload *f, struct ocelot_ace_rule rule; int ret; - rule.prio = get_prio(f->common.prio); + rule.prio = f->common.prio; rule.port = port_block->port; rule.id = f->cookie; @@ -236,7 +230,7 @@ static int ocelot_flower_stats_update(struct flow_cls_offload *f, struct ocelot_ace_rule rule; int ret; - rule.prio = get_prio(f->common.prio); + rule.prio = f->common.prio; rule.port = port_block->port; rule.id = f->cookie; ret = ocelot_ace_rule_stats_update(&rule); diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c index 86e968cd5ffd..124a43dc136a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -93,7 +93,7 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, return -EOPNOTSUPP; } - if (flow->common.prio != (1 << 16)) { + if (flow->common.prio != 1) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires highest priority"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 37c0bc699cd9..6c305b6ecad0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -94,7 +94,7 @@ static int tc_fill_entry(struct stmmac_priv *priv, struct stmmac_tc_entry *entry, *frag = NULL; struct tc_u32_sel *sel = cls->knode.sel; u32 off, data, mask, real_off, rem; - u32 prio = cls->common.prio; + u32 prio = cls->common.prio << 16; int ret; /* Only 1 match per entry */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e429809ca90d..98be18ef1ed3 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -646,7 +646,7 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, { cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; - cls_common->prio = tp->prio; + cls_common->prio = tp->prio >> 16; if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE) cls_common->extack = extack; } From 3bc158f8d0330f0ac58597c023acca2234c14616 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Aug 2019 03:24:10 +0200 Subject: [PATCH 479/482] netfilter: nf_tables: map basechain priority to hardware priority This patch adds initial support for offloading basechains using the priority range from 1 to 65535. This is restricting the netfilter priority range to 16-bit integer since this is what most drivers assume so far from tc. It should be possible to extend this range of supported priorities later on once drivers are updated to support for 32-bit integer priorities. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/netfilter/nf_tables_offload.h | 2 ++ net/netfilter/nf_tables_api.c | 4 ++++ net/netfilter/nf_tables_offload.c | 17 ++++++++++++++--- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index 3196663a10e3..c8b9dec376f5 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -73,4 +73,6 @@ int nft_flow_rule_offload_commit(struct net *net); (__reg)->key = __key; \ memset(&(__reg)->mask, 0xff, (__reg)->len); +int nft_chain_offload_priority(struct nft_base_chain *basechain); + #endif diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 88abbddf8967..d47469f824a1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1667,6 +1667,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, chain->flags |= NFT_BASE_CHAIN | flags; basechain->policy = NF_ACCEPT; + if (chain->flags & NFT_CHAIN_HW_OFFLOAD && + nft_chain_offload_priority(basechain) < 0) + return -EOPNOTSUPP; + flow_block_init(&basechain->flow_block); } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 64f5fd5f240e..c0d18c1d77ac 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -103,10 +103,11 @@ void nft_offload_update_dependency(struct nft_offload_ctx *ctx, } static void nft_flow_offload_common_init(struct flow_cls_common_offload *common, - __be16 proto, - struct netlink_ext_ack *extack) + __be16 proto, int priority, + struct netlink_ext_ack *extack) { common->protocol = proto; + common->prio = priority; common->extack = extack; } @@ -124,6 +125,15 @@ static int nft_setup_cb_call(struct nft_base_chain *basechain, return 0; } +int nft_chain_offload_priority(struct nft_base_chain *basechain) +{ + if (basechain->ops.priority <= 0 || + basechain->ops.priority > USHRT_MAX) + return -1; + + return 0; +} + static int nft_flow_offload_rule(struct nft_trans *trans, enum flow_cls_command command) { @@ -142,7 +152,8 @@ static int nft_flow_offload_rule(struct nft_trans *trans, if (flow) proto = flow->proto; - nft_flow_offload_common_init(&cls_flow.common, proto, &extack); + nft_flow_offload_common_init(&cls_flow.common, proto, + basechain->ops.priority, &extack); cls_flow.command = command; cls_flow.cookie = (unsigned long) rule; if (flow) From cfef46d692efd852a0da6803f920cc756eea2855 Mon Sep 17 00:00:00 2001 From: Tho Vu Date: Fri, 16 Aug 2019 17:17:02 +0200 Subject: [PATCH 480/482] ravb: Fix use-after-free ravb_tstamp_skb When a Tx timestamp is requested, a pointer to the skb is stored in the ravb_tstamp_skb struct. This was done without an skb_get. There exists the possibility that the skb could be freed by ravb_tx_free (when ravb_tx_free is called from ravb_start_xmit) before the timestamp was processed, leading to a use-after-free bug. Use skb_get when filling a ravb_tstamp_skb struct, and add appropriate frees/consumes when a ravb_tstamp_skb struct is freed. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Tho Vu Signed-off-by: Kazuya Mizuguchi Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index ef8f08931fe8..6cacd5e893ac 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Renesas Ethernet AVB device driver * - * Copyright (C) 2014-2015 Renesas Electronics Corporation + * Copyright (C) 2014-2019 Renesas Electronics Corporation * Copyright (C) 2015 Renesas Solutions Corp. * Copyright (C) 2015-2016 Cogent Embedded, Inc. * @@ -513,7 +513,10 @@ static void ravb_get_tx_tstamp(struct net_device *ndev) kfree(ts_skb); if (tag == tfa_tag) { skb_tstamp_tx(skb, &shhwtstamps); + dev_consume_skb_any(skb); break; + } else { + dev_kfree_skb_any(skb); } } ravb_modify(ndev, TCCR, TCCR_TFR, TCCR_TFR); @@ -1564,7 +1567,7 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) } goto unmap; } - ts_skb->skb = skb; + ts_skb->skb = skb_get(skb); ts_skb->tag = priv->ts_skb_tag++; priv->ts_skb_tag &= 0x3ff; list_add_tail(&ts_skb->list, &priv->ts_skb_list); @@ -1693,6 +1696,7 @@ static int ravb_close(struct net_device *ndev) /* Clear the timestamp list */ list_for_each_entry_safe(ts_skb, ts_skb2, &priv->ts_skb_list, list) { list_del(&ts_skb->list); + kfree_skb(ts_skb->skb); kfree(ts_skb); } From d1abaeb3be7b5fa6d7a1fbbd2e14e3310005c4c1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Aug 2019 14:31:08 -0700 Subject: [PATCH 481/482] Linux 5.3-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1b23f95db176..9fa18613566f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Bobtail Squid # *DOCUMENTATION* From 555df336c754ac9de1af9a5c72508918b3796b18 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Aug 2019 16:02:01 +0100 Subject: [PATCH 482/482] keys: Fix description size The maximum key description size is 4095. Commit f771fde82051 ("keys: Simplify key description management") inadvertantly reduced that to 255 and made sizes between 256 and 4095 work weirdly, and any size whereby size & 255 == 0 would cause an assertion in __key_link_begin() at the following line: BUG_ON(index_key->desc_len == 0); This can be fixed by simply increasing the size of desc_len in struct keyring_index_key to a u16. Note the argument length test in keyutils only checked empty descriptions and descriptions with a size around the limit (ie. 4095) and not for all the values in between, so it missed this. This has been addressed and https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/keyutils.git/commit/?id=066bf56807c26cd3045a25f355b34c1d8a20a5aa now exhaustively tests all possible lengths of type, description and payload and then some. The assertion failure looks something like: kernel BUG at security/keys/keyring.c:1245! ... RIP: 0010:__key_link_begin+0x88/0xa0 ... Call Trace: key_create_or_update+0x211/0x4b0 __x64_sys_add_key+0x101/0x200 do_syscall_64+0x5b/0x1e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 It can be triggered by: keyctl add user "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" a @s Fixes: f771fde82051 ("keys: Simplify key description management") Reported-by: kernel test robot Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/key.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index 91f391cd272e..50028338a4cc 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -94,11 +94,11 @@ struct keyring_index_key { union { struct { #ifdef __LITTLE_ENDIAN /* Put desc_len at the LSB of x */ - u8 desc_len; - char desc[sizeof(long) - 1]; /* First few chars of description */ + u16 desc_len; + char desc[sizeof(long) - 2]; /* First few chars of description */ #else - char desc[sizeof(long) - 1]; /* First few chars of description */ - u8 desc_len; + char desc[sizeof(long) - 2]; /* First few chars of description */ + u16 desc_len; #endif }; unsigned long x;