From 0d6f1693f255795d5c747dc444d69c6512586d98 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 4 Dec 2019 09:29:20 +0100 Subject: [PATCH 01/57] s390/cpum_sf: Rework sampling buffer allocation Adjust sampling buffer allocation depending on frequency and correct comments. Investigation on the interrupt handler revealed that almost always one interupt services one SDB, even when running with the maximum frequency of 100000. Very rarely there have been 2 SBD serviced per interrupt. Therefore reduce the number of SBD per CPU. Each SDB is one page in size. The new formula results in freq:4000 n_sdb:32 new:16 freq:10000 n_sdb:80 new:16 freq:20000 n_sdb:159 new:17 freq:40000 n_sdb:318 new:19 freq:50000 n_sdb:397 new:20 freq:62500 n_sdb:497 new:22 freq:83333 n_sdb:662 new:24 freq:100000 n_sdb:794 new:25 Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_sf.c | 40 +++++++++++++++++---------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index b095b1c78987..cf2020b8db44 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -372,28 +372,33 @@ static void deallocate_buffers(struct cpu_hw_sf *cpuhw) static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) { - unsigned long n_sdb, freq, factor; + unsigned long n_sdb, freq; size_t sample_size; /* Calculate sampling buffers using 4K pages * - * 1. Determine the sample data size which depends on the used - * sampling functions, for example, basic-sampling or - * basic-sampling with diagnostic-sampling. + * 1. The sampling size is 32 bytes for basic sampling. This size + * is the same for all machine types. Diagnostic + * sampling uses auxlilary data buffer setup which provides the + * memory for SDBs using linux common code auxiliary trace + * setup. * - * 2. Use the sampling frequency as input. The sampling buffer is - * designed for almost one second. This can be adjusted through - * the "factor" variable. - * In any case, alloc_sampling_buffer() sets the Alert Request + * 2. Function alloc_sampling_buffer() sets the Alert Request * Control indicator to trigger a measurement-alert to harvest - * sample-data-blocks (sdb). + * sample-data-blocks (SDB). This is done per SDB. This + * measurement alert interrupt fires quick enough to handle + * one SDB, on very high frequency and work loads there might + * be 2 to 3 SBDs available for sample processing. + * Currently there is no need for setup alert request on every + * n-th page. This is counterproductive as one IRQ triggers + * a very high number of samples to be processed at one IRQ. * - * 3. Compute the number of sample-data-blocks and ensure a minimum - * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not - * exceed a "calculated" maximum. The symbolic maximum is - * designed for basic-sampling only and needs to be increased if - * diagnostic-sampling is active. - * See also the remarks for these symbolic constants. + * 3. Use the sampling frequency as input. + * Compute the number of SDBs and ensure a minimum + * of CPUM_SF_MIN_SDB. Depending on frequency add some more + * SDBs to handle a higher sampling rate. + * Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples + * (one SDB) for every 10000 HZ frequency increment. * * 4. Compute the number of sample-data-block-tables (SDBT) and * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up @@ -401,10 +406,7 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) */ sample_size = sizeof(struct hws_basic_entry); freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); - factor = 1; - n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); - if (n_sdb < CPUM_SF_MIN_SDB) - n_sdb = CPUM_SF_MIN_SDB; + n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000); /* If there is already a sampling buffer allocated, it is very likely * that the sampling facility is enabled too. If the event to be From c4f762ff6b7766e0053e39d1d87d599384288048 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 7 Feb 2020 09:05:56 +0100 Subject: [PATCH 02/57] s390/zcrypt: Support for CCA protected key block version 2 There will come a new CCA keyblock version 2 for protected keys delivered back to the OS. The difference is only the amount of available buffer space to be up to 256 bytes for version 2. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_ccamisc.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 110fe9d0cb91..e6899107c586 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -592,7 +592,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain, u8 pad2[1]; u8 vptype; u8 vp[32]; /* verification pattern */ - } keyblock; + } ckb; } lv3; } __packed * prepparm; @@ -650,15 +650,16 @@ int cca_sec2protkey(u16 cardnr, u16 domain, prepparm = (struct uskrepparm *) prepcblk->rpl_parmb; /* check the returned keyblock */ - if (prepparm->lv3.keyblock.version != 0x01) { - DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x != 0x01\n", - __func__, (int) prepparm->lv3.keyblock.version); + if (prepparm->lv3.ckb.version != 0x01 && + prepparm->lv3.ckb.version != 0x02) { + DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x\n", + __func__, (int) prepparm->lv3.ckb.version); rc = -EIO; goto out; } /* copy the tanslated protected key */ - switch (prepparm->lv3.keyblock.len) { + switch (prepparm->lv3.ckb.len) { case 16+32: /* AES 128 protected key */ if (protkeytype) @@ -676,13 +677,13 @@ int cca_sec2protkey(u16 cardnr, u16 domain, break; default: DEBUG_ERR("%s unknown/unsupported keylen %d\n", - __func__, prepparm->lv3.keyblock.len); + __func__, prepparm->lv3.ckb.len); rc = -EIO; goto out; } - memcpy(protkey, prepparm->lv3.keyblock.key, prepparm->lv3.keyblock.len); + memcpy(protkey, prepparm->lv3.ckb.key, prepparm->lv3.ckb.len); if (protkeylen) - *protkeylen = prepparm->lv3.keyblock.len; + *protkeylen = prepparm->lv3.ckb.len; out: free_cprbmem(mem, PARMBSIZE, 0); @@ -1260,10 +1261,10 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, prepparm = (struct aurepparm *) prepcblk->rpl_parmb; /* check the returned keyblock */ - if (prepparm->vud.ckb.version != 0x01) { - DEBUG_ERR( - "%s reply param keyblock version mismatch 0x%02x != 0x01\n", - __func__, (int) prepparm->vud.ckb.version); + if (prepparm->vud.ckb.version != 0x01 && + prepparm->vud.ckb.version != 0x02) { + DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x\n", + __func__, (int) prepparm->vud.ckb.version); rc = -EIO; goto out; } From dd62abd2d84d8fe09c644a7407a34c22cb3d43be Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 10 Feb 2020 14:56:41 +0100 Subject: [PATCH 03/57] s390/qdio: clean up cdev access in qdio_setup_irq() Some parts use init_data->cdev, others use irq_ptr->cdev. In the end it's all the same, but unnecessarily confusing. Use a single reference instead. Signed-off-by: Julian Wiedmann Reviewed-by: Steffen Maier Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio_setup.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 3ab8e80d7bbc..36890ed68083 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -453,7 +453,8 @@ static void setup_qib(struct qdio_irq *irq_ptr, int qdio_setup_irq(struct qdio_initialize *init_data) { struct ciw *ciw; - struct qdio_irq *irq_ptr = init_data->cdev->private->qdio_data; + struct ccw_device *cdev = init_data->cdev; + struct qdio_irq *irq_ptr = cdev->private->qdio_data; memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib)); memset(&irq_ptr->siga_flag, 0, sizeof(irq_ptr->siga_flag)); @@ -470,9 +471,9 @@ int qdio_setup_irq(struct qdio_initialize *init_data) irq_ptr->int_parm = init_data->int_parm; irq_ptr->nr_input_qs = init_data->no_input_qs; irq_ptr->nr_output_qs = init_data->no_output_qs; - irq_ptr->cdev = init_data->cdev; + irq_ptr->cdev = cdev; irq_ptr->scan_threshold = init_data->scan_threshold; - ccw_device_get_schid(irq_ptr->cdev, &irq_ptr->schid); + ccw_device_get_schid(cdev, &irq_ptr->schid); setup_queues(irq_ptr, init_data); setup_qib(irq_ptr, init_data); @@ -488,14 +489,14 @@ int qdio_setup_irq(struct qdio_initialize *init_data) /* qdr, qib, sls, slsbs, slibs, sbales are filled now */ /* get qdio commands */ - ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE); + ciw = ccw_device_get_ciw(cdev, CIW_TYPE_EQUEUE); if (!ciw) { DBF_ERROR("%4x NO EQ", irq_ptr->schid.sch_no); return -EINVAL; } irq_ptr->equeue = *ciw; - ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE); + ciw = ccw_device_get_ciw(cdev, CIW_TYPE_AQUEUE); if (!ciw) { DBF_ERROR("%4x NO AQ", irq_ptr->schid.sch_no); return -EINVAL; @@ -503,10 +504,10 @@ int qdio_setup_irq(struct qdio_initialize *init_data) irq_ptr->aqueue = *ciw; /* set new interrupt handler */ - spin_lock_irq(get_ccwdev_lock(irq_ptr->cdev)); - irq_ptr->orig_handler = init_data->cdev->handler; - init_data->cdev->handler = qdio_int_handler; - spin_unlock_irq(get_ccwdev_lock(irq_ptr->cdev)); + spin_lock_irq(get_ccwdev_lock(cdev)); + irq_ptr->orig_handler = cdev->handler; + cdev->handler = qdio_int_handler; + spin_unlock_irq(get_ccwdev_lock(cdev)); return 0; } From 014816b66218d9f5f90e6d92951abc9d3749b4cd Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 10 Feb 2020 14:58:07 +0100 Subject: [PATCH 04/57] s390/qdio: reduce access to cdev->private->qdio_data Remove all usage of cdev->private->qdio_data that's buried deep in internal code. This should only be used by the exported driver API, which can then pass around a proper qdio_irq pointer. Also trivially merge some initializations with their definitions. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio.h | 2 +- drivers/s390/cio/qdio_main.c | 13 +++++-------- drivers/s390/cio/qdio_setup.c | 3 +-- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index ff74eb5fce50..85d530927e4e 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -392,7 +392,7 @@ void qdio_setup_ssqd_info(struct qdio_irq *irq_ptr); int qdio_setup_get_ssqd(struct qdio_irq *irq_ptr, struct subchannel_id *schid, struct qdio_ssqd_desc *data); -int qdio_setup_irq(struct qdio_initialize *init_data); +int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data); void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, struct ccw_device *cdev); void qdio_release_memory(struct qdio_irq *irq_ptr); diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 3475317c42e5..2886b95f4741 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1105,9 +1105,8 @@ int qdio_get_ssqd_desc(struct ccw_device *cdev, } EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc); -static void qdio_shutdown_queues(struct ccw_device *cdev) +static void qdio_shutdown_queues(struct qdio_irq *irq_ptr) { - struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct qdio_q *q; int i; @@ -1155,7 +1154,7 @@ int qdio_shutdown(struct ccw_device *cdev, int how) qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED); tiqdio_remove_device(irq_ptr); - qdio_shutdown_queues(cdev); + qdio_shutdown_queues(irq_ptr); qdio_shutdown_debug_entries(irq_ptr); /* cleanup subchannel */ @@ -1316,19 +1315,18 @@ static void qdio_detect_hsicq(struct qdio_irq *irq_ptr) int qdio_establish(struct qdio_initialize *init_data) { struct ccw_device *cdev = init_data->cdev; + struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct subchannel_id schid; - struct qdio_irq *irq_ptr; int rc; ccw_device_get_schid(cdev, &schid); DBF_EVENT("qestablish:%4x", schid.sch_no); - irq_ptr = cdev->private->qdio_data; if (!irq_ptr) return -ENODEV; mutex_lock(&irq_ptr->setup_mutex); - qdio_setup_irq(init_data); + qdio_setup_irq(irq_ptr, init_data); rc = qdio_establish_thinint(irq_ptr); if (rc) { @@ -1386,14 +1384,13 @@ EXPORT_SYMBOL_GPL(qdio_establish); */ int qdio_activate(struct ccw_device *cdev) { + struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct subchannel_id schid; - struct qdio_irq *irq_ptr; int rc; ccw_device_get_schid(cdev, &schid); DBF_EVENT("qactivate:%4x", schid.sch_no); - irq_ptr = cdev->private->qdio_data; if (!irq_ptr) return -ENODEV; diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 36890ed68083..c56ff92c6e0c 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -450,11 +450,10 @@ static void setup_qib(struct qdio_irq *irq_ptr, memcpy(irq_ptr->qib.ebcnam, init_data->adapter_name, 8); } -int qdio_setup_irq(struct qdio_initialize *init_data) +int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data) { struct ciw *ciw; struct ccw_device *cdev = init_data->cdev; - struct qdio_irq *irq_ptr = cdev->private->qdio_data; memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib)); memset(&irq_ptr->siga_flag, 0, sizeof(irq_ptr->siga_flag)); From b059a39cfa27c04e8e03e4ddf44f16501f36357d Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 20 Jan 2020 13:10:37 +0100 Subject: [PATCH 05/57] s390/arch: install kernels with their proper version ID In case $INSTALLKERNEL is not available, we should install the kernel image with its version number, and save the previous one accordingly. Also, we're adding a hint so users know that they still need to perform one more configuration step (usually adjusting zipl config). Signed-off-by: Stefan Raspl Signed-off-by: Vasily Gorbik --- arch/s390/boot/install.sh | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index bed227f267ae..515b27a996b3 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -21,15 +21,10 @@ if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi -# Default install - same as make zlilo +echo "Warning: '${INSTALLKERNEL}' command not available - additional " \ + "bootloader config required" >&2 +if [ -f $4/vmlinuz-$1 ]; then mv $4/vmlinuz-$1 $4/vmlinuz-$1.old; fi +if [ -f $4/System.map-$1 ]; then mv $4/System.map-$1 $4/System.map-$1.old; fi -if [ -f $4/vmlinuz ]; then - mv $4/vmlinuz $4/vmlinuz.old -fi - -if [ -f $4/System.map ]; then - mv $4/System.map $4/System.old -fi - -cat $2 > $4/vmlinuz -cp $3 $4/System.map +cat $2 > $4/vmlinuz-$1 +cp $3 $4/System.map-$1 From 6e2a7b5171ec7fd6005639479961ca28e70d4929 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 19 Feb 2020 11:19:15 +0100 Subject: [PATCH 06/57] s390/qdio: use QDIO_IRQ_STATE_INACTIVE instead of 0 Don't rely on the numeric value of enum constants. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio_setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index c56ff92c6e0c..d57b115867a3 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -462,7 +462,8 @@ int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data) memset(&irq_ptr->perf_stat, 0, sizeof(irq_ptr->perf_stat)); irq_ptr->debugfs_dev = irq_ptr->debugfs_perf = NULL; - irq_ptr->sch_token = irq_ptr->state = irq_ptr->perf_stat_enabled = 0; + irq_ptr->sch_token = irq_ptr->perf_stat_enabled = 0; + irq_ptr->state = QDIO_IRQ_STATE_INACTIVE; /* wipes qib.ac, required by ar7063 */ memset(irq_ptr->qdr, 0, sizeof(struct qdr)); From d5d006fa0927c34fa083c8d48e33b1c30b29fd1b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 21 Feb 2020 10:54:41 +0100 Subject: [PATCH 07/57] s390/qdio: simplify debugfs code There's no need for error handling, the debugfs core is smart enough to deal with IS_ERR() internally. This will also keep us from creating the debugfs files if the device directory doesn't exist. Currently (because irq_ptr->debugfs_dev gets set to NULL on error) the files would be placed into the debugfs root - without any association to their parent device. On teardown, use the debugfs_remove_recursive() helper to avoid keeping track of each created file/directory. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio.h | 2 -- drivers/s390/cio/qdio_debug.c | 34 ++++++++-------------------------- drivers/s390/cio/qdio_setup.c | 2 +- 3 files changed, 9 insertions(+), 29 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 85d530927e4e..e24a2cde487a 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -254,7 +254,6 @@ struct qdio_q { /* upper-layer program handler */ qdio_handler_t (*handler); - struct dentry *debugfs_q; struct qdio_irq *irq_ptr; struct sl *sl; /* @@ -270,7 +269,6 @@ struct qdio_irq { struct ccw_device *cdev; struct list_head entry; /* list of thinint devices */ struct dentry *debugfs_dev; - struct dentry *debugfs_perf; unsigned long int_parm; struct subchannel_id schid; diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index 9c0370b27426..bf8d82503870 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -284,17 +284,14 @@ static const struct file_operations debugfs_perf_fops = { .release = single_release, }; -static void setup_debugfs_entry(struct qdio_q *q) +static void setup_debugfs_entry(struct dentry *parent, struct qdio_q *q) { char name[QDIO_DEBUGFS_NAME_LEN]; snprintf(name, QDIO_DEBUGFS_NAME_LEN, "%s_%d", q->is_input_q ? "input" : "output", q->nr); - q->debugfs_q = debugfs_create_file(name, 0444, - q->irq_ptr->debugfs_dev, q, &qstat_fops); - if (IS_ERR(q->debugfs_q)) - q->debugfs_q = NULL; + debugfs_create_file(name, 0444, parent, q, &qstat_fops); } void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev) @@ -304,33 +301,18 @@ void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev) irq_ptr->debugfs_dev = debugfs_create_dir(dev_name(&cdev->dev), debugfs_root); - if (IS_ERR(irq_ptr->debugfs_dev)) - irq_ptr->debugfs_dev = NULL; - - irq_ptr->debugfs_perf = debugfs_create_file("statistics", - S_IFREG | S_IRUGO | S_IWUSR, - irq_ptr->debugfs_dev, irq_ptr, - &debugfs_perf_fops); - if (IS_ERR(irq_ptr->debugfs_perf)) - irq_ptr->debugfs_perf = NULL; + debugfs_create_file("statistics", S_IFREG | S_IRUGO | S_IWUSR, + irq_ptr->debugfs_dev, irq_ptr, &debugfs_perf_fops); for_each_input_queue(irq_ptr, q, i) - setup_debugfs_entry(q); + setup_debugfs_entry(irq_ptr->debugfs_dev, q); for_each_output_queue(irq_ptr, q, i) - setup_debugfs_entry(q); + setup_debugfs_entry(irq_ptr->debugfs_dev, q); } void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr) { - struct qdio_q *q; - int i; - - for_each_input_queue(irq_ptr, q, i) - debugfs_remove(q->debugfs_q); - for_each_output_queue(irq_ptr, q, i) - debugfs_remove(q->debugfs_q); - debugfs_remove(irq_ptr->debugfs_perf); - debugfs_remove(irq_ptr->debugfs_dev); + debugfs_remove_recursive(irq_ptr->debugfs_dev); } int __init qdio_debug_init(void) @@ -352,7 +334,7 @@ int __init qdio_debug_init(void) void qdio_debug_exit(void) { qdio_clear_dbf_list(); - debugfs_remove(debugfs_root); + debugfs_remove_recursive(debugfs_root); debug_unregister(qdio_dbf_setup); debug_unregister(qdio_dbf_error); } diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index d57b115867a3..ad04947a0032 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -461,7 +461,7 @@ int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data) memset(&irq_ptr->ssqd_desc, 0, sizeof(irq_ptr->ssqd_desc)); memset(&irq_ptr->perf_stat, 0, sizeof(irq_ptr->perf_stat)); - irq_ptr->debugfs_dev = irq_ptr->debugfs_perf = NULL; + irq_ptr->debugfs_dev = NULL; irq_ptr->sch_token = irq_ptr->perf_stat_enabled = 0; irq_ptr->state = QDIO_IRQ_STATE_INACTIVE; From fa226f1d81e2d3798d30eaa14550d7f35c35e6f3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 21 Feb 2020 09:06:12 -0600 Subject: [PATCH 08/57] s390: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Link: https://lkml.kernel.org/r/20200221150612.GA9717@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Vasily Gorbik --- arch/s390/appldata/appldata_os.c | 2 +- drivers/s390/block/dasd_diag.c | 2 +- drivers/s390/block/dasd_eckd.h | 2 +- drivers/s390/char/raw3270.h | 2 +- drivers/s390/char/sclp_pci.c | 2 +- drivers/s390/cio/idset.c | 2 +- drivers/s390/crypto/pkey_api.c | 2 +- drivers/s390/crypto/zcrypt_ccamisc.h | 2 +- drivers/s390/crypto/zcrypt_msgtype6.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 54f375627532..8bf46d705957 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -75,7 +75,7 @@ struct appldata_os_data { (waiting for I/O) */ /* per cpu data */ - struct appldata_os_per_cpu os_cpu[0]; + struct appldata_os_per_cpu os_cpu[]; } __attribute__((packed)); static struct appldata_os_data *appldata_os_data; diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 8d4971645cf1..facb588d09e4 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -58,7 +58,7 @@ struct dasd_diag_private { struct dasd_diag_req { unsigned int block_count; - struct dasd_diag_bio bio[0]; + struct dasd_diag_bio bio[]; }; static const u8 DASD_DIAG_CMS1[] = { 0xc3, 0xd4, 0xe2, 0xf1 };/* EBCDIC CMS1 */ diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h index 6943508d0f1d..ca24a78a256e 100644 --- a/drivers/s390/block/dasd_eckd.h +++ b/drivers/s390/block/dasd_eckd.h @@ -220,7 +220,7 @@ struct LRE_eckd_data { __u8 imbedded_count; __u8 extended_operation; __u16 extended_parameter_length; - __u8 extended_parameter[0]; + __u8 extended_parameter[]; } __attribute__ ((packed)); /* Prefix data for format 0x00 and 0x01 */ diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 3afaa35f7351..08f36e973b43 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -211,7 +211,7 @@ struct string struct list_head update; unsigned long size; unsigned long len; - char string[0]; + char string[]; } __attribute__ ((aligned(8))); static inline struct string * diff --git a/drivers/s390/char/sclp_pci.c b/drivers/s390/char/sclp_pci.c index 995e9196852e..a3e5a5fb0c1e 100644 --- a/drivers/s390/char/sclp_pci.c +++ b/drivers/s390/char/sclp_pci.c @@ -39,7 +39,7 @@ struct err_notify_evbuf { u8 atype; u32 fh; u32 fid; - u8 data[0]; + u8 data[]; } __packed; struct err_notify_sccb { diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c index 835de44dbbcc..77d0ea7b381b 100644 --- a/drivers/s390/cio/idset.c +++ b/drivers/s390/cio/idset.c @@ -13,7 +13,7 @@ struct idset { int num_ssid; int num_id; - unsigned long bitmap[0]; + unsigned long bitmap[]; }; static inline unsigned long bitmap_size(int num_ssid, int num_id) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 2f33c5fcf676..74e63ec49068 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -80,7 +80,7 @@ struct clearaeskeytoken { u8 res1[3]; u32 keytype; /* key type, one of the PKEY_KEYTYPE values */ u32 len; /* bytes actually stored in clearkey[] */ - u8 clearkey[0]; /* clear key value */ + u8 clearkey[]; /* clear key value */ } __packed; /* diff --git a/drivers/s390/crypto/zcrypt_ccamisc.h b/drivers/s390/crypto/zcrypt_ccamisc.h index 3a9876d5ab0e..8b7a641671c9 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.h +++ b/drivers/s390/crypto/zcrypt_ccamisc.h @@ -90,7 +90,7 @@ struct cipherkeytoken { u16 kmf1; /* key management field 1 */ u16 kmf2; /* key management field 2 */ u16 kmf3; /* key management field 3 */ - u8 vdata[0]; /* variable part data follows */ + u8 vdata[]; /* variable part data follows */ } __packed; /* Some defines for the CCA AES cipherkeytoken kmf1 field */ diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index a36251d138fb..eadd3a438a4b 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -590,7 +590,7 @@ struct type86x_reply { struct CPRBX cprbx; unsigned char pad[4]; /* 4 byte function code/rules block ? */ unsigned short length; - char text[0]; + char text[]; } __packed; struct type86_ep11_reply { From 4a559cd15dbc79958fa9b18ad4e8afe4a0bf4744 Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Tue, 25 Feb 2020 15:34:30 +0100 Subject: [PATCH 09/57] s390/crypto: explicitly memzero stack key material in aes_s390.c aes_s390.c has several functions which allocate space for key material on the stack and leave the used keys there. It is considered good practice to clean these locations before the function returns. Link: https://lkml.kernel.org/r/20200221165511.GB6928@lst.de Signed-off-by: Torsten Duwe Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- arch/s390/crypto/aes_s390.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 1c23d84a9097..73044634d342 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -342,6 +342,7 @@ static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier) memcpy(walk.iv, param.iv, AES_BLOCK_SIZE); ret = skcipher_walk_done(&walk, nbytes - n); } + memzero_explicit(¶m, sizeof(param)); return ret; } @@ -470,6 +471,8 @@ static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier) walk.dst.virt.addr, walk.src.virt.addr, n); ret = skcipher_walk_done(&walk, nbytes - n); } + memzero_explicit(&pcc_param, sizeof(pcc_param)); + memzero_explicit(&xts_param, sizeof(xts_param)); return ret; } From 701dc81e7412daaf3c5bf4bc55d35c8b1525112a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 19 Feb 2020 13:29:15 +0100 Subject: [PATCH 10/57] s390/mm: remove fake numa support It turned out that fake numa support is rather useless on s390, since there are no scenarios where there is any performance or other benefit when used. However it does provide maintenance cost and breaks from time to time. Therefore remove it. CONFIG_NUMA is still supported with a very small backend and only one node. This way userspace applications which require NUMA interfaces continue to work. Note that NODES_SHIFT is set to 1 (= 2 nodes) instead of 0 (= 1 node), since there is quite a bit of kernel code which assumes that more than one node is possible if CONFIG_NUMA is enabled. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 61 +--- arch/s390/include/asm/numa.h | 13 +- arch/s390/include/asm/topology.h | 9 +- arch/s390/kernel/setup.c | 1 + arch/s390/kernel/topology.c | 2 - arch/s390/numa/Makefile | 2 - arch/s390/numa/mode_emu.c | 577 ------------------------------- arch/s390/numa/numa.c | 147 +------- arch/s390/numa/numa_mode.h | 25 -- arch/s390/numa/toptree.c | 351 ------------------- arch/s390/numa/toptree.h | 61 ---- drivers/s390/char/sclp_cmd.c | 2 +- 12 files changed, 20 insertions(+), 1231 deletions(-) delete mode 100644 arch/s390/numa/mode_emu.c delete mode 100644 arch/s390/numa/numa_mode.h delete mode 100644 arch/s390/numa/toptree.c delete mode 100644 arch/s390/numa/toptree.h diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8abe77536d9d..6b1f715dd8bb 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -450,14 +450,6 @@ config NR_CPUS config HOTPLUG_CPU def_bool y -# Some NUMA nodes have memory ranges that span -# other nodes. Even though a pfn is valid and -# between a node's start and end pfns, it may not -# reside on that node. See memmap_init_zone() -# for details. <- They meant memory holes! -config NODES_SPAN_OTHER_NODES - def_bool NUMA - config NUMA bool "NUMA support" depends on SCHED_TOPOLOGY @@ -467,58 +459,9 @@ config NUMA This option adds NUMA support to the kernel. - An operation mode can be selected by appending - numa= to the kernel command line. - - The default behaviour is identical to appending numa=plain to - the command line. This will create just one node with all - available memory and all CPUs in it. - config NODES_SHIFT - int "Maximum NUMA nodes (as a power of 2)" - range 1 10 - depends on NUMA - default "4" - help - Specify the maximum number of NUMA nodes available on the target - system. Increases memory reserved to accommodate various tables. - -menu "Select NUMA modes" - depends on NUMA - -config NUMA_EMU - bool "NUMA emulation" - default y - help - Numa emulation mode will split the available system memory into - equal chunks which then are distributed over the configured number - of nodes in a round-robin manner. - - The number of fake nodes is limited by the number of available memory - chunks (i.e. memory size / fake size) and the number of supported - nodes in the kernel. - - The CPUs are assigned to the nodes in a way that partially respects - the original machine topology (if supported by the machine). - Fair distribution of the CPUs is not guaranteed. - -config EMU_SIZE - hex "NUMA emulation memory chunk size" - default 0x10000000 - range 0x400000 0x100000000 - depends on NUMA_EMU - help - Select the default size by which the memory is chopped and then - assigned to emulated NUMA nodes. - - This can be overridden by specifying - - emu_size= - - on the kernel command line where also suffixes K, M, G, and T are - supported. - -endmenu + int + default "1" config SCHED_SMT def_bool n diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h index 35f8cbe7e5bb..23cd5d1b734b 100644 --- a/arch/s390/include/asm/numa.h +++ b/arch/s390/include/asm/numa.h @@ -13,24 +13,13 @@ #ifdef CONFIG_NUMA #include -#include void numa_setup(void); -int numa_pfn_to_nid(unsigned long pfn); -int __node_distance(int a, int b); -void numa_update_cpu_topology(void); - -extern cpumask_t node_to_cpumask_map[MAX_NUMNODES]; -extern int numa_debug_enabled; #else static inline void numa_setup(void) { } -static inline void numa_update_cpu_topology(void) { } -static inline int numa_pfn_to_nid(unsigned long pfn) -{ - return 0; -} #endif /* CONFIG_NUMA */ + #endif /* _ASM_S390_NUMA_H */ diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index cca406fdbe51..bd3417185e30 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -16,7 +16,6 @@ struct cpu_topology_s390 { unsigned short socket_id; unsigned short book_id; unsigned short drawer_id; - unsigned short node_id; unsigned short dedicated : 1; cpumask_t thread_mask; cpumask_t core_mask; @@ -71,19 +70,23 @@ static inline void topology_expect_change(void) { } #define cpu_to_node cpu_to_node static inline int cpu_to_node(int cpu) { - return cpu_topology[cpu].node_id; + return 0; } /* Returns a pointer to the cpumask of CPUs on node 'node'. */ #define cpumask_of_node cpumask_of_node static inline const struct cpumask *cpumask_of_node(int node) { - return &node_to_cpumask_map[node]; + return cpu_possible_mask; } #define pcibus_to_node(bus) __pcibus_to_node(bus) #define node_distance(a, b) __node_distance(a, b) +static inline int __node_distance(int a, int b) +{ + return 0; +} #else /* !CONFIG_NUMA */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b2c2f75860e8..1158a63a8e0e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -790,6 +790,7 @@ static void __init memblock_add_mem_detect_info(void) memblock_physmem_add(start, end - start); } memblock_set_bottom_up(false); + memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); memblock_dump_all(); } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 3627953007ed..c189f5d996ff 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -26,7 +26,6 @@ #include #include #include -#include #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) @@ -267,7 +266,6 @@ static void update_cpu_masks(void) cpumask_set_cpu(cpu, &cpus_with_topology); } } - numa_update_cpu_topology(); } void store_topology(struct sysinfo_15_1_x *info) diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile index 66c2dff74895..c89d26f4f77d 100644 --- a/arch/s390/numa/Makefile +++ b/arch/s390/numa/Makefile @@ -1,4 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += numa.o -obj-y += toptree.o -obj-$(CONFIG_NUMA_EMU) += mode_emu.o diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c deleted file mode 100644 index 72d742bb2d17..000000000000 --- a/arch/s390/numa/mode_emu.c +++ /dev/null @@ -1,577 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NUMA support for s390 - * - * NUMA emulation (aka fake NUMA) distributes the available memory to nodes - * without using real topology information about the physical memory of the - * machine. - * - * It distributes the available CPUs to nodes while respecting the original - * machine topology information. This is done by trying to avoid to separate - * CPUs which reside on the same book or even on the same MC. - * - * Because the current Linux scheduler code requires a stable cpu to node - * mapping, cores are pinned to nodes when the first CPU thread is set online. - * - * Copyright IBM Corp. 2015 - */ - -#define KMSG_COMPONENT "numa_emu" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include "numa_mode.h" -#include "toptree.h" - -/* Distances between the different system components */ -#define DIST_EMPTY 0 -#define DIST_CORE 1 -#define DIST_MC 2 -#define DIST_BOOK 3 -#define DIST_DRAWER 4 -#define DIST_MAX 5 - -/* Node distance reported to common code */ -#define EMU_NODE_DIST 10 - -/* Node ID for free (not yet pinned) cores */ -#define NODE_ID_FREE -1 - -/* Different levels of toptree */ -enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY}; - -/* The two toptree IDs */ -enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA}; - -/* Number of NUMA nodes */ -static int emu_nodes = 1; -/* NUMA stripe size */ -static unsigned long emu_size; - -/* - * Node to core pinning information updates are protected by - * "sched_domains_mutex". - */ -static struct { - s32 to_node_id[CONFIG_NR_CPUS]; /* Pinned core to node mapping */ - int total; /* Total number of pinned cores */ - int per_node_target; /* Cores per node without extra cores */ - int per_node[MAX_NUMNODES]; /* Number of cores pinned to node */ -} *emu_cores; - -/* - * Pin a core to a node - */ -static void pin_core_to_node(int core_id, int node_id) -{ - if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) { - emu_cores->per_node[node_id]++; - emu_cores->to_node_id[core_id] = node_id; - emu_cores->total++; - } else { - WARN_ON(emu_cores->to_node_id[core_id] != node_id); - } -} - -/* - * Number of pinned cores of a node - */ -static int cores_pinned(struct toptree *node) -{ - return emu_cores->per_node[node->id]; -} - -/* - * ID of the node where the core is pinned (or NODE_ID_FREE) - */ -static int core_pinned_to_node_id(struct toptree *core) -{ - return emu_cores->to_node_id[core->id]; -} - -/* - * Number of cores in the tree that are not yet pinned - */ -static int cores_free(struct toptree *tree) -{ - struct toptree *core; - int count = 0; - - toptree_for_each(core, tree, CORE) { - if (core_pinned_to_node_id(core) == NODE_ID_FREE) - count++; - } - return count; -} - -/* - * Return node of core - */ -static struct toptree *core_node(struct toptree *core) -{ - return core->parent->parent->parent->parent; -} - -/* - * Return drawer of core - */ -static struct toptree *core_drawer(struct toptree *core) -{ - return core->parent->parent->parent; -} - -/* - * Return book of core - */ -static struct toptree *core_book(struct toptree *core) -{ - return core->parent->parent; -} - -/* - * Return mc of core - */ -static struct toptree *core_mc(struct toptree *core) -{ - return core->parent; -} - -/* - * Distance between two cores - */ -static int dist_core_to_core(struct toptree *core1, struct toptree *core2) -{ - if (core_drawer(core1)->id != core_drawer(core2)->id) - return DIST_DRAWER; - if (core_book(core1)->id != core_book(core2)->id) - return DIST_BOOK; - if (core_mc(core1)->id != core_mc(core2)->id) - return DIST_MC; - /* Same core or sibling on same MC */ - return DIST_CORE; -} - -/* - * Distance of a node to a core - */ -static int dist_node_to_core(struct toptree *node, struct toptree *core) -{ - struct toptree *core_node; - int dist_min = DIST_MAX; - - toptree_for_each(core_node, node, CORE) - dist_min = min(dist_min, dist_core_to_core(core_node, core)); - return dist_min == DIST_MAX ? DIST_EMPTY : dist_min; -} - -/* - * Unify will delete empty nodes, therefore recreate nodes. - */ -static void toptree_unify_tree(struct toptree *tree) -{ - int nid; - - toptree_unify(tree); - for (nid = 0; nid < emu_nodes; nid++) - toptree_get_child(tree, nid); -} - -/* - * Find the best/nearest node for a given core and ensure that no node - * gets more than "emu_cores->per_node_target + extra" cores. - */ -static struct toptree *node_for_core(struct toptree *numa, struct toptree *core, - int extra) -{ - struct toptree *node, *node_best = NULL; - int dist_cur, dist_best, cores_target; - - cores_target = emu_cores->per_node_target + extra; - dist_best = DIST_MAX; - node_best = NULL; - toptree_for_each(node, numa, NODE) { - /* Already pinned cores must use their nodes */ - if (core_pinned_to_node_id(core) == node->id) { - node_best = node; - break; - } - /* Skip nodes that already have enough cores */ - if (cores_pinned(node) >= cores_target) - continue; - dist_cur = dist_node_to_core(node, core); - if (dist_cur < dist_best) { - dist_best = dist_cur; - node_best = node; - } - } - return node_best; -} - -/* - * Find the best node for each core with respect to "extra" core count - */ -static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys, - int extra) -{ - struct toptree *node, *core, *tmp; - - toptree_for_each_safe(core, tmp, phys, CORE) { - node = node_for_core(numa, core, extra); - if (!node) - return; - toptree_move(core, node); - pin_core_to_node(core->id, node->id); - } -} - -/* - * Move structures of given level to specified NUMA node - */ -static void move_level_to_numa_node(struct toptree *node, struct toptree *phys, - enum toptree_level level, bool perfect) -{ - int cores_free, cores_target = emu_cores->per_node_target; - struct toptree *cur, *tmp; - - toptree_for_each_safe(cur, tmp, phys, level) { - cores_free = cores_target - toptree_count(node, CORE); - if (perfect) { - if (cores_free == toptree_count(cur, CORE)) - toptree_move(cur, node); - } else { - if (cores_free >= toptree_count(cur, CORE)) - toptree_move(cur, node); - } - } -} - -/* - * Move structures of a given level to NUMA nodes. If "perfect" is specified - * move only perfectly fitting structures. Otherwise move also smaller - * than needed structures. - */ -static void move_level_to_numa(struct toptree *numa, struct toptree *phys, - enum toptree_level level, bool perfect) -{ - struct toptree *node; - - toptree_for_each(node, numa, NODE) - move_level_to_numa_node(node, phys, level, perfect); -} - -/* - * For the first run try to move the big structures - */ -static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys) -{ - struct toptree *core; - - /* Always try to move perfectly fitting structures first */ - move_level_to_numa(numa, phys, DRAWER, true); - move_level_to_numa(numa, phys, DRAWER, false); - move_level_to_numa(numa, phys, BOOK, true); - move_level_to_numa(numa, phys, BOOK, false); - move_level_to_numa(numa, phys, MC, true); - move_level_to_numa(numa, phys, MC, false); - /* Now pin all the moved cores */ - toptree_for_each(core, numa, CORE) - pin_core_to_node(core->id, core_node(core)->id); -} - -/* - * Allocate new topology and create required nodes - */ -static struct toptree *toptree_new(int id, int nodes) -{ - struct toptree *tree; - int nid; - - tree = toptree_alloc(TOPOLOGY, id); - if (!tree) - goto fail; - for (nid = 0; nid < nodes; nid++) { - if (!toptree_get_child(tree, nid)) - goto fail; - } - return tree; -fail: - panic("NUMA emulation could not allocate topology"); -} - -/* - * Allocate and initialize core to node mapping - */ -static void __ref create_core_to_node_map(void) -{ - int i; - - emu_cores = memblock_alloc(sizeof(*emu_cores), 8); - if (!emu_cores) - panic("%s: Failed to allocate %zu bytes align=0x%x\n", - __func__, sizeof(*emu_cores), 8); - for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++) - emu_cores->to_node_id[i] = NODE_ID_FREE; -} - -/* - * Move cores from physical topology into NUMA target topology - * and try to keep as much of the physical topology as possible. - */ -static struct toptree *toptree_to_numa(struct toptree *phys) -{ - static int first = 1; - struct toptree *numa; - int cores_total; - - cores_total = emu_cores->total + cores_free(phys); - emu_cores->per_node_target = cores_total / emu_nodes; - numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes); - if (first) { - toptree_to_numa_first(numa, phys); - first = 0; - } - toptree_to_numa_single(numa, phys, 0); - toptree_to_numa_single(numa, phys, 1); - toptree_unify_tree(numa); - - WARN_ON(cpumask_weight(&phys->mask)); - return numa; -} - -/* - * Create a toptree out of the physical topology that we got from the hypervisor - */ -static struct toptree *toptree_from_topology(void) -{ - struct toptree *phys, *node, *drawer, *book, *mc, *core; - struct cpu_topology_s390 *top; - int cpu; - - phys = toptree_new(TOPTREE_ID_PHYS, 1); - - for_each_cpu(cpu, &cpus_with_topology) { - top = &cpu_topology[cpu]; - node = toptree_get_child(phys, 0); - drawer = toptree_get_child(node, top->drawer_id); - book = toptree_get_child(drawer, top->book_id); - mc = toptree_get_child(book, top->socket_id); - core = toptree_get_child(mc, smp_get_base_cpu(cpu)); - if (!drawer || !book || !mc || !core) - panic("NUMA emulation could not allocate memory"); - cpumask_set_cpu(cpu, &core->mask); - toptree_update_mask(mc); - } - return phys; -} - -/* - * Add toptree core to topology and create correct CPU masks - */ -static void topology_add_core(struct toptree *core) -{ - struct cpu_topology_s390 *top; - int cpu; - - for_each_cpu(cpu, &core->mask) { - top = &cpu_topology[cpu]; - cpumask_copy(&top->thread_mask, &core->mask); - cpumask_copy(&top->core_mask, &core_mc(core)->mask); - cpumask_copy(&top->book_mask, &core_book(core)->mask); - cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask); - cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]); - top->node_id = core_node(core)->id; - } -} - -/* - * Apply toptree to topology and create CPU masks - */ -static void toptree_to_topology(struct toptree *numa) -{ - struct toptree *core; - int i; - - /* Clear all node masks */ - for (i = 0; i < MAX_NUMNODES; i++) - cpumask_clear(&node_to_cpumask_map[i]); - - /* Rebuild all masks */ - toptree_for_each(core, numa, CORE) - topology_add_core(core); -} - -/* - * Show the node to core mapping - */ -static void print_node_to_core_map(void) -{ - int nid, cid; - - if (!numa_debug_enabled) - return; - printk(KERN_DEBUG "NUMA node to core mapping\n"); - for (nid = 0; nid < emu_nodes; nid++) { - printk(KERN_DEBUG " node %3d: ", nid); - for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) { - if (emu_cores->to_node_id[cid] == nid) - printk(KERN_CONT "%d ", cid); - } - printk(KERN_CONT "\n"); - } -} - -static void pin_all_possible_cpus(void) -{ - int core_id, node_id, cpu; - static int initialized; - - if (initialized) - return; - print_node_to_core_map(); - node_id = 0; - for_each_possible_cpu(cpu) { - core_id = smp_get_base_cpu(cpu); - if (emu_cores->to_node_id[core_id] != NODE_ID_FREE) - continue; - pin_core_to_node(core_id, node_id); - cpu_topology[cpu].node_id = node_id; - node_id = (node_id + 1) % emu_nodes; - } - print_node_to_core_map(); - initialized = 1; -} - -/* - * Transfer physical topology into a NUMA topology and modify CPU masks - * according to the NUMA topology. - * - * Must be called with "sched_domains_mutex" lock held. - */ -static void emu_update_cpu_topology(void) -{ - struct toptree *phys, *numa; - - if (emu_cores == NULL) - create_core_to_node_map(); - phys = toptree_from_topology(); - numa = toptree_to_numa(phys); - toptree_free(phys); - toptree_to_topology(numa); - toptree_free(numa); - pin_all_possible_cpus(); -} - -/* - * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum - * alignment (needed for memory hotplug). - */ -static unsigned long emu_setup_size_adjust(unsigned long size) -{ - unsigned long size_new; - - size = size ? : CONFIG_EMU_SIZE; - size_new = roundup(size, memory_block_size_bytes()); - if (size_new == size) - return size; - pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n", - size >> 20, size_new >> 20); - return size_new; -} - -/* - * If we have not enough memory for the specified nodes, reduce the node count. - */ -static int emu_setup_nodes_adjust(int nodes) -{ - int nodes_max; - - nodes_max = memblock.memory.total_size / emu_size; - nodes_max = max(nodes_max, 1); - if (nodes_max >= nodes) - return nodes; - pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes); - return nodes_max; -} - -/* - * Early emu setup - */ -static void emu_setup(void) -{ - int nid; - - emu_size = emu_setup_size_adjust(emu_size); - emu_nodes = emu_setup_nodes_adjust(emu_nodes); - for (nid = 0; nid < emu_nodes; nid++) - node_set(nid, node_possible_map); - pr_info("Creating %d nodes with memory stripe size %ld MB\n", - emu_nodes, emu_size >> 20); -} - -/* - * Return node id for given page number - */ -static int emu_pfn_to_nid(unsigned long pfn) -{ - return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes; -} - -/* - * Return stripe size - */ -static unsigned long emu_align(void) -{ - return emu_size; -} - -/* - * Return distance between two nodes - */ -static int emu_distance(int node1, int node2) -{ - return (node1 != node2) * EMU_NODE_DIST; -} - -/* - * Define callbacks for generic s390 NUMA infrastructure - */ -const struct numa_mode numa_mode_emu = { - .name = "emu", - .setup = emu_setup, - .update_cpu_topology = emu_update_cpu_topology, - .__pfn_to_nid = emu_pfn_to_nid, - .align = emu_align, - .distance = emu_distance, -}; - -/* - * Kernel parameter: emu_nodes= - */ -static int __init early_parse_emu_nodes(char *p) -{ - int count; - - if (!p || kstrtoint(p, 0, &count) != 0 || count <= 0) - return 0; - emu_nodes = min(count, MAX_NUMNODES); - return 0; -} -early_param("emu_nodes", early_parse_emu_nodes); - -/* - * Kernel parameter: emu_size=[[k|M|G|T]] - */ -static int __init early_parse_emu_size(char *p) -{ - if (p) - emu_size = memparse(p, NULL); - return 0; -} -early_param("emu_size", early_parse_emu_size); diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index d2910fa834c8..51c5a9f6e525 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -7,165 +7,36 @@ * Copyright IBM Corp. 2015 */ -#define KMSG_COMPONENT "numa" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - #include #include #include #include -#include #include - #include -#include "numa_mode.h" -pg_data_t *node_data[MAX_NUMNODES]; +struct pglist_data *node_data[MAX_NUMNODES]; EXPORT_SYMBOL(node_data); -cpumask_t node_to_cpumask_map[MAX_NUMNODES]; -EXPORT_SYMBOL(node_to_cpumask_map); - -static void plain_setup(void) +void __init numa_setup(void) { + int nid; + + nodes_clear(node_possible_map); node_set(0, node_possible_map); -} - -const struct numa_mode numa_mode_plain = { - .name = "plain", - .setup = plain_setup, -}; - -static const struct numa_mode *mode = &numa_mode_plain; - -int numa_pfn_to_nid(unsigned long pfn) -{ - return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0; -} - -void numa_update_cpu_topology(void) -{ - if (mode->update_cpu_topology) - mode->update_cpu_topology(); -} - -int __node_distance(int a, int b) -{ - return mode->distance ? mode->distance(a, b) : 0; -} -EXPORT_SYMBOL(__node_distance); - -int numa_debug_enabled; - -/* - * numa_setup_memory() - Assign bootmem to nodes - * - * The memory is first added to memblock without any respect to nodes. - * This is fixed before remaining memblock memory is handed over to the - * buddy allocator. - * An important side effect is that large bootmem allocations might easily - * cross node boundaries, which can be needed for large allocations with - * smaller memory stripes in each node (i.e. when using NUMA emulation). - * - * Memory defines nodes: - * Therefore this routine also sets the nodes online with memory. - */ -static void __init numa_setup_memory(void) -{ - unsigned long cur_base, align, end_of_dram; - int nid = 0; - - end_of_dram = memblock_end_of_DRAM(); - align = mode->align ? mode->align() : ULONG_MAX; - - /* - * Step through all available memory and assign it to the nodes - * indicated by the mode implementation. - * All nodes which are seen here will be set online. - */ - cur_base = 0; - do { - nid = numa_pfn_to_nid(PFN_DOWN(cur_base)); - node_set_online(nid); - memblock_set_node(cur_base, align, &memblock.memory, nid); - cur_base += align; - } while (cur_base < end_of_dram); - - /* Allocate and fill out node_data */ + node_set_online(0); for (nid = 0; nid < MAX_NUMNODES; nid++) { NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8); if (!NODE_DATA(nid)) panic("%s: Failed to allocate %zu bytes align=0x%x\n", __func__, sizeof(pg_data_t), 8); } - - for_each_online_node(nid) { - unsigned long start_pfn, end_pfn; - unsigned long t_start, t_end; - int i; - - start_pfn = ULONG_MAX; - end_pfn = 0; - for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) { - if (t_start < start_pfn) - start_pfn = t_start; - if (t_end > end_pfn) - end_pfn = t_end; - } - NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; - NODE_DATA(nid)->node_id = nid; - } + NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT; + NODE_DATA(0)->node_id = 0; } -/* - * numa_setup() - Earliest initialization - * - * Assign the mode and call the mode's setup routine. - */ -void __init numa_setup(void) -{ - pr_info("NUMA mode: %s\n", mode->name); - nodes_clear(node_possible_map); - /* Initially attach all possible CPUs to node 0. */ - cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); - if (mode->setup) - mode->setup(); - numa_setup_memory(); - memblock_dump_all(); -} - -/* - * numa_init_late() - Initialization initcall - * - * Register NUMA nodes. - */ static int __init numa_init_late(void) { - int nid; - - for_each_online_node(nid) - register_one_node(nid); + register_one_node(0); return 0; } arch_initcall(numa_init_late); - -static int __init parse_debug(char *parm) -{ - numa_debug_enabled = 1; - return 0; -} -early_param("numa_debug", parse_debug); - -static int __init parse_numa(char *parm) -{ - if (!parm) - return 1; - if (strcmp(parm, numa_mode_plain.name) == 0) - mode = &numa_mode_plain; -#ifdef CONFIG_NUMA_EMU - if (strcmp(parm, numa_mode_emu.name) == 0) - mode = &numa_mode_emu; -#endif - return 0; -} -early_param("numa", parse_numa); diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h deleted file mode 100644 index dfd3e2784081..000000000000 --- a/arch/s390/numa/numa_mode.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * NUMA support for s390 - * - * Define declarations used for communication between NUMA mode - * implementations and NUMA core functionality. - * - * Copyright IBM Corp. 2015 - */ -#ifndef __S390_NUMA_MODE_H -#define __S390_NUMA_MODE_H - -struct numa_mode { - char *name; /* Name of mode */ - void (*setup)(void); /* Initizalize mode */ - void (*update_cpu_topology)(void); /* Called by topology code */ - int (*__pfn_to_nid)(unsigned long pfn); /* PFN to node ID */ - unsigned long (*align)(void); /* Minimum node alignment */ - int (*distance)(int a, int b); /* Distance between two nodes */ -}; - -extern const struct numa_mode numa_mode_plain; -extern const struct numa_mode numa_mode_emu; - -#endif /* __S390_NUMA_MODE_H */ diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c deleted file mode 100644 index 71a608cd4f61..000000000000 --- a/arch/s390/numa/toptree.c +++ /dev/null @@ -1,351 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NUMA support for s390 - * - * A tree structure used for machine topology mangling - * - * Copyright IBM Corp. 2015 - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "toptree.h" - -/** - * toptree_alloc - Allocate and initialize a new tree node. - * @level: The node's vertical level; level 0 contains the leaves. - * @id: ID number, explicitly not unique beyond scope of node's siblings - * - * Allocate a new tree node and initialize it. - * - * RETURNS: - * Pointer to the new tree node or NULL on error - */ -struct toptree __ref *toptree_alloc(int level, int id) -{ - struct toptree *res; - - if (slab_is_available()) - res = kzalloc(sizeof(*res), GFP_KERNEL); - else - res = memblock_alloc(sizeof(*res), 8); - if (!res) - return res; - - INIT_LIST_HEAD(&res->children); - INIT_LIST_HEAD(&res->sibling); - cpumask_clear(&res->mask); - res->level = level; - res->id = id; - return res; -} - -/** - * toptree_remove - Remove a tree node from a tree - * @cand: Pointer to the node to remove - * - * The node is detached from its parent node. The parent node's - * masks will be updated to reflect the loss of the child. - */ -static void toptree_remove(struct toptree *cand) -{ - struct toptree *oldparent; - - list_del_init(&cand->sibling); - oldparent = cand->parent; - cand->parent = NULL; - toptree_update_mask(oldparent); -} - -/** - * toptree_free - discard a tree node - * @cand: Pointer to the tree node to discard - * - * Checks if @cand is attached to a parent node. Detaches it - * cleanly using toptree_remove. Possible children are freed - * recursively. In the end @cand itself is freed. - */ -void __ref toptree_free(struct toptree *cand) -{ - struct toptree *child, *tmp; - - if (cand->parent) - toptree_remove(cand); - toptree_for_each_child_safe(child, tmp, cand) - toptree_free(child); - if (slab_is_available()) - kfree(cand); - else - memblock_free_early((unsigned long)cand, sizeof(*cand)); -} - -/** - * toptree_update_mask - Update node bitmasks - * @cand: Pointer to a tree node - * - * The node's cpumask will be updated by combining all children's - * masks. Then toptree_update_mask is called recursively for the - * parent if applicable. - * - * NOTE: - * This must not be called on leaves. If called on a leaf, its - * CPU mask is cleared and lost. - */ -void toptree_update_mask(struct toptree *cand) -{ - struct toptree *child; - - cpumask_clear(&cand->mask); - list_for_each_entry(child, &cand->children, sibling) - cpumask_or(&cand->mask, &cand->mask, &child->mask); - if (cand->parent) - toptree_update_mask(cand->parent); -} - -/** - * toptree_insert - Insert a tree node into tree - * @cand: Pointer to the node to insert - * @target: Pointer to the node to which @cand will added as a child - * - * Insert a tree node into a tree. Masks will be updated automatically. - * - * RETURNS: - * 0 on success, -1 if NULL is passed as argument or the node levels - * don't fit. - */ -static int toptree_insert(struct toptree *cand, struct toptree *target) -{ - if (!cand || !target) - return -1; - if (target->level != (cand->level + 1)) - return -1; - list_add_tail(&cand->sibling, &target->children); - cand->parent = target; - toptree_update_mask(target); - return 0; -} - -/** - * toptree_move_children - Move all child nodes of a node to a new place - * @cand: Pointer to the node whose children are to be moved - * @target: Pointer to the node to which @cand's children will be attached - * - * Take all child nodes of @cand and move them using toptree_move. - */ -static void toptree_move_children(struct toptree *cand, struct toptree *target) -{ - struct toptree *child, *tmp; - - toptree_for_each_child_safe(child, tmp, cand) - toptree_move(child, target); -} - -/** - * toptree_unify - Merge children with same ID - * @cand: Pointer to node whose direct children should be made unique - * - * When mangling the tree it is possible that a node has two or more children - * which have the same ID. This routine merges these children into one and - * moves all children of the merged nodes into the unified node. - */ -void toptree_unify(struct toptree *cand) -{ - struct toptree *child, *tmp, *cand_copy; - - /* Threads cannot be split, cores are not split */ - if (cand->level < 2) - return; - - cand_copy = toptree_alloc(cand->level, 0); - toptree_for_each_child_safe(child, tmp, cand) { - struct toptree *tmpchild; - - if (!cpumask_empty(&child->mask)) { - tmpchild = toptree_get_child(cand_copy, child->id); - toptree_move_children(child, tmpchild); - } - toptree_free(child); - } - toptree_move_children(cand_copy, cand); - toptree_free(cand_copy); - - toptree_for_each_child(child, cand) - toptree_unify(child); -} - -/** - * toptree_move - Move a node to another context - * @cand: Pointer to the node to move - * @target: Pointer to the node where @cand should go - * - * In the easiest case @cand is exactly on the level below @target - * and will be immediately moved to the target. - * - * If @target's level is not the direct parent level of @cand, - * nodes for the missing levels are created and put between - * @cand and @target. The "stacking" nodes' IDs are taken from - * @cand's parents. - * - * After this it is likely to have redundant nodes in the tree - * which are addressed by means of toptree_unify. - */ -void toptree_move(struct toptree *cand, struct toptree *target) -{ - struct toptree *stack_target, *real_insert_point, *ptr, *tmp; - - if (cand->level + 1 == target->level) { - toptree_remove(cand); - toptree_insert(cand, target); - return; - } - - real_insert_point = NULL; - ptr = cand; - stack_target = NULL; - - do { - tmp = stack_target; - stack_target = toptree_alloc(ptr->level + 1, - ptr->parent->id); - toptree_insert(tmp, stack_target); - if (!real_insert_point) - real_insert_point = stack_target; - ptr = ptr->parent; - } while (stack_target->level < (target->level - 1)); - - toptree_remove(cand); - toptree_insert(cand, real_insert_point); - toptree_insert(stack_target, target); -} - -/** - * toptree_get_child - Access a tree node's child by its ID - * @cand: Pointer to tree node whose child is to access - * @id: The desired child's ID - * - * @cand's children are searched for a child with matching ID. - * If no match can be found, a new child with the desired ID - * is created and returned. - */ -struct toptree *toptree_get_child(struct toptree *cand, int id) -{ - struct toptree *child; - - toptree_for_each_child(child, cand) - if (child->id == id) - return child; - child = toptree_alloc(cand->level-1, id); - toptree_insert(child, cand); - return child; -} - -/** - * toptree_first - Find the first descendant on specified level - * @context: Pointer to tree node whose descendants are to be used - * @level: The level of interest - * - * RETURNS: - * @context's first descendant on the specified level, or NULL - * if there is no matching descendant - */ -struct toptree *toptree_first(struct toptree *context, int level) -{ - struct toptree *child, *tmp; - - if (context->level == level) - return context; - - if (!list_empty(&context->children)) { - list_for_each_entry(child, &context->children, sibling) { - tmp = toptree_first(child, level); - if (tmp) - return tmp; - } - } - return NULL; -} - -/** - * toptree_next_sibling - Return next sibling - * @cur: Pointer to a tree node - * - * RETURNS: - * If @cur has a parent and is not the last in the parent's children list, - * the next sibling is returned. Or NULL when there are no siblings left. - */ -static struct toptree *toptree_next_sibling(struct toptree *cur) -{ - if (cur->parent == NULL) - return NULL; - - if (cur == list_last_entry(&cur->parent->children, - struct toptree, sibling)) - return NULL; - return (struct toptree *) list_next_entry(cur, sibling); -} - -/** - * toptree_next - Tree traversal function - * @cur: Pointer to current element - * @context: Pointer to the root node of the tree or subtree to - * be traversed. - * @level: The level of interest. - * - * RETURNS: - * Pointer to the next node on level @level - * or NULL when there is no next node. - */ -struct toptree *toptree_next(struct toptree *cur, struct toptree *context, - int level) -{ - struct toptree *cur_context, *tmp; - - if (!cur) - return NULL; - - if (context->level == level) - return NULL; - - tmp = toptree_next_sibling(cur); - if (tmp != NULL) - return tmp; - - cur_context = cur; - while (cur_context->level < context->level - 1) { - /* Step up */ - cur_context = cur_context->parent; - /* Step aside */ - tmp = toptree_next_sibling(cur_context); - if (tmp != NULL) { - /* Step down */ - tmp = toptree_first(tmp, level); - if (tmp != NULL) - return tmp; - } - } - return NULL; -} - -/** - * toptree_count - Count descendants on specified level - * @context: Pointer to node whose descendants are to be considered - * @level: Only descendants on the specified level will be counted - * - * RETURNS: - * Number of descendants on the specified level - */ -int toptree_count(struct toptree *context, int level) -{ - struct toptree *cur; - int cnt = 0; - - toptree_for_each(cur, context, level) - cnt++; - return cnt; -} diff --git a/arch/s390/numa/toptree.h b/arch/s390/numa/toptree.h deleted file mode 100644 index 5246371ec713..000000000000 --- a/arch/s390/numa/toptree.h +++ /dev/null @@ -1,61 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * NUMA support for s390 - * - * A tree structure used for machine topology mangling - * - * Copyright IBM Corp. 2015 - */ -#ifndef S390_TOPTREE_H -#define S390_TOPTREE_H - -#include -#include - -struct toptree { - int level; - int id; - cpumask_t mask; - struct toptree *parent; - struct list_head sibling; - struct list_head children; -}; - -struct toptree *toptree_alloc(int level, int id); -void toptree_free(struct toptree *cand); -void toptree_update_mask(struct toptree *cand); -void toptree_unify(struct toptree *cand); -struct toptree *toptree_get_child(struct toptree *cand, int id); -void toptree_move(struct toptree *cand, struct toptree *target); -int toptree_count(struct toptree *context, int level); - -struct toptree *toptree_first(struct toptree *context, int level); -struct toptree *toptree_next(struct toptree *cur, struct toptree *context, - int level); - -#define toptree_for_each_child(child, ptree) \ - list_for_each_entry(child, &ptree->children, sibling) - -#define toptree_for_each_child_safe(child, ptmp, ptree) \ - list_for_each_entry_safe(child, ptmp, &ptree->children, sibling) - -#define toptree_is_last(ptree) \ - ((ptree->parent == NULL) || \ - (ptree->parent->children.prev == &ptree->sibling)) - -#define toptree_for_each(ptree, cont, ttype) \ - for (ptree = toptree_first(cont, ttype); \ - ptree != NULL; \ - ptree = toptree_next(ptree, cont, ttype)) - -#define toptree_for_each_safe(ptree, tmp, cont, ttype) \ - for (ptree = toptree_first(cont, ttype), \ - tmp = toptree_next(ptree, cont, ttype); \ - ptree != NULL; \ - ptree = tmp, \ - tmp = toptree_next(ptree, cont, ttype)) - -#define toptree_for_each_sibling(ptree, start) \ - toptree_for_each(ptree, start->parent, start->level) - -#endif /* S390_TOPTREE_H */ diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 37d42de06079..a864b21af602 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -406,7 +406,7 @@ static void __init add_memory_merged(u16 rn) if (!size) goto skip_add; for (addr = start; addr < start + size; addr += block_size) - add_memory(numa_pfn_to_nid(PFN_DOWN(addr)), addr, block_size); + add_memory(0, addr, block_size); skip_add: first_rn = rn; num = 1; From 12437759602315d1eaa5ece8169cd1eedd018ff2 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 27 Feb 2020 14:42:29 +0100 Subject: [PATCH 11/57] s390/mm: mark private defines for vm_fault_t as such This fixes several sparse warnings for fault.c: arch/s390/mm/fault.c:336:36: warning: restricted vm_fault_t degrades to integer arch/s390/mm/fault.c:573:23: warning: incorrect type in assignment (different base types) arch/s390/mm/fault.c:573:23: expected restricted vm_fault_t [usertype] fault arch/s390/mm/fault.c:573:23: got int Signed-off-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/fault.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 7b0bb475c166..151adef0d5dd 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -44,11 +44,11 @@ #define __SUBCODE_MASK 0x0600 #define __PF_RES_FIELD 0x8000000000000000ULL -#define VM_FAULT_BADCONTEXT 0x010000 -#define VM_FAULT_BADMAP 0x020000 -#define VM_FAULT_BADACCESS 0x040000 -#define VM_FAULT_SIGNAL 0x080000 -#define VM_FAULT_PFAULT 0x100000 +#define VM_FAULT_BADCONTEXT ((__force vm_fault_t) 0x010000) +#define VM_FAULT_BADMAP ((__force vm_fault_t) 0x020000) +#define VM_FAULT_BADACCESS ((__force vm_fault_t) 0x040000) +#define VM_FAULT_SIGNAL ((__force vm_fault_t) 0x080000) +#define VM_FAULT_PFAULT ((__force vm_fault_t) 0x100000) enum fault_type { KERNEL_FAULT, From e189b172d212adcba33146348d50072bf6323121 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 2 Mar 2020 09:48:08 +0100 Subject: [PATCH 12/57] MAINTAINERS: Update s390/cio maintainer Update maintainer entry for s390 Common I/O Layer. Sebastian has left IBM, Vineeth takes over maintainership. Signed-off-by: Peter Oberparleiter Acked-by: Vineeth Vijayan Signed-off-by: Vasily Gorbik --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a0d86490c2c6..e170f74e0361 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14461,7 +14461,7 @@ F: Documentation/s390/ F: Documentation/driver-api/s390-drivers.rst S390 COMMON I/O LAYER -M: Sebastian Ott +M: Vineeth Vijayan M: Peter Oberparleiter L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ From 5e674c308baba7b3c5912110aaff4fff2131de49 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 2 Mar 2020 09:54:26 +0100 Subject: [PATCH 13/57] MAINTAINERS: Update s390/pci maintainer Update maintainer entry for s390 PCI subsystem. Sebastian has left IBM, Niklas takes over maintainership. Signed-off-by: Peter Oberparleiter Acked-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e170f74e0361..2c73d6d0854e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14503,7 +14503,7 @@ S: Supported F: drivers/s390/net/ S390 PCI SUBSYSTEM -M: Sebastian Ott +M: Niklas Schnelle M: Gerald Schaefer L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ From 014b020475d4b9670d3cff11b751a7c20208f78b Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 25 Feb 2020 12:44:06 +0100 Subject: [PATCH 14/57] s390/mm: cleanup phys_to_pfn() and friends Make page, frame, virtual and physical address conversion macros more expressive by avoiding redundant definitions and defining new macros using existing ones. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/page.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 85e944f04c70..2e53b27f4f1a 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -161,20 +161,20 @@ static inline int devmem_is_allowed(unsigned long pfn) #define __pa(x) ((unsigned long)(x)) #define __va(x) ((void *)(unsigned long)(x)) -#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) -#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) +#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) + +#define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys)) +#define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) + +#define pfn_to_virt(pfn) __va(pfn_to_phys(pfn)) +#define virt_to_pfn(kaddr) (phys_to_pfn(__pa(kaddr))) #define pfn_to_kaddr(pfn) pfn_to_virt(pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define page_to_virt(page) pfn_to_virt(page_to_pfn(page)) -#define phys_to_pfn(kaddr) ((kaddr) >> PAGE_SHIFT) -#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) - -#define phys_to_page(kaddr) pfn_to_page(phys_to_pfn(kaddr)) -#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) - -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) From ad451abee4ba787508c2a344aae4c9316b96cab9 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 24 Feb 2020 09:53:47 +0100 Subject: [PATCH 15/57] s390/qdio: export SSQD via debugfs While we print out various SSQD fields at initialization time, having raw & full access to the current SSQD can help with debugging. Signed-off-by: Julian Wiedmann Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio_debug.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index bf8d82503870..8544faddf80c 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -190,6 +190,23 @@ static int qstat_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(qstat); +static int ssqd_show(struct seq_file *m, void *v) +{ + struct ccw_device *cdev = m->private; + struct qdio_ssqd_desc ssqd; + int rc; + + rc = qdio_get_ssqd_desc(cdev, &ssqd); + if (rc) + return rc; + + seq_hex_dump(m, "", DUMP_PREFIX_NONE, 16, 4, &ssqd, sizeof(ssqd), + false); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(ssqd); + static char *qperf_names[] = { "Assumed adapter interrupts", "QDIO interrupts", @@ -303,6 +320,8 @@ void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev) debugfs_root); debugfs_create_file("statistics", S_IFREG | S_IRUGO | S_IWUSR, irq_ptr->debugfs_dev, irq_ptr, &debugfs_perf_fops); + debugfs_create_file("ssqd", 0444, irq_ptr->debugfs_dev, cdev, + &ssqd_fops); for_each_input_queue(irq_ptr, q, i) setup_debugfs_entry(irq_ptr->debugfs_dev, q); From 035f212fa7f21035537cf6dea620fe5653191eb6 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Mon, 10 Feb 2020 17:53:25 +0100 Subject: [PATCH 16/57] s390/pci: embedding hotplug_slot in zdev Embedding the hotplug_slot in zdev structure allows to greatly simplify the hotplug handling by eliminating the handling of the slot_list. Signed-off-by: Pierre Morel Reviewed-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 2 + drivers/pci/hotplug/s390_pci_hpc.c | 99 +++++++++--------------------- 2 files changed, 31 insertions(+), 70 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index b05187ce5dbd..73b69a777152 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -96,6 +97,7 @@ struct s390_domain; struct zpci_dev { struct pci_bus *bus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ + struct hotplug_slot hotplug_slot; enum zpci_state state; u32 fid; /* function ID, used by sclp */ diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 30ee72268790..39295d88f670 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -19,7 +19,6 @@ #include #define SLOT_NAME_SIZE 10 -static LIST_HEAD(s390_hotplug_slot_list); static int zpci_fn_configured(enum zpci_state state) { @@ -27,97 +26,86 @@ static int zpci_fn_configured(enum zpci_state state) state == ZPCI_FN_STATE_ONLINE; } -/* - * struct slot - slot information for each *physical* slot - */ -struct slot { - struct list_head slot_list; - struct hotplug_slot hotplug_slot; - struct zpci_dev *zdev; -}; - -static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot) +static inline int zdev_configure(struct zpci_dev *zdev) { - return container_of(hotplug_slot, struct slot, hotplug_slot); -} + int ret = sclp_pci_configure(zdev->fid); -static inline int slot_configure(struct slot *slot) -{ - int ret = sclp_pci_configure(slot->zdev->fid); - - zpci_dbg(3, "conf fid:%x, rc:%d\n", slot->zdev->fid, ret); + zpci_dbg(3, "conf fid:%x, rc:%d\n", zdev->fid, ret); if (!ret) - slot->zdev->state = ZPCI_FN_STATE_CONFIGURED; + zdev->state = ZPCI_FN_STATE_CONFIGURED; return ret; } -static inline int slot_deconfigure(struct slot *slot) +static inline int zdev_deconfigure(struct zpci_dev *zdev) { - int ret = sclp_pci_deconfigure(slot->zdev->fid); + int ret = sclp_pci_deconfigure(zdev->fid); - zpci_dbg(3, "deconf fid:%x, rc:%d\n", slot->zdev->fid, ret); + zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret); if (!ret) - slot->zdev->state = ZPCI_FN_STATE_STANDBY; + zdev->state = ZPCI_FN_STATE_STANDBY; return ret; } static int enable_slot(struct hotplug_slot *hotplug_slot) { - struct slot *slot = to_slot(hotplug_slot); + struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, + hotplug_slot); int rc; - if (slot->zdev->state != ZPCI_FN_STATE_STANDBY) + if (zdev->state != ZPCI_FN_STATE_STANDBY) return -EIO; - rc = slot_configure(slot); + rc = zdev_configure(zdev); if (rc) return rc; - rc = zpci_enable_device(slot->zdev); + rc = zpci_enable_device(zdev); if (rc) goto out_deconfigure; - pci_scan_slot(slot->zdev->bus, ZPCI_DEVFN); + pci_scan_slot(zdev->bus, ZPCI_DEVFN); pci_lock_rescan_remove(); - pci_bus_add_devices(slot->zdev->bus); + pci_bus_add_devices(zdev->bus); pci_unlock_rescan_remove(); return rc; out_deconfigure: - slot_deconfigure(slot); + zdev_deconfigure(zdev); return rc; } static int disable_slot(struct hotplug_slot *hotplug_slot) { - struct slot *slot = to_slot(hotplug_slot); + struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, + hotplug_slot); struct pci_dev *pdev; int rc; - if (!zpci_fn_configured(slot->zdev->state)) + if (!zpci_fn_configured(zdev->state)) return -EIO; - pdev = pci_get_slot(slot->zdev->bus, ZPCI_DEVFN); + pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); if (pdev) { pci_stop_and_remove_bus_device_locked(pdev); pci_dev_put(pdev); } - rc = zpci_disable_device(slot->zdev); + rc = zpci_disable_device(zdev); if (rc) return rc; - return slot_deconfigure(slot); + return zdev_deconfigure(zdev); } static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) { - struct slot *slot = to_slot(hotplug_slot); + struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, + hotplug_slot); - switch (slot->zdev->state) { + switch (zdev->state) { case ZPCI_FN_STATE_STANDBY: *value = 0; break; @@ -145,44 +133,15 @@ static const struct hotplug_slot_ops s390_hotplug_slot_ops = { int zpci_init_slot(struct zpci_dev *zdev) { char name[SLOT_NAME_SIZE]; - struct slot *slot; - int rc; - if (!zdev) - return 0; - - slot = kzalloc(sizeof(*slot), GFP_KERNEL); - if (!slot) - goto error; - - slot->zdev = zdev; - slot->hotplug_slot.ops = &s390_hotplug_slot_ops; + zdev->hotplug_slot.ops = &s390_hotplug_slot_ops; snprintf(name, SLOT_NAME_SIZE, "%08x", zdev->fid); - rc = pci_hp_register(&slot->hotplug_slot, zdev->bus, - ZPCI_DEVFN, name); - if (rc) - goto error_reg; - - list_add(&slot->slot_list, &s390_hotplug_slot_list); - return 0; - -error_reg: - kfree(slot); -error: - return -ENOMEM; + return pci_hp_register(&zdev->hotplug_slot, zdev->bus, + ZPCI_DEVFN, name); } void zpci_exit_slot(struct zpci_dev *zdev) { - struct slot *slot, *next; - - list_for_each_entry_safe(slot, next, &s390_hotplug_slot_list, - slot_list) { - if (slot->zdev != zdev) - continue; - list_del(&slot->slot_list); - pci_hp_deregister(&slot->hotplug_slot); - kfree(slot); - } + pci_hp_deregister(&zdev->hotplug_slot); } From d68d5d51dc898895b4e15bea52e5668ca9e76180 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 5 Mar 2020 07:44:09 +0100 Subject: [PATCH 17/57] s390/cpum_cf: Add new extended counters for IBM z15 Add CPU measurement counter facility event description for IBM z15. Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_cf_events.c | 123 ++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 8b33e03e47b8..1e3df52b2b65 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -238,6 +238,64 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_HPAGE_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z15, L1D_L2D_SOURCED_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z15, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z15, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z15, L1I_L2I_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z15, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z15, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z15, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z15, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z15, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z15, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af); +CPUMF_EVENT_ATTR(cf_z15, BCD_DFP_EXECUTION_SLOTS, 0x00e0); +CPUMF_EVENT_ATTR(cf_z15, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z15, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z15, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z15, TX_NC_TABORT, 0x00f3); +CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4); +CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCERROR, 0x00109); +CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); + static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS), @@ -516,6 +574,67 @@ static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z15, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z15, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z15, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z15, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z15, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z15, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z15, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z15, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, BCD_DFP_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z15, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z15, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z15, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z15, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z15, DFLT_CC), + CPUMF_EVENT_PTR(cf_z15, DFLT_CCERROR), + CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumcf_pmu_events_group = { @@ -624,9 +743,11 @@ __init const struct attribute_group **cpumf_cf_event_group(void) break; case 0x3906: case 0x3907: + model = cpumcf_z14_pmu_event_attr; + break; case 0x8561: case 0x8562: - model = cpumcf_z14_pmu_event_attr; + model = cpumcf_z15_pmu_event_attr; break; default: model = none; From d2abfbe4652d2b49d30fe77548cf663e63d2d469 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 5 Mar 2020 15:01:21 +0100 Subject: [PATCH 18/57] s390: enable bpf jit by default when not built as always-on This is the s390 variant of commit 81c22041d9f1 ("bpf, x86, arm64: Enable jit by default when not built as always-on"). Acked-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6b1f715dd8bb..f4ff75ff62f2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -109,6 +109,7 @@ config S390 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANTS_DYNAMIC_TASK_STRUCT + select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 From 0b38b5e1d0e2f361e418e05c179db05bb688bbd6 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 22 Jan 2020 13:38:22 +0100 Subject: [PATCH 19/57] s390: prevent leaking kernel address in BEAR When userspace executes a syscall or gets interrupted, BEAR contains a kernel address when returning to userspace. This make it pretty easy to figure out where the kernel is mapped even with KASLR enabled. To fix this, add lpswe to lowcore and always execute it there, so userspace sees only the lowcore address of lpswe. For this we have to extend both critical_cleanup and the SWITCH_ASYNC macro to also check for lpswe addresses in lowcore. Fixes: b2d24b97b2a9 ("s390/kernel: add support for kernel address space layout randomization (KASLR)") Cc: # v5.2+ Reviewed-by: Gerald Schaefer Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/processor.h | 1 + arch/s390/include/asm/setup.h | 7 ++++ arch/s390/kernel/asm-offsets.c | 2 + arch/s390/kernel/entry.S | 65 ++++++++++++++++++------------- arch/s390/kernel/process.c | 1 + arch/s390/kernel/setup.c | 3 ++ arch/s390/kernel/smp.c | 2 + arch/s390/mm/vmem.c | 4 ++ 9 files changed, 62 insertions(+), 27 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 237ee0c4169f..612ed3c6d581 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -141,7 +141,9 @@ struct lowcore { /* br %r1 trampoline */ __u16 br_r1_trampoline; /* 0x0400 */ - __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */ + __u32 return_lpswe; /* 0x0402 */ + __u32 return_mcck_lpswe; /* 0x0406 */ + __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 361ef5eda468..c9522346799f 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -162,6 +162,7 @@ typedef struct thread_struct thread_struct; #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ .fpu.regs = (void *) init_task.thread.fpu.fprs, \ + .last_break = 1, \ } /* diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index b241ddb67caf..534f212753d6 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -8,6 +8,7 @@ #include #include +#include #define EP_OFFSET 0x10008 #define EP_STRING "S390EP" @@ -162,6 +163,12 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } +static inline u32 gen_lpswe(unsigned long addr) +{ + BUILD_BUG_ON(addr > 0xfff); + return 0xb2b20000 | addr; +} + #else /* __ASSEMBLY__ */ #define IPL_DEVICE (IPL_DEVICE_OFFSET) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index ce33406cfe83..e80f0e6f5972 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -124,6 +124,8 @@ int main(void) OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code); OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address); OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr); + OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe); + OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe); OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw); OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw); OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 9205add8481d..3ae64914bd14 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -115,26 +115,29 @@ _LPP_OFFSET = __LC_LPP .macro SWITCH_ASYNC savearea,timer tmhh %r8,0x0001 # interrupting from user ? - jnz 1f + jnz 2f lgr %r14,%r9 + cghi %r14,__LC_RETURN_LPSWE + je 0f slg %r14,BASED(.Lcritical_start) clg %r14,BASED(.Lcritical_length) - jhe 0f + jhe 1f +0: lghi %r11,\savearea # inside critical section, do cleanup brasl %r14,cleanup_critical tmhh %r8,0x0001 # retest problem state after cleanup - jnz 1f -0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? + jnz 2f +1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT - jnz 2f + jnz 3f CHECK_STACK \savearea aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 3f -1: UPDATE_VTIME %r14,%r15,\timer + j 4f +2: UPDATE_VTIME %r14,%r15,\timer BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -2: lg %r15,__LC_ASYNC_STACK # load async stack -3: la %r11,STACK_FRAME_OVERHEAD(%r15) +3: lg %r15,__LC_ASYNC_STACK # load async stack +4: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro UPDATE_VTIME w1,w2,enter_timer @@ -401,7 +404,7 @@ ENTRY(system_call) stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_PSW + b __LC_RETURN_LPSWE(%r0) .Lsysc_done: # @@ -608,43 +611,50 @@ ENTRY(pgm_check_handler) BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK - lg %r12,__LC_CURRENT + srag %r11,%r10,12 + jnz 0f + /* if __LC_LAST_BREAK is < 4096, it contains one of + * the lpswe addresses in lowcore. Set it to 1 (initial state) + * to prevent leaking that address to userspace. + */ + lghi %r10,1 +0: lg %r12,__LC_CURRENT lghi %r11,0 larl %r13,cleanup_critical lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # test problem state bit - jnz 2f # -> fault in user space + jnz 3f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a lgr %r14,%r9 slg %r14,BASED(.Lsie_critical_start) clg %r14,BASED(.Lsie_critical_length) - jhe 0f + jhe 1f lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit lghi %r11,_PIF_GUEST_FAULT #endif -0: tmhh %r8,0x4000 # PER bit set in old PSW ? - jnz 1f # -> enabled, can't be a double fault +1: tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3,0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -1: CHECK_STACK __LC_SAVE_AREA_SYNC +2: CHECK_STACK __LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 4f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f -2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + # CHECK_VMAP_STACK branches to stack_overflow or 5f + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f +3: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lg %r15,__LC_KERNEL_STACK lgr %r14,%r12 aghi %r14,__TASK_thread # pointer to thread_struct lghi %r13,__LC_PGM_TDB tm __LC_PGM_ILC+2,0x02 # check for transaction abort - jz 3f + jz 4f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -3: stg %r10,__THREAD_last_break(%r14) -4: lgr %r13,%r11 +4: stg %r10,__THREAD_last_break(%r14) +5: lgr %r13,%r11 la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use @@ -663,14 +673,14 @@ ENTRY(pgm_check_handler) stg %r13,__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 5f + jz 6f tmhh %r8,0x0001 # kernel per event ? jz .Lpgm_kprobe oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -5: REENABLE_IRQS +6: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) @@ -775,7 +785,7 @@ ENTRY(io_int_handler) mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER .Lio_exit_kernel: lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_PSW + b __LC_RETURN_LPSWE(%r0) .Lio_done: # @@ -1214,7 +1224,7 @@ ENTRY(mcck_int_handler) stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 0: lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_MCCK_PSW + b __LC_RETURN_MCCK_LPSWE .Lmcck_panic: lg %r15,__LC_NODAT_STACK @@ -1271,6 +1281,8 @@ ENDPROC(stack_overflow) #endif ENTRY(cleanup_critical) + cghi %r9,__LC_RETURN_LPSWE + je .Lcleanup_lpswe #if IS_ENABLED(CONFIG_KVM) clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap jl 0f @@ -1424,6 +1436,7 @@ ENDPROC(cleanup_critical) mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) +.Lcleanup_lpswe: 1: lmg %r8,%r9,__LC_RETURN_PSW BR_EX %r14,%r11 .Lcleanup_sysc_restore_insn: diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 6ccef5f29761..eb6e23ad15a2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -106,6 +106,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, p->thread.system_timer = 0; p->thread.hardirq_timer = 0; p->thread.softirq_timer = 0; + p->thread.last_break = 1; frame->sf.back_chain = 0; /* new return point is ret_from_fork */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1158a63a8e0e..26de59256466 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -73,6 +73,7 @@ #include #include #include +#include #include "entry.h" /* @@ -450,6 +451,8 @@ static void __init setup_lowcore_dat_off(void) lc->spinlock_index = 0; arch_spin_lock_setup(0); lc->br_r1_trampoline = 0x07f1; /* br %r1 */ + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a08bd2522dd9..f87d4e14269c 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -212,6 +212,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; lc->br_r1_trampoline = 0x07f1; /* br %r1 */ + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); if (nmi_alloc_per_cpu(lc)) goto out_async; if (vdso_alloc_per_cpu(lc)) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b403fa14847d..f810930aff42 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -415,6 +415,10 @@ void __init vmem_map_init(void) SET_MEMORY_RO | SET_MEMORY_X); __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); + + /* we need lowcore executable for our LPSWE instructions */ + set_memory_x(0, 1); + pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } From fb83510295d7a6cdeb46242515c3180f9adafc85 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 29 Jan 2020 12:15:15 +0100 Subject: [PATCH 20/57] s390/cpuinfo: add system topology information This update adjusts /proc/cpuinfo format to meet some user level programs expectations. It also makes the layout consistent with x86 where CPU topology is presented as blocks of key-value pairs. Reviewed-by: Vasily Gorbik Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/processor.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 6ebc2117c66c..2c13ca562b48 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -151,6 +151,26 @@ static void show_cpu_summary(struct seq_file *m, void *v) } } +static void show_cpu_topology(struct seq_file *m, unsigned long n) +{ +#ifdef CONFIG_SCHED_TOPOLOGY + seq_printf(m, "physical id : %d\n", topology_physical_package_id(n)); + seq_printf(m, "core id : %d\n", topology_core_id(n)); + seq_printf(m, "book id : %d\n", topology_book_id(n)); + seq_printf(m, "drawer id : %d\n", topology_drawer_id(n)); + seq_printf(m, "dedicated : %d\n", topology_cpu_dedicated(n)); +#endif /* CONFIG_SCHED_TOPOLOGY */ +} + +static void show_cpu_ids(struct seq_file *m, unsigned long n) +{ + struct cpuid *id = &per_cpu(cpu_info.cpu_id, n); + + seq_printf(m, "version : %02X\n", id->version); + seq_printf(m, "identification : %06X\n", id->ident); + seq_printf(m, "machine : %04X\n", id->machine); +} + static void show_cpu_mhz(struct seq_file *m, unsigned long n) { struct cpu_info *c = per_cpu_ptr(&cpu_info, n); @@ -171,6 +191,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (!machine_has_cpu_mhz) return 0; seq_printf(m, "\ncpu number : %ld\n", n); + show_cpu_topology(m, n); + show_cpu_ids(m, n); show_cpu_mhz(m, n); return 0; } From 8719b6d29d2851fa84c4074bb2e5adc022911ab8 Mon Sep 17 00:00:00 2001 From: afzal mohammed Date: Wed, 4 Mar 2020 06:20:48 +0530 Subject: [PATCH 21/57] s390/irq: replace setup_irq() by request_irq() request_irq() is preferred over setup_irq(). Invocations of setup_irq() occur after memory allocators are ready. Per tglx[1], setup_irq() existed in olden days when allocators were not ready by the time early interrupts were initialized. Hence replace setup_irq() by request_irq(). [1] https://lkml.kernel.org/r/alpine.DEB.2.20.1710191609480.1971@nanos Signed-off-by: afzal mohammed Message-Id: <20200304005049.5291-1-afzal.mohd.ma@gmail.com> [heiko.carstens@de.ibm.com: replace pr_err with panic] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/irq.c | 8 ++------ drivers/s390/cio/airq.c | 8 ++------ drivers/s390/cio/cio.c | 8 ++------ 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 8371855042dc..da550cb8b31b 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -294,11 +294,6 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy) return IRQ_HANDLED; } -static struct irqaction external_interrupt = { - .name = "EXT", - .handler = do_ext_interrupt, -}; - void __init init_ext_interrupts(void) { int idx; @@ -308,7 +303,8 @@ void __init init_ext_interrupts(void) irq_set_chip_and_handler(EXT_INTERRUPT, &dummy_irq_chip, handle_percpu_irq); - setup_irq(EXT_INTERRUPT, &external_interrupt); + if (request_irq(EXT_INTERRUPT, do_ext_interrupt, 0, "EXT", NULL)) + panic("Failed to register EXT interrupt\n"); } static DEFINE_SPINLOCK(irq_subclass_lock); diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index 427b2e24a8ce..cb466ed7eb5e 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -105,16 +105,12 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy) return IRQ_HANDLED; } -static struct irqaction airq_interrupt = { - .name = "AIO", - .handler = do_airq_interrupt, -}; - void __init init_airq_interrupts(void) { irq_set_chip_and_handler(THIN_INTERRUPT, &dummy_irq_chip, handle_percpu_irq); - setup_irq(THIN_INTERRUPT, &airq_interrupt); + if (request_irq(THIN_INTERRUPT, do_airq_interrupt, 0, "AIO", NULL)) + panic("Failed to register AIO interrupt\n"); } static inline unsigned long iv_size(unsigned long bits) diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 18f5458f90e8..6d716db2a46a 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -563,16 +563,12 @@ static irqreturn_t do_cio_interrupt(int irq, void *dummy) return IRQ_HANDLED; } -static struct irqaction io_interrupt = { - .name = "I/O", - .handler = do_cio_interrupt, -}; - void __init init_cio_interrupts(void) { irq_set_chip_and_handler(IO_INTERRUPT, &dummy_irq_chip, handle_percpu_irq); - setup_irq(IO_INTERRUPT, &io_interrupt); + if (request_irq(IO_INTERRUPT, do_cio_interrupt, 0, "I/O", NULL)) + panic("Failed to register I/O interrupt\n"); } #ifdef CONFIG_CCW_CONSOLE From 76fb118083eaf63f506fcbe695c1b12a38971b7a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 10 Mar 2020 10:25:51 +0100 Subject: [PATCH 22/57] s390/irq: make init_ext_interrupts static Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/hw_irq.h | 1 - arch/s390/kernel/irq.c | 18 +++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h index adae176757ae..9078b5b6b837 100644 --- a/arch/s390/include/asm/hw_irq.h +++ b/arch/s390/include/asm/hw_irq.h @@ -7,6 +7,5 @@ void __init init_airq_interrupts(void); void __init init_cio_interrupts(void); -void __init init_ext_interrupts(void); #endif diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index da550cb8b31b..3514420f0259 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -95,14 +95,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, }; -void __init init_IRQ(void) -{ - BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS); - init_cio_interrupts(); - init_airq_interrupts(); - init_ext_interrupts(); -} - void do_IRQ(struct pt_regs *regs, int irq) { struct pt_regs *old_regs; @@ -294,7 +286,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy) return IRQ_HANDLED; } -void __init init_ext_interrupts(void) +static void __init init_ext_interrupts(void) { int idx; @@ -307,6 +299,14 @@ void __init init_ext_interrupts(void) panic("Failed to register EXT interrupt\n"); } +void __init init_IRQ(void) +{ + BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS); + init_cio_interrupts(); + init_airq_interrupts(); + init_ext_interrupts(); +} + static DEFINE_SPINLOCK(irq_subclass_lock); static unsigned char irq_subclass_refcount[64]; From 1d49688d2bc6406d74566bca35b3d67201a906fc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 10 Mar 2020 10:29:43 +0100 Subject: [PATCH 23/57] s390/traps: mark test_monitor_call __init Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index dc75588d7894..ff9cc4c3290e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -271,7 +271,7 @@ void kernel_stack_overflow(struct pt_regs *regs) } NOKPROBE_SYMBOL(kernel_stack_overflow); -static void test_monitor_call(void) +static void __init test_monitor_call(void) { int val = 1; From bb533ec8bacd064ee273ca3305db97938c3331ae Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 10 Mar 2020 09:01:44 +0100 Subject: [PATCH 24/57] s390/config: do not select VIRTIO_CONSOLE via Kconfig select does not ensure that dependencies are also selected. Instead of selecting VIRTIO_CONSOLE from S390_GUEST we should rather add this to the defconfigs. So we update those as well. Reported-by: Michael S. Tsirkin Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 - arch/s390/configs/debug_defconfig | 1 + arch/s390/configs/defconfig | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f4ff75ff62f2..334f3f2199e8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -953,7 +953,6 @@ config S390_GUEST select TTY select VIRTUALIZATION select VIRTIO - select VIRTIO_CONSOLE help Enabling this option adds support for virtio based paravirtual device drivers on s390. diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 2e60c80395ab..4ca5c7499cce 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -532,6 +532,7 @@ CONFIG_INPUT_EVDEV=y # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 CONFIG_NULL_TTY=m +CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 25f799849582..c0950750fb50 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -528,6 +528,7 @@ CONFIG_INPUT_EVDEV=y # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 CONFIG_NULL_TTY=m +CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m From 31932757c6121b394cd4f158b6b8f1cca8ffe871 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sun, 8 Mar 2020 21:34:49 +0100 Subject: [PATCH 25/57] s390/mm: optimize page table upgrade routine There is a maximum of two new tables allocated on page table upgrade. Because we know that a loop the current implementation is based on could be unrolled with some improvements: * upgrade from 3 to 5 levels happens in one go - without an unnecessary re-take of page_table_lock in-between; * page tables initialization moved out of the atomic code; Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/pgalloc.c | 90 ++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 34 deletions(-) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 3dd253f81a77..d3be3fe2c55d 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -77,43 +77,65 @@ static void __crst_table_upgrade(void *arg) int crst_table_upgrade(struct mm_struct *mm, unsigned long end) { - unsigned long *table, *pgd; - int rc, notify; + unsigned long *pgd = NULL, *p4d = NULL, *__pgd; + unsigned long asce_limit = mm->context.asce_limit; /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ - VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE); - rc = 0; - notify = 0; - while (mm->context.asce_limit < end) { - table = crst_table_alloc(mm); - if (!table) { - rc = -ENOMEM; - break; - } - spin_lock_bh(&mm->page_table_lock); - pgd = (unsigned long *) mm->pgd; - if (mm->context.asce_limit == _REGION2_SIZE) { - crst_table_init(table, _REGION2_ENTRY_EMPTY); - p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->context.asce_limit = _REGION1_SIZE; - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION2; - mm_inc_nr_puds(mm); - } else { - crst_table_init(table, _REGION1_ENTRY_EMPTY); - pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->context.asce_limit = -PAGE_SIZE; - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION1; - } - notify = 1; - spin_unlock_bh(&mm->page_table_lock); + VM_BUG_ON(asce_limit < _REGION2_SIZE); + + if (end <= asce_limit) + return 0; + + if (asce_limit == _REGION2_SIZE) { + p4d = crst_table_alloc(mm); + if (unlikely(!p4d)) + goto err_p4d; + crst_table_init(p4d, _REGION2_ENTRY_EMPTY); } - if (notify) - on_each_cpu(__crst_table_upgrade, mm, 0); - return rc; + if (end > _REGION1_SIZE) { + pgd = crst_table_alloc(mm); + if (unlikely(!pgd)) + goto err_pgd; + crst_table_init(pgd, _REGION1_ENTRY_EMPTY); + } + + spin_lock_bh(&mm->page_table_lock); + + /* + * This routine gets called with mmap_sem lock held and there is + * no reason to optimize for the case of otherwise. However, if + * that would ever change, the below check will let us know. + */ + VM_BUG_ON(asce_limit != mm->context.asce_limit); + + if (p4d) { + __pgd = (unsigned long *) mm->pgd; + p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd); + mm->pgd = (pgd_t *) p4d; + mm->context.asce_limit = _REGION1_SIZE; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + mm_inc_nr_puds(mm); + } + if (pgd) { + __pgd = (unsigned long *) mm->pgd; + pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd); + mm->pgd = (pgd_t *) pgd; + mm->context.asce_limit = -PAGE_SIZE; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION1; + } + + spin_unlock_bh(&mm->page_table_lock); + + on_each_cpu(__crst_table_upgrade, mm, 0); + + return 0; + +err_pgd: + crst_table_free(mm, p4d); +err_p4d: + return -ENOMEM; } void crst_table_downgrade(struct mm_struct *mm) From 42d211a1ae3b77008d4190b7dc79ad29b48bbcd2 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 11 Mar 2020 14:18:05 +0100 Subject: [PATCH 26/57] s390/cpuinfo: show processor physical address Show CPU physical address as reported by STAP instruction Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/smp.h | 1 + arch/s390/kernel/processor.c | 1 + arch/s390/kernel/smp.c | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index b157a81fb977..231a51e870fe 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -34,6 +34,7 @@ extern int smp_vcpu_scheduled(int cpu); extern void smp_yield_cpu(int cpu); extern void smp_cpu_set_polarization(int cpu, int val); extern int smp_cpu_get_polarization(int cpu); +extern int smp_cpu_get_cpu_address(int cpu); extern void smp_fill_possible_mask(void); extern void smp_detect_cpus(void); diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 2c13ca562b48..b98654d0ce41 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -159,6 +159,7 @@ static void show_cpu_topology(struct seq_file *m, unsigned long n) seq_printf(m, "book id : %d\n", topology_book_id(n)); seq_printf(m, "drawer id : %d\n", topology_drawer_id(n)); seq_printf(m, "dedicated : %d\n", topology_cpu_dedicated(n)); + seq_printf(m, "address : %d\n", smp_cpu_get_cpu_address(n)); #endif /* CONFIG_SCHED_TOPOLOGY */ } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index f87d4e14269c..edc1bf39c542 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -703,6 +703,11 @@ int smp_cpu_get_polarization(int cpu) return pcpu_devices[cpu].polarization; } +int smp_cpu_get_cpu_address(int cpu) +{ + return pcpu_devices[cpu].address; +} + static void __ref smp_get_core_info(struct sclp_core_info *info, int early) { static int use_sigp_detection; From cd8e702f0db75f28d0fbdc574a8fcda4aca0b09b Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 12 Mar 2020 11:35:05 +0100 Subject: [PATCH 27/57] s390/numa: remove redundant cpus_with_topology variable Variable cpus_with_topology is a leftover that became unneeded once the fake NUMA support has been removed. Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/topology.h | 1 - arch/s390/kernel/topology.c | 6 ------ 2 files changed, 7 deletions(-) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index bd3417185e30..4648303e6958 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -24,7 +24,6 @@ struct cpu_topology_s390 { }; extern struct cpu_topology_s390 cpu_topology[NR_CPUS]; -extern cpumask_t cpus_with_topology; #define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) #define topology_thread_id(cpu) (cpu_topology[cpu].thread_id) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index c189f5d996ff..ec1bffe6ce75 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -62,8 +62,6 @@ static struct mask_info drawer_info; struct cpu_topology_s390 cpu_topology[NR_CPUS]; EXPORT_SYMBOL_GPL(cpu_topology); -cpumask_t cpus_with_topology; - static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { cpumask_t mask; @@ -137,7 +135,6 @@ static void add_cpus_to_mask(struct topology_core *tl_core, cpumask_set_cpu(lcpu + i, &drawer->mask); cpumask_set_cpu(lcpu + i, &book->mask); cpumask_set_cpu(lcpu + i, &socket->mask); - cpumask_set_cpu(lcpu + i, &cpus_with_topology); smp_cpu_set_polarization(lcpu + i, tl_core->pp); } } @@ -262,8 +259,6 @@ static void update_cpu_masks(void) topo->socket_id = id; topo->book_id = id; topo->drawer_id = id; - if (cpu_present(cpu)) - cpumask_set_cpu(cpu, &cpus_with_topology); } } } @@ -287,7 +282,6 @@ static int __arch_update_cpu_topology(void) int rc = 0; mutex_lock(&smp_cpu_state_mutex); - cpumask_clear(&cpus_with_topology); if (MACHINE_HAS_TOPOLOGY) { rc = 1; store_topology(info); From 52aeda7accb6d2e511a1b89142cbbf6fd2c12565 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 12 Mar 2020 11:32:23 +0100 Subject: [PATCH 28/57] s390/topology: remove offline CPUs from CPU topology masks The CPU topology masks on s390 contain also bits of CPUs which are offline. Currently this is already a problem, since common code scheduler expects e.g. cpu_smt_mask() to reflect reality. This update changes the described behaviour and s390 starts to behave like all other architectures. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/topology.h | 2 ++ arch/s390/kernel/smp.c | 6 ++++-- arch/s390/kernel/topology.c | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 4648303e6958..56b14616fb6b 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -43,6 +43,7 @@ int topology_cpu_init(struct cpu *); int topology_set_cpu_management(int fc); void topology_schedule_update(void); void store_topology(struct sysinfo_15_1_x *info); +void update_cpu_masks(void); void topology_expect_change(void); const struct cpumask *cpu_coregroup_mask(int cpu); @@ -52,6 +53,7 @@ static inline void topology_init_early(void) { } static inline void topology_schedule_update(void) { } static inline int topology_cpu_init(struct cpu *cpu) { return 0; } static inline int topology_cpu_dedicated(int cpu_nr) { return 0; } +static inline void update_cpu_masks(void) { } static inline void topology_expect_change(void) { } #endif /* CONFIG_SCHED_TOPOLOGY */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index edc1bf39c542..7eaabbab2213 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -858,12 +858,13 @@ static void smp_init_secondary(void) init_cpu_timer(); vtime_init(); pfault_init(); - notify_cpu_starting(smp_processor_id()); + notify_cpu_starting(cpu); if (topology_cpu_dedicated(cpu)) set_cpu_flag(CIF_DEDICATED_CPU); else clear_cpu_flag(CIF_DEDICATED_CPU); - set_cpu_online(smp_processor_id(), true); + set_cpu_online(cpu, true); + update_cpu_masks(); inc_irq_stat(CPU_RST); local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); @@ -935,6 +936,7 @@ int __cpu_disable(void) /* Handle possible pending IPIs */ smp_handle_ext_call(); set_cpu_online(smp_processor_id(), false); + update_cpu_masks(); /* Disable pseudo page faults on this cpu. */ pfault_fini(); /* Disable interrupt sources via control register. */ diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index ec1bffe6ce75..09711d55f123 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -88,6 +88,7 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) cpumask_copy(&mask, cpumask_of(cpu)); break; } + cpumask_and(&mask, &mask, cpu_online_mask); return mask; } @@ -103,6 +104,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu) for (i = 0; i <= smp_cpu_mtid; i++) if (cpu_present(cpu + i)) cpumask_set_cpu(cpu + i, &mask); + cpumask_and(&mask, &mask, cpu_online_mask); return mask; } @@ -241,7 +243,7 @@ int topology_set_cpu_management(int fc) return rc; } -static void update_cpu_masks(void) +void update_cpu_masks(void) { struct cpu_topology_s390 *topo; int cpu, id; From eb3e064b8dd12d0bc907dbbfc227bca9da252e6f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Mar 2020 10:09:15 +0100 Subject: [PATCH 29/57] s390/zcrypt: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Message-Id: <20200311090915.21059-1-tiwai@suse.de> Signed-off-by: Takashi Iwai Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_cex4.c | 40 ++++++++++++++++--------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 9a9d02e19774..6fc1ea77fbe9 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -128,16 +128,18 @@ static ssize_t cca_mkvps_show(struct device *dev, n = snprintf(buf, PAGE_SIZE, "AES NEW: - -\n"); if (ci.cur_mk_state >= '1' && ci.cur_mk_state <= '2') - n += snprintf(buf + n, PAGE_SIZE - n, "AES CUR: %s 0x%016llx\n", - cao_state[ci.cur_mk_state - '1'], ci.cur_mkvp); + n += scnprintf(buf + n, PAGE_SIZE - n, + "AES CUR: %s 0x%016llx\n", + cao_state[ci.cur_mk_state - '1'], ci.cur_mkvp); else - n += snprintf(buf + n, PAGE_SIZE - n, "AES CUR: - -\n"); + n += scnprintf(buf + n, PAGE_SIZE - n, "AES CUR: - -\n"); if (ci.old_mk_state >= '1' && ci.old_mk_state <= '2') - n += snprintf(buf + n, PAGE_SIZE - n, "AES OLD: %s 0x%016llx\n", - cao_state[ci.old_mk_state - '1'], ci.old_mkvp); + n += scnprintf(buf + n, PAGE_SIZE - n, + "AES OLD: %s 0x%016llx\n", + cao_state[ci.old_mk_state - '1'], ci.old_mkvp); else - n += snprintf(buf + n, PAGE_SIZE - n, "AES OLD: - -\n"); + n += scnprintf(buf + n, PAGE_SIZE - n, "AES OLD: - -\n"); return n; } @@ -251,11 +253,11 @@ static ssize_t ep11_card_op_modes_show(struct device *dev, if (ci.op_mode & (1 << ep11_op_modes[i].mode_bit)) { if (n > 0) buf[n++] = ' '; - n += snprintf(buf + n, PAGE_SIZE - n, - "%s", ep11_op_modes[i].mode_txt); + n += scnprintf(buf + n, PAGE_SIZE - n, + "%s", ep11_op_modes[i].mode_txt); } } - n += snprintf(buf + n, PAGE_SIZE - n, "\n"); + n += scnprintf(buf + n, PAGE_SIZE - n, "\n"); return n; } @@ -305,21 +307,21 @@ static ssize_t ep11_mkvps_show(struct device *dev, cwk_state[di.cur_wk_state - '0']); bin2hex(buf + n, di.cur_wkvp, sizeof(di.cur_wkvp)); n += 2 * sizeof(di.cur_wkvp); - n += snprintf(buf + n, PAGE_SIZE - n, "\n"); + n += scnprintf(buf + n, PAGE_SIZE - n, "\n"); } else n = snprintf(buf, PAGE_SIZE, "WK CUR: - -\n"); if (di.new_wk_state == '0') { - n += snprintf(buf + n, PAGE_SIZE - n, "WK NEW: %s -\n", - nwk_state[di.new_wk_state - '0']); + n += scnprintf(buf + n, PAGE_SIZE - n, "WK NEW: %s -\n", + nwk_state[di.new_wk_state - '0']); } else if (di.new_wk_state >= '1' && di.new_wk_state <= '2') { - n += snprintf(buf + n, PAGE_SIZE - n, "WK NEW: %s 0x", - nwk_state[di.new_wk_state - '0']); + n += scnprintf(buf + n, PAGE_SIZE - n, "WK NEW: %s 0x", + nwk_state[di.new_wk_state - '0']); bin2hex(buf + n, di.new_wkvp, sizeof(di.new_wkvp)); n += 2 * sizeof(di.new_wkvp); - n += snprintf(buf + n, PAGE_SIZE - n, "\n"); + n += scnprintf(buf + n, PAGE_SIZE - n, "\n"); } else - n += snprintf(buf + n, PAGE_SIZE - n, "WK NEW: - -\n"); + n += scnprintf(buf + n, PAGE_SIZE - n, "WK NEW: - -\n"); return n; } @@ -346,11 +348,11 @@ static ssize_t ep11_queue_op_modes_show(struct device *dev, if (di.op_mode & (1 << ep11_op_modes[i].mode_bit)) { if (n > 0) buf[n++] = ' '; - n += snprintf(buf + n, PAGE_SIZE - n, - "%s", ep11_op_modes[i].mode_txt); + n += scnprintf(buf + n, PAGE_SIZE - n, + "%s", ep11_op_modes[i].mode_txt); } } - n += snprintf(buf + n, PAGE_SIZE - n, "\n"); + n += scnprintf(buf + n, PAGE_SIZE - n, "\n"); return n; } From 40501c70e3f09e8018bf08457502a3a7b2d5a406 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 12 Mar 2020 11:19:55 +0100 Subject: [PATCH 30/57] s390/zcrypt: replace snprintf/sprintf with scnprintf snprintf() may not always return the correct size of used bytes but instead the length the resulting string would be if it would fit into the buffer. So scnprintf() is the function to use when the real length of the resulting string is needed. Replace all occurrences of snprintf() with scnprintf() where the return code is further processed. Also find and fix some occurrences where sprintf() was used. Suggested-by: Takashi Iwai Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 72 +++++++++++++++--------------- drivers/s390/crypto/ap_card.c | 17 +++---- drivers/s390/crypto/ap_queue.c | 18 ++++---- drivers/s390/crypto/zcrypt_card.c | 6 +-- drivers/s390/crypto/zcrypt_cex4.c | 34 +++++++------- drivers/s390/crypto/zcrypt_queue.c | 4 +- 6 files changed, 76 insertions(+), 75 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 5256e3ce84e5..171b0a08e0f9 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1021,7 +1021,7 @@ EXPORT_SYMBOL(ap_parse_mask_str); static ssize_t ap_domain_show(struct bus_type *bus, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", ap_domain_index); + return scnprintf(buf, PAGE_SIZE, "%d\n", ap_domain_index); } static ssize_t ap_domain_store(struct bus_type *bus, @@ -1047,14 +1047,14 @@ static BUS_ATTR_RW(ap_domain); static ssize_t ap_control_domain_mask_show(struct bus_type *bus, char *buf) { if (!ap_configuration) /* QCI not supported */ - return snprintf(buf, PAGE_SIZE, "not supported\n"); + return scnprintf(buf, PAGE_SIZE, "not supported\n"); - return snprintf(buf, PAGE_SIZE, - "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", - ap_configuration->adm[0], ap_configuration->adm[1], - ap_configuration->adm[2], ap_configuration->adm[3], - ap_configuration->adm[4], ap_configuration->adm[5], - ap_configuration->adm[6], ap_configuration->adm[7]); + return scnprintf(buf, PAGE_SIZE, + "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", + ap_configuration->adm[0], ap_configuration->adm[1], + ap_configuration->adm[2], ap_configuration->adm[3], + ap_configuration->adm[4], ap_configuration->adm[5], + ap_configuration->adm[6], ap_configuration->adm[7]); } static BUS_ATTR_RO(ap_control_domain_mask); @@ -1062,14 +1062,14 @@ static BUS_ATTR_RO(ap_control_domain_mask); static ssize_t ap_usage_domain_mask_show(struct bus_type *bus, char *buf) { if (!ap_configuration) /* QCI not supported */ - return snprintf(buf, PAGE_SIZE, "not supported\n"); + return scnprintf(buf, PAGE_SIZE, "not supported\n"); - return snprintf(buf, PAGE_SIZE, - "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", - ap_configuration->aqm[0], ap_configuration->aqm[1], - ap_configuration->aqm[2], ap_configuration->aqm[3], - ap_configuration->aqm[4], ap_configuration->aqm[5], - ap_configuration->aqm[6], ap_configuration->aqm[7]); + return scnprintf(buf, PAGE_SIZE, + "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", + ap_configuration->aqm[0], ap_configuration->aqm[1], + ap_configuration->aqm[2], ap_configuration->aqm[3], + ap_configuration->aqm[4], ap_configuration->aqm[5], + ap_configuration->aqm[6], ap_configuration->aqm[7]); } static BUS_ATTR_RO(ap_usage_domain_mask); @@ -1077,29 +1077,29 @@ static BUS_ATTR_RO(ap_usage_domain_mask); static ssize_t ap_adapter_mask_show(struct bus_type *bus, char *buf) { if (!ap_configuration) /* QCI not supported */ - return snprintf(buf, PAGE_SIZE, "not supported\n"); + return scnprintf(buf, PAGE_SIZE, "not supported\n"); - return snprintf(buf, PAGE_SIZE, - "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", - ap_configuration->apm[0], ap_configuration->apm[1], - ap_configuration->apm[2], ap_configuration->apm[3], - ap_configuration->apm[4], ap_configuration->apm[5], - ap_configuration->apm[6], ap_configuration->apm[7]); + return scnprintf(buf, PAGE_SIZE, + "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", + ap_configuration->apm[0], ap_configuration->apm[1], + ap_configuration->apm[2], ap_configuration->apm[3], + ap_configuration->apm[4], ap_configuration->apm[5], + ap_configuration->apm[6], ap_configuration->apm[7]); } static BUS_ATTR_RO(ap_adapter_mask); static ssize_t ap_interrupts_show(struct bus_type *bus, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", - ap_using_interrupts() ? 1 : 0); + return scnprintf(buf, PAGE_SIZE, "%d\n", + ap_using_interrupts() ? 1 : 0); } static BUS_ATTR_RO(ap_interrupts); static ssize_t config_time_show(struct bus_type *bus, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", ap_config_time); + return scnprintf(buf, PAGE_SIZE, "%d\n", ap_config_time); } static ssize_t config_time_store(struct bus_type *bus, @@ -1118,7 +1118,7 @@ static BUS_ATTR_RW(config_time); static ssize_t poll_thread_show(struct bus_type *bus, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", ap_poll_kthread ? 1 : 0); + return scnprintf(buf, PAGE_SIZE, "%d\n", ap_poll_kthread ? 1 : 0); } static ssize_t poll_thread_store(struct bus_type *bus, @@ -1141,7 +1141,7 @@ static BUS_ATTR_RW(poll_thread); static ssize_t poll_timeout_show(struct bus_type *bus, char *buf) { - return snprintf(buf, PAGE_SIZE, "%llu\n", poll_timeout); + return scnprintf(buf, PAGE_SIZE, "%llu\n", poll_timeout); } static ssize_t poll_timeout_store(struct bus_type *bus, const char *buf, @@ -1176,7 +1176,7 @@ static ssize_t ap_max_domain_id_show(struct bus_type *bus, char *buf) max_domain_id = ap_max_domain_id ? : -1; else max_domain_id = 15; - return snprintf(buf, PAGE_SIZE, "%d\n", max_domain_id); + return scnprintf(buf, PAGE_SIZE, "%d\n", max_domain_id); } static BUS_ATTR_RO(ap_max_domain_id); @@ -1187,10 +1187,10 @@ static ssize_t apmask_show(struct bus_type *bus, char *buf) if (mutex_lock_interruptible(&ap_perms_mutex)) return -ERESTARTSYS; - rc = snprintf(buf, PAGE_SIZE, - "0x%016lx%016lx%016lx%016lx\n", - ap_perms.apm[0], ap_perms.apm[1], - ap_perms.apm[2], ap_perms.apm[3]); + rc = scnprintf(buf, PAGE_SIZE, + "0x%016lx%016lx%016lx%016lx\n", + ap_perms.apm[0], ap_perms.apm[1], + ap_perms.apm[2], ap_perms.apm[3]); mutex_unlock(&ap_perms_mutex); return rc; @@ -1218,10 +1218,10 @@ static ssize_t aqmask_show(struct bus_type *bus, char *buf) if (mutex_lock_interruptible(&ap_perms_mutex)) return -ERESTARTSYS; - rc = snprintf(buf, PAGE_SIZE, - "0x%016lx%016lx%016lx%016lx\n", - ap_perms.aqm[0], ap_perms.aqm[1], - ap_perms.aqm[2], ap_perms.aqm[3]); + rc = scnprintf(buf, PAGE_SIZE, + "0x%016lx%016lx%016lx%016lx\n", + ap_perms.aqm[0], ap_perms.aqm[1], + ap_perms.aqm[2], ap_perms.aqm[3]); mutex_unlock(&ap_perms_mutex); return rc; diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c index e85bfca1ed16..0a39dfdb6a1d 100644 --- a/drivers/s390/crypto/ap_card.c +++ b/drivers/s390/crypto/ap_card.c @@ -23,7 +23,7 @@ static ssize_t hwtype_show(struct device *dev, { struct ap_card *ac = to_ap_card(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", ac->ap_dev.device_type); + return scnprintf(buf, PAGE_SIZE, "%d\n", ac->ap_dev.device_type); } static DEVICE_ATTR_RO(hwtype); @@ -33,7 +33,7 @@ static ssize_t raw_hwtype_show(struct device *dev, { struct ap_card *ac = to_ap_card(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", ac->raw_hwtype); + return scnprintf(buf, PAGE_SIZE, "%d\n", ac->raw_hwtype); } static DEVICE_ATTR_RO(raw_hwtype); @@ -43,7 +43,7 @@ static ssize_t depth_show(struct device *dev, struct device_attribute *attr, { struct ap_card *ac = to_ap_card(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", ac->queue_depth); + return scnprintf(buf, PAGE_SIZE, "%d\n", ac->queue_depth); } static DEVICE_ATTR_RO(depth); @@ -53,7 +53,7 @@ static ssize_t ap_functions_show(struct device *dev, { struct ap_card *ac = to_ap_card(dev); - return snprintf(buf, PAGE_SIZE, "0x%08X\n", ac->functions); + return scnprintf(buf, PAGE_SIZE, "0x%08X\n", ac->functions); } static DEVICE_ATTR_RO(ap_functions); @@ -69,7 +69,7 @@ static ssize_t request_count_show(struct device *dev, spin_lock_bh(&ap_list_lock); req_cnt = atomic64_read(&ac->total_request_count); spin_unlock_bh(&ap_list_lock); - return snprintf(buf, PAGE_SIZE, "%llu\n", req_cnt); + return scnprintf(buf, PAGE_SIZE, "%llu\n", req_cnt); } static ssize_t request_count_store(struct device *dev, @@ -102,7 +102,7 @@ static ssize_t requestq_count_show(struct device *dev, for_each_ap_queue(aq, ac) reqq_cnt += aq->requestq_count; spin_unlock_bh(&ap_list_lock); - return snprintf(buf, PAGE_SIZE, "%d\n", reqq_cnt); + return scnprintf(buf, PAGE_SIZE, "%d\n", reqq_cnt); } static DEVICE_ATTR_RO(requestq_count); @@ -119,7 +119,7 @@ static ssize_t pendingq_count_show(struct device *dev, for_each_ap_queue(aq, ac) penq_cnt += aq->pendingq_count; spin_unlock_bh(&ap_list_lock); - return snprintf(buf, PAGE_SIZE, "%d\n", penq_cnt); + return scnprintf(buf, PAGE_SIZE, "%d\n", penq_cnt); } static DEVICE_ATTR_RO(pendingq_count); @@ -127,7 +127,8 @@ static DEVICE_ATTR_RO(pendingq_count); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "ap:t%02X\n", to_ap_dev(dev)->device_type); + return scnprintf(buf, PAGE_SIZE, "ap:t%02X\n", + to_ap_dev(dev)->device_type); } static DEVICE_ATTR_RO(modalias); diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index a317ab484932..39c0b246c69a 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -484,7 +484,7 @@ static ssize_t request_count_show(struct device *dev, spin_lock_bh(&aq->lock); req_cnt = aq->total_request_count; spin_unlock_bh(&aq->lock); - return snprintf(buf, PAGE_SIZE, "%llu\n", req_cnt); + return scnprintf(buf, PAGE_SIZE, "%llu\n", req_cnt); } static ssize_t request_count_store(struct device *dev, @@ -511,7 +511,7 @@ static ssize_t requestq_count_show(struct device *dev, spin_lock_bh(&aq->lock); reqq_cnt = aq->requestq_count; spin_unlock_bh(&aq->lock); - return snprintf(buf, PAGE_SIZE, "%d\n", reqq_cnt); + return scnprintf(buf, PAGE_SIZE, "%d\n", reqq_cnt); } static DEVICE_ATTR_RO(requestq_count); @@ -525,7 +525,7 @@ static ssize_t pendingq_count_show(struct device *dev, spin_lock_bh(&aq->lock); penq_cnt = aq->pendingq_count; spin_unlock_bh(&aq->lock); - return snprintf(buf, PAGE_SIZE, "%d\n", penq_cnt); + return scnprintf(buf, PAGE_SIZE, "%d\n", penq_cnt); } static DEVICE_ATTR_RO(pendingq_count); @@ -540,14 +540,14 @@ static ssize_t reset_show(struct device *dev, switch (aq->state) { case AP_STATE_RESET_START: case AP_STATE_RESET_WAIT: - rc = snprintf(buf, PAGE_SIZE, "Reset in progress.\n"); + rc = scnprintf(buf, PAGE_SIZE, "Reset in progress.\n"); break; case AP_STATE_WORKING: case AP_STATE_QUEUE_FULL: - rc = snprintf(buf, PAGE_SIZE, "Reset Timer armed.\n"); + rc = scnprintf(buf, PAGE_SIZE, "Reset Timer armed.\n"); break; default: - rc = snprintf(buf, PAGE_SIZE, "No Reset Timer set.\n"); + rc = scnprintf(buf, PAGE_SIZE, "No Reset Timer set.\n"); } spin_unlock_bh(&aq->lock); return rc; @@ -581,11 +581,11 @@ static ssize_t interrupt_show(struct device *dev, spin_lock_bh(&aq->lock); if (aq->state == AP_STATE_SETIRQ_WAIT) - rc = snprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n"); + rc = scnprintf(buf, PAGE_SIZE, "Enable Interrupt pending.\n"); else if (aq->interrupt == AP_INTR_ENABLED) - rc = snprintf(buf, PAGE_SIZE, "Interrupts enabled.\n"); + rc = scnprintf(buf, PAGE_SIZE, "Interrupts enabled.\n"); else - rc = snprintf(buf, PAGE_SIZE, "Interrupts disabled.\n"); + rc = scnprintf(buf, PAGE_SIZE, "Interrupts disabled.\n"); spin_unlock_bh(&aq->lock); return rc; } diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index d4f35a183c15..c53cab4b0c9e 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -41,7 +41,7 @@ static ssize_t type_show(struct device *dev, { struct zcrypt_card *zc = to_ap_card(dev)->private; - return snprintf(buf, PAGE_SIZE, "%s\n", zc->type_string); + return scnprintf(buf, PAGE_SIZE, "%s\n", zc->type_string); } static DEVICE_ATTR_RO(type); @@ -52,7 +52,7 @@ static ssize_t online_show(struct device *dev, { struct zcrypt_card *zc = to_ap_card(dev)->private; - return snprintf(buf, PAGE_SIZE, "%d\n", zc->online); + return scnprintf(buf, PAGE_SIZE, "%d\n", zc->online); } static ssize_t online_store(struct device *dev, @@ -86,7 +86,7 @@ static ssize_t load_show(struct device *dev, { struct zcrypt_card *zc = to_ap_card(dev)->private; - return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&zc->load)); + return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&zc->load)); } static DEVICE_ATTR_RO(load); diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 6fc1ea77fbe9..8cfb1e2e9e1b 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -87,7 +87,7 @@ static ssize_t cca_serialnr_show(struct device *dev, if (ap_domain_index >= 0) cca_get_info(ac->id, ap_domain_index, &ci, zc->online); - return snprintf(buf, PAGE_SIZE, "%s\n", ci.serial); + return scnprintf(buf, PAGE_SIZE, "%s\n", ci.serial); } static struct device_attribute dev_attr_cca_serialnr = @@ -122,10 +122,10 @@ static ssize_t cca_mkvps_show(struct device *dev, &ci, zq->online); if (ci.new_mk_state >= '1' && ci.new_mk_state <= '3') - n = snprintf(buf, PAGE_SIZE, "AES NEW: %s 0x%016llx\n", - new_state[ci.new_mk_state - '1'], ci.new_mkvp); + n = scnprintf(buf, PAGE_SIZE, "AES NEW: %s 0x%016llx\n", + new_state[ci.new_mk_state - '1'], ci.new_mkvp); else - n = snprintf(buf, PAGE_SIZE, "AES NEW: - -\n"); + n = scnprintf(buf, PAGE_SIZE, "AES NEW: - -\n"); if (ci.cur_mk_state >= '1' && ci.cur_mk_state <= '2') n += scnprintf(buf + n, PAGE_SIZE - n, @@ -172,9 +172,9 @@ static ssize_t ep11_api_ordinalnr_show(struct device *dev, ep11_get_card_info(ac->id, &ci, zc->online); if (ci.API_ord_nr > 0) - return snprintf(buf, PAGE_SIZE, "%u\n", ci.API_ord_nr); + return scnprintf(buf, PAGE_SIZE, "%u\n", ci.API_ord_nr); else - return snprintf(buf, PAGE_SIZE, "\n"); + return scnprintf(buf, PAGE_SIZE, "\n"); } static struct device_attribute dev_attr_ep11_api_ordinalnr = @@ -193,11 +193,11 @@ static ssize_t ep11_fw_version_show(struct device *dev, ep11_get_card_info(ac->id, &ci, zc->online); if (ci.FW_version > 0) - return snprintf(buf, PAGE_SIZE, "%d.%d\n", - (int)(ci.FW_version >> 8), - (int)(ci.FW_version & 0xFF)); + return scnprintf(buf, PAGE_SIZE, "%d.%d\n", + (int)(ci.FW_version >> 8), + (int)(ci.FW_version & 0xFF)); else - return snprintf(buf, PAGE_SIZE, "\n"); + return scnprintf(buf, PAGE_SIZE, "\n"); } static struct device_attribute dev_attr_ep11_fw_version = @@ -216,9 +216,9 @@ static ssize_t ep11_serialnr_show(struct device *dev, ep11_get_card_info(ac->id, &ci, zc->online); if (ci.serial[0]) - return snprintf(buf, PAGE_SIZE, "%16.16s\n", ci.serial); + return scnprintf(buf, PAGE_SIZE, "%16.16s\n", ci.serial); else - return snprintf(buf, PAGE_SIZE, "\n"); + return scnprintf(buf, PAGE_SIZE, "\n"); } static struct device_attribute dev_attr_ep11_serialnr = @@ -300,16 +300,16 @@ static ssize_t ep11_mkvps_show(struct device *dev, &di); if (di.cur_wk_state == '0') { - n = snprintf(buf, PAGE_SIZE, "WK CUR: %s -\n", - cwk_state[di.cur_wk_state - '0']); + n = scnprintf(buf, PAGE_SIZE, "WK CUR: %s -\n", + cwk_state[di.cur_wk_state - '0']); } else if (di.cur_wk_state == '1') { - n = snprintf(buf, PAGE_SIZE, "WK CUR: %s 0x", - cwk_state[di.cur_wk_state - '0']); + n = scnprintf(buf, PAGE_SIZE, "WK CUR: %s 0x", + cwk_state[di.cur_wk_state - '0']); bin2hex(buf + n, di.cur_wkvp, sizeof(di.cur_wkvp)); n += 2 * sizeof(di.cur_wkvp); n += scnprintf(buf + n, PAGE_SIZE - n, "\n"); } else - n = snprintf(buf, PAGE_SIZE, "WK CUR: - -\n"); + n = scnprintf(buf, PAGE_SIZE, "WK CUR: - -\n"); if (di.new_wk_state == '0') { n += scnprintf(buf + n, PAGE_SIZE - n, "WK NEW: %s -\n", diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 522c4bc69a08..b7d9fa567880 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -42,7 +42,7 @@ static ssize_t online_show(struct device *dev, { struct zcrypt_queue *zq = to_ap_queue(dev)->private; - return snprintf(buf, PAGE_SIZE, "%d\n", zq->online); + return scnprintf(buf, PAGE_SIZE, "%d\n", zq->online); } static ssize_t online_store(struct device *dev, @@ -78,7 +78,7 @@ static ssize_t load_show(struct device *dev, { struct zcrypt_queue *zq = to_ap_queue(dev)->private; - return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&zq->load)); + return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&zq->load)); } static DEVICE_ATTR_RO(load); From 1a2ae03b1938b050c3bbd79e79d5075e0307fe20 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 20 Feb 2020 16:22:30 +0100 Subject: [PATCH 31/57] s390/ipl: add support to control memory clearing for FCP and CCW re-IPL Re-IPL for both CCW and FCP is currently done by using diag 308 with the "Load Clear" subcode, which means that all memory will be cleared. This can increase re-IPL duration considerably on very large machines. For CCW devices, there is also a "Load Normal" subcode that was only used for dump kernels so far. For FCP devices, a similar "Load Normal" subcode was introduced with z14. The "Load Normal" diag 308 subcode allows to re-IPL without clearing memory. This patch adds a new "clear" sysfs attribute to /sys/firmware/reipl for both the ccw and fcp subdirectories, which can be set to either "0" or "1" to disable or enable re-IPL with memory clearing. The default value is "0", which disables memory clearing. Signed-off-by: Gerald Schaefer Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/ipl.h | 1 + arch/s390/kernel/ipl.c | 73 +++++++++++++++++++++++++++++++++---- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 084e71b7272a..b63bd66404b8 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -119,6 +119,7 @@ enum diag308_subcode { DIAG308_LOAD_NORMAL_DUMP = 4, DIAG308_SET = 5, DIAG308_STORE = 6, + DIAG308_LOAD_NORMAL = 7, }; enum diag308_rc { diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 6837affc19e8..4a71061974fd 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -144,6 +144,9 @@ static struct ipl_parameter_block *dump_block_ccw; static struct sclp_ipl_info sclp_ipl_info; +static bool reipl_fcp_clear; +static bool reipl_ccw_clear; + static inline int __diag308(unsigned long subcode, void *addr) { register unsigned long _addr asm("0") = (unsigned long) addr; @@ -691,6 +694,21 @@ static struct kobj_attribute sys_reipl_fcp_loadparm_attr = __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show, reipl_fcp_loadparm_store); +static ssize_t reipl_fcp_clear_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%u\n", reipl_fcp_clear); +} + +static ssize_t reipl_fcp_clear_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + if (strtobool(buf, &reipl_fcp_clear) < 0) + return -EINVAL; + return len; +} + static struct attribute *reipl_fcp_attrs[] = { &sys_reipl_fcp_device_attr.attr, &sys_reipl_fcp_wwpn_attr.attr, @@ -706,6 +724,9 @@ static struct attribute_group reipl_fcp_attr_group = { .bin_attrs = reipl_fcp_bin_attrs, }; +static struct kobj_attribute sys_reipl_fcp_clear_attr = + __ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store); + /* CCW reipl device attributes */ DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw); @@ -741,16 +762,36 @@ static struct kobj_attribute sys_reipl_ccw_loadparm_attr = __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show, reipl_ccw_loadparm_store); +static ssize_t reipl_ccw_clear_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%u\n", reipl_ccw_clear); +} + +static ssize_t reipl_ccw_clear_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + if (strtobool(buf, &reipl_ccw_clear) < 0) + return -EINVAL; + return len; +} + +static struct kobj_attribute sys_reipl_ccw_clear_attr = + __ATTR(clear, 0644, reipl_ccw_clear_show, reipl_ccw_clear_store); + static struct attribute *reipl_ccw_attrs_vm[] = { &sys_reipl_ccw_device_attr.attr, &sys_reipl_ccw_loadparm_attr.attr, &sys_reipl_ccw_vmparm_attr.attr, + &sys_reipl_ccw_clear_attr.attr, NULL, }; static struct attribute *reipl_ccw_attrs_lpar[] = { &sys_reipl_ccw_device_attr.attr, &sys_reipl_ccw_loadparm_attr.attr, + &sys_reipl_ccw_clear_attr.attr, NULL, }; @@ -892,11 +933,17 @@ static void __reipl_run(void *unused) switch (reipl_type) { case IPL_TYPE_CCW: diag308(DIAG308_SET, reipl_block_ccw); - diag308(DIAG308_LOAD_CLEAR, NULL); + if (reipl_ccw_clear) + diag308(DIAG308_LOAD_CLEAR, NULL); + else + diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); break; case IPL_TYPE_FCP: diag308(DIAG308_SET, reipl_block_fcp); - diag308(DIAG308_LOAD_CLEAR, NULL); + if (reipl_fcp_clear) + diag308(DIAG308_LOAD_CLEAR, NULL); + else + diag308(DIAG308_LOAD_NORMAL, NULL); break; case IPL_TYPE_NSS: diag308(DIAG308_SET, reipl_block_nss); @@ -1008,11 +1055,16 @@ static int __init reipl_fcp_init(void) } rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); - if (rc) { - kset_unregister(reipl_fcp_kset); - free_page((unsigned long) reipl_block_fcp); - return rc; - } + if (rc) + goto out1; + + if (test_facility(141)) { + rc = sysfs_create_file(&reipl_fcp_kset->kobj, + &sys_reipl_fcp_clear_attr.attr); + if (rc) + goto out2; + } else + reipl_fcp_clear = true; if (ipl_info.type == IPL_TYPE_FCP) { memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block)); @@ -1032,6 +1084,13 @@ static int __init reipl_fcp_init(void) } reipl_capabilities |= IPL_TYPE_FCP; return 0; + +out2: + sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); +out1: + kset_unregister(reipl_fcp_kset); + free_page((unsigned long) reipl_block_fcp); + return rc; } static int __init reipl_type_init(void) From 959684978d5a8443cfb0ed59a9d1fc59d2a80d09 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 13 Mar 2020 16:52:44 +0100 Subject: [PATCH 32/57] s390/cpuinfo: show number of online cores Show number of cores that run at least one SMT thread Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/topology.h | 3 +++ arch/s390/kernel/processor.c | 1 + arch/s390/kernel/topology.c | 20 ++++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 56b14616fb6b..fbb507504a3b 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -17,6 +17,7 @@ struct cpu_topology_s390 { unsigned short book_id; unsigned short drawer_id; unsigned short dedicated : 1; + int booted_cores; cpumask_t thread_mask; cpumask_t core_mask; cpumask_t book_mask; @@ -35,6 +36,7 @@ extern struct cpu_topology_s390 cpu_topology[NR_CPUS]; #define topology_drawer_id(cpu) (cpu_topology[cpu].drawer_id) #define topology_drawer_cpumask(cpu) (&cpu_topology[cpu].drawer_mask) #define topology_cpu_dedicated(cpu) (cpu_topology[cpu].dedicated) +#define topology_booted_cores(cpu) (cpu_topology[cpu].booted_cores) #define mc_capable() 1 @@ -53,6 +55,7 @@ static inline void topology_init_early(void) { } static inline void topology_schedule_update(void) { } static inline int topology_cpu_init(struct cpu *cpu) { return 0; } static inline int topology_cpu_dedicated(int cpu_nr) { return 0; } +static inline int topology_booted_cores(int cpu_nr) { return 1; } static inline void update_cpu_masks(void) { } static inline void topology_expect_change(void) { } diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index b98654d0ce41..41172093160e 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -160,6 +160,7 @@ static void show_cpu_topology(struct seq_file *m, unsigned long n) seq_printf(m, "drawer id : %d\n", topology_drawer_id(n)); seq_printf(m, "dedicated : %d\n", topology_cpu_dedicated(n)); seq_printf(m, "address : %d\n", smp_cpu_get_cpu_address(n)); + seq_printf(m, "cpu cores : %d\n", topology_booted_cores(n)); #endif /* CONFIG_SCHED_TOPOLOGY */ } diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 09711d55f123..d03edebce754 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -245,8 +245,8 @@ int topology_set_cpu_management(int fc) void update_cpu_masks(void) { - struct cpu_topology_s390 *topo; - int cpu, id; + struct cpu_topology_s390 *topo, *topo_package, *topo_sibling; + int cpu, sibling, pkg_first, smt_first, id; for_each_possible_cpu(cpu) { topo = &cpu_topology[cpu]; @@ -254,6 +254,7 @@ void update_cpu_masks(void) topo->core_mask = cpu_group_map(&socket_info, cpu); topo->book_mask = cpu_group_map(&book_info, cpu); topo->drawer_mask = cpu_group_map(&drawer_info, cpu); + topo->booted_cores = 0; if (topology_mode != TOPOLOGY_MODE_HW) { id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; topo->thread_id = cpu; @@ -263,6 +264,21 @@ void update_cpu_masks(void) topo->drawer_id = id; } } + for_each_online_cpu(cpu) { + topo = &cpu_topology[cpu]; + pkg_first = cpumask_first(&topo->core_mask); + topo_package = &cpu_topology[pkg_first]; + if (cpu == pkg_first) { + for_each_cpu(sibling, &topo->core_mask) { + topo_sibling = &cpu_topology[sibling]; + smt_first = cpumask_first(&topo_sibling->thread_mask); + if (sibling == smt_first) + topo_package->booted_cores++; + } + } else { + topo->booted_cores = topo_package->booted_cores; + } + } } void store_topology(struct sysinfo_15_1_x *info) From 2db52dc353146e684d0b3ec4060f00ebefc5c4d2 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 13 Mar 2020 16:55:10 +0100 Subject: [PATCH 33/57] s390/cpuinfo: show number of online CPUs within a package Show number of online CPUs within a package (which is the socket in case of s390). For what it worth, present that value as "siblings" field - just like x86 does. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/processor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 41172093160e..39bf777e140e 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -160,6 +160,7 @@ static void show_cpu_topology(struct seq_file *m, unsigned long n) seq_printf(m, "drawer id : %d\n", topology_drawer_id(n)); seq_printf(m, "dedicated : %d\n", topology_cpu_dedicated(n)); seq_printf(m, "address : %d\n", smp_cpu_get_cpu_address(n)); + seq_printf(m, "siblings : %d\n", cpumask_weight(topology_core_cpumask(n))); seq_printf(m, "cpu cores : %d\n", topology_booted_cores(n)); #endif /* CONFIG_SCHED_TOPOLOGY */ } From 872f27103874a73783aeff2aac2b41a489f67d7c Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 16 Mar 2020 12:39:55 +0100 Subject: [PATCH 34/57] s390/cpuinfo: fix wrong output when CPU0 is offline /proc/cpuinfo should not print information about CPU 0 when it is offline. Fixes: 281eaa8cb67c ("s390/cpuinfo: simplify locking and skip offline cpus early") Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens [heiko.carstens@de.ibm.com: shortened commit message] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/processor.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 39bf777e140e..f36acc8d2631 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -188,8 +188,9 @@ static void show_cpu_mhz(struct seq_file *m, unsigned long n) static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; + unsigned long first = cpumask_first(cpu_online_mask); - if (!n) + if (n == first) show_cpu_summary(m, v); if (!machine_has_cpu_mhz) return 0; @@ -204,6 +205,8 @@ static inline void *c_update(loff_t *pos) { if (*pos) *pos = cpumask_next(*pos - 1, cpu_online_mask); + else + *pos = cpumask_first(cpu_online_mask); return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL; } From 1b648dfd544bd9d486926e221743b9bd143d7eca Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 16 Mar 2020 10:25:44 +0100 Subject: [PATCH 35/57] s390/cpuinfo: do not skip info for CPUs without MHz feature In the past there were no per-CPU information in /proc/cpuinfo other than CPU frequency. Hence, for machines without CPU MHz feature there were nothing to show. Now CPU topology and IDs still could be shown, so do not skip this information from the output. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens [heiko.carstens@de.ibm.com: moved comparison] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/processor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index f36acc8d2631..c92d04f876cb 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -178,6 +178,8 @@ static void show_cpu_mhz(struct seq_file *m, unsigned long n) { struct cpu_info *c = per_cpu_ptr(&cpu_info, n); + if (!machine_has_cpu_mhz) + return; seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic); seq_printf(m, "cpu MHz static : %d\n", c->cpu_mhz_static); } @@ -192,8 +194,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (n == first) show_cpu_summary(m, v); - if (!machine_has_cpu_mhz) - return 0; seq_printf(m, "\ncpu number : %ld\n", n); show_cpu_topology(m, n); show_cpu_ids(m, n); From 394216275c7d503d966317da9a01ad6626a6091d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Mar 2020 20:55:24 +0100 Subject: [PATCH 36/57] s390: remove broken hibernate / power management support Hibernation is known to be broken for many years on s390. Given that there aren't any real use cases, remove the code instead of spending time to fix and maintain it. Without hibernate support it doesn't make too much sense to keep power management support; therefore remove it completely. Acked-by: Christian Borntraeger Acked-by: Peter Oberparleiter Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 10 -- arch/s390/kernel/Makefile | 1 - arch/s390/kernel/machine_kexec.c | 31 ---- arch/s390/kernel/suspend.c | 240 --------------------------- arch/s390/kernel/swsusp.S | 276 ------------------------------- arch/s390/mm/cmm.c | 46 +----- arch/s390/mm/pageattr.c | 16 -- arch/s390/pci/pci.c | 43 ----- 8 files changed, 3 insertions(+), 660 deletions(-) delete mode 100644 arch/s390/kernel/suspend.c delete mode 100644 arch/s390/kernel/swsusp.S diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 334f3f2199e8..0e2ad7b2e073 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -102,7 +102,6 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_KEEP_MEMBLOCK - select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING @@ -810,15 +809,6 @@ config SECCOMP If unsure, say Y. -menu "Power Management" - -config ARCH_HIBERNATION_POSSIBLE - def_bool y - -source "kernel/power/Kconfig" - -endmenu - config CCW def_bool y diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2b1203cf7be6..10f80071f945 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -54,7 +54,6 @@ CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o -obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index cb8b1cc285c9..3a854cb5a4c6 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -38,36 +37,6 @@ extern const unsigned long long relocate_kernel_len; #ifdef CONFIG_CRASH_DUMP -/* - * PM notifier callback for kdump - */ -static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, - void *ptr) -{ - switch (action) { - case PM_SUSPEND_PREPARE: - case PM_HIBERNATION_PREPARE: - if (kexec_crash_image) - arch_kexec_unprotect_crashkres(); - break; - case PM_POST_SUSPEND: - case PM_POST_HIBERNATION: - if (kexec_crash_image) - arch_kexec_protect_crashkres(); - break; - default: - return NOTIFY_DONE; - } - return NOTIFY_OK; -} - -static int __init machine_kdump_pm_init(void) -{ - pm_notifier(machine_kdump_pm_cb, 0); - return 0; -} -arch_initcall(machine_kdump_pm_init); - /* * Reset the system, copy boot CPU registers to absolute zero, * and jump to the kdump image diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c deleted file mode 100644 index 75b7b307946e..000000000000 --- a/arch/s390/kernel/suspend.c +++ /dev/null @@ -1,240 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Suspend support specific for s390. - * - * Copyright IBM Corp. 2009 - * - * Author(s): Hans-Joachim Picht - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "entry.h" - -/* - * The restore of the saved pages in an hibernation image will set - * the change and referenced bits in the storage key for each page. - * Overindication of the referenced bits after an hibernation cycle - * does not cause any harm but the overindication of the change bits - * would cause trouble. - * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each - * page to the most significant byte of the associated page frame - * number in the hibernation image. - */ - -/* - * Key storage is allocated as a linked list of pages. - * The size of the keys array is (PAGE_SIZE - sizeof(long)) - */ -struct page_key_data { - struct page_key_data *next; - unsigned char data[]; -}; - -#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *)) - -static struct page_key_data *page_key_data; -static struct page_key_data *page_key_rp, *page_key_wp; -static unsigned long page_key_rx, page_key_wx; -unsigned long suspend_zero_pages; - -/* - * For each page in the hibernation image one additional byte is - * stored in the most significant byte of the page frame number. - * On suspend no additional memory is required but on resume the - * keys need to be memorized until the page data has been restored. - * Only then can the storage keys be set to their old state. - */ -unsigned long page_key_additional_pages(unsigned long pages) -{ - return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); -} - -/* - * Free page_key_data list of arrays. - */ -void page_key_free(void) -{ - struct page_key_data *pkd; - - while (page_key_data) { - pkd = page_key_data; - page_key_data = pkd->next; - free_page((unsigned long) pkd); - } -} - -/* - * Allocate page_key_data list of arrays with enough room to store - * one byte for each page in the hibernation image. - */ -int page_key_alloc(unsigned long pages) -{ - struct page_key_data *pk; - unsigned long size; - - size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); - while (size--) { - pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL); - if (!pk) { - page_key_free(); - return -ENOMEM; - } - pk->next = page_key_data; - page_key_data = pk; - } - page_key_rp = page_key_wp = page_key_data; - page_key_rx = page_key_wx = 0; - return 0; -} - -/* - * Save the storage key into the upper 8 bits of the page frame number. - */ -void page_key_read(unsigned long *pfn) -{ - struct page *page; - unsigned long addr; - unsigned char key; - - page = pfn_to_page(*pfn); - addr = (unsigned long) page_address(page); - key = (unsigned char) page_get_storage_key(addr) & 0x7f; - if (arch_test_page_nodat(page)) - key |= 0x80; - *(unsigned char *) pfn = key; -} - -/* - * Extract the storage key from the upper 8 bits of the page frame number - * and store it in the page_key_data list of arrays. - */ -void page_key_memorize(unsigned long *pfn) -{ - page_key_wp->data[page_key_wx] = *(unsigned char *) pfn; - *(unsigned char *) pfn = 0; - if (++page_key_wx < PAGE_KEY_DATA_SIZE) - return; - page_key_wp = page_key_wp->next; - page_key_wx = 0; -} - -/* - * Get the next key from the page_key_data list of arrays and set the - * storage key of the page referred by @address. If @address refers to - * a "safe" page the swsusp_arch_resume code will transfer the storage - * key from the buffer page to the original page. - */ -void page_key_write(void *address) -{ - struct page *page; - unsigned char key; - - key = page_key_rp->data[page_key_rx]; - page_set_storage_key((unsigned long) address, key & 0x7f, 0); - page = virt_to_page(address); - if (key & 0x80) - arch_set_page_nodat(page, 0); - else - arch_set_page_dat(page, 0); - if (++page_key_rx >= PAGE_KEY_DATA_SIZE) - return; - page_key_rp = page_key_rp->next; - page_key_rx = 0; -} - -int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); - unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end)); - unsigned long end_rodata_pfn = PFN_DOWN(__pa(__end_rodata)) - 1; - unsigned long stext_pfn = PFN_DOWN(__pa(_stext)); - - /* Always save lowcore pages (LC protection might be enabled). */ - if (pfn <= LC_PAGES) - return 0; - if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) - return 1; - /* Skip memory holes and read-only pages (DCSS, ...). */ - if (pfn >= stext_pfn && pfn <= end_rodata_pfn) - return 0; - if (tprot(PFN_PHYS(pfn))) - return 1; - return 0; -} - -/* - * PM notifier callback for suspend - */ -static int suspend_pm_cb(struct notifier_block *nb, unsigned long action, - void *ptr) -{ - switch (action) { - case PM_SUSPEND_PREPARE: - case PM_HIBERNATION_PREPARE: - suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER); - if (!suspend_zero_pages) - return NOTIFY_BAD; - break; - case PM_POST_SUSPEND: - case PM_POST_HIBERNATION: - free_pages(suspend_zero_pages, LC_ORDER); - break; - default: - return NOTIFY_DONE; - } - return NOTIFY_OK; -} - -static int __init suspend_pm_init(void) -{ - pm_notifier(suspend_pm_cb, 0); - return 0; -} -arch_initcall(suspend_pm_init); - -void save_processor_state(void) -{ - /* swsusp_arch_suspend() actually saves all cpu register contents. - * Machine checks must be disabled since swsusp_arch_suspend() stores - * register contents to their lowcore save areas. That's the same - * place where register contents on machine checks would be saved. - * To avoid register corruption disable machine checks. - * We must also disable machine checks in the new psw mask for - * program checks, since swsusp_arch_suspend() may generate program - * checks. Disabling machine checks for all other new psw masks is - * just paranoia. - */ - local_mcck_disable(); - /* Disable lowcore protection */ - __ctl_clear_bit(0,28); - S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK; - S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK; - S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK; - S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK; -} - -void restore_processor_state(void) -{ - S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK; - S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK; - S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK; - S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK; - /* Enable lowcore protection */ - __ctl_set_bit(0,28); - local_mcck_enable(); -} - -/* Called at the end of swsusp_arch_resume */ -void s390_early_resume(void) -{ - lgr_info_log(); - channel_subsystem_reinit(); - zpci_rescan(); -} diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S deleted file mode 100644 index a7baf0b5f818..000000000000 --- a/arch/s390/kernel/swsusp.S +++ /dev/null @@ -1,276 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * S390 64-bit swsusp implementation - * - * Copyright IBM Corp. 2009 - * - * Author(s): Hans-Joachim Picht - * Michael Holzheu - */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * Save register context in absolute 0 lowcore and call swsusp_save() to - * create in-memory kernel image. The context is saved in the designated - * "store status" memory locations (see POP). - * We return from this function twice. The first time during the suspend to - * disk process. The second time via the swsusp_arch_resume() function - * (see below) in the resume process. - * This function runs with disabled interrupts. - */ - GEN_BR_THUNK %r14 - - .section .text -ENTRY(swsusp_arch_suspend) - lg %r1,__LC_NODAT_STACK - stmg %r6,%r15,__SF_GPRS(%r1) - aghi %r1,-STACK_FRAME_OVERHEAD - stg %r15,__SF_BACKCHAIN(%r1) - lgr %r15,%r1 - - /* Store FPU registers */ - brasl %r14,save_fpu_regs - - /* Deactivate DAT */ - stnsm __SF_EMPTY(%r15),0xfb - - /* Store prefix register on stack */ - stpx __SF_EMPTY(%r15) - - /* Save prefix register contents for lowcore copy */ - llgf %r10,__SF_EMPTY(%r15) - - /* Get pointer to save area */ - lghi %r1,0x1000 - - /* Save CPU address */ - stap __LC_EXT_CPU_ADDR(%r0) - - /* Store registers */ - mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ - stam %a0,%a15,0x340(%r1) /* store access registers */ - stctg %c0,%c15,0x380(%r1) /* store control registers */ - stmg %r0,%r15,0x280(%r1) /* store general registers */ - - stpt 0x328(%r1) /* store timer */ - stck __SF_EMPTY(%r15) /* store clock */ - stckc 0x330(%r1) /* store clock comparator */ - - /* Update cputime accounting before going to sleep */ - lg %r0,__LC_LAST_UPDATE_TIMER - slg %r0,0x328(%r1) - alg %r0,__LC_SYSTEM_TIMER - stg %r0,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1) - lg %r0,__LC_LAST_UPDATE_CLOCK - slg %r0,__SF_EMPTY(%r15) - alg %r0,__LC_STEAL_TIMER - stg %r0,__LC_STEAL_TIMER - mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15) - - /* Activate DAT */ - stosm __SF_EMPTY(%r15),0x04 - - /* Set prefix page to zero */ - xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) - spx __SF_EMPTY(%r15) - - /* Save absolute zero pages */ - larl %r2,suspend_zero_pages - lg %r2,0(%r2) - lghi %r4,0 - lghi %r3,2*PAGE_SIZE - lghi %r5,2*PAGE_SIZE -1: mvcle %r2,%r4,0 - jo 1b - - /* Copy lowcore to absolute zero lowcore */ - lghi %r2,0 - lgr %r4,%r10 - lghi %r3,2*PAGE_SIZE - lghi %r5,2*PAGE_SIZE -1: mvcle %r2,%r4,0 - jo 1b - - /* Save image */ - brasl %r14,swsusp_save - - /* Restore prefix register and return */ - lghi %r1,0x1000 - spx 0x318(%r1) - lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) - lghi %r2,0 - BR_EX %r14 -ENDPROC(swsusp_arch_suspend) - -/* - * Restore saved memory image to correct place and restore register context. - * Then we return to the function that called swsusp_arch_suspend(). - * swsusp_arch_resume() runs with disabled interrupts. - */ -ENTRY(swsusp_arch_resume) - stmg %r6,%r15,__SF_GPRS(%r15) - lgr %r1,%r15 - aghi %r15,-STACK_FRAME_OVERHEAD - stg %r1,__SF_BACKCHAIN(%r15) - - /* Make all free pages stable */ - lghi %r2,1 - brasl %r14,arch_set_page_states - - /* Set prefix page to zero */ - xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) - spx __SF_EMPTY(%r15) - - /* Deactivate DAT */ - stnsm __SF_EMPTY(%r15),0xfb - - /* Restore saved image */ - larl %r1,restore_pblist - lg %r1,0(%r1) - ltgr %r1,%r1 - jz 2f -0: - lg %r2,8(%r1) - lg %r4,0(%r1) - iske %r0,%r4 - lghi %r3,PAGE_SIZE - lghi %r5,PAGE_SIZE -1: - mvcle %r2,%r4,0 - jo 1b - lg %r2,8(%r1) - sske %r0,%r2 - lg %r1,16(%r1) - ltgr %r1,%r1 - jnz 0b -2: - ptlb /* flush tlb */ - - /* Reset System */ - larl %r1,.Lnew_pgm_check_psw - epsw %r2,%r3 - stm %r2,%r3,0(%r1) - mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1) - larl %r1,__swsusp_reset_dma - lg %r1,0(%r1) - BASR_EX %r14,%r1 - larl %r1,smp_cpu_mt_shift - icm %r1,15,0(%r1) - jz smt_done - llgfr %r1,%r1 -smt_loop: - sigp %r1,%r0,SIGP_SET_MULTI_THREADING - brc 8,smt_done /* accepted */ - brc 2,smt_loop /* busy, try again */ -smt_done: - larl %r1,.Lnew_pgm_check_psw - lpswe 0(%r1) -pgm_check_entry: - - /* Switch to original suspend CPU */ - larl %r1,.Lresume_cpu /* Resume CPU address: r2 */ - stap 0(%r1) - llgh %r2,0(%r1) - llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */ - cgr %r1,%r2 - je restore_registers /* r1 = r2 -> nothing to do */ - larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */ - mvc __LC_RST_NEW_PSW(16,%r0),0(%r4) -3: - sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */ - brc 8,4f /* accepted */ - brc 2,3b /* busy, try again */ - - /* Suspend CPU not available -> panic */ - larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD - larl %r2,.Lpanic_string - brasl %r14,sclp_early_printk_force - larl %r3,.Ldisabled_wait_31 - lpsw 0(%r3) -4: - /* Switch to suspend CPU */ - sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */ - brc 2,4b /* busy, try again */ -5: - sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */ - brc 2,5b /* busy, try again */ -6: j 6b - -restart_suspend: - larl %r1,.Lresume_cpu - llgh %r2,0(%r1) -7: - sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */ - brc 8,7b /* accepted, status 0, still running */ - brc 2,7b /* busy, try again */ - tmll %r9,0x40 /* Test if resume CPU is stopped */ - jz 7b - -restore_registers: - /* Restore registers */ - lghi %r13,0x1000 /* %r1 = pointer to save area */ - - /* Ignore time spent in suspended state. */ - llgf %r1,0x318(%r13) - stck __LC_LAST_UPDATE_CLOCK(%r1) - spt 0x328(%r13) /* reprogram timer */ - //sckc 0x330(%r13) /* set clock comparator */ - - lctlg %c0,%c15,0x380(%r13) /* load control registers */ - lam %a0,%a15,0x340(%r13) /* load access registers */ - - /* Load old stack */ - lg %r15,0x2f8(%r13) - - /* Save prefix register */ - mvc __SF_EMPTY(4,%r15),0x318(%r13) - - /* Restore absolute zero pages */ - lghi %r2,0 - larl %r4,suspend_zero_pages - lg %r4,0(%r4) - lghi %r3,2*PAGE_SIZE - lghi %r5,2*PAGE_SIZE -1: mvcle %r2,%r4,0 - jo 1b - - /* Restore prefix register */ - spx __SF_EMPTY(%r15) - - /* Activate DAT */ - stosm __SF_EMPTY(%r15),0x04 - - /* Make all free pages unstable */ - lghi %r2,0 - brasl %r14,arch_set_page_states - - /* Call arch specific early resume code */ - brasl %r14,s390_early_resume - - /* Return 0 */ - lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) - lghi %r2,0 - BR_EX %r14 -ENDPROC(swsusp_arch_resume) - - .section .data..nosave,"aw",@progbits - .align 8 -.Ldisabled_wait_31: - .long 0x000a0000,0x00000000 -.Lpanic_string: - .asciz "Resume not possible because suspend CPU is no longer available\n" - .align 8 -.Lrestart_suspend_psw: - .quad 0x0000000180000000,restart_suspend -.Lnew_pgm_check_psw: - .quad 0,pgm_check_entry -.Lresume_cpu: - .byte 0,0 diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index a51c892f14f3..ae989b740376 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -49,7 +48,6 @@ static volatile long cmm_pages_target; static volatile long cmm_timed_pages_target; static long cmm_timeout_pages; static long cmm_timeout_seconds; -static int cmm_suspended; static struct cmm_page_array *cmm_page_list; static struct cmm_page_array *cmm_timed_page_list; @@ -151,9 +149,9 @@ static int cmm_thread(void *dummy) while (1) { rc = wait_event_interruptible(cmm_thread_wait, - (!cmm_suspended && (cmm_pages != cmm_pages_target || - cmm_timed_pages != cmm_timed_pages_target)) || - kthread_should_stop()); + cmm_pages != cmm_pages_target || + cmm_timed_pages != cmm_timed_pages_target || + kthread_should_stop()); if (kthread_should_stop() || rc == -ERESTARTSYS) { cmm_pages_target = cmm_pages; cmm_timed_pages_target = cmm_timed_pages; @@ -390,38 +388,6 @@ static void cmm_smsg_target(const char *from, char *msg) static struct ctl_table_header *cmm_sysctl_header; -static int cmm_suspend(void) -{ - cmm_suspended = 1; - cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); - cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); - return 0; -} - -static int cmm_resume(void) -{ - cmm_suspended = 0; - cmm_kick_thread(); - return 0; -} - -static int cmm_power_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - switch (event) { - case PM_POST_HIBERNATION: - return cmm_resume(); - case PM_HIBERNATION_PREPARE: - return cmm_suspend(); - default: - return NOTIFY_DONE; - } -} - -static struct notifier_block cmm_power_notifier = { - .notifier_call = cmm_power_event, -}; - static int __init cmm_init(void) { int rc = -ENOMEM; @@ -446,16 +412,11 @@ static int __init cmm_init(void) rc = register_oom_notifier(&cmm_oom_nb); if (rc < 0) goto out_oom_notify; - rc = register_pm_notifier(&cmm_power_notifier); - if (rc) - goto out_pm; cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); if (!IS_ERR(cmm_thread_ptr)) return 0; rc = PTR_ERR(cmm_thread_ptr); - unregister_pm_notifier(&cmm_power_notifier); -out_pm: unregister_oom_notifier(&cmm_oom_nb); out_oom_notify: #ifdef CONFIG_CMM_IUCV @@ -475,7 +436,6 @@ static void __exit cmm_exit(void) #ifdef CONFIG_CMM_IUCV smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target); #endif - unregister_pm_notifier(&cmm_power_notifier); unregister_oom_notifier(&cmm_oom_nb); kthread_stop(cmm_thread_ptr); del_timer_sync(&cmm_timer); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index f8c6faab41f4..e22c06d5f206 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -367,20 +367,4 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) } } -#ifdef CONFIG_HIBERNATION -bool kernel_page_present(struct page *page) -{ - unsigned long addr; - int cc; - - addr = page_to_phys(page); - asm volatile( - " lra %1,0(%1)\n" - " ipm %0\n" - " srl %0,28" - : "=d" (cc), "+a" (addr) : : "cc"); - return cc == 0; -} -#endif /* CONFIG_HIBERNATION */ - #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index bc61ea18e88d..d4eaba24e300 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -607,49 +607,6 @@ void pcibios_disable_device(struct pci_dev *pdev) zpci_debug_exit_device(zdev); } -#ifdef CONFIG_HIBERNATE_CALLBACKS -static int zpci_restore(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct zpci_dev *zdev = to_zpci(pdev); - int ret = 0; - - if (zdev->state != ZPCI_FN_STATE_ONLINE) - goto out; - - ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES); - if (ret) - goto out; - - zpci_map_resources(pdev); - zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - (u64) zdev->dma_table); - -out: - return ret; -} - -static int zpci_freeze(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct zpci_dev *zdev = to_zpci(pdev); - - if (zdev->state != ZPCI_FN_STATE_ONLINE) - return 0; - - zpci_unregister_ioat(zdev, 0); - zpci_unmap_resources(pdev); - return clp_disable_fh(zdev); -} - -struct dev_pm_ops pcibios_pm_ops = { - .thaw_noirq = zpci_restore, - .freeze_noirq = zpci_freeze, - .restore_noirq = zpci_restore, - .poweroff_noirq = zpci_freeze, -}; -#endif /* CONFIG_HIBERNATE_CALLBACKS */ - static int zpci_alloc_domain(struct zpci_dev *zdev) { if (zpci_unique_uid) { From 086b2d78375cffe58f5341359bebec0650793811 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Mar 2020 20:55:20 +0100 Subject: [PATCH 37/57] PM: remove s390 specific callbacks ARCH_SAVE_PAGE_KEYS has been introduced in order to be able to save and restore s390 specific storage keys into a hibernation image. With hibernation support removed from s390 there is no point in keeping the callbacks. Acked-by: Christian Borntraeger Acked-by: Peter Oberparleiter Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- include/linux/suspend.h | 34 ---------------------------------- kernel/power/Kconfig | 3 --- kernel/power/snapshot.c | 18 ------------------ 3 files changed, 55 deletions(-) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 2b2055b035ee..4fcc6fd0cbd6 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -566,38 +566,4 @@ static inline void queue_up_suspend_work(void) {} #endif /* !CONFIG_PM_AUTOSLEEP */ -#ifdef CONFIG_ARCH_SAVE_PAGE_KEYS -/* - * The ARCH_SAVE_PAGE_KEYS functions can be used by an architecture - * to save/restore additional information to/from the array of page - * frame numbers in the hibernation image. For s390 this is used to - * save and restore the storage key for each page that is included - * in the hibernation image. - */ -unsigned long page_key_additional_pages(unsigned long pages); -int page_key_alloc(unsigned long pages); -void page_key_free(void); -void page_key_read(unsigned long *pfn); -void page_key_memorize(unsigned long *pfn); -void page_key_write(void *address); - -#else /* !CONFIG_ARCH_SAVE_PAGE_KEYS */ - -static inline unsigned long page_key_additional_pages(unsigned long pages) -{ - return 0; -} - -static inline int page_key_alloc(unsigned long pages) -{ - return 0; -} - -static inline void page_key_free(void) {} -static inline void page_key_read(unsigned long *pfn) {} -static inline void page_key_memorize(unsigned long *pfn) {} -static inline void page_key_write(void *address) {} - -#endif /* !CONFIG_ARCH_SAVE_PAGE_KEYS */ - #endif /* _LINUX_SUSPEND_H */ diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 7cbfbeacd68a..c208566c844b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -80,9 +80,6 @@ config HIBERNATION For more information take a look at . -config ARCH_SAVE_PAGE_KEYS - bool - config PM_STD_PARTITION string "Default resume partition" depends on HIBERNATION diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ddade80ad276..e99d13b0b8fc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1744,9 +1744,6 @@ int hibernate_preallocate_memory(void) count += highmem; count -= totalreserve_pages; - /* Add number of pages required for page keys (s390 only). */ - size += page_key_additional_pages(saveable); - /* Compute the maximum number of saveable pages to leave in memory. */ max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); @@ -2075,8 +2072,6 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) buf[j] = memory_bm_next_pfn(bm); if (unlikely(buf[j] == BM_END_OF_MAP)) break; - /* Save page key for data page (s390 only). */ - page_key_read(buf + j); } } @@ -2226,9 +2221,6 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) if (unlikely(buf[j] == BM_END_OF_MAP)) break; - /* Extract and buffer page key for data page (s390 only). */ - page_key_memorize(buf + j); - if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) memory_bm_set_bit(bm, buf[j]); else @@ -2623,11 +2615,6 @@ int snapshot_write_next(struct snapshot_handle *handle) if (error) return error; - /* Allocate buffer for page keys. */ - error = page_key_alloc(nr_copy_pages); - if (error) - return error; - hibernate_restore_protection_begin(); } else if (handle->cur <= nr_meta_pages + 1) { error = unpack_orig_pfns(buffer, ©_bm); @@ -2649,8 +2636,6 @@ int snapshot_write_next(struct snapshot_handle *handle) } } else { copy_last_highmem_page(); - /* Restore page key for data page (s390 only). */ - page_key_write(handle->buffer); hibernate_restore_protect_page(handle->buffer); handle->buffer = get_buffer(&orig_bm, &ca); if (IS_ERR(handle->buffer)) @@ -2673,9 +2658,6 @@ int snapshot_write_next(struct snapshot_handle *handle) void snapshot_write_finalize(struct snapshot_handle *handle) { copy_last_highmem_page(); - /* Restore page key for data page (s390 only). */ - page_key_write(handle->buffer); - page_key_free(); hibernate_restore_protect_page(handle->buffer); /* Do that only if we have loaded the image entirely */ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { From 9289224040cb120605ffa35263ea27a30105019e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 16 Mar 2020 09:20:38 +0100 Subject: [PATCH 38/57] s390/qdio: pass ISC as parameter to chsc_sadc() When issuing a SADC for a QDIO device, don't hardcode the ISC but use whatever is specified in qdio's handler for Adapter Interrupts. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- drivers/s390/cio/chsc.c | 5 +++-- drivers/s390/cio/chsc.h | 3 ++- drivers/s390/cio/qdio_thinint.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 6392a1b95b02..1ca73c2e5a8f 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -180,11 +180,12 @@ EXPORT_SYMBOL_GPL(chsc_ssqd); * @scssc: request and response block for SADC * @summary_indicator_addr: summary indicator address * @subchannel_indicator_addr: subchannel indicator address + * @isc: Interruption Subclass for this subchannel * * Returns 0 on success. */ int chsc_sadc(struct subchannel_id schid, struct chsc_scssc_area *scssc, - u64 summary_indicator_addr, u64 subchannel_indicator_addr) + u64 summary_indicator_addr, u64 subchannel_indicator_addr, u8 isc) { memset(scssc, 0, sizeof(*scssc)); scssc->request.length = 0x0fe0; @@ -196,7 +197,7 @@ int chsc_sadc(struct subchannel_id schid, struct chsc_scssc_area *scssc, scssc->ks = PAGE_DEFAULT_KEY >> 4; scssc->kc = PAGE_DEFAULT_KEY >> 4; - scssc->isc = QDIO_AIRQ_ISC; + scssc->isc = isc; scssc->schid = schid; /* enable the time delay disablement facility */ diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index e57d68e325a3..34de6d77442c 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -163,7 +163,8 @@ void chsc_chp_offline(struct chp_id chpid); int chsc_get_channel_measurement_chars(struct channel_path *chp); int chsc_ssqd(struct subchannel_id schid, struct chsc_ssqd_area *ssqd); int chsc_sadc(struct subchannel_id schid, struct chsc_scssc_area *scssc, - u64 summary_indicator_addr, u64 subchannel_indicator_addr); + u64 summary_indicator_addr, u64 subchannel_indicator_addr, + u8 isc); int chsc_sgib(u32 origin); int chsc_error_from_response(int response); diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index 7c4e4ec08a12..999b998ea0c9 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -211,7 +211,7 @@ static int set_subchannel_ind(struct qdio_irq *irq_ptr, int reset) } rc = chsc_sadc(irq_ptr->schid, scssc, summary_indicator_addr, - subchannel_indicator_addr); + subchannel_indicator_addr, tiqdio_airq.isc); if (rc) { DBF_ERROR("%4x SSI r:%4x", irq_ptr->schid.sch_no, scssc->response.code); From 969ae01bab2fe938b4c8324836038b5ac1c78fac Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 17 Mar 2020 12:59:37 +0100 Subject: [PATCH 39/57] s390/pci: Fix zpci_alloc_domain() over allocation Until now zpci_alloc_domain() only prevented more than CONFIG_PCI_NR_FUNCTIONS from being added when using automatic domain allocation. When explicit UIDs were defined UIDs above CONFIG_PCI_NR_FUNCTIONS were not counted at all. When more PCI functions are added this could lead to various errors including under sized IRQ vectors and similar issues. Fix this by explicitly tracking the number of allocated domains. Signed-off-by: Niklas Schnelle Reviewed-by: Pierre Morel Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 1 + arch/s390/pci/pci.c | 34 ++++++++++++++++++---------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 73b69a777152..93527655e752 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -26,6 +26,7 @@ int pci_proc_domain(struct pci_bus *); #define ZPCI_NR_DMA_SPACES 1 #define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS +#define ZPCI_DOMAIN_BITMAP_SIZE (1 << 16) /* PCI Function Controls */ #define ZPCI_FC_FN_ENABLED 0x80 diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index d4eaba24e300..2b90a90aa81d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -40,8 +40,9 @@ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); -static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); +static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE); static DEFINE_SPINLOCK(zpci_domain_lock); +static unsigned int zpci_num_domains_allocated; #define ZPCI_IOMAP_ENTRIES \ min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2), \ @@ -609,12 +610,16 @@ void pcibios_disable_device(struct pci_dev *pdev) static int zpci_alloc_domain(struct zpci_dev *zdev) { + spin_lock(&zpci_domain_lock); + if (zpci_num_domains_allocated > (ZPCI_NR_DEVICES - 1)) { + spin_unlock(&zpci_domain_lock); + pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n", + zdev->fid, ZPCI_NR_DEVICES); + return -ENOSPC; + } + if (zpci_unique_uid) { zdev->domain = (u16) zdev->uid; - if (zdev->domain >= ZPCI_NR_DEVICES) - return 0; - - spin_lock(&zpci_domain_lock); if (test_bit(zdev->domain, zpci_domain)) { spin_unlock(&zpci_domain_lock); pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n", @@ -622,30 +627,27 @@ static int zpci_alloc_domain(struct zpci_dev *zdev) return -EEXIST; } set_bit(zdev->domain, zpci_domain); + zpci_num_domains_allocated++; spin_unlock(&zpci_domain_lock); return 0; } - - spin_lock(&zpci_domain_lock); + /* + * We can always auto allocate domains below ZPCI_NR_DEVICES. + * There is either a free domain or we have reached the maximum in + * which case we would have bailed earlier. + */ zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES); - if (zdev->domain == ZPCI_NR_DEVICES) { - spin_unlock(&zpci_domain_lock); - pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n", - zdev->fid, ZPCI_NR_DEVICES); - return -ENOSPC; - } set_bit(zdev->domain, zpci_domain); + zpci_num_domains_allocated++; spin_unlock(&zpci_domain_lock); return 0; } static void zpci_free_domain(struct zpci_dev *zdev) { - if (zdev->domain >= ZPCI_NR_DEVICES) - return; - spin_lock(&zpci_domain_lock); clear_bit(zdev->domain, zpci_domain); + zpci_num_domains_allocated--; spin_unlock(&zpci_domain_lock); } From 7a11c67a1ff9b0231eaaaa6a28294776d55b569a Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 18 Mar 2020 13:53:16 +0100 Subject: [PATCH 40/57] s390/pci: Improve handling of unset UID When UID checking is enabled a UID value of 0 is invalid and can not be set by the user. On z/VM it is however used to indicate an unset UID. Until now, this lead to the behavior that one PCI function could be attached with UID 0 after which z/VM would prohibit further attachment. Now if the user then turns off UID checking in z/VM the user could seemingly attach additional PCI functions that would however not show up in Linux as that would not be informed of the change in UID checking mode. This is unexpected and confusing and lead to bug reports against Linux. Instead now, if we encounter an unset UID value of 0 treat it as indicating that UID checking was turned off, switch to automatic domain allocation, and warn the user of the possible misconfiguration. Signed-off-by: Niklas Schnelle Reviewed-by: Peter Oberparleiter Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 3 +++ arch/s390/pci/pci.c | 8 ++++++++ arch/s390/pci/pci_clp.c | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 93527655e752..7485ee561fec 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -189,6 +189,9 @@ int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); int clp_get_state(u32 fid, enum zpci_state *state); +/* UID */ +void update_uid_checking(bool new); + /* IOMMU Interface */ int zpci_init_iommu(struct zpci_dev *zdev); void zpci_destroy_iommu(struct zpci_dev *zdev); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 2b90a90aa81d..cf7485bdd7cf 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -620,6 +620,13 @@ static int zpci_alloc_domain(struct zpci_dev *zdev) if (zpci_unique_uid) { zdev->domain = (u16) zdev->uid; + if (zdev->domain == 0) { + pr_warn("UID checking is active but no UID is set for PCI function %08x, so automatic domain allocation is used instead\n", + zdev->fid); + update_uid_checking(false); + goto auto_allocate; + } + if (test_bit(zdev->domain, zpci_domain)) { spin_unlock(&zpci_domain_lock); pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n", @@ -631,6 +638,7 @@ static int zpci_alloc_domain(struct zpci_dev *zdev) spin_unlock(&zpci_domain_lock); return 0; } +auto_allocate: /* * We can always auto allocate domains below ZPCI_NR_DEVICES. * There is either a free domain or we have reached the maximum in diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 0d3d8f170ea4..ea794ae755ae 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -24,7 +24,7 @@ bool zpci_unique_uid; -static void update_uid_checking(bool new) +void update_uid_checking(bool new) { if (zpci_unique_uid != new) zpci_dbg(1, "uid checking:%d\n", new); From 3cc7c927102d8ce836735c1005a7d102a148579c Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 19 Mar 2020 16:01:04 +0100 Subject: [PATCH 41/57] s390/ap: Remove ap device suspend and resume callbacks With the removal of the s390 hibernate support the suspend and resume callbacks for the ap devices are not needed any more. This patch removes the callbacks and the ap bus' registration struct for the power management. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 171b0a08e0f9..0246bb35790f 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -587,24 +587,6 @@ static int ap_uevent(struct device *dev, struct kobj_uevent_env *env) return retval; } -static int ap_dev_suspend(struct device *dev) -{ - struct ap_device *ap_dev = to_ap_dev(dev); - - if (ap_dev->drv && ap_dev->drv->suspend) - ap_dev->drv->suspend(ap_dev); - return 0; -} - -static int ap_dev_resume(struct device *dev) -{ - struct ap_device *ap_dev = to_ap_dev(dev); - - if (ap_dev->drv && ap_dev->drv->resume) - ap_dev->drv->resume(ap_dev); - return 0; -} - static void ap_bus_suspend(void) { AP_DBF(DBF_DEBUG, "%s running\n", __func__); @@ -694,13 +676,10 @@ static struct notifier_block ap_power_notifier = { .notifier_call = ap_power_event, }; -static SIMPLE_DEV_PM_OPS(ap_bus_pm_ops, ap_dev_suspend, ap_dev_resume); - static struct bus_type ap_bus_type = { .name = "ap", .match = &ap_bus_match, .uevent = &ap_uevent, - .pm = &ap_bus_pm_ops, }; static int __ap_revise_reserved(struct device *dev, void *dummy) From 6c7c851f1b666a8a455678a0b480b9162de86052 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Tue, 3 Mar 2020 16:42:01 +0100 Subject: [PATCH 42/57] s390/diag: fix display of diagnose call statistics Show the full diag statistic table and not just parts of it. The issue surfaced in a KVM guest with a number of vcpus defined smaller than NR_DIAG_STAT. Fixes: 1ec2772e0c3c ("s390/diag: add a statistic for diagnose calls") Cc: stable@vger.kernel.org Signed-off-by: Michael Mueller Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index e9dac9a24d3f..61f2b0412345 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -84,7 +84,7 @@ static int show_diag_stat(struct seq_file *m, void *v) static void *show_diag_stat_start(struct seq_file *m, loff_t *pos) { - return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL; } static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos) From 4141b6a5e9f171325effc36a22eb92bf961e7a5c Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 23 Mar 2020 11:09:07 +0100 Subject: [PATCH 43/57] s390/cpum_sf: Fix wrong page count in error message When perf record -e SF_CYCLES_BASIC_DIAG runs with very high frequency, the samples arrive faster than the perf process can save them to file. Eventually, for longer running processes, this leads to the siutation where the trace buffers allocated by perf slowly fills up. At one point the auxiliary trace buffer is full and the CPU Measurement sampling facility is turned off. Furthermore a warning is printed to the kernel log buffer: cpum_sf: The AUX buffer with 0 pages for the diagnostic-sampling mode is full The number of allocated pages for the auxiliary trace buffer is shown as zero pages. That is wrong. Fix this by saving the number of allocated pages before entering the work loop in the interrupt handler. When the interrupt handler processes the samples, it may detect the buffer full condition and stop sampling, reducing the buffer size to zero. Print the correct value in the error message: cpum_sf: The AUX buffer with 256 pages for the diagnostic-sampling mode is full Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_sf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index cf2020b8db44..85a711d783eb 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1578,6 +1578,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) unsigned long range = 0, size; unsigned long long overflow = 0; struct perf_output_handle *handle = &cpuhw->handle; + unsigned long num_sdb; aux = perf_get_aux(handle); if (WARN_ON_ONCE(!aux)) @@ -1589,13 +1590,14 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) size >> PAGE_SHIFT); perf_aux_output_end(handle, size); + num_sdb = aux->sfb.num_sdb; while (!done) { /* Get an output handle */ aux = perf_aux_output_begin(handle, cpuhw->event); if (handle->size == 0) { pr_err("The AUX buffer with %lu pages for the " "diagnostic-sampling mode is full\n", - aux->sfb.num_sdb); + num_sdb); debug_sprintf_event(sfdbg, 1, "%s: AUX buffer used up\n", __func__); From 2c7749b90536b76795eab4cada028c2ddad25fc3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 10 Mar 2020 13:47:30 -0700 Subject: [PATCH 44/57] s390: use fallthrough; Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/signal.c | 4 ++-- arch/s390/kernel/topology.c | 2 +- arch/s390/mm/fault.c | 11 +++++------ arch/s390/mm/pgalloc.c | 2 +- drivers/s390/char/con3215.c | 2 +- drivers/s390/char/hmcdrv_ftp.c | 2 +- drivers/s390/char/sclp_sdias.c | 2 +- drivers/s390/char/tape_core.c | 6 +++--- 8 files changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index e6fca5498e1f..b295090e2ce6 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -487,7 +487,7 @@ void do_signal(struct pt_regs *regs) regs->gprs[2] = -EINTR; break; } - /* fallthrough */ + fallthrough; case -ERESTARTNOINTR: regs->gprs[2] = regs->orig_gpr2; regs->psw.addr = @@ -514,7 +514,7 @@ void do_signal(struct pt_regs *regs) case -ERESTART_RESTARTBLOCK: /* Restart with sys_restart_syscall */ regs->int_code = __NR_restart_syscall; - /* fallthrough */ + fallthrough; case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index d03edebce754..5f70cefc13e4 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -83,7 +83,7 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) cpumask_copy(&mask, cpu_present_mask); break; default: - /* fallthrough */ + fallthrough; case TOPOLOGY_MODE_SINGLE: cpumask_copy(&mask, cpumask_of(cpu)); break; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 151adef0d5dd..15c6c811d254 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -122,7 +122,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - /* fallthrough */ + fallthrough; case _ASCE_TYPE_REGION2: table += (address & _REGION2_INDEX) >> _REGION2_SHIFT; if (bad_address(table)) @@ -131,7 +131,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - /* fallthrough */ + fallthrough; case _ASCE_TYPE_REGION3: table += (address & _REGION3_INDEX) >> _REGION3_SHIFT; if (bad_address(table)) @@ -140,7 +140,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE)) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - /* fallthrough */ + fallthrough; case _ASCE_TYPE_SEGMENT: table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; if (bad_address(table)) @@ -327,7 +327,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, case VM_FAULT_BADACCESS: if (access == VM_EXEC && signal_return(regs) == 0) break; - /* fallthrough */ + fallthrough; case VM_FAULT_BADMAP: /* Bad memory access. Check if it is kernel or user space. */ if (user_mode(regs)) { @@ -337,9 +337,8 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, do_sigsegv(regs, si_code); break; } - /* fallthrough */ + fallthrough; case VM_FAULT_BADCONTEXT: - /* fallthrough */ case VM_FAULT_PFAULT: do_no_context(regs); break; diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index d3be3fe2c55d..af3bddd5e568 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -326,7 +326,7 @@ void __tlb_remove_table(void *_table) mask >>= 24; if (mask != 0) break; - /* fallthrough */ + fallthrough; case 3: /* 4K page table with pgstes */ if (mask & 3) atomic_xor_bits(&page->_refcount, 3 << 24); diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index e7cf0a1d4f71..92757f9bd010 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -398,7 +398,7 @@ static void raw3215_irq(struct ccw_device *cdev, unsigned long intparm, } if (dstat == 0x08) break; - /* else, fall through */ + fallthrough; case 0x04: /* Device end interrupt. */ if ((raw = req->info) == NULL) diff --git a/drivers/s390/char/hmcdrv_ftp.c b/drivers/s390/char/hmcdrv_ftp.c index 0e70397d6e04..37ee8f698c3b 100644 --- a/drivers/s390/char/hmcdrv_ftp.c +++ b/drivers/s390/char/hmcdrv_ftp.c @@ -137,7 +137,7 @@ static int hmcdrv_ftp_parse(char *cmd, struct hmcdrv_ftp_cmdspec *ftp) while ((*cmd != '\0') && !iscntrl(*cmd)) ++cmd; ftp->fname = start; - /* fall through */ + fallthrough; default: *cmd = '\0'; break; diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c index 13f97fd73aca..644b61013679 100644 --- a/drivers/s390/char/sclp_sdias.c +++ b/drivers/s390/char/sclp_sdias.c @@ -214,7 +214,7 @@ int sclp_sdias_copy(void *dest, int start_blk, int nr_blks) break; case SDIAS_EVSTATE_NO_DATA: TRACE("no data\n"); - /* fall through */ + fallthrough; default: pr_err("Error from SCLP while copying hsa. Event status = %x\n", sdias_evbuf.event_status); diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 3e0b2f63a9d2..380e6a67719c 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -677,7 +677,7 @@ tape_generic_remove(struct ccw_device *cdev) switch (device->tape_state) { case TS_INIT: tape_state_set(device, TS_NOT_OPER); - /* fallthrough */ + fallthrough; case TS_NOT_OPER: /* * Nothing to do. @@ -950,7 +950,7 @@ __tape_start_request(struct tape_device *device, struct tape_request *request) break; if (device->tape_state == TS_UNUSED) break; - /* fallthrough */ + fallthrough; default: if (device->tape_state == TS_BLKUSE) break; @@ -1118,7 +1118,7 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) case -ETIMEDOUT: DBF_LH(1, "(%08x): Request timed out\n", device->cdev_id); - /* fallthrough */ + fallthrough; case -EIO: __tape_end_request(device, request, -EIO); break; From fcf0220abc5b65d12ff48ba9870cf3d90801b075 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 10 Mar 2020 13:39:51 -0700 Subject: [PATCH 45/57] s390/zcrypt: use fallthrough; Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 2 +- drivers/s390/crypto/ap_queue.c | 8 ++++---- drivers/s390/crypto/zcrypt_msgtype6.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 0246bb35790f..c413211c6116 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -360,7 +360,7 @@ void ap_wait(enum ap_wait wait) wake_up(&ap_poll_wait); break; } - /* Fall through */ + fallthrough; case AP_WAIT_TIMEOUT: spin_lock_bh(&ap_poll_timer_lock); if (!hrtimer_is_queued(&ap_poll_timer)) { diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 39c0b246c69a..9cb997cffa61 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -152,7 +152,7 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq) ap_msg->receive(aq, ap_msg, aq->reply); break; } - /* fall through */ + fallthrough; case AP_RESPONSE_NO_PENDING_REPLY: if (!status.queue_empty || aq->queue_count <= 0) break; @@ -219,7 +219,7 @@ static enum ap_wait ap_sm_suspend_read(struct ap_queue *aq) case AP_RESPONSE_NORMAL: if (aq->queue_count > 0) return AP_WAIT_AGAIN; - /* fall through */ + fallthrough; default: return AP_WAIT_NONE; } @@ -254,7 +254,7 @@ static enum ap_wait ap_sm_write(struct ap_queue *aq) aq->state = AP_STATE_WORKING; return AP_WAIT_AGAIN; } - /* fall through */ + fallthrough; case AP_RESPONSE_Q_FULL: aq->state = AP_STATE_QUEUE_FULL; return AP_WAIT_INTERRUPT; @@ -380,7 +380,7 @@ static enum ap_wait ap_sm_setirq_wait(struct ap_queue *aq) case AP_RESPONSE_NORMAL: if (aq->queue_count > 0) return AP_WAIT_AGAIN; - /* fallthrough */ + fallthrough; case AP_RESPONSE_NO_PENDING_REPLY: return AP_WAIT_TIMEOUT; default: diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index eadd3a438a4b..fd1cbb2d6b3f 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -801,7 +801,7 @@ static int convert_response_ica(struct zcrypt_queue *zq, if (msg->cprbx.cprb_ver_id == 0x02) return convert_type86_ica(zq, reply, outputdata, outputdatalength); - /* fall through - wrong cprb version is an unknown response */ + fallthrough; /* wrong cprb version is an unknown response */ default: /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; pr_err("Cryptographic device %02x.%04x failed and was set offline\n", @@ -834,7 +834,7 @@ static int convert_response_xcrb(struct zcrypt_queue *zq, } if (msg->cprbx.cprb_ver_id == 0x02) return convert_type86_xcrb(zq, reply, xcRB); - /* fall through - wrong cprb version is an unknown response */ + fallthrough; /* wrong cprb version is an unknown response */ default: /* Unknown response type, this should NEVER EVER happen */ xcRB->status = 0x0008044DL; /* HDD_InvalidParm */ zq->online = 0; @@ -864,7 +864,7 @@ static int convert_response_ep11_xcrb(struct zcrypt_queue *zq, return convert_error(zq, reply); if (msg->cprbx.cprb_ver_id == 0x04) return convert_type86_ep11_xcrb(zq, reply, xcRB); - /* fall through - wrong cprb version is an unknown resp */ + fallthrough; /* wrong cprb version is an unknown resp */ default: /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; pr_err("Cryptographic device %02x.%04x failed and was set offline\n", @@ -894,7 +894,7 @@ static int convert_response_rng(struct zcrypt_queue *zq, return -EINVAL; if (msg->cprbx.cprb_ver_id == 0x02) return convert_type86_rng(zq, reply, data); - /* fall through - wrong cprb version is an unknown response */ + fallthrough; /* wrong cprb version is an unknown response */ default: /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; pr_err("Cryptographic device %02x.%04x failed and was set offline\n", From 0696178e7741c50e2634209276c344017e690bec Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 10 Mar 2020 13:39:50 -0700 Subject: [PATCH 46/57] s390/vfio: use fallthrough; Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 5c0f53c6dde7..e0bde8518745 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -90,7 +90,7 @@ static void vfio_ap_wait_for_irqclear(int apqn) case AP_RESPONSE_RESET_IN_PROGRESS: if (!status.irq_enabled) return; - /* Fall through */ + fallthrough; case AP_RESPONSE_BUSY: msleep(20); break; From b09fcecb6c91113cd39e50ba32c5afc29bcb20c9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 10 Mar 2020 13:39:50 -0700 Subject: [PATCH 47/57] s390/cio: use fallthrough; Convert the various uses of fallthrough comments to fallthrough; Done via script Link: https://lore.kernel.org/lkml/b56602fcf79f849e733e7b521bb0e17895d390fa.1582230379.git.joe.com/ Signed-off-by: Joe Perches Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/cio/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 0c6245fc7706..50007cb9be5b 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1262,7 +1262,7 @@ static int recovery_check(struct device *dev, void *data) sch = to_subchannel(cdev->dev.parent); if ((sch->schib.pmcw.pam & sch->opm) == sch->vpm) break; - /* fall through */ + fallthrough; case DEV_STATE_DISCONNECTED: CIO_MSG_EVENT(3, "recovery: trigger 0.%x.%04x\n", cdev->private->dev_id.ssid, @@ -2091,7 +2091,7 @@ static void ccw_device_todo(struct work_struct *work) case CDEV_TODO_UNREG_EVAL: if (!sch_is_pseudo_sch(sch)) css_schedule_eval(sch->schid); - /* fall-through */ + fallthrough; case CDEV_TODO_UNREG: if (sch_is_pseudo_sch(sch)) ccw_device_unregister(cdev); From 194f75706b86847d1b4237958f7d6bd7ea7baf42 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 20 Mar 2020 09:19:14 +0100 Subject: [PATCH 48/57] s390/ism: remove pm support As s390 no longer supports ARCH_HIBERNATION_POSSIBLE, drop the unused pm ops from the ism driver. Signed-off-by: Ursula Braun Signed-off-by: Vasily Gorbik --- drivers/s390/net/ism_drv.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 4fc2056bd227..c75112ee7b97 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -567,31 +567,11 @@ static void ism_remove(struct pci_dev *pdev) kfree(ism); } -static int ism_suspend(struct device *dev) -{ - struct ism_dev *ism = dev_get_drvdata(dev); - - ism_dev_exit(ism); - return 0; -} - -static int ism_resume(struct device *dev) -{ - struct ism_dev *ism = dev_get_drvdata(dev); - - return ism_dev_init(ism); -} - -static SIMPLE_DEV_PM_OPS(ism_pm_ops, ism_suspend, ism_resume); - static struct pci_driver ism_driver = { .name = DRV_NAME, .id_table = ism_device_table, .probe = ism_probe, .remove = ism_remove, - .driver = { - .pm = &ism_pm_ops, - }, }; static int __init ism_init(void) From 712fa5f294f377ee3103c36c178e7d62c65dd108 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 23 Mar 2020 09:38:37 +0100 Subject: [PATCH 49/57] s390/mm: cleanup arch_get_unmapped_area() and friends Factor out check_asce_limit() function and fix few style defects in arch_get_unmapped_area() family of functions. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens [heiko.carstens@de.ibm.com: small coding style changes] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgalloc.h | 14 ++++++++++++ arch/s390/mm/hugetlbpage.c | 11 ++------- arch/s390/mm/mmap.c | 40 +++++++++------------------------ 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 77606c4acd58..f0d7457fa1da 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -48,6 +48,20 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) int crst_table_upgrade(struct mm_struct *mm, unsigned long limit); void crst_table_downgrade(struct mm_struct *); +static inline unsigned long check_asce_limit(struct mm_struct *mm, unsigned long addr, + unsigned long len) +{ + int rc; + + if (addr + len > mm->context.asce_limit && + addr + len <= TASK_SIZE) { + rc = crst_table_upgrade(mm, addr + len); + if (rc) + return (unsigned long) rc; + } + return addr; +} + static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) { unsigned long *table = crst_table_alloc(mm); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 5674710a4841..f01daddcbc5e 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -326,7 +326,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - int rc; if (len & ~huge_page_mask(h)) return -EINVAL; @@ -353,15 +352,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, else addr = hugetlb_get_unmapped_area_topdown(file, addr, len, pgoff, flags); - if (addr & ~PAGE_MASK) + if (offset_in_page(addr)) return addr; check_asce_limit: - if (addr + len > current->mm->context.asce_limit && - addr + len <= TASK_SIZE) { - rc = crst_table_upgrade(mm, addr + len); - if (rc) - return (unsigned long) rc; - } - return addr; + return check_asce_limit(mm, addr, len); } diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index cbc718ba6d78..1b78f630a9ca 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -72,14 +72,13 @@ static inline unsigned long mmap_base(unsigned long rnd, return PAGE_ALIGN(STACK_TOP - gap - rnd); } -unsigned long -arch_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct vm_unmapped_area_info info; - int rc; if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; @@ -105,30 +104,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; addr = vm_unmapped_area(&info); - if (addr & ~PAGE_MASK) + if (offset_in_page(addr)) return addr; check_asce_limit: - if (addr + len > current->mm->context.asce_limit && - addr + len <= TASK_SIZE) { - rc = crst_table_upgrade(mm, addr + len); - if (rc) - return (unsigned long) rc; - } - - return addr; + return check_asce_limit(mm, addr, len); } -unsigned long -arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) +unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - unsigned long addr = addr0; struct vm_unmapped_area_info info; - int rc; /* requested length too big for entire address space */ if (len > TASK_SIZE - mmap_min_addr) @@ -163,25 +152,18 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, * can happen with large stack limits and large mmap() * allocations. */ - if (addr & ~PAGE_MASK) { + if (offset_in_page(addr)) { VM_BUG_ON(addr != -ENOMEM); info.flags = 0; info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = TASK_SIZE; addr = vm_unmapped_area(&info); - if (addr & ~PAGE_MASK) + if (offset_in_page(addr)) return addr; } check_asce_limit: - if (addr + len > current->mm->context.asce_limit && - addr + len <= TASK_SIZE) { - rc = crst_table_upgrade(mm, addr + len); - if (rc) - return (unsigned long) rc; - } - - return addr; + return check_asce_limit(mm, addr, len); } /* From 34515df25d7e5ae19f66eadfb1351a3178344f36 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 23 Mar 2020 14:32:04 +0100 Subject: [PATCH 50/57] s390/zcrypt: use kvmalloc instead of kmalloc for 256k alloc Tests showed that it may happen that a 256k kmalloc may fail due to a temporary shortage on 256k slab entries. The find functions for cca and ep11 use a 256k array to fetch the states of all possible crypto cards and their domains in one piece. With the patch now kvmalloc is used to allocate this temporary memory as there is no need to have this memory area physical continuously. Signed-off-by: Harald Freudenberger Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_ccamisc.c | 8 ++++---- drivers/s390/crypto/zcrypt_ep11misc.c | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index e6899107c586..1b835398feec 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -1569,9 +1569,9 @@ static int findcard(u64 mkvp, u16 *pcardnr, u16 *pdomain, return -EINVAL; /* fetch status of all crypto cards */ - device_status = kmalloc_array(MAX_ZDEV_ENTRIES_EXT, - sizeof(struct zcrypt_device_status_ext), - GFP_KERNEL); + device_status = kvmalloc_array(MAX_ZDEV_ENTRIES_EXT, + sizeof(struct zcrypt_device_status_ext), + GFP_KERNEL); if (!device_status) return -ENOMEM; zcrypt_device_status_mask_ext(device_status); @@ -1641,7 +1641,7 @@ static int findcard(u64 mkvp, u16 *pcardnr, u16 *pdomain, } else rc = -ENODEV; - kfree(device_status); + kvfree(device_status); return rc; } diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index d4caf46ff9df..7dd987afcd55 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -1217,9 +1217,9 @@ int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, struct ep11_card_info eci; /* fetch status of all crypto cards */ - device_status = kmalloc_array(MAX_ZDEV_ENTRIES_EXT, - sizeof(struct zcrypt_device_status_ext), - GFP_KERNEL); + device_status = kvmalloc_array(MAX_ZDEV_ENTRIES_EXT, + sizeof(struct zcrypt_device_status_ext), + GFP_KERNEL); if (!device_status) return -ENOMEM; zcrypt_device_status_mask_ext(device_status); @@ -1227,7 +1227,7 @@ int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, /* allocate 1k space for up to 256 apqns */ _apqns = kmalloc_array(256, sizeof(u32), GFP_KERNEL); if (!_apqns) { - kfree(device_status); + kvfree(device_status); return -ENOMEM; } @@ -1282,7 +1282,7 @@ int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, rc = 0; } - kfree(device_status); + kvfree(device_status); return rc; } EXPORT_SYMBOL(ep11_findcard2); From 41677b1d9415f5512acf262c55bd48f71319ce29 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 24 Mar 2020 17:30:39 +0100 Subject: [PATCH 51/57] s390/ap: remove power management code from ap bus and drivers The s390 power management support has been removed. So the api registration and the suspend and resume callbacks and all the code related to this for the ap bus and the ap drivers is removed with this patch. Signed-off-by: Harald Freudenberger Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 117 ++--------------------------- drivers/s390/crypto/ap_bus.h | 5 -- drivers/s390/crypto/ap_queue.c | 51 ------------- drivers/s390/crypto/zcrypt_cex2a.c | 2 - drivers/s390/crypto/zcrypt_cex2c.c | 2 - drivers/s390/crypto/zcrypt_cex4.c | 2 - 6 files changed, 5 insertions(+), 174 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index c413211c6116..35064443e748 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -18,13 +18,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include @@ -103,16 +103,9 @@ static struct hrtimer ap_poll_timer; */ static unsigned long long poll_timeout = 250000; -/* Suspend flag */ -static int ap_suspend_flag; /* Maximum domain id */ static int ap_max_domain_id; -/* - * Flag to check if domain was set through module parameter domain=. This is - * important when supsend and resume is done in a z/VM environment where the - * domain might change. - */ -static int user_set_domain; + static struct bus_type ap_bus_type; /* Adapter interrupt definitions */ @@ -386,8 +379,6 @@ void ap_request_timeout(struct timer_list *t) { struct ap_queue *aq = from_timer(aq, t, timeout); - if (ap_suspend_flag) - return; spin_lock_bh(&aq->lock); ap_wait(ap_sm_event(aq, AP_EVENT_TIMEOUT)); spin_unlock_bh(&aq->lock); @@ -401,8 +392,7 @@ void ap_request_timeout(struct timer_list *t) */ static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused) { - if (!ap_suspend_flag) - tasklet_schedule(&ap_tasklet); + tasklet_schedule(&ap_tasklet); return HRTIMER_NORESTART; } @@ -413,8 +403,7 @@ static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused) static void ap_interrupt_handler(struct airq_struct *airq, bool floating) { inc_irq_stat(IRQIO_APB); - if (!ap_suspend_flag) - tasklet_schedule(&ap_tasklet); + tasklet_schedule(&ap_tasklet); } /** @@ -486,7 +475,7 @@ static int ap_poll_thread(void *data) while (!kthread_should_stop()) { add_wait_queue(&ap_poll_wait, &wait); set_current_state(TASK_INTERRUPTIBLE); - if (ap_suspend_flag || !ap_pending_requests()) { + if (!ap_pending_requests()) { schedule(); try_to_freeze(); } @@ -587,33 +576,6 @@ static int ap_uevent(struct device *dev, struct kobj_uevent_env *env) return retval; } -static void ap_bus_suspend(void) -{ - AP_DBF(DBF_DEBUG, "%s running\n", __func__); - - ap_suspend_flag = 1; - /* - * Disable scanning for devices, thus we do not want to scan - * for them after removing. - */ - flush_work(&ap_scan_work); - tasklet_disable(&ap_tasklet); -} - -static int __ap_card_devices_unregister(struct device *dev, void *dummy) -{ - if (is_card_dev(dev)) - device_unregister(dev); - return 0; -} - -static int __ap_queue_devices_unregister(struct device *dev, void *dummy) -{ - if (is_queue_dev(dev)) - device_unregister(dev); - return 0; -} - static int __ap_queue_devices_with_id_unregister(struct device *dev, void *data) { if (is_queue_dev(dev) && @@ -622,60 +584,6 @@ static int __ap_queue_devices_with_id_unregister(struct device *dev, void *data) return 0; } -static void ap_bus_resume(void) -{ - int rc; - - AP_DBF(DBF_DEBUG, "%s running\n", __func__); - - /* remove all queue devices */ - bus_for_each_dev(&ap_bus_type, NULL, NULL, - __ap_queue_devices_unregister); - /* remove all card devices */ - bus_for_each_dev(&ap_bus_type, NULL, NULL, - __ap_card_devices_unregister); - - /* Reset thin interrupt setting */ - if (ap_interrupts_available() && !ap_using_interrupts()) { - rc = register_adapter_interrupt(&ap_airq); - ap_airq_flag = (rc == 0); - } - if (!ap_interrupts_available() && ap_using_interrupts()) { - unregister_adapter_interrupt(&ap_airq); - ap_airq_flag = 0; - } - /* Reset domain */ - if (!user_set_domain) - ap_domain_index = -1; - /* Get things going again */ - ap_suspend_flag = 0; - if (ap_airq_flag) - xchg(ap_airq.lsi_ptr, 0); - tasklet_enable(&ap_tasklet); - queue_work(system_long_wq, &ap_scan_work); -} - -static int ap_power_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - switch (event) { - case PM_HIBERNATION_PREPARE: - case PM_SUSPEND_PREPARE: - ap_bus_suspend(); - break; - case PM_POST_HIBERNATION: - case PM_POST_SUSPEND: - ap_bus_resume(); - break; - default: - break; - } - return NOTIFY_DONE; -} -static struct notifier_block ap_power_notifier = { - .notifier_call = ap_power_event, -}; - static struct bus_type ap_bus_type = { .name = "ap", .match = &ap_bus_match, @@ -852,8 +760,6 @@ EXPORT_SYMBOL(ap_driver_unregister); void ap_bus_force_rescan(void) { - if (ap_suspend_flag) - return; /* processing a asynchronous bus rescan */ del_timer(&ap_config_timer); queue_work(system_long_wq, &ap_scan_work); @@ -1546,8 +1452,6 @@ static void ap_scan_bus(struct work_struct *unused) static void ap_config_timeout(struct timer_list *unused) { - if (ap_suspend_flag) - return; queue_work(system_long_wq, &ap_scan_work); } @@ -1620,11 +1524,6 @@ static int __init ap_module_init(void) ap_domain_index); ap_domain_index = -1; } - /* In resume callback we need to know if the user had set the domain. - * If so, we can not just reset it. - */ - if (ap_domain_index >= 0) - user_set_domain = 1; if (ap_interrupts_available()) { rc = register_adapter_interrupt(&ap_airq); @@ -1667,17 +1566,11 @@ static int __init ap_module_init(void) goto out_work; } - rc = register_pm_notifier(&ap_power_notifier); - if (rc) - goto out_pm; - queue_work(system_long_wq, &ap_scan_work); initialised = true; return 0; -out_pm: - ap_poll_thread_stop(); out_work: hrtimer_cancel(&ap_poll_timer); root_device_unregister(ap_root_device); diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 4348fdff1c61..8e8e37b6c0ee 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -91,7 +91,6 @@ enum ap_state { AP_STATE_IDLE, AP_STATE_WORKING, AP_STATE_QUEUE_FULL, - AP_STATE_SUSPEND_WAIT, AP_STATE_REMOVE, /* about to be removed from driver */ AP_STATE_UNBOUND, /* momentary not bound to a driver */ AP_STATE_BORKED, /* broken */ @@ -136,8 +135,6 @@ struct ap_driver { int (*probe)(struct ap_device *); void (*remove)(struct ap_device *); - void (*suspend)(struct ap_device *); - void (*resume)(struct ap_device *); }; #define to_ap_drv(x) container_of((x), struct ap_driver, driver) @@ -259,8 +256,6 @@ void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *ap_msg); struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type); void ap_queue_prepare_remove(struct ap_queue *aq); void ap_queue_remove(struct ap_queue *aq); -void ap_queue_suspend(struct ap_device *ap_dev); -void ap_queue_resume(struct ap_device *ap_dev); void ap_queue_init_state(struct ap_queue *aq); struct ap_card *ap_card_create(int id, int queue_depth, int raw_device_type, diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 9cb997cffa61..0eaf1d04e8df 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -200,31 +200,6 @@ static enum ap_wait ap_sm_read(struct ap_queue *aq) } } -/** - * ap_sm_suspend_read(): Receive pending reply messages from an AP queue - * without changing the device state in between. In suspend mode we don't - * allow sending new requests, therefore just fetch pending replies. - * @aq: pointer to the AP queue - * - * Returns AP_WAIT_NONE or AP_WAIT_AGAIN - */ -static enum ap_wait ap_sm_suspend_read(struct ap_queue *aq) -{ - struct ap_queue_status status; - - if (!aq->reply) - return AP_WAIT_NONE; - status = ap_sm_recv(aq); - switch (status.response_code) { - case AP_RESPONSE_NORMAL: - if (aq->queue_count > 0) - return AP_WAIT_AGAIN; - fallthrough; - default: - return AP_WAIT_NONE; - } -} - /** * ap_sm_write(): Send messages from the request queue to an AP queue. * @aq: pointer to the AP queue @@ -417,10 +392,6 @@ static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = { [AP_EVENT_POLL] = ap_sm_read, [AP_EVENT_TIMEOUT] = ap_sm_reset, }, - [AP_STATE_SUSPEND_WAIT] = { - [AP_EVENT_POLL] = ap_sm_suspend_read, - [AP_EVENT_TIMEOUT] = ap_sm_nop, - }, [AP_STATE_REMOVE] = { [AP_EVENT_POLL] = ap_sm_nop, [AP_EVENT_TIMEOUT] = ap_sm_nop, @@ -449,28 +420,6 @@ enum ap_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_event event) return wait; } -/* - * Power management for queue devices - */ -void ap_queue_suspend(struct ap_device *ap_dev) -{ - struct ap_queue *aq = to_ap_queue(&ap_dev->device); - - /* Poll on the device until all requests are finished. */ - spin_lock_bh(&aq->lock); - aq->state = AP_STATE_SUSPEND_WAIT; - while (ap_sm_event(aq, AP_EVENT_POLL) != AP_WAIT_NONE) - ; - aq->state = AP_STATE_BORKED; - spin_unlock_bh(&aq->lock); -} -EXPORT_SYMBOL(ap_queue_suspend); - -void ap_queue_resume(struct ap_device *ap_dev) -{ -} -EXPORT_SYMBOL(ap_queue_resume); - /* * AP queue related attributes. */ diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index 7cbb384ec535..b447f3e9e4a2 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -204,8 +204,6 @@ static void zcrypt_cex2a_queue_remove(struct ap_device *ap_dev) static struct ap_driver zcrypt_cex2a_queue_driver = { .probe = zcrypt_cex2a_queue_probe, .remove = zcrypt_cex2a_queue_remove, - .suspend = ap_queue_suspend, - .resume = ap_queue_resume, .ids = zcrypt_cex2a_queue_ids, .flags = AP_DRIVER_FLAG_DEFAULT, }; diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index c78c0d119806..266440168bb7 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -260,8 +260,6 @@ static void zcrypt_cex2c_queue_remove(struct ap_device *ap_dev) static struct ap_driver zcrypt_cex2c_queue_driver = { .probe = zcrypt_cex2c_queue_probe, .remove = zcrypt_cex2c_queue_remove, - .suspend = ap_queue_suspend, - .resume = ap_queue_resume, .ids = zcrypt_cex2c_queue_ids, .flags = AP_DRIVER_FLAG_DEFAULT, }; diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 8cfb1e2e9e1b..cdaa8348ad04 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -656,8 +656,6 @@ static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev) static struct ap_driver zcrypt_cex4_queue_driver = { .probe = zcrypt_cex4_queue_probe, .remove = zcrypt_cex4_queue_remove, - .suspend = ap_queue_suspend, - .resume = ap_queue_resume, .ids = zcrypt_cex4_queue_ids, .flags = AP_DRIVER_FLAG_DEFAULT, }; From 5e1fb45ec8e2eb80f00cce6661b747800e998c78 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 19 Mar 2020 10:36:32 +0100 Subject: [PATCH 52/57] s390/ccwgroup: remove pm support As s390 no longer supports ARCH_HIBERNATION_POSSIBLE, drop the unused pm ops from the ccwgroup bus driver. Signed-off-by: Julian Wiedmann Signed-off-by: Vasily Gorbik --- drivers/s390/cio/ccwgroup.c | 69 ------------------------------------- 1 file changed, 69 deletions(-) diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index b42a93736668..483a9ecfcbb1 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -485,79 +485,10 @@ static void ccwgroup_shutdown(struct device *dev) gdrv->shutdown(gdev); } -static int ccwgroup_pm_prepare(struct device *dev) -{ - struct ccwgroup_device *gdev = to_ccwgroupdev(dev); - struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver); - - /* Fail while device is being set online/offline. */ - if (atomic_read(&gdev->onoff)) - return -EAGAIN; - - if (!gdev->dev.driver || gdev->state != CCWGROUP_ONLINE) - return 0; - - return gdrv->prepare ? gdrv->prepare(gdev) : 0; -} - -static void ccwgroup_pm_complete(struct device *dev) -{ - struct ccwgroup_device *gdev = to_ccwgroupdev(dev); - struct ccwgroup_driver *gdrv = to_ccwgroupdrv(dev->driver); - - if (!gdev->dev.driver || gdev->state != CCWGROUP_ONLINE) - return; - - if (gdrv->complete) - gdrv->complete(gdev); -} - -static int ccwgroup_pm_freeze(struct device *dev) -{ - struct ccwgroup_device *gdev = to_ccwgroupdev(dev); - struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver); - - if (!gdev->dev.driver || gdev->state != CCWGROUP_ONLINE) - return 0; - - return gdrv->freeze ? gdrv->freeze(gdev) : 0; -} - -static int ccwgroup_pm_thaw(struct device *dev) -{ - struct ccwgroup_device *gdev = to_ccwgroupdev(dev); - struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver); - - if (!gdev->dev.driver || gdev->state != CCWGROUP_ONLINE) - return 0; - - return gdrv->thaw ? gdrv->thaw(gdev) : 0; -} - -static int ccwgroup_pm_restore(struct device *dev) -{ - struct ccwgroup_device *gdev = to_ccwgroupdev(dev); - struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver); - - if (!gdev->dev.driver || gdev->state != CCWGROUP_ONLINE) - return 0; - - return gdrv->restore ? gdrv->restore(gdev) : 0; -} - -static const struct dev_pm_ops ccwgroup_pm_ops = { - .prepare = ccwgroup_pm_prepare, - .complete = ccwgroup_pm_complete, - .freeze = ccwgroup_pm_freeze, - .thaw = ccwgroup_pm_thaw, - .restore = ccwgroup_pm_restore, -}; - static struct bus_type ccwgroup_bus_type = { .name = "ccwgroup", .remove = ccwgroup_remove, .shutdown = ccwgroup_shutdown, - .pm = &ccwgroup_pm_ops, }; bool dev_is_ccwgroup(struct device *dev) From dea284867701024d5058d7ab1542763a12006595 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 20 Mar 2020 14:00:00 +0100 Subject: [PATCH 53/57] s390/qdio: remove unused function declarations commit 50f769df1c4b ("[S390] qdio: improve inbound buffer acknowledgement") introduced these declarations, but noone added the actual code for them. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index e24a2cde487a..8564e8a8e359 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -394,8 +394,6 @@ int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data); void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, struct ccw_device *cdev); void qdio_release_memory(struct qdio_irq *irq_ptr); -int qdio_setup_create_sysfs(struct ccw_device *cdev); -void qdio_setup_destroy_sysfs(struct ccw_device *cdev); int qdio_setup_init(void); void qdio_setup_exit(void); int qdio_enable_async_operation(struct qdio_output_q *q); From b2745655be3658cd422ba2b07cf19eb64e0c0eaf Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 20 Mar 2020 14:00:00 +0100 Subject: [PATCH 54/57] s390/qdio: set qdio_irq->cdev at allocation time Set up qdio_irq->cdev right when the qdio_irq struct is allocated, so that all subsequent code can rely on this pointer. Then convert two helper functions to not pass a cdev parameter around. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio.h | 3 +-- drivers/s390/cio/qdio_debug.c | 8 ++++---- drivers/s390/cio/qdio_debug.h | 3 +-- drivers/s390/cio/qdio_main.c | 10 ++++++---- drivers/s390/cio/qdio_setup.c | 8 +++----- 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 8564e8a8e359..098bf8c9bbec 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -391,8 +391,7 @@ int qdio_setup_get_ssqd(struct qdio_irq *irq_ptr, struct subchannel_id *schid, struct qdio_ssqd_desc *data); int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data); -void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, - struct ccw_device *cdev); +void qdio_print_subchannel_info(struct qdio_irq *irq_ptr); void qdio_release_memory(struct qdio_irq *irq_ptr); int qdio_setup_init(void); void qdio_setup_exit(void); diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index 8544faddf80c..f21fb406bef0 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -81,7 +81,7 @@ int qdio_allocate_dbf(struct qdio_initialize *init_data, /* allocate trace view for the interface */ snprintf(text, QDIO_DBF_NAME_LEN, "qdio_%s", - dev_name(&init_data->cdev->dev)); + dev_name(&irq_ptr->cdev->dev)); irq_ptr->debug_area = qdio_get_dbf_entry(text); if (irq_ptr->debug_area) DBF_DEV_EVENT(DBF_ERR, irq_ptr, "dbf reused"); @@ -311,16 +311,16 @@ static void setup_debugfs_entry(struct dentry *parent, struct qdio_q *q) debugfs_create_file(name, 0444, parent, q, &qstat_fops); } -void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, struct ccw_device *cdev) +void qdio_setup_debug_entries(struct qdio_irq *irq_ptr) { struct qdio_q *q; int i; - irq_ptr->debugfs_dev = debugfs_create_dir(dev_name(&cdev->dev), + irq_ptr->debugfs_dev = debugfs_create_dir(dev_name(&irq_ptr->cdev->dev), debugfs_root); debugfs_create_file("statistics", S_IFREG | S_IRUGO | S_IWUSR, irq_ptr->debugfs_dev, irq_ptr, &debugfs_perf_fops); - debugfs_create_file("ssqd", 0444, irq_ptr->debugfs_dev, cdev, + debugfs_create_file("ssqd", 0444, irq_ptr->debugfs_dev, irq_ptr->cdev, &ssqd_fops); for_each_input_queue(irq_ptr, q, i) diff --git a/drivers/s390/cio/qdio_debug.h b/drivers/s390/cio/qdio_debug.h index f85f5fa7cefc..122450ba6b90 100644 --- a/drivers/s390/cio/qdio_debug.h +++ b/drivers/s390/cio/qdio_debug.h @@ -66,8 +66,7 @@ static inline void DBF_DEV_HEX(struct qdio_irq *dev, void *addr, int qdio_allocate_dbf(struct qdio_initialize *init_data, struct qdio_irq *irq_ptr); -void qdio_setup_debug_entries(struct qdio_irq *irq_ptr, - struct ccw_device *cdev); +void qdio_setup_debug_entries(struct qdio_irq *irq_ptr); void qdio_shutdown_debug_entries(struct qdio_irq *irq_ptr); int qdio_debug_init(void); void qdio_debug_exit(void); diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 2886b95f4741..e93155dbe887 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1229,10 +1229,11 @@ EXPORT_SYMBOL_GPL(qdio_free); */ int qdio_allocate(struct qdio_initialize *init_data) { + struct ccw_device *cdev = init_data->cdev; struct subchannel_id schid; struct qdio_irq *irq_ptr; - ccw_device_get_schid(init_data->cdev, &schid); + ccw_device_get_schid(cdev, &schid); DBF_EVENT("qallocate:%4x", schid.sch_no); if ((init_data->no_input_qs && !init_data->input_handler) || @@ -1252,6 +1253,7 @@ int qdio_allocate(struct qdio_initialize *init_data) if (!irq_ptr) goto out_err; + irq_ptr->cdev = cdev; mutex_init(&irq_ptr->setup_mutex); if (qdio_allocate_dbf(init_data, irq_ptr)) goto out_rel; @@ -1276,7 +1278,7 @@ int qdio_allocate(struct qdio_initialize *init_data) goto out_rel; INIT_LIST_HEAD(&irq_ptr->entry); - init_data->cdev->private->qdio_data = irq_ptr; + cdev->private->qdio_data = irq_ptr; qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); return 0; out_rel: @@ -1372,8 +1374,8 @@ int qdio_establish(struct qdio_initialize *init_data) qdio_init_buf_states(irq_ptr); mutex_unlock(&irq_ptr->setup_mutex); - qdio_print_subchannel_info(irq_ptr, cdev); - qdio_setup_debug_entries(irq_ptr, cdev); + qdio_print_subchannel_info(irq_ptr); + qdio_setup_debug_entries(irq_ptr); return 0; } EXPORT_SYMBOL_GPL(qdio_establish); diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index ad04947a0032..e1b7a9118f9d 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -452,8 +452,8 @@ static void setup_qib(struct qdio_irq *irq_ptr, int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data) { + struct ccw_device *cdev = irq_ptr->cdev; struct ciw *ciw; - struct ccw_device *cdev = init_data->cdev; memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib)); memset(&irq_ptr->siga_flag, 0, sizeof(irq_ptr->siga_flag)); @@ -471,7 +471,6 @@ int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data) irq_ptr->int_parm = init_data->int_parm; irq_ptr->nr_input_qs = init_data->no_input_qs; irq_ptr->nr_output_qs = init_data->no_output_qs; - irq_ptr->cdev = cdev; irq_ptr->scan_threshold = init_data->scan_threshold; ccw_device_get_schid(cdev, &irq_ptr->schid); setup_queues(irq_ptr, init_data); @@ -511,14 +510,13 @@ int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data) return 0; } -void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, - struct ccw_device *cdev) +void qdio_print_subchannel_info(struct qdio_irq *irq_ptr) { char s[80]; snprintf(s, 80, "qdio: %s %s on SC %x using " "AI:%d QEBSM:%d PRI:%d TDD:%d SIGA:%s%s%s%s%s\n", - dev_name(&cdev->dev), + dev_name(&irq_ptr->cdev->dev), (irq_ptr->qib.qfmt == QDIO_QETH_QFMT) ? "OSA" : ((irq_ptr->qib.qfmt == QDIO_ZFCP_QFMT) ? "ZFCP" : "HS"), irq_ptr->schid.sch_no, From 6a3eb35e56b3308966945b76ec1dfbc18537feef Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 28 Feb 2020 11:32:01 +0100 Subject: [PATCH 55/57] s390/mm: remove page table downgrade support This update consolidates page table handling code. Because there are hardly any 31-bit binaries left we do not need to optimize for that. No extra efforts are needed to ensure that a compat task does not map anything above 2GB. The TASK_SIZE limit for 31-bit tasks is 2GB already and the generic code does check that a resulting map address would not surpass that limit. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/mmu.h | 2 -- arch/s390/include/asm/mmu_context.h | 5 ----- arch/s390/include/asm/pgalloc.h | 16 +--------------- arch/s390/include/asm/processor.h | 1 - arch/s390/mm/pgalloc.c | 24 ------------------------ 5 files changed, 1 insertion(+), 47 deletions(-) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index bcfb6371086f..a8418e1379eb 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -32,8 +32,6 @@ typedef struct { unsigned int uses_cmm:1; /* The gmaps associated with this context are allowed to use huge pages. */ unsigned int allow_gmap_hpage_1m:1; - /* The mmu context is for compat task */ - unsigned int compat_mm:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8d04e6f3f796..3763734965e4 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -25,7 +25,6 @@ static inline int init_new_context(struct task_struct *tsk, atomic_set(&mm->context.flush_count, 0); mm->context.gmap_asce = 0; mm->context.flush_mm = 0; - mm->context.compat_mm = test_thread_flag(TIF_31BIT); #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste || test_thread_flag(TIF_PGSTE) || @@ -57,10 +56,6 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION2; break; - case _REGION3_SIZE: - /* forked 2-level compat task, set new asce with new mm->pgd */ - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index f0d7457fa1da..5e3ff9f7a586 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -46,7 +46,6 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) } int crst_table_upgrade(struct mm_struct *mm, unsigned long limit); -void crst_table_downgrade(struct mm_struct *); static inline unsigned long check_asce_limit(struct mm_struct *mm, unsigned long addr, unsigned long len) @@ -130,24 +129,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - unsigned long *table = crst_table_alloc(mm); - - if (!table) - return NULL; - if (mm->context.asce_limit == _REGION3_SIZE) { - /* Forking a compat process with 2 page table levels */ - if (!pgtable_pmd_page_ctor(virt_to_page(table))) { - crst_table_free(mm, table); - return NULL; - } - } - return (pgd_t *) table; + return (pgd_t *) crst_table_alloc(mm); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - if (mm->context.asce_limit == _REGION3_SIZE) - pgtable_pmd_page_dtor(virt_to_page(pgd)); crst_table_free(mm, (unsigned long *) pgd); } diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c9522346799f..dee5e57da518 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -179,7 +179,6 @@ typedef struct thread_struct thread_struct; regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \ regs->psw.addr = new_psw; \ regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm); \ execve_tail(); \ } while (0) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index af3bddd5e568..4630fb7705ca 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -138,30 +138,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) return -ENOMEM; } -void crst_table_downgrade(struct mm_struct *mm) -{ - pgd_t *pgd; - - /* downgrade should only happen from 3 to 2 levels (compat only) */ - VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE); - - if (current->active_mm == mm) { - clear_user_asce(); - __tlb_flush_mm(mm); - } - - pgd = mm->pgd; - mm_dec_nr_pmds(mm); - mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); - mm->context.asce_limit = _REGION3_SIZE; - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; - crst_table_free(mm, (unsigned long *) pgd); - - if (current->active_mm == mm) - set_user_asce(mm); -} - static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) { unsigned int old, new; From f75556081afe5a565c2ce200837406303a59ae2b Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 19 Mar 2020 13:44:49 +0100 Subject: [PATCH 56/57] s390/mm: cleanup virtual memory constants usage Remove duplicate definitions and consolidate usage of virutal and address translation constants. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/mmu_context.h | 4 ++-- arch/s390/include/asm/processor.h | 8 ++++---- arch/s390/mm/pgalloc.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 3763734965e4..248d51cdcad9 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -42,11 +42,11 @@ static inline int init_new_context(struct task_struct *tsk, */ case 0: /* context created by exec, set asce limit to 4TB */ - mm->context.asce_limit = STACK_TOP_MAX; + mm->context.asce_limit = _REGION2_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION3; break; - case -PAGE_SIZE: + case TASK_SIZE_MAX: /* forked 5-level task, set new asce with new_mm->pgd */ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION1; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index dee5e57da518..d052adfd53f3 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -93,15 +93,15 @@ extern void __bpon(void); */ #define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \ - (1UL << 31) : -PAGE_SIZE) + _REGION3_SIZE : TASK_SIZE_MAX) #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ - (1UL << 30) : (1UL << 41)) + (_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1)) #define TASK_SIZE TASK_SIZE_OF(current) #define TASK_SIZE_MAX (-PAGE_SIZE) #define STACK_TOP (test_thread_flag(TIF_31BIT) ? \ - (1UL << 31) : (1UL << 42)) -#define STACK_TOP_MAX (1UL << 42) + _REGION3_SIZE : _REGION2_SIZE) +#define STACK_TOP_MAX _REGION2_SIZE #define HAVE_ARCH_PICK_MMAP_LAYOUT diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 4630fb7705ca..498c98a312f4 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -121,7 +121,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) __pgd = (unsigned long *) mm->pgd; pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd); mm->pgd = (pgd_t *) pgd; - mm->context.asce_limit = -PAGE_SIZE; + mm->context.asce_limit = TASK_SIZE_MAX; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION1; } @@ -527,7 +527,7 @@ void base_asce_free(unsigned long asce) base_region2_walk(table, 0, _REGION1_SIZE, 0); break; case _ASCE_TYPE_REGION1: - base_region1_walk(table, 0, -_PAGE_SIZE, 0); + base_region1_walk(table, 0, TASK_SIZE_MAX, 0); break; } base_crst_free(table); From 1058c163dc31b3335c9cf7c4fa42ccf87be73017 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 19 Mar 2020 13:44:50 +0100 Subject: [PATCH 57/57] s390/mm: cleanup init_new_context() callback The set of values asce_limit may be assigned with is TASK_SIZE_MAX, _REGION1_SIZE, _REGION2_SIZE and 0 as a special case if the callback was called from execve(). Do VM_BUG_ON() if asce_limit is something else. Save few CPU cycles by removing unnecessary asce_limit re-assignment in case of 3-level task and redundant PGD entry type reconstruction. Signed-off-by: Alexander Gordeev Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/mmu_context.h | 35 +++++++++++++++++------------ arch/s390/include/asm/pgalloc.h | 11 --------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 248d51cdcad9..844396b3735e 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -18,6 +18,8 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + unsigned long asce_type, init_entry; + spin_lock_init(&mm->context.lock); INIT_LIST_HEAD(&mm->context.pgtable_list); INIT_LIST_HEAD(&mm->context.gmap_list); @@ -35,29 +37,34 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.allow_gmap_hpage_1m = 0; #endif switch (mm->context.asce_limit) { - case _REGION2_SIZE: + default: /* - * forked 3-level task, fall through to set new asce with new - * mm->pgd + * context created by exec, the value of asce_limit can + * only be zero in this case */ - case 0: - /* context created by exec, set asce limit to 4TB */ + VM_BUG_ON(mm->context.asce_limit); + /* continue as 3-level task */ mm->context.asce_limit = _REGION2_SIZE; - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION3; + fallthrough; + case _REGION2_SIZE: + /* forked 3-level task */ + init_entry = _REGION3_ENTRY_EMPTY; + asce_type = _ASCE_TYPE_REGION3; break; case TASK_SIZE_MAX: - /* forked 5-level task, set new asce with new_mm->pgd */ - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION1; + /* forked 5-level task */ + init_entry = _REGION1_ENTRY_EMPTY; + asce_type = _ASCE_TYPE_REGION1; break; case _REGION1_SIZE: - /* forked 4-level task, set new asce with new mm->pgd */ - mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + /* forked 4-level task */ + init_entry = _REGION2_ENTRY_EMPTY; + asce_type = _ASCE_TYPE_REGION2; break; } - crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | asce_type; + crst_table_init((unsigned long *) mm->pgd, init_entry); return 0; } diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 5e3ff9f7a586..74a352f8c0d1 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -34,17 +34,6 @@ static inline void crst_table_init(unsigned long *crst, unsigned long entry) memset64((u64 *)crst, entry, _CRST_ENTRIES); } -static inline unsigned long pgd_entry_type(struct mm_struct *mm) -{ - if (mm_pmd_folded(mm)) - return _SEGMENT_ENTRY_EMPTY; - if (mm_pud_folded(mm)) - return _REGION3_ENTRY_EMPTY; - if (mm_p4d_folded(mm)) - return _REGION2_ENTRY_EMPTY; - return _REGION1_ENTRY_EMPTY; -} - int crst_table_upgrade(struct mm_struct *mm, unsigned long limit); static inline unsigned long check_asce_limit(struct mm_struct *mm, unsigned long addr,