libnvdimm for 4.8

1/ Replace pcommit with ADR / directed-flushing: The pcommit instruction, which has not shipped on any product, is deprecated. Instead, the requirement is that platforms implement either ADR, or provide one or more flush addresses per nvdimm. ADR (Asynchronous DRAM Refresh) flushes data in posted write buffers to the memory controller on a power-fail event. Flush addresses are defined in ACPI 6.x as an NVDIMM Firmware Interface Table (NFIT) sub-structure: "Flush Hint Address Structure". A flush hint is an mmio address that when written and fenced assures that all previous posted writes targeting a given dimm have been flushed to media. 2/ On-demand ARS (address range scrub): Linux uses the results of the ACPI ARS commands to track bad blocks in pmem devices. When latent errors are detected we re-scrub the media to refresh the bad block list, userspace can also request a re-scrub at any time. 3/ Support for the Microsoft DSM (device specific method) command format. 4/ Support for EDK2/OVMF virtual disk device memory ranges. 5/ Various fixes and cleanups across the subsystem. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJXmXBsAAoJEB7SkWpmfYgCEwwP/1IOt9ocP+iHLMDH9KE7VaTZ NmUDR+Zy6g5cRQM7SgcuU5BXUcx+OsSrSrUTVF1cW994o9Gbz1mFotkv0ZAsPcYY ZVRQxo2oqHrssyOcg+PsgKWiXn68rJOCgmpEyzaJywl5qTMst7pzsT1s1f7rSh6h trCf4VaJJwxZR8fARGtlHUnnhPe2Orp99EZRKEWprAsIv2kPuWpPHSjRjuEgN1JG KW8AYwWqFTtiLRUk86I4KBB0wcDrfctsjgN9Ogd6+aHyQBRnVSr2U+vDCFkC8KLu qiDCpYp+yyxBjclnljz7tRRT3GtzfCUWd4v2KVWqgg2IaobUc0Lbukp/rmikUXQP WLikT2OCQ994eFK5OX3Q3cIU/4j459TQnof8q14yVSpjAKrNUXVSR5puN7Hxa+V7 41wKrAsnsyY1oq+Yd/rMR8VfH7PHx3bFkrmRCGZCufLX1UQm4aYj+sWagDKiV3yA DiudghbOnhfurfGsnXUVw7y7GKs+gNWNBmB6ndAD6ZEHmKoGUhAEbJDLCc3DnANl b/2mv1MIdIcC1DlCmnbbcn6fv6bICe/r8poK3VrCK3UgOq/EOvKIWl7giP+k1JuC 6DdVYhlNYIVFXUNSLFAwz8OkLu8byx7WDm36iEqrKHtPw+8qa/2bWVgOU6OBgpjV cN3edFVIdxvZeMgM5Ubq =xCBG -----END PGP SIGNATURE----- Merge tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm Pull libnvdimm updates from Dan Williams: - Replace pcommit with ADR / directed-flushing. The pcommit instruction, which has not shipped on any product, is deprecated. Instead, the requirement is that platforms implement either ADR, or provide one or more flush addresses per nvdimm. ADR (Asynchronous DRAM Refresh) flushes data in posted write buffers to the memory controller on a power-fail event. Flush addresses are defined in ACPI 6.x as an NVDIMM Firmware Interface Table (NFIT) sub-structure: "Flush Hint Address Structure". A flush hint is an mmio address that when written and fenced assures that all previous posted writes targeting a given dimm have been flushed to media. - On-demand ARS (address range scrub). Linux uses the results of the ACPI ARS commands to track bad blocks in pmem devices. When latent errors are detected we re-scrub the media to refresh the bad block list, userspace can also request a re-scrub at any time. - Support for the Microsoft DSM (device specific method) command format. - Support for EDK2/OVMF virtual disk device memory ranges. - Various fixes and cleanups across the subsystem. * tag 'libnvdimm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (41 commits) libnvdimm-btt: Delete an unnecessary check before the function call "__nd_device_register" nfit: do an ARS scrub on hitting a latent media error nfit: move to nfit/ sub-directory nfit, libnvdimm: allow an ARS scrub to be triggered on demand libnvdimm: register nvdimm_bus devices with an nd_bus driver pmem: clarify a debug print in pmem_clear_poison x86/insn: remove pcommit Revert "KVM: x86: add pcommit support" nfit, tools/testing/nvdimm/: unify shutdown paths libnvdimm: move ->module to struct nvdimm_bus_descriptor nfit: cleanup acpi_nfit_init calling convention nfit: fix _FIT evaluation memory leak + use after free tools/testing/nvdimm: add manufacturing_{date|location} dimm properties tools/testing/nvdimm: add virtual ramdisk range acpi, nfit: treat virtual ramdisk SPA as pmem region pmem: kill __pmem address space pmem: kill wmb_pmem() libnvdimm, pmem: use nvdimm_flush() for namespace I/O writes fs/dax: remove wmb_pmem() libnvdimm, pmem: flush posted-write queues on shutdown ...
2016-07-28 17:22:07 -07:00 · 2016-07-28 17:22:07 -07:00 · f0c98ebc57
parent d94ba9e7d8 0606263f24
commit f0c98ebc57
65 changed files with 1375 additions and 1015 deletions
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@ -395,7 +395,7 @@ prototypes:
 	int (*release) (struct gendisk *, fmode_t);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-	int (*direct_access) (struct block_device *, sector_t, void __pmem **,
+	int (*direct_access) (struct block_device *, sector_t, void **,
 				unsigned long *);
 	int (*media_changed) (struct gendisk *);
 	void (*unlock_native_capacity) (struct gendisk *);
--- a/Documentation/nvdimm/btt.txt
+++ b/Documentation/nvdimm/btt.txt
@ -256,28 +256,18 @@ If any of these error conditions are encountered, the arena is put into a read
 only state using a flag in the info block.


-5. In-kernel usage
-==================
+5. Usage
+========

-Any block driver that supports byte granularity IO to the storage may register
-with the BTT. It will have to provide the rw_bytes interface in its
-block_device_operations struct:
+The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
+(pmem, or blk mode). The easiest way to set up such a namespace is using the
+'ndctl' utility [1]:

-	int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw);
+For example, the ndctl command line to setup a btt with a 4k sector size is:

-It may register with the BTT after it adds its own gendisk, using btt_init:
+    ndctl create-namespace -f -e namespace0.0 -m sector -l 4k

-	struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize,
-			u32 lbasize, u8 uuid[], int maxlane);
+See ndctl create-namespace --help for more options.

-note that maxlane is the maximum amount of concurrency the driver wishes to
-allow the BTT to use.
-
-The BTT 'disk' appears as a stacked block device that grabs the underlying block
-device in the O_EXCL mode.
-
-When the driver wishes to remove the backing disk, it should similarly call
-btt_fini using the same struct btt* handle that was provided to it by btt_init.
-
-	void btt_fini(struct btt *btt);
+[1]: https://github.com/pmem/ndctl

--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@ -143,12 +143,12 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
 */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-		       void __pmem **kaddr, pfn_t *pfn, long size)
+		       void **kaddr, pfn_t *pfn, long size)
 {
 	struct axon_ram_bank *bank = device->bd_disk->private_data;
 	loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;

-	*kaddr = (void __pmem __force *) bank->io_addr + offset;
+	*kaddr = (void *) bank->io_addr + offset;
 	*pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
 	return bank->size - offset;
 }
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@ -225,7 +225,6 @@
 #define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@ -26,13 +26,11 @@
 * @n: length of the copy in bytes
 *
 * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent arch_wmb_pmem() can flush cpu and memory controller
- * write buffers to guarantee durability.
+ * a subsequent pmem driver flush operation will drain posted write queues.
 */
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-		size_t n)
+static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
 {
-	int unwritten;
+	int rem;

 	/*
 	 * We are copying between two kernel buffers, if
@ -40,59 +38,36 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
 	 * fault) we would have already reported a general protection fault
 	 * before the WARN+BUG.
 	 */
-	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
-			(void __user *) src, n);
-	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
-				__func__, dst, src, unwritten))
+	rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
+	if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
+				__func__, dst, src, rem))
 		BUG();
 }

-static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
-		size_t n)
+static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
 {
 	if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
-		return memcpy_mcsafe(dst, (void __force *) src, n);
-	memcpy(dst, (void __force *) src, n);
+		return memcpy_mcsafe(dst, src, n);
+	memcpy(dst, src, n);
 	return 0;
 }

-/**
- * arch_wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of arch_memcpy_to_pmem() operations this drains data
- * from cpu write buffers and any platform (memory controller) buffers
- * to ensure that written data is durable on persistent memory media.
- */
-static inline void arch_wmb_pmem(void)
-{
-	/*
-	 * wmb() to 'sfence' all previous writes such that they are
-	 * architecturally visible to 'pcommit'.  Note, that we've
-	 * already arranged for pmem writes to avoid the cache via
-	 * arch_memcpy_to_pmem().
-	 */
-	wmb();
-	pcommit_sfence();
-}
-
 /**
 * arch_wb_cache_pmem - write back a cache range with CLWB
 * @vaddr:	virtual start address
 * @size:	number of bytes to write back
 *
 * Write back a cache range using the CLWB (cache line write back)
- * instruction.  This function requires explicit ordering with an
- * arch_wmb_pmem() call.
+ * instruction.
 */
-static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
 {
 	u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
 	unsigned long clflush_mask = x86_clflush_size - 1;
-	void *vaddr = (void __force *)addr;
-	void *vend = vaddr + size;
+	void *vend = addr + size;
 	void *p;

-	for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+	for (p = (void *)((unsigned long)addr & ~clflush_mask);
 	     p < vend; p += x86_clflush_size)
 		clwb(p);
 }
@ -113,16 +88,14 @@ static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
 * @i:		iterator with source data
 *
 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * This function requires explicit ordering with an arch_wmb_pmem() call.
 */
-static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
 		struct iov_iter *i)
 {
-	void *vaddr = (void __force *)addr;
 	size_t len;

 	/* TODO: skip the write-back by always using non-temporal stores */
-	len = copy_from_iter_nocache(vaddr, bytes, i);
+	len = copy_from_iter_nocache(addr, bytes, i);

 	if (__iter_needs_pmem_wb(i))
 		arch_wb_cache_pmem(addr, bytes);
@ -136,28 +109,16 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
 * @size:	number of bytes to zero
 *
 * Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with an arch_wmb_pmem() call.
 */
-static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+static inline void arch_clear_pmem(void *addr, size_t size)
 {
-	void *vaddr = (void __force *)addr;
-
-	memset(vaddr, 0, size);
+	memset(addr, 0, size);
 	arch_wb_cache_pmem(addr, size);
 }

-static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+static inline void arch_invalidate_pmem(void *addr, size_t size)
 {
-	clflush_cache_range((void __force *) addr, size);
-}
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-	/*
-	 * We require that wmb() be an 'sfence', that is only guaranteed on
-	 * 64-bit builds
-	 */
-	return static_cpu_has(X86_FEATURE_PCOMMIT);
+	clflush_cache_range(addr, size);
 }
 #endif /* CONFIG_ARCH_HAS_PMEM_API */
 #endif /* __ASM_X86_PMEM_H__ */
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@ -253,52 +253,6 @@ static inline void clwb(volatile void *__p)
 		: [pax] "a" (p));
 }

-/**
- * pcommit_sfence() - persistent commit and fence
- *
- * The PCOMMIT instruction ensures that data that has been flushed from the
- * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to
- * memory and is durable on the DIMM.  The primary use case for this is
- * persistent memory.
- *
- * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT
- * with appropriate fencing.
- *
- * Example:
- * void flush_and_commit_buffer(void *vaddr, unsigned int size)
- * {
- *         unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
- *         void *vend = vaddr + size;
- *         void *p;
- *
- *         for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
- *              p < vend; p += boot_cpu_data.x86_clflush_size)
- *                 clwb(p);
- *
- *         // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes
- *         // MFENCE via mb() also works
- *         wmb();
- *
- *         // PCOMMIT and the required SFENCE for ordering
- *         pcommit_sfence();
- * }
- *
- * After this function completes the data pointed to by 'vaddr' has been
- * accepted to memory and will be durable if the 'vaddr' points to persistent
- * memory.
- *
- * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify
- * things we include both the PCOMMIT and the required SFENCE in the
- * alternatives generated by pcommit_sfence().
- */
-static inline void pcommit_sfence(void)
-{
-	alternative(ASM_NOP7,
-		    ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
-		    "sfence",
-		    X86_FEATURE_PCOMMIT);
-}
-
 #define nop() asm volatile ("nop")


--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@ -72,7 +72,6 @@
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
 #define SECONDARY_EXEC_ENABLE_PML               0x00020000
 #define SECONDARY_EXEC_XSAVES			0x00100000
-#define SECONDARY_EXEC_PCOMMIT			0x00200000
 #define SECONDARY_EXEC_TSC_SCALING              0x02000000

 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@ -78,7 +78,6 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
-#define EXIT_REASON_PCOMMIT             65

 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@ -127,8 +126,7 @@
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
-	{ EXIT_REASON_XRSTORS,               "XRSTORS" }, \
-	{ EXIT_REASON_PCOMMIT,               "PCOMMIT" }
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }

 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@ -366,7 +366,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
 		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
 		F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
-		F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
+		F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);

 	/* cpuid 0xD.1.eax */
 	const u32 kvm_cpuid_D_1_eax_x86_features =
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@ -144,14 +144,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
 	return best && (best->ebx & bit(X86_FEATURE_RTM));
 }

-static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
-{
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, 7, 0);
-	return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
-}
-
 static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@ -2707,8 +2707,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		SECONDARY_EXEC_APIC_REGISTER_VIRT |
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 		SECONDARY_EXEC_WBINVD_EXITING |
-		SECONDARY_EXEC_XSAVES |
-		SECONDARY_EXEC_PCOMMIT;
+		SECONDARY_EXEC_XSAVES;

 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
@ -3270,7 +3269,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 			SECONDARY_EXEC_SHADOW_VMCS |
 			SECONDARY_EXEC_XSAVES |
 			SECONDARY_EXEC_ENABLE_PML |
-			SECONDARY_EXEC_PCOMMIT |
 			SECONDARY_EXEC_TSC_SCALING;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
@ -4858,9 +4856,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 	if (!enable_pml)
 		exec_control &= ~SECONDARY_EXEC_ENABLE_PML;

-	/* Currently, we allow L1 guest to directly run pcommit instruction. */
-	exec_control &= ~SECONDARY_EXEC_PCOMMIT;
-
 	return exec_control;
 }

@ -4904,9 +4899,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)

 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));

-	if (cpu_has_secondary_exec_ctrls())
+	if (cpu_has_secondary_exec_ctrls()) {
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
 				vmx_secondary_exec_control(vmx));
+	}

 	if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
@ -7564,13 +7560,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
 	return 1;
 }

-static int handle_pcommit(struct kvm_vcpu *vcpu)
-{
-	/* we never catch pcommit instruct for L1 guest. */
-	WARN_ON(1);
-	return 1;
-}
-
 /*
 * The exit handlers return 1 if the exit was handled fully and guest execution
 * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@ -7621,7 +7610,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_XSAVES]                  = handle_xsaves,
 	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
-	[EXIT_REASON_PCOMMIT]                 = handle_pcommit,
 };

 static const int kvm_vmx_max_exit_handlers =
@ -7930,8 +7918,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		 * the XSS exit bitmap in vmcs12.
 		 */
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
-	case EXIT_REASON_PCOMMIT:
-		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
 	default:
 		return true;
 	}
@ -9094,15 +9080,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)

 	if (cpu_has_secondary_exec_ctrls())
 		vmcs_set_secondary_exec_control(secondary_exec_ctl);
-
-	if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
-		if (guest_cpuid_has_pcommit(vcpu))
-			vmx->nested.nested_vmx_secondary_ctls_high |=
-				SECONDARY_EXEC_PCOMMIT;
-		else
-			vmx->nested.nested_vmx_secondary_ctls_high &=
-				~SECONDARY_EXEC_PCOMMIT;
-	}
 }

 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@ -9715,8 +9692,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 				  SECONDARY_EXEC_RDTSCP |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
-				  SECONDARY_EXEC_PCOMMIT);
+				  SECONDARY_EXEC_APIC_REGISTER_VIRT);
 		if (nested_cpu_has(vmcs12,
 				CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 			exec_control |= vmcs12->secondary_vm_exec_control;
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@ -1012,7 +1012,7 @@ GrpTable: Grp15
 4: XSAVE
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
 EndTable

 GrpTable: Grp16
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@ -454,32 +454,7 @@ config ACPI_REDUCED_HARDWARE_ONLY

 	  If you are unsure what to do, do not enable this option.

-config ACPI_NFIT
-	tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
-	depends on PHYS_ADDR_T_64BIT
-	depends on BLK_DEV
-	depends on ARCH_HAS_MMIO_FLUSH
-	select LIBNVDIMM
-	help
-	  Infrastructure to probe ACPI 6 compliant platforms for
-	  NVDIMMs (NFIT) and register a libnvdimm device tree.  In
-	  addition to storage devices this also enables libnvdimm to pass
-	  ACPI._DSM messages for platform/dimm configuration.
-
-	  To compile this driver as a module, choose M here:
-	  the module will be called nfit.
-
-config ACPI_NFIT_DEBUG
-	bool "NFIT DSM debug"
-	depends on ACPI_NFIT
-	depends on DYNAMIC_DEBUG
-	default n
-	help
-	  Enabling this option causes the nfit driver to dump the
-	  input and output buffers of _DSM operations on the ACPI0012
-	  device and its children.  This can be very verbose, so leave
-	  it disabled unless you are debugging a hardware / firmware
-	  issue.
+source "drivers/acpi/nfit/Kconfig"

 source "drivers/acpi/apei/Kconfig"
 source "drivers/acpi/dptf/Kconfig"
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@ -69,7 +69,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT)	+= pci_slot.o
 obj-$(CONFIG_ACPI_PROCESSOR)	+= processor.o
 obj-$(CONFIG_ACPI)		+= container.o
 obj-$(CONFIG_ACPI_THERMAL)	+= thermal.o
-obj-$(CONFIG_ACPI_NFIT)		+= nfit.o
+obj-$(CONFIG_ACPI_NFIT)		+= nfit/
 obj-$(CONFIG_ACPI)		+= acpi_memhotplug.o
 obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
 obj-$(CONFIG_ACPI_BATTERY)	+= battery.o
--- a/drivers/acpi/nfit/Kconfig
+++ b/drivers/acpi/nfit/Kconfig
@ -0,0 +1,26 @@
+config ACPI_NFIT
+	tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
+	depends on PHYS_ADDR_T_64BIT
+	depends on BLK_DEV
+	depends on ARCH_HAS_MMIO_FLUSH
+	select LIBNVDIMM
+	help
+	  Infrastructure to probe ACPI 6 compliant platforms for
+	  NVDIMMs (NFIT) and register a libnvdimm device tree.  In
+	  addition to storage devices this also enables libnvdimm to pass
+	  ACPI._DSM messages for platform/dimm configuration.
+
+	  To compile this driver as a module, choose M here:
+	  the module will be called nfit.
+
+config ACPI_NFIT_DEBUG
+	bool "NFIT DSM debug"
+	depends on ACPI_NFIT
+	depends on DYNAMIC_DEBUG
+	default n
+	help
+	  Enabling this option causes the nfit driver to dump the
+	  input and output buffers of _DSM operations on the ACPI0012
+	  device and its children.  This can be very verbose, so leave
+	  it disabled unless you are debugging a hardware / firmware
+	  issue.
--- a/drivers/acpi/nfit/Makefile
+++ b/drivers/acpi/nfit/Makefile
@ -0,0 +1,3 @@
+obj-$(CONFIG_ACPI_NFIT) := nfit.o
+nfit-y := core.o
+nfit-$(CONFIG_X86_MCE) += mce.o
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@ -0,0 +1,89 @@
+/*
+ * NFIT - Machine Check Handler
+ *
+ * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/notifier.h>
+#include <linux/acpi.h>
+#include <asm/mce.h>
+#include "nfit.h"
+
+static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
+			void *data)
+{
+	struct mce *mce = (struct mce *)data;
+	struct acpi_nfit_desc *acpi_desc;
+	struct nfit_spa *nfit_spa;
+
+	/* We only care about memory errors */
+	if (!(mce->status & MCACOD))
+		return NOTIFY_DONE;
+
+	/*
+	 * mce->addr contains the physical addr accessed that caused the
+	 * machine check. We need to walk through the list of NFITs, and see
+	 * if any of them matches that address, and only then start a scrub.
+	 */
+	mutex_lock(&acpi_desc_lock);
+	list_for_each_entry(acpi_desc, &acpi_descs, list) {
+		struct device *dev = acpi_desc->dev;
+		int found_match = 0;
+
+		mutex_lock(&acpi_desc->init_mutex);
+		list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+			struct acpi_nfit_system_address *spa = nfit_spa->spa;
+
+			if (nfit_spa_type(spa) == NFIT_SPA_PM)
+				continue;
+			/* find the spa that covers the mce addr */
+			if (spa->address > mce->addr)
+				continue;
+			if ((spa->address + spa->length - 1) < mce->addr)
+				continue;
+			found_match = 1;
+			dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
+				__func__, spa->range_index, spa->address,
+				spa->length);
+			/*
+			 * We can break at the first match because we're going
+			 * to rescan all the SPA ranges. There shouldn't be any
+			 * aliasing anyway.
+			 */
+			break;
+		}
+		mutex_unlock(&acpi_desc->init_mutex);
+
+		/*
+		 * We can ignore an -EBUSY here because if an ARS is already
+		 * in progress, just let that be the last authoritative one
+		 */
+		if (found_match)
+			acpi_nfit_ars_rescan(acpi_desc);
+	}
+
+	mutex_unlock(&acpi_desc_lock);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nfit_mce_dec = {
+	.notifier_call	= nfit_handle_mce,
+};
+
+void nfit_mce_register(void)
+{
+	mce_register_decode_chain(&nfit_mce_dec);
+}
+
+void nfit_mce_unregister(void)
+{
+	mce_unregister_decode_chain(&nfit_mce_dec);
+}
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@ -16,6 +16,7 @@
 #define __NFIT_H__
 #include <linux/workqueue.h>
 #include <linux/libnvdimm.h>
+#include <linux/ndctl.h>
 #include <linux/types.h>
 #include <linux/uuid.h>
 #include <linux/acpi.h>
@ -31,6 +32,9 @@
 #define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
 #define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"

+/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
+#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
+
 #define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
 		| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
 		| ACPI_NFIT_MEM_NOT_ARMED)
@ -40,6 +44,7 @@ enum nfit_uuids {
 	NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
 	NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
 	NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
+	NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
 	NFIT_SPA_VOLATILE,
 	NFIT_SPA_PM,
 	NFIT_SPA_DCR,
@ -74,37 +79,37 @@ enum {
 };

 struct nfit_spa {
-	struct acpi_nfit_system_address *spa;
 	struct list_head list;
 	struct nd_region *nd_region;
-	unsigned int ars_done:1;
+	unsigned int ars_required:1;
 	u32 clear_err_unit;
 	u32 max_ars;
+	struct acpi_nfit_system_address spa[0];
 };

 struct nfit_dcr {
-	struct acpi_nfit_control_region *dcr;
 	struct list_head list;
+	struct acpi_nfit_control_region dcr[0];
 };

 struct nfit_bdw {
-	struct acpi_nfit_data_region *bdw;
 	struct list_head list;
+	struct acpi_nfit_data_region bdw[0];
 };

 struct nfit_idt {
-	struct acpi_nfit_interleave *idt;
 	struct list_head list;
+	struct acpi_nfit_interleave idt[0];
 };

 struct nfit_flush {
-	struct acpi_nfit_flush_address *flush;
 	struct list_head list;
+	struct acpi_nfit_flush_address flush[0];
 };

 struct nfit_memdev {
-	struct acpi_nfit_memory_map *memdev;
 	struct list_head list;
+	struct acpi_nfit_memory_map memdev[0];
 };

 /* assembled tables for a given dimm/memory-device */
@ -123,6 +128,7 @@ struct nfit_mem {
 	struct list_head list;
 	struct acpi_device *adev;
 	struct acpi_nfit_desc *acpi_desc;
+	struct resource *flush_wpq;
 	unsigned long dsm_mask;
 	int family;
 };
@ -130,10 +136,7 @@ struct nfit_mem {
 struct acpi_nfit_desc {
 	struct nvdimm_bus_descriptor nd_desc;
 	struct acpi_table_header acpi_header;
-	struct acpi_nfit_header *nfit;
-	struct mutex spa_map_mutex;
 	struct mutex init_mutex;
-	struct list_head spa_maps;
 	struct list_head memdevs;
 	struct list_head flushes;
 	struct list_head dimms;
@ -146,6 +149,9 @@ struct acpi_nfit_desc {
 	struct nd_cmd_ars_status *ars_status;
 	size_t ars_status_size;
 	struct work_struct work;
+	struct list_head list;
+	struct kernfs_node *scrub_count_state;
+	unsigned int scrub_count;
 	unsigned int cancel:1;
 	unsigned long dimm_cmd_force_en;
 	unsigned long bus_cmd_force_en;
@ -161,7 +167,7 @@ enum nd_blk_mmio_selector {
 struct nd_blk_addr {
 	union {
 		void __iomem *base;
-		void __pmem  *aperture;
+		void *aperture;
 	};
 };

@ -180,28 +186,26 @@ struct nfit_blk {
 	u64 bdw_offset; /* post interleave offset */
 	u64 stat_offset;
 	u64 cmd_offset;
-	void __iomem *nvdimm_flush;
 	u32 dimm_flags;
 };

-enum spa_map_type {
-	SPA_MAP_CONTROL,
-	SPA_MAP_APERTURE,
-};
+extern struct list_head acpi_descs;
+extern struct mutex acpi_desc_lock;
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);

-struct nfit_spa_mapping {
-	struct acpi_nfit_desc *acpi_desc;
-	struct acpi_nfit_system_address *spa;
-	struct list_head list;
-	struct kref kref;
-	enum spa_map_type type;
-	struct nd_blk_addr addr;
-};
-
-static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
+#ifdef CONFIG_X86_MCE
+void nfit_mce_register(void);
+void nfit_mce_unregister(void);
+#else
+static inline void nfit_mce_register(void)
 {
-	return container_of(kref, struct nfit_spa_mapping, kref);
 }
+static inline void nfit_mce_unregister(void)
+{
+}
+#endif
+
+int nfit_spa_type(struct acpi_nfit_system_address *spa);

 static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
 		struct nfit_mem *nfit_mem)
@ -218,6 +222,6 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
 }

 const u8 *to_nfit_uuid(enum nfit_uuids id);
-int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz);
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
 #endif /* __NFIT_H__ */
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@ -379,7 +379,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,

 #ifdef CONFIG_BLK_DEV_RAM_DAX
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
-			void __pmem **kaddr, pfn_t *pfn, long size)
+			void **kaddr, pfn_t *pfn, long size)
 {
 	struct brd_device *brd = bdev->bd_disk->private_data;
 	struct page *page;
@ -389,7 +389,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
 	page = brd_insert_page(brd, sector);
 	if (!page)
 		return -ENOSPC;
-	*kaddr = (void __pmem *)page_address(page);
+	*kaddr = page_address(page);
 	*pfn = page_to_pfn_t(page);

 	return PAGE_SIZE;
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@ -211,11 +211,9 @@ int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
 	}
 	dax_dev->dev = dev;

-	rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev);
-	if (rc) {
-		unregister_dax_dev(dev);
+	rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
+	if (rc)
 		return rc;
-	}

 	return 0;

--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@ -102,21 +102,19 @@ static int dax_pmem_probe(struct device *dev)
 	if (rc)
 		return rc;

-	rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
-	if (rc) {
-		dax_pmem_percpu_exit(&dax_pmem->ref);
+	rc = devm_add_action_or_reset(dev, dax_pmem_percpu_exit,
+							&dax_pmem->ref);
+	if (rc)
 		return rc;
-	}

 	addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
 	if (IS_ERR(addr))
 		return PTR_ERR(addr);

-	rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref);
-	if (rc) {
-		dax_pmem_percpu_kill(&dax_pmem->ref);
+	rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
+							&dax_pmem->ref);
+	if (rc)
 		return rc;
-	}

 	nd_region = to_nd_region(dev->parent);
 	dax_region = alloc_dax_region(dev, nd_region->id, &res,
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@ -142,7 +142,7 @@ static int linear_iterate_devices(struct dm_target *ti,
 }

 static long linear_direct_access(struct dm_target *ti, sector_t sector,
-				 void __pmem **kaddr, pfn_t *pfn, long size)
+				 void **kaddr, pfn_t *pfn, long size)
 {
 	struct linear_c *lc = ti->private;
 	struct block_device *bdev = lc->dev->bdev;
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@ -2303,7 +2303,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
 }

 static long origin_direct_access(struct dm_target *ti, sector_t sector,
-		void __pmem **kaddr, pfn_t *pfn, long size)
+		void **kaddr, pfn_t *pfn, long size)
 {
 	DMWARN("device does not support dax.");
 	return -EIO;
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@ -309,7 +309,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 }

 static long stripe_direct_access(struct dm_target *ti, sector_t sector,
-				 void __pmem **kaddr, pfn_t *pfn, long size)
+				 void **kaddr, pfn_t *pfn, long size)
 {
 	struct stripe_c *sc = ti->private;
 	uint32_t stripe;
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@ -149,7 +149,7 @@ static void io_err_release_clone_rq(struct request *clone)
 }

 static long io_err_direct_access(struct dm_target *ti, sector_t sector,
-				 void __pmem **kaddr, pfn_t *pfn, long size)
+				 void **kaddr, pfn_t *pfn, long size)
 {
 	return -EIO;
 }
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@ -906,7 +906,7 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);

 static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
-				 void __pmem **kaddr, pfn_t *pfn, long size)
+				 void **kaddr, pfn_t *pfn, long size)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
 	struct dm_table *map;
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@ -1,6 +1,7 @@
 menuconfig LIBNVDIMM
 	tristate "NVDIMM (Non-Volatile Memory Device) Support"
 	depends on PHYS_ADDR_T_64BIT
+	depends on HAS_IOMEM
 	depends on BLK_DEV
 	help
 	  Generic support for non-volatile memory devices including
@ -19,7 +20,6 @@ if LIBNVDIMM
 config BLK_DEV_PMEM
 	tristate "PMEM: Persistent memory block device support"
 	default LIBNVDIMM
-	depends on HAS_IOMEM
 	select ND_BTT if BTT
 	select ND_PFN if NVDIMM_PFN
 	help
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@ -267,10 +267,8 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 	q = blk_alloc_queue(GFP_KERNEL);
 	if (!q)
 		return -ENOMEM;
-	if (devm_add_action(dev, nd_blk_release_queue, q)) {
-		blk_cleanup_queue(q);
+	if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
 		return -ENOMEM;
-	}

 	blk_queue_make_request(q, nd_blk_make_request);
 	blk_queue_max_hw_sectors(q, UINT_MAX);
@ -282,10 +280,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 	disk = alloc_disk(0);
 	if (!disk)
 		return -ENOMEM;
-	if (devm_add_action(dev, nd_blk_release_disk, disk)) {
-		put_disk(disk);
-		return -ENOMEM;
-	}

 	disk->first_minor	= 0;
 	disk->fops		= &nd_blk_fops;
@ -295,6 +289,9 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 	set_capacity(disk, 0);
 	device_add_disk(dev, disk);

+	if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
+		return -ENOMEM;
+
 	if (nsblk_meta_size(nsblk)) {
 		int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));

--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@ -198,7 +198,6 @@ struct device *nd_btt_create(struct nd_region *nd_region)
 {
 	struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);

-	if (dev)
 	__nd_device_register(dev);
 	return dev;
 }
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@ -31,6 +31,7 @@
 int nvdimm_major;
 static int nvdimm_bus_major;
 static struct class *nd_class;
+static DEFINE_IDA(nd_ida);

 static int to_nd_device_type(struct device *dev)
 {
@ -60,20 +61,13 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 			to_nd_device_type(dev));
 }

-static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
-{
-	struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
-
-	return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
-}
-
 static struct module *to_bus_provider(struct device *dev)
 {
 	/* pin bus providers while regions are enabled */
 	if (is_nd_pmem(dev) || is_nd_blk(dev)) {
 		struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);

-		return nvdimm_bus->module;
+		return nvdimm_bus->nd_desc->module;
 	}
 	return NULL;
 }
@ -136,6 +130,21 @@ static int nvdimm_bus_remove(struct device *dev)
 	return rc;
 }

+static void nvdimm_bus_shutdown(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	struct nd_device_driver *nd_drv = NULL;
+
+	if (dev->driver)
+		nd_drv = to_nd_device_driver(dev->driver);
+
+	if (nd_drv && nd_drv->shutdown) {
+		nd_drv->shutdown(dev);
+		dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n",
+				dev->driver->name, dev_name(dev));
+	}
+}
+
 void nd_device_notify(struct device *dev, enum nvdimm_event event)
 {
 	device_lock(dev);
@ -208,14 +217,187 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
 }
 EXPORT_SYMBOL_GPL(nvdimm_clear_poison);

+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv);
+
 static struct bus_type nvdimm_bus_type = {
 	.name = "nd",
 	.uevent = nvdimm_bus_uevent,
 	.match = nvdimm_bus_match,
 	.probe = nvdimm_bus_probe,
 	.remove = nvdimm_bus_remove,
+	.shutdown = nvdimm_bus_shutdown,
 };

+static void nvdimm_bus_release(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus;
+
+	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+	ida_simple_remove(&nd_ida, nvdimm_bus->id);
+	kfree(nvdimm_bus);
+}
+
+static bool is_nvdimm_bus(struct device *dev)
+{
+	return dev->release == nvdimm_bus_release;
+}
+
+struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+{
+	struct device *dev;
+
+	for (dev = nd_dev; dev; dev = dev->parent)
+		if (is_nvdimm_bus(dev))
+			break;
+	dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
+	if (dev)
+		return to_nvdimm_bus(dev);
+	return NULL;
+}
+
+struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus;
+
+	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+	WARN_ON(!is_nvdimm_bus(dev));
+	return nvdimm_bus;
+}
+EXPORT_SYMBOL_GPL(to_nvdimm_bus);
+
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nd_desc)
+{
+	struct nvdimm_bus *nvdimm_bus;
+	int rc;
+
+	nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
+	if (!nvdimm_bus)
+		return NULL;
+	INIT_LIST_HEAD(&nvdimm_bus->list);
+	INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
+	INIT_LIST_HEAD(&nvdimm_bus->poison_list);
+	init_waitqueue_head(&nvdimm_bus->probe_wait);
+	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
+	mutex_init(&nvdimm_bus->reconfig_mutex);
+	if (nvdimm_bus->id < 0) {
+		kfree(nvdimm_bus);
+		return NULL;
+	}
+	nvdimm_bus->nd_desc = nd_desc;
+	nvdimm_bus->dev.parent = parent;
+	nvdimm_bus->dev.release = nvdimm_bus_release;
+	nvdimm_bus->dev.groups = nd_desc->attr_groups;
+	nvdimm_bus->dev.bus = &nvdimm_bus_type;
+	dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
+	rc = device_register(&nvdimm_bus->dev);
+	if (rc) {
+		dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
+		goto err;
+	}
+
+	return nvdimm_bus;
+ err:
+	put_device(&nvdimm_bus->dev);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_register);
+
+void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
+{
+	if (!nvdimm_bus)
+		return;
+	device_unregister(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
+
+static int child_unregister(struct device *dev, void *data)
+{
+	/*
+	 * the singular ndctl class device per bus needs to be
+	 * "device_destroy"ed, so skip it here
+	 *
+	 * i.e. remove classless children
+	 */
+	if (dev->class)
+		/* pass */;
+	else
+		nd_device_unregister(dev, ND_SYNC);
+	return 0;
+}
+
+static void free_poison_list(struct list_head *poison_list)
+{
+	struct nd_poison *pl, *next;
+
+	list_for_each_entry_safe(pl, next, poison_list, list) {
+		list_del(&pl->list);
+		kfree(pl);
+	}
+	list_del_init(poison_list);
+}
+
+static int nd_bus_remove(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_del_init(&nvdimm_bus->list);
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	nd_synchronize();
+	device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
+
+	nvdimm_bus_lock(&nvdimm_bus->dev);
+	free_poison_list(&nvdimm_bus->poison_list);
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+
+	nvdimm_bus_destroy_ndctl(nvdimm_bus);
+
+	return 0;
+}
+
+static int nd_bus_probe(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+	int rc;
+
+	rc = nvdimm_bus_create_ndctl(nvdimm_bus);
+	if (rc)
+		return rc;
+
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	/* enable bus provider attributes to look up their local context */
+	dev_set_drvdata(dev, nvdimm_bus->nd_desc);
+
+	return 0;
+}
+
+static struct nd_device_driver nd_bus_driver = {
+	.probe = nd_bus_probe,
+	.remove = nd_bus_remove,
+	.drv = {
+		.name = "nd_bus",
+		.suppress_bind_attrs = true,
+		.bus = &nvdimm_bus_type,
+		.owner = THIS_MODULE,
+		.mod_name = KBUILD_MODNAME,
+	},
+};
+
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
+
+	if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver)
+		return true;
+
+	return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
+}
+
 static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);

 void nd_synchronize(void)
@ -395,12 +577,10 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
 	dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
 			"ndctl%d", nvdimm_bus->id);

-	if (IS_ERR(dev)) {
+	if (IS_ERR(dev))
 		dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
 				nvdimm_bus->id, PTR_ERR(dev));
-		return PTR_ERR(dev);
-	}
-	return 0;
+	return PTR_ERR_OR_ZERO(dev);
 }

 void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
@ -850,8 +1030,14 @@ int __init nvdimm_bus_init(void)
 		goto err_class;
 	}

+	rc = driver_register(&nd_bus_driver.drv);
+	if (rc)
+		goto err_nd_bus;
+
 	return 0;

+ err_nd_bus:
+	class_destroy(nd_class);
 err_class:
 	unregister_chrdev(nvdimm_major, "dimmctl");
 err_dimm_chrdev:
@ -864,8 +1050,10 @@ int __init nvdimm_bus_init(void)

 void nvdimm_bus_exit(void)
 {
+	driver_unregister(&nd_bus_driver.drv);
 	class_destroy(nd_class);
 	unregister_chrdev(nvdimm_bus_major, "ndctl");
 	unregister_chrdev(nvdimm_major, "dimmctl");
 	bus_unregister(&nvdimm_bus_type);
+	ida_destroy(&nd_ida);
 }
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@ -240,7 +240,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 		return memcpy_from_pmem(buf, nsio->addr + offset, size);
 	} else {
 		memcpy_to_pmem(nsio->addr + offset, buf, size);
-		wmb_pmem();
+		nvdimm_flush(to_nd_region(ndns->dev.parent));
 	}

 	return 0;
@ -266,9 +266,8 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)

 	nsio->addr = devm_memremap(dev, res->start, resource_size(res),
 			ARCH_MEMREMAP_PMEM);
-	if (IS_ERR(nsio->addr))
-		return PTR_ERR(nsio->addr);
-	return 0;
+
+	return PTR_ERR_OR_ZERO(nsio->addr);
 }
 EXPORT_SYMBOL_GPL(devm_nsio_enable);

--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@ -20,12 +20,12 @@
 #include <linux/ndctl.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/io.h>
 #include "nd-core.h"
 #include "nd.h"

 LIST_HEAD(nvdimm_bus_list);
 DEFINE_MUTEX(nvdimm_bus_list_mutex);
-static DEFINE_IDA(nd_ida);

 void nvdimm_bus_lock(struct device *dev)
 {
@ -57,6 +57,127 @@ bool is_nvdimm_bus_locked(struct device *dev)
 }
 EXPORT_SYMBOL(is_nvdimm_bus_locked);

+struct nvdimm_map {
+	struct nvdimm_bus *nvdimm_bus;
+	struct list_head list;
+	resource_size_t offset;
+	unsigned long flags;
+	size_t size;
+	union {
+		void *mem;
+		void __iomem *iomem;
+	};
+	struct kref kref;
+};
+
+static struct nvdimm_map *find_nvdimm_map(struct device *dev,
+		resource_size_t offset)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	struct nvdimm_map *nvdimm_map;
+
+	list_for_each_entry(nvdimm_map, &nvdimm_bus->mapping_list, list)
+		if (nvdimm_map->offset == offset)
+			return nvdimm_map;
+	return NULL;
+}
+
+static struct nvdimm_map *alloc_nvdimm_map(struct device *dev,
+		resource_size_t offset, size_t size, unsigned long flags)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	struct nvdimm_map *nvdimm_map;
+
+	nvdimm_map = kzalloc(sizeof(*nvdimm_map), GFP_KERNEL);
+	if (!nvdimm_map)
+		return NULL;
+
+	INIT_LIST_HEAD(&nvdimm_map->list);
+	nvdimm_map->nvdimm_bus = nvdimm_bus;
+	nvdimm_map->offset = offset;
+	nvdimm_map->flags = flags;
+	nvdimm_map->size = size;
+	kref_init(&nvdimm_map->kref);
+
+	if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev)))
+		goto err_request_region;
+
+	if (flags)
+		nvdimm_map->mem = memremap(offset, size, flags);
+	else
+		nvdimm_map->iomem = ioremap(offset, size);
+
+	if (!nvdimm_map->mem)
+		goto err_map;
+
+	dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "%s: bus unlocked!",
+			__func__);
+	list_add(&nvdimm_map->list, &nvdimm_bus->mapping_list);
+
+	return nvdimm_map;
+
+ err_map:
+	release_mem_region(offset, size);
+ err_request_region:
+	kfree(nvdimm_map);
+	return NULL;
+}
+
+static void nvdimm_map_release(struct kref *kref)
+{
+	struct nvdimm_bus *nvdimm_bus;
+	struct nvdimm_map *nvdimm_map;
+
+	nvdimm_map = container_of(kref, struct nvdimm_map, kref);
+	nvdimm_bus = nvdimm_map->nvdimm_bus;
+
+	dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
+	list_del(&nvdimm_map->list);
+	if (nvdimm_map->flags)
+		memunmap(nvdimm_map->mem);
+	else
+		iounmap(nvdimm_map->iomem);
+	release_mem_region(nvdimm_map->offset, nvdimm_map->size);
+	kfree(nvdimm_map);
+}
+
+static void nvdimm_map_put(void *data)
+{
+	struct nvdimm_map *nvdimm_map = data;
+	struct nvdimm_bus *nvdimm_bus = nvdimm_map->nvdimm_bus;
+
+	nvdimm_bus_lock(&nvdimm_bus->dev);
+	kref_put(&nvdimm_map->kref, nvdimm_map_release);
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+
+/**
+ * devm_nvdimm_memremap - map a resource that is shared across regions
+ * @dev: device that will own a reference to the shared mapping
+ * @offset: physical base address of the mapping
+ * @size: mapping size
+ * @flags: memremap flags, or, if zero, perform an ioremap instead
+ */
+void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
+		size_t size, unsigned long flags)
+{
+	struct nvdimm_map *nvdimm_map;
+
+	nvdimm_bus_lock(dev);
+	nvdimm_map = find_nvdimm_map(dev, offset);
+	if (!nvdimm_map)
+		nvdimm_map = alloc_nvdimm_map(dev, offset, size, flags);
+	else
+		kref_get(&nvdimm_map->kref);
+	nvdimm_bus_unlock(dev);
+
+	if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map))
+		return NULL;
+
+	return nvdimm_map->mem;
+}
+EXPORT_SYMBOL_GPL(devm_nvdimm_memremap);
+
 u64 nd_fletcher64(void *addr, size_t len, bool le)
 {
 	u32 *buf = addr;
@ -73,25 +194,6 @@ u64 nd_fletcher64(void *addr, size_t len, bool le)
 }
 EXPORT_SYMBOL_GPL(nd_fletcher64);

-static void nvdimm_bus_release(struct device *dev)
-{
-	struct nvdimm_bus *nvdimm_bus;
-
-	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
-	ida_simple_remove(&nd_ida, nvdimm_bus->id);
-	kfree(nvdimm_bus);
-}
-
-struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
-{
-	struct nvdimm_bus *nvdimm_bus;
-
-	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
-	WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
-	return nvdimm_bus;
-}
-EXPORT_SYMBOL_GPL(to_nvdimm_bus);
-
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
 {
 	/* struct nvdimm_bus definition is private to libnvdimm */
@ -99,18 +201,12 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
 }
 EXPORT_SYMBOL_GPL(to_nd_desc);

-struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus)
 {
-	struct device *dev;
-
-	for (dev = nd_dev; dev; dev = dev->parent)
-		if (dev->release == nvdimm_bus_release)
-			break;
-	dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
-	if (dev)
-		return to_nvdimm_bus(dev);
-	return NULL;
+	/* struct nvdimm_bus definition is private to libnvdimm */
+	return &nvdimm_bus->dev;
 }
+EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev);

 static bool is_uuid_sep(char sep)
 {
@ -325,51 +421,6 @@ struct attribute_group nvdimm_bus_attribute_group = {
 };
 EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);

-struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
-		struct nvdimm_bus_descriptor *nd_desc, struct module *module)
-{
-	struct nvdimm_bus *nvdimm_bus;
-	int rc;
-
-	nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
-	if (!nvdimm_bus)
-		return NULL;
-	INIT_LIST_HEAD(&nvdimm_bus->list);
-	INIT_LIST_HEAD(&nvdimm_bus->poison_list);
-	init_waitqueue_head(&nvdimm_bus->probe_wait);
-	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
-	mutex_init(&nvdimm_bus->reconfig_mutex);
-	if (nvdimm_bus->id < 0) {
-		kfree(nvdimm_bus);
-		return NULL;
-	}
-	nvdimm_bus->nd_desc = nd_desc;
-	nvdimm_bus->module = module;
-	nvdimm_bus->dev.parent = parent;
-	nvdimm_bus->dev.release = nvdimm_bus_release;
-	nvdimm_bus->dev.groups = nd_desc->attr_groups;
-	dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
-	rc = device_register(&nvdimm_bus->dev);
-	if (rc) {
-		dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
-		goto err;
-	}
-
-	rc = nvdimm_bus_create_ndctl(nvdimm_bus);
-	if (rc)
-		goto err;
-
-	mutex_lock(&nvdimm_bus_list_mutex);
-	list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
-	mutex_unlock(&nvdimm_bus_list_mutex);
-
-	return nvdimm_bus;
- err:
-	put_device(&nvdimm_bus->dev);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
-
 static void set_badblock(struct badblocks *bb, sector_t s, int num)
 {
 	dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
@ -545,54 +596,6 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 }
 EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);

-static void free_poison_list(struct list_head *poison_list)
-{
-	struct nd_poison *pl, *next;
-
-	list_for_each_entry_safe(pl, next, poison_list, list) {
-		list_del(&pl->list);
-		kfree(pl);
-	}
-	list_del_init(poison_list);
-}
-
-static int child_unregister(struct device *dev, void *data)
-{
-	/*
-	 * the singular ndctl class device per bus needs to be
-	 * "device_destroy"ed, so skip it here
-	 *
-	 * i.e. remove classless children
-	 */
-	if (dev->class)
-		/* pass */;
-	else
-		nd_device_unregister(dev, ND_SYNC);
-	return 0;
-}
-
-void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
-{
-	if (!nvdimm_bus)
-		return;
-
-	mutex_lock(&nvdimm_bus_list_mutex);
-	list_del_init(&nvdimm_bus->list);
-	mutex_unlock(&nvdimm_bus_list_mutex);
-
-	nd_synchronize();
-	device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
-
-	nvdimm_bus_lock(&nvdimm_bus->dev);
-	free_poison_list(&nvdimm_bus->poison_list);
-	nvdimm_bus_unlock(&nvdimm_bus->dev);
-
-	nvdimm_bus_destroy_ndctl(nvdimm_bus);
-
-	device_unregister(&nvdimm_bus->dev);
-}
-EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
-
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 {
@ -601,7 +604,8 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 	if (meta_size == 0)
 		return 0;

-	bi.profile = NULL;
+	memset(&bi, 0, sizeof(bi));
+
 	bi.tuple_size = meta_size;
 	bi.tag_size = meta_size;

@ -650,7 +654,6 @@ static __exit void libnvdimm_exit(void)
 	nvdimm_bus_exit();
 	nd_region_devs_exit();
 	nvdimm_devs_exit();
-	ida_destroy(&nd_ida);
 }

 MODULE_LICENSE("GPL v2");
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@ -346,7 +346,8 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group);

 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 		const struct attribute_group **groups, unsigned long flags,
-		unsigned long cmd_mask)
+		unsigned long cmd_mask, int num_flush,
+		struct resource *flush_wpq)
 {
 	struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
 	struct device *dev;
@ -362,6 +363,8 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 	nvdimm->provider_data = provider_data;
 	nvdimm->flags = flags;
 	nvdimm->cmd_mask = cmd_mask;
+	nvdimm->num_flush = num_flush;
+	nvdimm->flush_wpq = flush_wpq;
 	atomic_set(&nvdimm->busy, 0);
 	dev = &nvdimm->dev;
 	dev_set_name(dev, "nmem%d", nvdimm->id);
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@ -47,6 +47,7 @@ static int e820_pmem_probe(struct platform_device *pdev)

 	nd_desc.attr_groups = e820_pmem_attribute_groups;
 	nd_desc.provider_name = "e820";
+	nd_desc.module = THIS_MODULE;
 	nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
 	if (!nvdimm_bus)
 		goto err;
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@ -26,11 +26,11 @@ extern int nvdimm_major;
 struct nvdimm_bus {
 	struct nvdimm_bus_descriptor *nd_desc;
 	wait_queue_head_t probe_wait;
-	struct module *module;
 	struct list_head list;
 	struct device dev;
 	int id, probe_active;
 	struct list_head poison_list;
+	struct list_head mapping_list;
 	struct mutex reconfig_mutex;
 };

@ -40,7 +40,8 @@ struct nvdimm {
 	unsigned long cmd_mask;
 	struct device dev;
 	atomic_t busy;
-	int id;
+	int id, num_flush;
+	struct resource *flush_wpq;
 };

 bool is_nvdimm(struct device *dev);
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@ -49,9 +49,11 @@ struct nvdimm_drvdata {
 	struct kref kref;
 };

-struct nd_region_namespaces {
-	int count;
-	int active;
+struct nd_region_data {
+	int ns_count;
+	int ns_active;
+	unsigned int flush_mask;
+	void __iomem *flush_wpq[0][0];
 };

 static inline struct nd_namespace_index *to_namespace_index(
@ -119,7 +121,6 @@ struct nd_region {

 struct nd_blk_region {
 	int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
-	void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
 	int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
 			void *iobuf, u64 len, int rw);
 	void *blk_provider_data;
@ -325,6 +326,7 @@ static inline void devm_nsio_disable(struct device *dev,
 }
 #endif
 int nd_blk_region_init(struct nd_region *nd_region);
+int nd_region_activate(struct nd_region *nd_region);
 void __nd_iostat_start(struct bio *bio, unsigned long *start);
 static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
 {
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@ -29,27 +29,28 @@
 #include <linux/slab.h>
 #include <linux/pmem.h>
 #include <linux/nd.h>
+#include "pmem.h"
 #include "pfn.h"
 #include "nd.h"

-struct pmem_device {
-	/* One contiguous memory region per device */
-	phys_addr_t		phys_addr;
-	/* when non-zero this device is hosting a 'pfn' instance */
-	phys_addr_t		data_offset;
-	u64			pfn_flags;
-	void __pmem		*virt_addr;
-	/* immutable base size of the namespace */
-	size_t			size;
-	/* trim size when namespace capacity has been section aligned */
-	u32			pfn_pad;
-	struct badblocks	bb;
-};
+static struct device *to_dev(struct pmem_device *pmem)
+{
+	/*
+	 * nvdimm bus services need a 'dev' parameter, and we record the device
+	 * at init in bb.dev.
+	 */
+	return pmem->bb.dev;
+}
+
+static struct nd_region *to_region(struct pmem_device *pmem)
+{
+	return to_nd_region(to_dev(pmem)->parent);
+}

 static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
 		unsigned int len)
 {
-	struct device *dev = pmem->bb.dev;
+	struct device *dev = to_dev(pmem);
 	sector_t sector;
 	long cleared;

@ -57,7 +58,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
 	cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);

 	if (cleared > 0 && cleared / 512) {
-		dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
+		dev_dbg(dev, "%s: %#llx clear %ld sector%s\n",
 				__func__, (unsigned long long) sector,
 				cleared / 512, cleared / 512 > 1 ? "s" : "");
 		badblocks_clear(&pmem->bb, sector, cleared / 512);
@ -73,7 +74,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	bool bad_pmem = false;
 	void *mem = kmap_atomic(page);
 	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
-	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
+	void *pmem_addr = pmem->virt_addr + pmem_off;

 	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
 		bad_pmem = true;
@ -112,6 +113,11 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	return rc;
 }

+/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */
+#ifndef REQ_FLUSH
+#define REQ_FLUSH REQ_PREFLUSH
+#endif
+
 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 {
 	int rc = 0;
@ -120,6 +126,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 	struct bio_vec bvec;
 	struct bvec_iter iter;
 	struct pmem_device *pmem = q->queuedata;
+	struct nd_region *nd_region = to_region(pmem);
+
+	if (bio->bi_rw & REQ_FLUSH)
+		nvdimm_flush(nd_region);

 	do_acct = nd_iostat_start(bio, &start);
 	bio_for_each_segment(bvec, bio, iter) {
@ -134,8 +144,8 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 	if (do_acct)
 		nd_iostat_end(bio, start);

-	if (bio_data_dir(bio))
-		wmb_pmem();
+	if (bio->bi_rw & REQ_FUA)
+		nvdimm_flush(nd_region);

 	bio_endio(bio);
 	return BLK_QC_T_NONE;
@ -148,8 +158,6 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	int rc;

 	rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
-	if (rw & WRITE)
-		wmb_pmem();

 	/*
 	 * The ->rw_page interface is subtle and tricky.  The core
@ -163,8 +171,9 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	return rc;
 }

-static long pmem_direct_access(struct block_device *bdev, sector_t sector,
-		      void __pmem **kaddr, pfn_t *pfn, long size)
+/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
+__weak long pmem_direct_access(struct block_device *bdev, sector_t sector,
+		      void **kaddr, pfn_t *pfn, long size)
 {
 	struct pmem_device *pmem = bdev->bd_queue->queuedata;
 	resource_size_t offset = sector * 512 + pmem->data_offset;
@ -195,7 +204,7 @@ static void pmem_release_queue(void *q)
 	blk_cleanup_queue(q);
 }

-void pmem_release_disk(void *disk)
+static void pmem_release_disk(void *disk)
 {
 	del_gendisk(disk);
 	put_disk(disk);
@ -205,6 +214,7 @@ static int pmem_attach_disk(struct device *dev,
 		struct nd_namespace_common *ndns)
 {
 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct vmem_altmap __altmap, *altmap = NULL;
 	struct resource *res = &nsio->res;
 	struct nd_pfn *nd_pfn = NULL;
@ -234,7 +244,7 @@ static int pmem_attach_disk(struct device *dev,
 	dev_set_drvdata(dev, pmem);
 	pmem->phys_addr = res->start;
 	pmem->size = resource_size(res);
-	if (!arch_has_wmb_pmem())
+	if (nvdimm_has_flush(nd_region) < 0)
 		dev_warn(dev, "unable to guarantee persistence of writes\n");

 	if (!devm_request_mem_region(dev, res->start, resource_size(res),
@ -269,15 +279,14 @@ static int pmem_attach_disk(struct device *dev,
 	 * At release time the queue must be dead before
 	 * devm_memremap_pages is unwound
 	 */
-	if (devm_add_action(dev, pmem_release_queue, q)) {
-		blk_cleanup_queue(q);
+	if (devm_add_action_or_reset(dev, pmem_release_queue, q))
 		return -ENOMEM;
-	}

 	if (IS_ERR(addr))
 		return PTR_ERR(addr);
-	pmem->virt_addr = (void __pmem *) addr;
+	pmem->virt_addr = addr;

+	blk_queue_write_cache(q, true, true);
 	blk_queue_make_request(q, pmem_make_request);
 	blk_queue_physical_block_size(q, PAGE_SIZE);
 	blk_queue_max_hw_sectors(q, UINT_MAX);
@ -289,10 +298,6 @@ static int pmem_attach_disk(struct device *dev,
 	disk = alloc_disk_node(0, nid);
 	if (!disk)
 		return -ENOMEM;
-	if (devm_add_action(dev, pmem_release_disk, disk)) {
-		put_disk(disk);
-		return -ENOMEM;
-	}

 	disk->fops		= &pmem_fops;
 	disk->queue		= q;
@ -302,9 +307,13 @@ static int pmem_attach_disk(struct device *dev,
 			/ 512);
 	if (devm_init_badblocks(dev, &pmem->bb))
 		return -ENOMEM;
-	nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res);
+	nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
 	disk->bb = &pmem->bb;
 	device_add_disk(dev, disk);
+
+	if (devm_add_action_or_reset(dev, pmem_release_disk, disk))
+		return -ENOMEM;
+
 	revalidate_disk(disk);

 	return 0;
@ -340,13 +349,20 @@ static int nd_pmem_remove(struct device *dev)
 {
 	if (is_nd_btt(dev))
 		nvdimm_namespace_detach_btt(to_nd_btt(dev));
+	nvdimm_flush(to_nd_region(dev->parent));
+
 	return 0;
 }

+static void nd_pmem_shutdown(struct device *dev)
+{
+	nvdimm_flush(to_nd_region(dev->parent));
+}
+
 static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
 {
-	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct pmem_device *pmem = dev_get_drvdata(dev);
+	struct nd_region *nd_region = to_region(pmem);
 	resource_size_t offset = 0, end_trunc = 0;
 	struct nd_namespace_common *ndns;
 	struct nd_namespace_io *nsio;
@ -382,6 +398,7 @@ static struct nd_device_driver nd_pmem_driver = {
 	.probe = nd_pmem_probe,
 	.remove = nd_pmem_remove,
 	.notify = nd_pmem_notify,
+	.shutdown = nd_pmem_shutdown,
 	.drv = {
 		.name = "nd_pmem",
 	},
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@ -0,0 +1,24 @@
+#ifndef __NVDIMM_PMEM_H__
+#define __NVDIMM_PMEM_H__
+#include <linux/badblocks.h>
+#include <linux/types.h>
+#include <linux/pfn_t.h>
+#include <linux/fs.h>
+
+long pmem_direct_access(struct block_device *bdev, sector_t sector,
+		      void **kaddr, pfn_t *pfn, long size);
+/* this definition is in it's own header for tools/testing/nvdimm to consume */
+struct pmem_device {
+	/* One contiguous memory region per device */
+	phys_addr_t		phys_addr;
+	/* when non-zero this device is hosting a 'pfn' instance */
+	phys_addr_t		data_offset;
+	u64			pfn_flags;
+	void			*virt_addr;
+	/* immutable base size of the namespace */
+	size_t			size;
+	/* trim size when namespace capacity has been section aligned */
+	u32			pfn_pad;
+	struct badblocks	bb;
+};
+#endif /* __NVDIMM_PMEM_H__ */
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@ -20,7 +20,7 @@ static int nd_region_probe(struct device *dev)
 {
 	int err, rc;
 	static unsigned long once;
-	struct nd_region_namespaces *num_ns;
+	struct nd_region_data *ndrd;
 	struct nd_region *nd_region = to_nd_region(dev);

 	if (nd_region->num_lanes > num_online_cpus()
@ -33,21 +33,21 @@ static int nd_region_probe(struct device *dev)
 				nd_region->num_lanes);
 	}

+	rc = nd_region_activate(nd_region);
+	if (rc)
+		return rc;
+
 	rc = nd_blk_region_init(nd_region);
 	if (rc)
 		return rc;

 	rc = nd_region_register_namespaces(nd_region, &err);
-	num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
-	if (!num_ns)
-		return -ENOMEM;
-
 	if (rc < 0)
 		return rc;

-	num_ns->active = rc;
-	num_ns->count = rc + err;
-	dev_set_drvdata(dev, num_ns);
+	ndrd = dev_get_drvdata(dev);
+	ndrd->ns_active = rc;
+	ndrd->ns_count = rc + err;

 	if (rc && err && rc == err)
 		return -ENODEV;
@ -82,6 +82,8 @@ static int nd_region_remove(struct device *dev)
 {
 	struct nd_region *nd_region = to_nd_region(dev);

+	device_for_each_child(dev, NULL, child_unregister);
+
 	/* flush attribute readers and disable */
 	nvdimm_bus_lock(dev);
 	nd_region->ns_seed = NULL;
@ -91,7 +93,6 @@ static int nd_region_remove(struct device *dev)
 	dev_set_drvdata(dev, NULL);
 	nvdimm_bus_unlock(dev);

-	device_for_each_child(dev, NULL, child_unregister);
 	return 0;
 }

--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@ -14,13 +14,97 @@
 #include <linux/highmem.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/pmem.h>
 #include <linux/sort.h>
 #include <linux/io.h>
 #include <linux/nd.h>
 #include "nd-core.h"
 #include "nd.h"

+/*
+ * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
+ * irrelevant.
+ */
+#include <linux/io-64-nonatomic-hi-lo.h>
+
 static DEFINE_IDA(region_ida);
+static DEFINE_PER_CPU(int, flush_idx);
+
+static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
+		struct nd_region_data *ndrd)
+{
+	int i, j;
+
+	dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
+			nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
+	for (i = 0; i < nvdimm->num_flush; i++) {
+		struct resource *res = &nvdimm->flush_wpq[i];
+		unsigned long pfn = PHYS_PFN(res->start);
+		void __iomem *flush_page;
+
+		/* check if flush hints share a page */
+		for (j = 0; j < i; j++) {
+			struct resource *res_j = &nvdimm->flush_wpq[j];
+			unsigned long pfn_j = PHYS_PFN(res_j->start);
+
+			if (pfn == pfn_j)
+				break;
+		}
+
+		if (j < i)
+			flush_page = (void __iomem *) ((unsigned long)
+					ndrd->flush_wpq[dimm][j] & PAGE_MASK);
+		else
+			flush_page = devm_nvdimm_ioremap(dev,
+					PHYS_PFN(pfn), PAGE_SIZE);
+		if (!flush_page)
+			return -ENXIO;
+		ndrd->flush_wpq[dimm][i] = flush_page
+			+ (res->start & ~PAGE_MASK);
+	}
+
+	return 0;
+}
+
+int nd_region_activate(struct nd_region *nd_region)
+{
+	int i, num_flush = 0;
+	struct nd_region_data *ndrd;
+	struct device *dev = &nd_region->dev;
+	size_t flush_data_size = sizeof(void *);
+
+	nvdimm_bus_lock(&nd_region->dev);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+		/* at least one null hint slot per-dimm for the "no-hint" case */
+		flush_data_size += sizeof(void *);
+		num_flush = min_not_zero(num_flush, nvdimm->num_flush);
+		if (!nvdimm->num_flush)
+			continue;
+		flush_data_size += nvdimm->num_flush * sizeof(void *);
+	}
+	nvdimm_bus_unlock(&nd_region->dev);
+
+	ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
+	if (!ndrd)
+		return -ENOMEM;
+	dev_set_drvdata(dev, ndrd);
+
+	ndrd->flush_mask = (1 << ilog2(num_flush)) - 1;
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+		int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
+
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}

 static void nd_region_release(struct device *dev)
 {
@ -242,12 +326,12 @@ static DEVICE_ATTR_RO(available_size);
 static ssize_t init_namespaces_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
+	struct nd_region_data *ndrd = dev_get_drvdata(dev);
 	ssize_t rc;

 	nvdimm_bus_lock(dev);
-	if (num_ns)
-		rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
+	if (ndrd)
+		rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
 	else
 		rc = -ENXIO;
 	nvdimm_bus_unlock(dev);
@ -433,8 +517,6 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,

 		if (is_nd_pmem(dev))
 			return;
-
-		to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
 	}
 	if (dev->parent && is_nd_blk(dev->parent) && probe) {
 		nd_region = to_nd_region(dev->parent);
@ -698,7 +780,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 		if (ndbr) {
 			nd_region = &ndbr->nd_region;
 			ndbr->enable = ndbr_desc->enable;
-			ndbr->disable = ndbr_desc->disable;
 			ndbr->do_io = ndbr_desc->do_io;
 		}
 		region_buf = ndbr;
@ -794,6 +875,67 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
 }
 EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);

+/**
+ * nvdimm_flush - flush any posted write queues between the cpu and pmem media
+ * @nd_region: blk or interleaved pmem region
+ */
+void nvdimm_flush(struct nd_region *nd_region)
+{
+	struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
+	int i, idx;
+
+	/*
+	 * Try to encourage some diversity in flush hint addresses
+	 * across cpus assuming a limited number of flush hints.
+	 */
+	idx = this_cpu_read(flush_idx);
+	idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
+
+	/*
+	 * The first wmb() is needed to 'sfence' all previous writes
+	 * such that they are architecturally visible for the platform
+	 * buffer flush.  Note that we've already arranged for pmem
+	 * writes to avoid the cache via arch_memcpy_to_pmem().  The
+	 * final wmb() ensures ordering for the NVDIMM flush write.
+	 */
+	wmb();
+	for (i = 0; i < nd_region->ndr_mappings; i++)
+		if (ndrd->flush_wpq[i][0])
+			writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]);
+	wmb();
+}
+EXPORT_SYMBOL_GPL(nvdimm_flush);
+
+/**
+ * nvdimm_has_flush - determine write flushing requirements
+ * @nd_region: blk or interleaved pmem region
+ *
+ * Returns 1 if writes require flushing
+ * Returns 0 if writes do not require flushing
+ * Returns -ENXIO if flushing capability can not be determined
+ */
+int nvdimm_has_flush(struct nd_region *nd_region)
+{
+	struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
+	int i;
+
+	/* no nvdimm == flushing capability unknown */
+	if (nd_region->ndr_mappings == 0)
+		return -ENXIO;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++)
+		/* flush hints present, flushing required */
+		if (ndrd->flush_wpq[i][0])
+			return 1;
+
+	/*
+	 * The platform defines dimm devices without hints, assume
+	 * platform persistence mechanism like ADR
+	 */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_has_flush);
+
 void __exit nd_region_devs_exit(void)
 {
 	ida_destroy(&region_ida);
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@ -31,7 +31,7 @@ static void dcssblk_release(struct gendisk *disk, fmode_t mode);
 static blk_qc_t dcssblk_make_request(struct request_queue *q,
 						struct bio *bio);
 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
-			 void __pmem **kaddr, pfn_t *pfn, long size);
+			 void **kaddr, pfn_t *pfn, long size);

 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";

@ -884,7 +884,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)

 static long
 dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
-			void __pmem **kaddr, pfn_t *pfn, long size)
+			void **kaddr, pfn_t *pfn, long size)
 {
 	struct dcssblk_dev_info *dev_info;
 	unsigned long offset, dev_sz;
@ -894,7 +894,7 @@ dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
 		return -ENODEV;
 	dev_sz = dev_info->end - dev_info->start;
 	offset = secnum * 512;
-	*kaddr = (void __pmem *) (dev_info->start + offset);
+	*kaddr = (void *) dev_info->start + offset;
 	*pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);

 	return dev_sz - offset;
--- a/fs/dax.c
+++ b/fs/dax.c
@ -75,13 +75,13 @@ static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
 	struct request_queue *q = bdev->bd_queue;
 	long rc = -EIO;

-	dax->addr = (void __pmem *) ERR_PTR(-EIO);
+	dax->addr = ERR_PTR(-EIO);
 	if (blk_queue_enter(q, true) != 0)
 		return rc;

 	rc = bdev_direct_access(bdev, dax);
 	if (rc < 0) {
-		dax->addr = (void __pmem *) ERR_PTR(rc);
+		dax->addr = ERR_PTR(rc);
 		blk_queue_exit(q);
 		return rc;
 	}
@ -147,12 +147,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 		      struct buffer_head *bh)
 {
 	loff_t pos = start, max = start, bh_max = start;
-	bool hole = false, need_wmb = false;
+	bool hole = false;
 	struct block_device *bdev = NULL;
 	int rw = iov_iter_rw(iter), rc;
 	long map_len = 0;
 	struct blk_dax_ctl dax = {
-		.addr = (void __pmem *) ERR_PTR(-EIO),
+		.addr = ERR_PTR(-EIO),
 	};
 	unsigned blkbits = inode->i_blkbits;
 	sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
@ -218,7 +218,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,

 		if (iov_iter_rw(iter) == WRITE) {
 			len = copy_from_iter_pmem(dax.addr, max - pos, iter);
-			need_wmb = true;
 		} else if (!hole)
 			len = copy_to_iter((void __force *) dax.addr, max - pos,
 					iter);
@ -235,8 +234,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 			dax.addr += len;
 	}

-	if (need_wmb)
-		wmb_pmem();
 	dax_unmap_atomic(bdev, &dax);

 	return (pos == start) ? rc : pos - start;
@ -788,7 +785,6 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 				return ret;
 		}
 	}
-	wmb_pmem();
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
@ -1187,7 +1183,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
 		if (dax_map_atomic(bdev, &dax) < 0)
 			return PTR_ERR(dax.addr);
 		clear_pmem(dax.addr + offset, length);
-		wmb_pmem();
 		dax_unmap_atomic(bdev, &dax);
 	}
 	return 0;
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@ -1665,7 +1665,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 */
 struct blk_dax_ctl {
 	sector_t sector;
-	void __pmem *addr;
+	void *addr;
 	long size;
 	pfn_t pfn;
 };
@ -1676,8 +1676,8 @@ struct block_device_operations {
 	int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-	long (*direct_access)(struct block_device *, sector_t, void __pmem **,
-			pfn_t *, long);
+	long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *,
+			long);
 	unsigned int (*check_events) (struct gendisk *disk,
 				      unsigned int clearing);
 	/* ->media_changed() is DEPRECATED, use ->check_events() instead */
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@ -17,7 +17,6 @@
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
 # define __percpu	__attribute__((noderef, address_space(3)))
-# define __pmem		__attribute__((noderef, address_space(5)))
 #ifdef CONFIG_SPARSE_RCU_POINTER
 # define __rcu		__attribute__((noderef, address_space(4)))
 #else /* CONFIG_SPARSE_RCU_POINTER */
@ -45,7 +44,6 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __cond_lock(x,c) (c)
 # define __percpu
 # define __rcu
-# define __pmem
 # define __private
 # define ACCESS_PRIVATE(p, member) ((p)->member)
 #endif /* __CHECKER__ */
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@ -131,7 +131,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
 * >= 0 : the number of bytes accessible at the address
 */
 typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
-				     void __pmem **kaddr, pfn_t *pfn, long size);
+				     void **kaddr, pfn_t *pfn, long size);

 void dm_error(const char *message);

--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@ -52,6 +52,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,

 struct nd_namespace_label;
 struct nvdimm_drvdata;
+
 struct nd_mapping {
 	struct nvdimm *nvdimm;
 	struct nd_namespace_label **labels;
@ -69,6 +70,7 @@ struct nd_mapping {
 struct nvdimm_bus_descriptor {
 	const struct attribute_group **attr_groups;
 	unsigned long cmd_mask;
+	struct module *module;
 	char *provider_name;
 	ndctl_fn ndctl;
 	int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
@ -99,13 +101,21 @@ struct nd_region_desc {
 	unsigned long flags;
 };

+struct device;
+void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
+		size_t size, unsigned long flags);
+static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
+		resource_size_t offset, size_t size)
+{
+	return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0);
+}
+
 struct nvdimm_bus;
 struct module;
 struct device;
 struct nd_blk_region;
 struct nd_blk_region_desc {
 	int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
-	void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
 	int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
 			void *iobuf, u64 len, int rw);
 	struct nd_region_desc ndr_desc;
@ -119,22 +129,22 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
 }

 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
-struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
-		struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
-#define nvdimm_bus_register(parent, desc) \
-	__nvdimm_bus_register(parent, desc, THIS_MODULE)
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nfit_desc);
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
 struct nd_region *to_nd_region(struct device *dev);
 struct nd_blk_region *to_nd_blk_region(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
+struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
 const char *nvdimm_name(struct nvdimm *nvdimm);
 unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
 void *nvdimm_provider_data(struct nvdimm *nvdimm);
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 		const struct attribute_group **groups, unsigned long flags,
-		unsigned long cmd_mask);
+		unsigned long cmd_mask, int num_flush,
+		struct resource *flush_wpq);
 const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
 const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
 u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
@ -156,4 +166,6 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
 unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
 void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
 u64 nd_fletcher64(void *addr, size_t len, bool le);
+void nvdimm_flush(struct nd_region *nd_region);
+int nvdimm_has_flush(struct nd_region *nd_region);
 #endif /* __LIBNVDIMM_H__ */
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@ -26,6 +26,7 @@ struct nd_device_driver {
 	unsigned long type;
 	int (*probe)(struct device *dev);
 	int (*remove)(struct device *dev);
+	void (*shutdown)(struct device *dev);
 	void (*notify)(struct device *dev, enum nvdimm_event event);
 };

@ -67,7 +68,7 @@ struct nd_namespace_io {
 	struct nd_namespace_common common;
 	struct resource res;
 	resource_size_t size;
-	void __pmem *addr;
+	void *addr;
 	struct badblocks bb;
 };

--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@ -28,7 +28,10 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
 	return __pfn_to_pfn_t(pfn, 0);
 }

-extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags);
+static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
+{
+	return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
+}

 static inline bool pfn_t_has_page(pfn_t pfn)
 {
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@ -26,47 +26,35 @@
 * calling these symbols with arch_has_pmem_api() and redirect to the
 * implementation in asm/pmem.h.
 */
-static inline bool __arch_has_wmb_pmem(void)
-{
-	return false;
-}
-
-static inline void arch_wmb_pmem(void)
+static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
 {
 	BUG();
 }

-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-		size_t n)
-{
-	BUG();
-}
-
-static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
-		size_t n)
+static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
 {
 	BUG();
 	return -EFAULT;
 }

-static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
 		struct iov_iter *i)
 {
 	BUG();
 	return 0;
 }

-static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+static inline void arch_clear_pmem(void *addr, size_t size)
 {
 	BUG();
 }

-static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
 {
 	BUG();
 }

-static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+static inline void arch_invalidate_pmem(void *addr, size_t size)
 {
 	BUG();
 }
@ -77,13 +65,6 @@ static inline bool arch_has_pmem_api(void)
 	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
 }

-static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
-		size_t size)
-{
-	memcpy(dst, (void __force *) src, size);
-	return 0;
-}
-
 /*
 * memcpy_from_pmem - read from persistent memory with error handling
 * @dst: destination buffer
@ -92,54 +73,13 @@ static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
 *
 * Returns 0 on success negative error code on failure.
 */
-static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
-		size_t size)
+static inline int memcpy_from_pmem(void *dst, void const *src, size_t size)
 {
 	if (arch_has_pmem_api())
 		return arch_memcpy_from_pmem(dst, src, size);
 	else
-		return default_memcpy_from_pmem(dst, src, size);
-}
-
-/**
- * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
- *
- * For a given cpu implementation within an architecture it is possible
- * that wmb_pmem() resolves to a nop.  In the case this returns
- * false, pmem api users are unable to ensure durability and may want to
- * fall back to a different data consistency model, or otherwise notify
- * the user.
- */
-static inline bool arch_has_wmb_pmem(void)
-{
-	return arch_has_pmem_api() && __arch_has_wmb_pmem();
-}
-
-/*
- * These defaults seek to offer decent performance and minimize the
- * window between i/o completion and writes being durable on media.
- * However, it is undefined / architecture specific whether
- * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
- * making data durable relative to i/o completion.
- */
-static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
-		size_t size)
-{
-	memcpy((void __force *) dst, src, size);
-}
-
-static inline size_t default_copy_from_iter_pmem(void __pmem *addr,
-		size_t bytes, struct iov_iter *i)
-{
-	return copy_from_iter_nocache((void __force *)addr, bytes, i);
-}
-
-static inline void default_clear_pmem(void __pmem *addr, size_t size)
-{
-	if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0)
-		clear_page((void __force *)addr);
-	else
-		memset((void __force *)addr, 0, size);
+		memcpy(dst, src, size);
+	return 0;
 }

 /**
@ -152,29 +92,14 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size)
 * being effectively evicted from, or never written to, the processor
 * cache hierarchy after the copy completes.  After memcpy_to_pmem()
 * data may still reside in cpu or platform buffers, so this operation
- * must be followed by a wmb_pmem().
+ * must be followed by a blkdev_issue_flush() on the pmem block device.
 */
-static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
+static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
 {
 	if (arch_has_pmem_api())
 		arch_memcpy_to_pmem(dst, src, n);
 	else
-		default_memcpy_to_pmem(dst, src, n);
-}
-
-/**
- * wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of memcpy_to_pmem() operations this drains data from
- * cpu write buffers and any platform (memory controller) buffers to
- * ensure that written data is durable on persistent memory media.
- */
-static inline void wmb_pmem(void)
-{
-	if (arch_has_wmb_pmem())
-		arch_wmb_pmem();
-	else
-		wmb();
+		memcpy(dst, src, n);
 }

 /**
@ -184,14 +109,14 @@ static inline void wmb_pmem(void)
 * @i:		iterator with source data
 *
 * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
 */
-static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t copy_from_iter_pmem(void *addr, size_t bytes,
 		struct iov_iter *i)
 {
 	if (arch_has_pmem_api())
 		return arch_copy_from_iter_pmem(addr, bytes, i);
-	return default_copy_from_iter_pmem(addr, bytes, i);
+	return copy_from_iter_nocache(addr, bytes, i);
 }

 /**
@ -200,14 +125,14 @@ static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
 * @size:	number of bytes to zero
 *
 * Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
 */
-static inline void clear_pmem(void __pmem *addr, size_t size)
+static inline void clear_pmem(void *addr, size_t size)
 {
 	if (arch_has_pmem_api())
 		arch_clear_pmem(addr, size);
 	else
-		default_clear_pmem(addr, size);
+		memset(addr, 0, size);
 }

 /**
@ -218,7 +143,7 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
 * For platforms that support clearing poison this flushes any poisoned
 * ranges out of the cache
 */
-static inline void invalidate_pmem(void __pmem *addr, size_t size)
+static inline void invalidate_pmem(void *addr, size_t size)
 {
 	if (arch_has_pmem_api())
 		arch_invalidate_pmem(addr, size);
@ -230,9 +155,9 @@ static inline void invalidate_pmem(void __pmem *addr, size_t size)
 * @size:	number of bytes to write back
 *
 * Write back the processor cache range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
 */
-static inline void wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void wb_cache_pmem(void *addr, size_t size)
 {
 	if (arch_has_pmem_api())
 		arch_wb_cache_pmem(addr, size);
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@ -298,6 +298,7 @@ struct nd_cmd_pkg {
 #define NVDIMM_FAMILY_INTEL 0
 #define NVDIMM_FAMILY_HPE1 1
 #define NVDIMM_FAMILY_HPE2 2
+#define NVDIMM_FAMILY_MSFT 3

 #define ND_IOCTL_CALL			_IOWR(ND_IOCTL, ND_CMD_CALL,\
 					struct nd_cmd_pkg)
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@ -169,12 +169,6 @@ void devm_memunmap(struct device *dev, void *addr)
 }
 EXPORT_SYMBOL(devm_memunmap);

-pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
-{
-	return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
-}
-EXPORT_SYMBOL(phys_to_pfn_t);
-
 #ifdef CONFIG_ZONE_DEVICE
 static DEFINE_MUTEX(pgmap_lock);
 static RADIX_TREE(pgmap_radix, GFP_KERNEL);
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@ -313,7 +313,6 @@ our $Sparse	= qr{
 			__kernel|
 			__force|
 			__iomem|
-			__pmem|
 			__must_check|
 			__init_refok|
 			__kprobes|
--- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt
+++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
@ -947,7 +947,7 @@ GrpTable: Grp15
 4: XSAVE
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
 EndTable

 GrpTable: Grp16
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@ -1664,5 +1664,3 @@
 "0f c7 1d 78 56 34 12 \txrstors 0x12345678",},
 {{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",},
-{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
-"66 0f ae f8          \tpcommit ",},
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@ -1696,5 +1696,3 @@
 "0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",},
 {{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
 "41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",},
-{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
-"66 0f ae f8          \tpcommit ",},
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@ -2655,10 +2655,6 @@ int main(void)

 #endif /* #ifndef __x86_64__ */

-	/* pcommit */
-
-	asm volatile("pcommit");
-
 	/* Following line is a marker for the awk script - do not change */
 	asm volatile("rdtsc"); /* Stop here */

--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@ -1012,7 +1012,7 @@ GrpTable: Grp15
 4: XSAVE
 5: XRSTOR | lfence (11B)
 6: XSAVEOPT | clwb (66) | mfence (11B)
-7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
 EndTable

 GrpTable: Grp16
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@ -11,12 +11,14 @@ ldflags-y += --wrap=__devm_release_region
 ldflags-y += --wrap=__request_region
 ldflags-y += --wrap=__release_region
 ldflags-y += --wrap=devm_memremap_pages
-ldflags-y += --wrap=phys_to_pfn_t
+ldflags-y += --wrap=insert_resource
+ldflags-y += --wrap=remove_resource

 DRIVERS := ../../../drivers
 NVDIMM_SRC := $(DRIVERS)/nvdimm
-ACPI_SRC := $(DRIVERS)/acpi
+ACPI_SRC := $(DRIVERS)/acpi/nfit
 DAX_SRC := $(DRIVERS)/dax
+ccflags-y := -I$(src)/$(NVDIMM_SRC)/

 obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
@ -27,10 +29,12 @@ obj-$(CONFIG_ACPI_NFIT) += nfit.o
 obj-$(CONFIG_DEV_DAX) += dax.o
 obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o

-nfit-y := $(ACPI_SRC)/nfit.o
+nfit-y := $(ACPI_SRC)/core.o
+nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
 nfit-y += config_check.o

 nd_pmem-y := $(NVDIMM_SRC)/pmem.o
+nd_pmem-y += pmem-dax.o
 nd_pmem-y += config_check.o

 nd_btt-y := $(NVDIMM_SRC)/btt.o
--- a/tools/testing/nvdimm/config_check.c
+++ b/tools/testing/nvdimm/config_check.c
@ -10,6 +10,7 @@ void check(void)
 	BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
 	BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
 	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
 	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
 	BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
 	BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
--- a/tools/testing/nvdimm/pmem-dax.c
+++ b/tools/testing/nvdimm/pmem-dax.c
@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include "test/nfit_test.h"
+#include <linux/blkdev.h>
+#include <pmem.h>
+#include <nd.h>
+
+long pmem_direct_access(struct block_device *bdev, sector_t sector,
+		void **kaddr, pfn_t *pfn, long size)
+{
+	struct pmem_device *pmem = bdev->bd_queue->queuedata;
+	resource_size_t offset = sector * 512 + pmem->data_offset;
+
+	if (unlikely(is_bad_pmem(&pmem->bb, sector, size)))
+		return -EIO;
+
+	/*
+	 * Limit dax to a single page at a time given vmalloc()-backed
+	 * in the nfit_test case.
+	 */
+	if (get_nfit_res(pmem->phys_addr + offset)) {
+		struct page *page;
+
+		*kaddr = pmem->virt_addr + offset;
+		page = vmalloc_to_page(pmem->virt_addr + offset);
+		*pfn = page_to_pfn_t(page);
+		dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent,
+				"%s: sector: %#llx pfn: %#lx\n", __func__,
+				(unsigned long long) sector, page_to_pfn(page));
+
+		return PAGE_SIZE;
+	}
+
+	*kaddr = pmem->virt_addr + offset;
+	*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+	/*
+	 * If badblocks are present, limit known good range to the
+	 * requested range.
+	 */
+	if (unlikely(pmem->bb.count))
+		return size;
+	return pmem->size - pmem->pfn_pad - offset;
+}
--- a/tools/testing/nvdimm/test/Kbuild
+++ b/tools/testing/nvdimm/test/Kbuild
@ -1,5 +1,5 @@
 ccflags-y := -I$(src)/../../../../drivers/nvdimm/
-ccflags-y += -I$(src)/../../../../drivers/acpi/
+ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/

 obj-m += nfit_test.o
 obj-m += nfit_test_iomap.o
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@ -10,11 +10,13 @@
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 */
+#include <linux/memremap.h>
 #include <linux/rculist.h>
 #include <linux/export.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/pfn_t.h>
 #include <linux/io.h>
 #include <linux/mm.h>
 #include "nfit_test.h"
@ -52,7 +54,7 @@ static struct nfit_test_resource *__get_nfit_res(resource_size_t resource)
 	return NULL;
 }

-static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
+struct nfit_test_resource *get_nfit_res(resource_size_t resource)
 {
 	struct nfit_test_resource *res;

@ -62,6 +64,7 @@ static struct nfit_test_resource *get_nfit_res(resource_size_t resource)

 	return res;
 }
+EXPORT_SYMBOL(get_nfit_res);

 void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
 		void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
@ -97,10 +100,6 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
 }
 EXPORT_SYMBOL(__wrap_devm_memremap);

-#ifdef __HAVE_ARCH_PTE_DEVMAP
-#include <linux/memremap.h>
-#include <linux/pfn_t.h>
-
 void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
 		struct percpu_ref *ref, struct vmem_altmap *altmap)
 {
@ -122,19 +121,6 @@ pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
        return phys_to_pfn_t(addr, flags);
 }
 EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
-#else
-/* to be removed post 4.5-rc1 */
-void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res)
-{
-	resource_size_t offset = res->start;
-	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
-
-	if (nfit_res)
-		return nfit_res->buf + offset - nfit_res->res->start;
-	return devm_memremap_pages(dev, res);
-}
-EXPORT_SYMBOL(__wrap_devm_memremap_pages);
-#endif

 void *__wrap_memremap(resource_size_t offset, size_t size,
 		unsigned long flags)
@ -229,6 +215,22 @@ struct resource *__wrap___request_region(struct resource *parent,
 }
 EXPORT_SYMBOL(__wrap___request_region);

+int __wrap_insert_resource(struct resource *parent, struct resource *res)
+{
+	if (get_nfit_res(res->start))
+		return 0;
+	return insert_resource(parent, res);
+}
+EXPORT_SYMBOL(__wrap_insert_resource);
+
+int __wrap_remove_resource(struct resource *res)
+{
+	if (get_nfit_res(res->start))
+		return 0;
+	return remove_resource(res);
+}
+EXPORT_SYMBOL(__wrap_remove_resource);
+
 struct resource *__wrap___devm_request_region(struct device *dev,
 		struct resource *parent, resource_size_t start,
 		resource_size_t n, const char *name)
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@ -98,11 +98,13 @@
 enum {
 	NUM_PM  = 3,
 	NUM_DCR = 5,
+	NUM_HINTS = 8,
 	NUM_BDW = NUM_DCR,
 	NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
 	NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
 	DIMM_SIZE = SZ_32M,
 	LABEL_SIZE = SZ_128K,
+	SPA_VCD_SIZE = SZ_4M,
 	SPA0_SIZE = DIMM_SIZE,
 	SPA1_SIZE = DIMM_SIZE*2,
 	SPA2_SIZE = DIMM_SIZE,
@ -470,11 +472,7 @@ static void release_nfit_res(void *data)
 	list_del(&nfit_res->list);
 	spin_unlock(&nfit_test_lock);

-	if (is_vmalloc_addr(nfit_res->buf))
 	vfree(nfit_res->buf);
-	else
-		dma_free_coherent(nfit_res->dev, resource_size(res),
-				nfit_res->buf, res->start);
 	kfree(res);
 	kfree(nfit_res);
 }
@ -507,9 +505,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,

 	return nfit_res->buf;
 err:
-	if (buf && !is_vmalloc_addr(buf))
-		dma_free_coherent(dev, size, buf, *dma);
-	else if (buf)
+	if (buf)
 		vfree(buf);
 	kfree(res);
 	kfree(nfit_res);
@ -524,15 +520,6 @@ static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
 	return __test_alloc(t, size, dma, buf);
 }

-static void *test_alloc_coherent(struct nfit_test *t, size_t size,
-		dma_addr_t *dma)
-{
-	struct device *dev = &t->pdev.dev;
-	void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
-
-	return __test_alloc(t, size, dma, buf);
-}
-
 static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
 {
 	int i;
@ -584,7 +571,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
 			+ offsetof(struct acpi_nfit_control_region,
 					window_size) * NUM_DCR
 			+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
-			+ sizeof(struct acpi_nfit_flush_address) * NUM_DCR;
+			+ (sizeof(struct acpi_nfit_flush_address)
+					+ sizeof(u64) * NUM_HINTS) * NUM_DCR;
 	int i;

 	t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
@ -592,15 +580,15 @@ static int nfit_test0_alloc(struct nfit_test *t)
 		return -ENOMEM;
 	t->nfit_size = nfit_size;

-	t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]);
+	t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]);
 	if (!t->spa_set[0])
 		return -ENOMEM;

-	t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]);
+	t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]);
 	if (!t->spa_set[1])
 		return -ENOMEM;

-	t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]);
+	t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]);
 	if (!t->spa_set[2])
 		return -ENOMEM;

@ -614,7 +602,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
 			return -ENOMEM;
 		sprintf(t->label[i], "label%d", i);

-		t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]);
+		t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS,
+				&t->flush_dma[i]);
 		if (!t->flush[i])
 			return -ENOMEM;
 	}
@ -630,7 +619,7 @@ static int nfit_test0_alloc(struct nfit_test *t)

 static int nfit_test1_alloc(struct nfit_test *t)
 {
-	size_t nfit_size = sizeof(struct acpi_nfit_system_address)
+	size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
 		+ sizeof(struct acpi_nfit_memory_map)
 		+ offsetof(struct acpi_nfit_control_region, window_size);

@ -639,15 +628,31 @@ static int nfit_test1_alloc(struct nfit_test *t)
 		return -ENOMEM;
 	t->nfit_size = nfit_size;

-	t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]);
+	t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]);
 	if (!t->spa_set[0])
 		return -ENOMEM;

+	t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]);
+	if (!t->spa_set[1])
+		return -ENOMEM;
+
 	return ars_state_init(&t->pdev.dev, &t->ars_state);
 }

+static void dcr_common_init(struct acpi_nfit_control_region *dcr)
+{
+	dcr->vendor_id = 0xabcd;
+	dcr->device_id = 0;
+	dcr->revision_id = 1;
+	dcr->valid_fields = 1;
+	dcr->manufacturing_location = 0xa;
+	dcr->manufacturing_date = cpu_to_be16(2016);
+}
+
 static void nfit_test0_setup(struct nfit_test *t)
 {
+	const int flush_hint_size = sizeof(struct acpi_nfit_flush_address)
+		+ (sizeof(u64) * NUM_HINTS);
 	struct acpi_nfit_desc *acpi_desc;
 	struct acpi_nfit_memory_map *memdev;
 	void *nfit_buf = t->nfit_buf;
@ -655,7 +660,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	struct acpi_nfit_control_region *dcr;
 	struct acpi_nfit_data_region *bdw;
 	struct acpi_nfit_flush_address *flush;
-	unsigned int offset;
+	unsigned int offset, i;

 	/*
 	 * spa0 (interleave first half of dimm0 and dimm1, note storage
@ -972,9 +977,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = sizeof(struct acpi_nfit_control_region);
 	dcr->region_index = 0+1;
-	dcr->vendor_id = 0xabcd;
-	dcr->device_id = 0;
-	dcr->revision_id = 1;
+	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[0];
 	dcr->code = NFIT_FIC_BLK;
 	dcr->windows = 1;
@ -989,9 +992,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = sizeof(struct acpi_nfit_control_region);
 	dcr->region_index = 1+1;
-	dcr->vendor_id = 0xabcd;
-	dcr->device_id = 0;
-	dcr->revision_id = 1;
+	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[1];
 	dcr->code = NFIT_FIC_BLK;
 	dcr->windows = 1;
@ -1006,9 +1007,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = sizeof(struct acpi_nfit_control_region);
 	dcr->region_index = 2+1;
-	dcr->vendor_id = 0xabcd;
-	dcr->device_id = 0;
-	dcr->revision_id = 1;
+	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[2];
 	dcr->code = NFIT_FIC_BLK;
 	dcr->windows = 1;
@ -1023,9 +1022,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = sizeof(struct acpi_nfit_control_region);
 	dcr->region_index = 3+1;
-	dcr->vendor_id = 0xabcd;
-	dcr->device_id = 0;
-	dcr->revision_id = 1;
+	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[3];
 	dcr->code = NFIT_FIC_BLK;
 	dcr->windows = 1;
@ -1042,9 +1039,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
 	dcr->region_index = 4+1;
-	dcr->vendor_id = 0xabcd;
-	dcr->device_id = 0;
-	dcr->revision_id = 1;
+	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[0];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
@ -1056,9 +1051,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
 	dcr->region_index = 5+1;
-	dcr->vendor_id = 0xabcd;
-	dcr->device_id = 0;
-	dcr->revision_id = 1;
+	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[1];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
@ -1070,9 +1063,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
 	dcr->region_index = 6+1;
-	dcr->vendor_id = 0xabcd;
-	dcr->device_id = 0;
-	dcr->revision_id = 1;
+	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[2];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
@ -1084,9 +1075,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
 	dcr->region_index = 7+1;
-	dcr->vendor_id = 0xabcd;
-	dcr->device_id = 0;
-	dcr->revision_id = 1;
+	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[3];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
@ -1141,45 +1130,47 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* flush0 (dimm0) */
 	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-	flush->header.length = sizeof(struct acpi_nfit_flush_address);
+	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[0];
-	flush->hint_count = 1;
-	flush->hint_address[0] = t->flush_dma[0];
+	flush->hint_count = NUM_HINTS;
+	for (i = 0; i < NUM_HINTS; i++)
+		flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);

 	/* flush1 (dimm1) */
-	flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1;
+	flush = nfit_buf + offset + flush_hint_size * 1;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-	flush->header.length = sizeof(struct acpi_nfit_flush_address);
+	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[1];
-	flush->hint_count = 1;
-	flush->hint_address[0] = t->flush_dma[1];
+	flush->hint_count = NUM_HINTS;
+	for (i = 0; i < NUM_HINTS; i++)
+		flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);

 	/* flush2 (dimm2) */
-	flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2;
+	flush = nfit_buf + offset + flush_hint_size  * 2;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-	flush->header.length = sizeof(struct acpi_nfit_flush_address);
+	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[2];
-	flush->hint_count = 1;
-	flush->hint_address[0] = t->flush_dma[2];
+	flush->hint_count = NUM_HINTS;
+	for (i = 0; i < NUM_HINTS; i++)
+		flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);

 	/* flush3 (dimm3) */
-	flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3;
+	flush = nfit_buf + offset + flush_hint_size * 3;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-	flush->header.length = sizeof(struct acpi_nfit_flush_address);
+	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[3];
-	flush->hint_count = 1;
-	flush->hint_address[0] = t->flush_dma[3];
+	flush->hint_count = NUM_HINTS;
+	for (i = 0; i < NUM_HINTS; i++)
+		flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);

 	if (t->setup_hotplug) {
-		offset = offset + sizeof(struct acpi_nfit_flush_address) * 4;
+		offset = offset + flush_hint_size * 4;
 		/* dcr-descriptor4: blk */
 		dcr = nfit_buf + offset;
 		dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 		dcr->header.length = sizeof(struct acpi_nfit_control_region);
 		dcr->region_index = 8+1;
-		dcr->vendor_id = 0xabcd;
-		dcr->device_id = 0;
-		dcr->revision_id = 1;
+		dcr_common_init(dcr);
 		dcr->serial_number = ~handle[4];
 		dcr->code = NFIT_FIC_BLK;
 		dcr->windows = 1;
@ -1196,9 +1187,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		dcr->header.length = offsetof(struct acpi_nfit_control_region,
 				window_size);
 		dcr->region_index = 9+1;
-		dcr->vendor_id = 0xabcd;
-		dcr->device_id = 0;
-		dcr->revision_id = 1;
+		dcr_common_init(dcr);
 		dcr->serial_number = ~handle[4];
 		dcr->code = NFIT_FIC_BYTEN;
 		dcr->windows = 0;
@ -1300,10 +1289,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 		/* flush3 (dimm4) */
 		flush = nfit_buf + offset;
 		flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
-		flush->header.length = sizeof(struct acpi_nfit_flush_address);
+		flush->header.length = flush_hint_size;
 		flush->device_handle = handle[4];
-		flush->hint_count = 1;
-		flush->hint_address[0] = t->flush_dma[4];
+		flush->hint_count = NUM_HINTS;
+		for (i = 0; i < NUM_HINTS; i++)
+			flush->hint_address[i] = t->flush_dma[4]
+				+ i * sizeof(u64);
 	}

 	post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE);
@ -1339,7 +1330,16 @@ static void nfit_test1_setup(struct nfit_test *t)
 	spa->address = t->spa_set_dma[0];
 	spa->length = SPA2_SIZE;

-	offset += sizeof(*spa);
+	/* virtual cd region */
+	spa = nfit_buf + sizeof(*spa);
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
+	spa->range_index = 0;
+	spa->address = t->spa_set_dma[1];
+	spa->length = SPA_VCD_SIZE;
+
+	offset += sizeof(*spa) * 2;
 	/* mem-region0 (spa0, dimm0) */
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@ -1365,9 +1365,7 @@ static void nfit_test1_setup(struct nfit_test *t)
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
 	dcr->region_index = 0+1;
-	dcr->vendor_id = 0xabcd;
-	dcr->device_id = 0;
-	dcr->revision_id = 1;
+	dcr_common_init(dcr);
 	dcr->serial_number = ~0;
 	dcr->code = NFIT_FIC_BYTE;
 	dcr->windows = 0;
@ -1462,20 +1460,16 @@ static int nfit_test_probe(struct platform_device *pdev)
 	nfit_test->setup(nfit_test);
 	acpi_desc = &nfit_test->acpi_desc;
 	acpi_nfit_desc_init(acpi_desc, &pdev->dev);
-	acpi_desc->nfit = nfit_test->nfit_buf;
 	acpi_desc->blk_do_io = nfit_test_blk_do_io;
 	nd_desc = &acpi_desc->nd_desc;
 	nd_desc->provider_name = NULL;
+	nd_desc->module = THIS_MODULE;
 	nd_desc->ndctl = nfit_test_ctl;
-	acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc);
-	if (!acpi_desc->nvdimm_bus)
-		return -ENXIO;

-	rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
-	if (rc) {
-		nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+	rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+			nfit_test->nfit_size);
+	if (rc)
 		return rc;
-	}

 	if (nfit_test->setup != nfit_test0_setup)
 		return 0;
@ -1483,22 +1477,16 @@ static int nfit_test_probe(struct platform_device *pdev)
 	nfit_test->setup_hotplug = 1;
 	nfit_test->setup(nfit_test);

-	rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
-	if (rc) {
-		nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+	rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+			nfit_test->nfit_size);
+	if (rc)
 		return rc;
-	}

 	return 0;
 }

 static int nfit_test_remove(struct platform_device *pdev)
 {
-	struct nfit_test *nfit_test = to_nfit_test(&pdev->dev);
-	struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc;
-
-	nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
-
 	return 0;
 }

@ -1523,12 +1511,6 @@ static struct platform_driver nfit_test_driver = {
 	.id_table = nfit_test_id,
 };

-#ifdef CONFIG_CMA_SIZE_MBYTES
-#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
-#else
-#define CMA_SIZE_MBYTES 0
-#endif
-
 static __init int nfit_test_init(void)
 {
 	int rc, i;
@ -1538,7 +1520,6 @@ static __init int nfit_test_init(void)
 	for (i = 0; i < NUM_NFITS; i++) {
 		struct nfit_test *nfit_test;
 		struct platform_device *pdev;
-		static int once;

 		nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
 		if (!nfit_test) {
@ -1577,20 +1558,6 @@ static __init int nfit_test_init(void)
 			goto err_register;

 		instances[i] = nfit_test;
-
-		if (!once++) {
-			dma_addr_t dma;
-			void *buf;
-
-			buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma,
-					GFP_KERNEL);
-			if (!buf) {
-				rc = -ENOMEM;
-				dev_warn(&pdev->dev, "need 128M of free cma\n");
-				goto err_register;
-			}
-			dma_free_coherent(&pdev->dev, SZ_128M, buf, dma);
-		}
 	}

 	rc = platform_driver_register(&nfit_test_driver);
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@ -12,6 +12,7 @@
 */
 #ifndef __NFIT_TEST_H__
 #define __NFIT_TEST_H__
+#include <linux/list.h>

 struct nfit_test_resource {
 	struct list_head list;
@ -26,4 +27,5 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
 void __wrap_iounmap(volatile void __iomem *addr);
 void nfit_test_setup(nfit_test_lookup_fn lookup);
 void nfit_test_teardown(void);
+struct nfit_test_resource *get_nfit_res(resource_size_t resource);
 #endif