Merge branch 'akpm' (patches from Andrew)

Merge more updates from Andrew Morton:

 - almost all of the rest of MM

 - kasan updates

 - lots of procfs work

 - misc things

 - lib/ updates

 - checkpatch

 - rapidio

 - ipc/shm updates

 - the start of willy's XArray conversion

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (140 commits)
  page cache: use xa_lock
  xarray: add the xa_lock to the radix_tree_root
  fscache: use appropriate radix tree accessors
  export __set_page_dirty
  unicore32: turn flush_dcache_mmap_lock into a no-op
  arm64: turn flush_dcache_mmap_lock into a no-op
  mac80211_hwsim: use DEFINE_IDA
  radix tree: use GFP_ZONEMASK bits of gfp_t for flags
  linux/const.h: refactor _BITUL and _BITULL a bit
  linux/const.h: move UL() macro to include/linux/const.h
  linux/const.h: prefix include guard of uapi/linux/const.h with _UAPI
  xen, mm: allow deferred page initialization for xen pv domains
  elf: enforce MAP_FIXED on overlaying elf segments
  fs, elf: drop MAP_FIXED usage from elf_map
  mm: introduce MAP_FIXED_NOREPLACE
  MAINTAINERS: update bouncing aacraid@adaptec.com addresses
  fs/dcache.c: add cond_resched() in shrink_dentry_list()
  include/linux/kfifo.h: fix comment
  ipc/shm.c: shm_split(): remove unneeded test for NULL shm_file_data.vm_ops
  kernel/sysctl.c: add kdoc comments to do_proc_do{u}intvec_minmax_conv_param
  ...
This commit is contained in:
Linus Torvalds 2018-04-11 10:51:26 -07:00
commit 8837c70d53
182 changed files with 4164 additions and 2100 deletions

428
.clang-format Normal file
View File

@ -0,0 +1,428 @@
# SPDX-License-Identifier: GPL-2.0
#
# clang-format configuration file. Intended for clang-format >= 4.
#
# For more information, see:
#
# Documentation/process/clang-format.rst
# https://clang.llvm.org/docs/ClangFormat.html
# https://clang.llvm.org/docs/ClangFormatStyleOptions.html
#
---
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
#AlignEscapedNewlines: Left # Unknown to clang-format-4.0
AlignOperands: true
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: None
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: false
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: true
AfterNamespace: true
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
#AfterExternBlock: false # Unknown to clang-format-5.0
BeforeCatch: false
BeforeElse: false
IndentBraces: false
#SplitEmptyFunction: true # Unknown to clang-format-4.0
#SplitEmptyRecord: true # Unknown to clang-format-4.0
#SplitEmptyNamespace: true # Unknown to clang-format-4.0
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Custom
#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0
BreakBeforeTernaryOperators: false
BreakConstructorInitializersBeforeComma: false
#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
#CompactNamespaces: false # Unknown to clang-format-4.0
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 8
ContinuationIndentWidth: 8
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
#FixNamespaceComments: false # Unknown to clang-format-4.0
# Taken from:
# git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \
# | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$, - '\1'," \
# | sort | uniq
ForEachMacros:
- 'apei_estatus_for_each_section'
- 'ata_for_each_dev'
- 'ata_for_each_link'
- 'ax25_for_each'
- 'ax25_uid_for_each'
- 'bio_for_each_integrity_vec'
- '__bio_for_each_segment'
- 'bio_for_each_segment'
- 'bio_for_each_segment_all'
- 'bio_list_for_each'
- 'bip_for_each_vec'
- 'blkg_for_each_descendant_post'
- 'blkg_for_each_descendant_pre'
- 'blk_queue_for_each_rl'
- 'bond_for_each_slave'
- 'bond_for_each_slave_rcu'
- 'btree_for_each_safe128'
- 'btree_for_each_safe32'
- 'btree_for_each_safe64'
- 'btree_for_each_safel'
- 'card_for_each_dev'
- 'cgroup_taskset_for_each'
- 'cgroup_taskset_for_each_leader'
- 'cpufreq_for_each_entry'
- 'cpufreq_for_each_entry_idx'
- 'cpufreq_for_each_valid_entry'
- 'cpufreq_for_each_valid_entry_idx'
- 'css_for_each_child'
- 'css_for_each_descendant_post'
- 'css_for_each_descendant_pre'
- 'device_for_each_child_node'
- 'drm_atomic_crtc_for_each_plane'
- 'drm_atomic_crtc_state_for_each_plane'
- 'drm_atomic_crtc_state_for_each_plane_state'
- 'drm_for_each_connector_iter'
- 'drm_for_each_crtc'
- 'drm_for_each_encoder'
- 'drm_for_each_encoder_mask'
- 'drm_for_each_fb'
- 'drm_for_each_legacy_plane'
- 'drm_for_each_plane'
- 'drm_for_each_plane_mask'
- 'drm_mm_for_each_hole'
- 'drm_mm_for_each_node'
- 'drm_mm_for_each_node_in_range'
- 'drm_mm_for_each_node_safe'
- 'for_each_active_drhd_unit'
- 'for_each_active_iommu'
- 'for_each_available_child_of_node'
- 'for_each_bio'
- 'for_each_board_func_rsrc'
- 'for_each_bvec'
- 'for_each_child_of_node'
- 'for_each_clear_bit'
- 'for_each_clear_bit_from'
- 'for_each_cmsghdr'
- 'for_each_compatible_node'
- 'for_each_console'
- 'for_each_cpu'
- 'for_each_cpu_and'
- 'for_each_cpu_not'
- 'for_each_cpu_wrap'
- 'for_each_dev_addr'
- 'for_each_dma_cap_mask'
- 'for_each_drhd_unit'
- 'for_each_dss_dev'
- 'for_each_efi_memory_desc'
- 'for_each_efi_memory_desc_in_map'
- 'for_each_endpoint_of_node'
- 'for_each_evictable_lru'
- 'for_each_fib6_node_rt_rcu'
- 'for_each_fib6_walker_rt'
- 'for_each_free_mem_range'
- 'for_each_free_mem_range_reverse'
- 'for_each_func_rsrc'
- 'for_each_hstate'
- 'for_each_if'
- 'for_each_iommu'
- 'for_each_ip_tunnel_rcu'
- 'for_each_irq_nr'
- 'for_each_lru'
- 'for_each_matching_node'
- 'for_each_matching_node_and_match'
- 'for_each_memblock'
- 'for_each_memblock_type'
- 'for_each_memcg_cache_index'
- 'for_each_mem_pfn_range'
- 'for_each_mem_range'
- 'for_each_mem_range_rev'
- 'for_each_migratetype_order'
- 'for_each_msi_entry'
- 'for_each_net'
- 'for_each_netdev'
- 'for_each_netdev_continue'
- 'for_each_netdev_continue_rcu'
- 'for_each_netdev_feature'
- 'for_each_netdev_in_bond_rcu'
- 'for_each_netdev_rcu'
- 'for_each_netdev_reverse'
- 'for_each_netdev_safe'
- 'for_each_net_rcu'
- 'for_each_new_connector_in_state'
- 'for_each_new_crtc_in_state'
- 'for_each_new_plane_in_state'
- 'for_each_new_private_obj_in_state'
- 'for_each_node'
- 'for_each_node_by_name'
- 'for_each_node_by_type'
- 'for_each_node_mask'
- 'for_each_node_state'
- 'for_each_node_with_cpus'
- 'for_each_node_with_property'
- 'for_each_of_allnodes'
- 'for_each_of_allnodes_from'
- 'for_each_of_pci_range'
- 'for_each_old_connector_in_state'
- 'for_each_old_crtc_in_state'
- 'for_each_oldnew_connector_in_state'
- 'for_each_oldnew_crtc_in_state'
- 'for_each_oldnew_plane_in_state'
- 'for_each_oldnew_private_obj_in_state'
- 'for_each_old_plane_in_state'
- 'for_each_old_private_obj_in_state'
- 'for_each_online_cpu'
- 'for_each_online_node'
- 'for_each_online_pgdat'
- 'for_each_pci_bridge'
- 'for_each_pci_dev'
- 'for_each_pci_msi_entry'
- 'for_each_populated_zone'
- 'for_each_possible_cpu'
- 'for_each_present_cpu'
- 'for_each_prime_number'
- 'for_each_prime_number_from'
- 'for_each_process'
- 'for_each_process_thread'
- 'for_each_property_of_node'
- 'for_each_reserved_mem_region'
- 'for_each_resv_unavail_range'
- 'for_each_rtdcom'
- 'for_each_rtdcom_safe'
- 'for_each_set_bit'
- 'for_each_set_bit_from'
- 'for_each_sg'
- 'for_each_sg_page'
- '__for_each_thread'
- 'for_each_thread'
- 'for_each_zone'
- 'for_each_zone_zonelist'
- 'for_each_zone_zonelist_nodemask'
- 'fwnode_for_each_available_child_node'
- 'fwnode_for_each_child_node'
- 'fwnode_graph_for_each_endpoint'
- 'gadget_for_each_ep'
- 'hash_for_each'
- 'hash_for_each_possible'
- 'hash_for_each_possible_rcu'
- 'hash_for_each_possible_rcu_notrace'
- 'hash_for_each_possible_safe'
- 'hash_for_each_rcu'
- 'hash_for_each_safe'
- 'hctx_for_each_ctx'
- 'hlist_bl_for_each_entry'
- 'hlist_bl_for_each_entry_rcu'
- 'hlist_bl_for_each_entry_safe'
- 'hlist_for_each'
- 'hlist_for_each_entry'
- 'hlist_for_each_entry_continue'
- 'hlist_for_each_entry_continue_rcu'
- 'hlist_for_each_entry_continue_rcu_bh'
- 'hlist_for_each_entry_from'
- 'hlist_for_each_entry_from_rcu'
- 'hlist_for_each_entry_rcu'
- 'hlist_for_each_entry_rcu_bh'
- 'hlist_for_each_entry_rcu_notrace'
- 'hlist_for_each_entry_safe'
- '__hlist_for_each_rcu'
- 'hlist_for_each_safe'
- 'hlist_nulls_for_each_entry'
- 'hlist_nulls_for_each_entry_from'
- 'hlist_nulls_for_each_entry_rcu'
- 'hlist_nulls_for_each_entry_safe'
- 'ide_host_for_each_port'
- 'ide_port_for_each_dev'
- 'ide_port_for_each_present_dev'
- 'idr_for_each_entry'
- 'idr_for_each_entry_continue'
- 'idr_for_each_entry_ul'
- 'inet_bind_bucket_for_each'
- 'inet_lhash2_for_each_icsk_rcu'
- 'iov_for_each'
- 'key_for_each'
- 'key_for_each_safe'
- 'klp_for_each_func'
- 'klp_for_each_object'
- 'kvm_for_each_memslot'
- 'kvm_for_each_vcpu'
- 'list_for_each'
- 'list_for_each_entry'
- 'list_for_each_entry_continue'
- 'list_for_each_entry_continue_rcu'
- 'list_for_each_entry_continue_reverse'
- 'list_for_each_entry_from'
- 'list_for_each_entry_from_reverse'
- 'list_for_each_entry_lockless'
- 'list_for_each_entry_rcu'
- 'list_for_each_entry_reverse'
- 'list_for_each_entry_safe'
- 'list_for_each_entry_safe_continue'
- 'list_for_each_entry_safe_from'
- 'list_for_each_entry_safe_reverse'
- 'list_for_each_prev'
- 'list_for_each_prev_safe'
- 'list_for_each_safe'
- 'llist_for_each'
- 'llist_for_each_entry'
- 'llist_for_each_entry_safe'
- 'llist_for_each_safe'
- 'media_device_for_each_entity'
- 'media_device_for_each_intf'
- 'media_device_for_each_link'
- 'media_device_for_each_pad'
- 'netdev_for_each_lower_dev'
- 'netdev_for_each_lower_private'
- 'netdev_for_each_lower_private_rcu'
- 'netdev_for_each_mc_addr'
- 'netdev_for_each_uc_addr'
- 'netdev_for_each_upper_dev_rcu'
- 'netdev_hw_addr_list_for_each'
- 'nft_rule_for_each_expr'
- 'nla_for_each_attr'
- 'nla_for_each_nested'
- 'nlmsg_for_each_attr'
- 'nlmsg_for_each_msg'
- 'nr_neigh_for_each'
- 'nr_neigh_for_each_safe'
- 'nr_node_for_each'
- 'nr_node_for_each_safe'
- 'of_for_each_phandle'
- 'of_property_for_each_string'
- 'of_property_for_each_u32'
- 'pci_bus_for_each_resource'
- 'ping_portaddr_for_each_entry'
- 'plist_for_each'
- 'plist_for_each_continue'
- 'plist_for_each_entry'
- 'plist_for_each_entry_continue'
- 'plist_for_each_entry_safe'
- 'plist_for_each_safe'
- 'pnp_for_each_card'
- 'pnp_for_each_dev'
- 'protocol_for_each_card'
- 'protocol_for_each_dev'
- 'queue_for_each_hw_ctx'
- 'radix_tree_for_each_contig'
- 'radix_tree_for_each_slot'
- 'radix_tree_for_each_tagged'
- 'rbtree_postorder_for_each_entry_safe'
- 'resource_list_for_each_entry'
- 'resource_list_for_each_entry_safe'
- 'rhl_for_each_entry_rcu'
- 'rhl_for_each_rcu'
- 'rht_for_each'
- 'rht_for_each_continue'
- 'rht_for_each_entry'
- 'rht_for_each_entry_continue'
- 'rht_for_each_entry_rcu'
- 'rht_for_each_entry_rcu_continue'
- 'rht_for_each_entry_safe'
- 'rht_for_each_rcu'
- 'rht_for_each_rcu_continue'
- '__rq_for_each_bio'
- 'rq_for_each_segment'
- 'scsi_for_each_prot_sg'
- 'scsi_for_each_sg'
- 'sctp_for_each_hentry'
- 'sctp_skb_for_each'
- 'shdma_for_each_chan'
- '__shost_for_each_device'
- 'shost_for_each_device'
- 'sk_for_each'
- 'sk_for_each_bound'
- 'sk_for_each_entry_offset_rcu'
- 'sk_for_each_from'
- 'sk_for_each_rcu'
- 'sk_for_each_safe'
- 'sk_nulls_for_each'
- 'sk_nulls_for_each_from'
- 'sk_nulls_for_each_rcu'
- 'snd_pcm_group_for_each_entry'
- 'snd_soc_dapm_widget_for_each_path'
- 'snd_soc_dapm_widget_for_each_path_safe'
- 'snd_soc_dapm_widget_for_each_sink_path'
- 'snd_soc_dapm_widget_for_each_source_path'
- 'tb_property_for_each'
- 'udp_portaddr_for_each_entry'
- 'udp_portaddr_for_each_entry_rcu'
- 'usb_hub_for_each_child'
- 'v4l2_device_for_each_subdev'
- 'v4l2_m2m_for_each_dst_buf'
- 'v4l2_m2m_for_each_dst_buf_safe'
- 'v4l2_m2m_for_each_src_buf'
- 'v4l2_m2m_for_each_src_buf_safe'
- 'zorro_for_each_dev'
#IncludeBlocks: Preserve # Unknown to clang-format-5.0
IncludeCategories:
- Regex: '.*'
Priority: 1
IncludeIsMainRegex: '(Test)?$'
IndentCaseLabels: false
#IndentPPDirectives: None # Unknown to clang-format-5.0
IndentWidth: 8
IndentWrappedFunctionNames: true
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: Inner
#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0
ObjCBlockIndentWidth: 8
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
# Taken from git's rules
#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0
PenaltyBreakBeforeFirstCallParameter: 30
PenaltyBreakComment: 10
PenaltyBreakFirstLessLess: 0
PenaltyBreakString: 10
PenaltyExcessCharacter: 100
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
ReflowComments: false
SortIncludes: false
#SortUsingDeclarations: false # Unknown to clang-format-4.0
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0
#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0
SpaceBeforeParens: ControlStatements
#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: false
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp03
TabWidth: 8
UseTab: Always
...

1
.gitignore vendored
View File

@ -81,6 +81,7 @@ modules.builtin
!.gitignore !.gitignore
!.mailmap !.mailmap
!.cocciconfig !.cocciconfig
!.clang-format
# #
# Generated include files # Generated include files

View File

@ -262,7 +262,7 @@ When oom event notifier is registered, event will be delivered.
2.6 Locking 2.6 Locking
lock_page_cgroup()/unlock_page_cgroup() should not be called under lock_page_cgroup()/unlock_page_cgroup() should not be called under
mapping->tree_lock. the i_pages lock.
Other lock order is following: Other lock order is following:
PG_locked. PG_locked.

View File

@ -58,6 +58,14 @@ can never be transgressed. If there is a good reason to go against the
style (a line which becomes far less readable if split to fit within the style (a line which becomes far less readable if split to fit within the
80-column limit, for example), just do it. 80-column limit, for example), just do it.
Note that you can also use the ``clang-format`` tool to help you with
these rules, to quickly re-format parts of your code automatically,
and to review full files in order to spot coding style mistakes,
typos and possible improvements. It is also handy for sorting ``#includes``,
for aligning variables/macros, for reflowing text and other similar tasks.
See the file :ref:`Documentation/process/clang-format.rst <clangformat>`
for more details.
Abstraction layers Abstraction layers
****************** ******************

View File

@ -0,0 +1,184 @@
.. _clangformat:
clang-format
============
``clang-format`` is a tool to format C/C++/... code according to
a set of rules and heuristics. Like most tools, it is not perfect
nor covers every single case, but it is good enough to be helpful.
``clang-format`` can be used for several purposes:
- Quickly reformat a block of code to the kernel style. Specially useful
when moving code around and aligning/sorting. See clangformatreformat_.
- Spot style mistakes, typos and possible improvements in files
you maintain, patches you review, diffs, etc. See clangformatreview_.
- Help you follow the coding style rules, specially useful for those
new to kernel development or working at the same time in several
projects with different coding styles.
Its configuration file is ``.clang-format`` in the root of the kernel tree.
The rules contained there try to approximate the most common kernel
coding style. They also try to follow :ref:`Documentation/process/coding-style.rst <codingstyle>`
as much as possible. Since not all the kernel follows the same style,
it is possible that you may want to tweak the defaults for a particular
subsystem or folder. To do so, you can override the defaults by writing
another ``.clang-format`` file in a subfolder.
The tool itself has already been included in the repositories of popular
Linux distributions for a long time. Search for ``clang-format`` in
your repositories. Otherwise, you can either download pre-built
LLVM/clang binaries or build the source code from:
http://releases.llvm.org/download.html
See more information about the tool at:
https://clang.llvm.org/docs/ClangFormat.html
https://clang.llvm.org/docs/ClangFormatStyleOptions.html
.. _clangformatreview:
Review files and patches for coding style
-----------------------------------------
By running the tool in its inline mode, you can review full subsystems,
folders or individual files for code style mistakes, typos or improvements.
To do so, you can run something like::
# Make sure your working directory is clean!
clang-format -i kernel/*.[ch]
And then take a look at the git diff.
Counting the lines of such a diff is also useful for improving/tweaking
the style options in the configuration file; as well as testing new
``clang-format`` features/versions.
``clang-format`` also supports reading unified diffs, so you can review
patches and git diffs easily. See the documentation at:
https://clang.llvm.org/docs/ClangFormat.html#script-for-patch-reformatting
To avoid ``clang-format`` formatting some portion of a file, you can do::
int formatted_code;
// clang-format off
void unformatted_code ;
// clang-format on
void formatted_code_again;
While it might be tempting to use this to keep a file always in sync with
``clang-format``, specially if you are writing new files or if you are
a maintainer, please note that people might be running different
``clang-format`` versions or not have it available at all. Therefore,
you should probably refrain yourself from using this in kernel sources;
at least until we see if ``clang-format`` becomes commonplace.
.. _clangformatreformat:
Reformatting blocks of code
---------------------------
By using an integration with your text editor, you can reformat arbitrary
blocks (selections) of code with a single keystroke. This is specially
useful when moving code around, for complex code that is deeply intended,
for multi-line macros (and aligning their backslashes), etc.
Remember that you can always tweak the changes afterwards in those cases
where the tool did not do an optimal job. But as a first approximation,
it can be very useful.
There are integrations for many popular text editors. For some of them,
like vim, emacs, BBEdit and Visual Studio you can find support built-in.
For instructions, read the appropiate section at:
https://clang.llvm.org/docs/ClangFormat.html
For Atom, Eclipse, Sublime Text, Visual Studio Code, XCode and other
editors and IDEs you should be able to find ready-to-use plugins.
For this use case, consider using a secondary ``.clang-format``
so that you can tweak a few options. See clangformatextra_.
.. _clangformatmissing:
Missing support
---------------
``clang-format`` is missing support for some things that are common
in kernel code. They are easy to remember, so if you use the tool
regularly, you will quickly learn to avoid/ignore those.
In particular, some very common ones you will notice are:
- Aligned blocks of one-line ``#defines``, e.g.::
#define TRACING_MAP_BITS_DEFAULT 11
#define TRACING_MAP_BITS_MAX 17
#define TRACING_MAP_BITS_MIN 7
vs.::
#define TRACING_MAP_BITS_DEFAULT 11
#define TRACING_MAP_BITS_MAX 17
#define TRACING_MAP_BITS_MIN 7
- Aligned designated initializers, e.g.::
static const struct file_operations uprobe_events_ops = {
.owner = THIS_MODULE,
.open = probes_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
.write = probes_write,
};
vs.::
static const struct file_operations uprobe_events_ops = {
.owner = THIS_MODULE,
.open = probes_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
.write = probes_write,
};
.. _clangformatextra:
Extra features/options
----------------------
Some features/style options are not enabled by default in the configuration
file in order to minimize the differences between the output and the current
code. In other words, to make the difference as small as possible,
which makes reviewing full-file style, as well diffs and patches as easy
as possible.
In other cases (e.g. particular subsystems/folders/files), the kernel style
might be different and enabling some of these options may approximate
better the style there.
For instance:
- Aligning assignments (``AlignConsecutiveAssignments``).
- Aligning declarations (``AlignConsecutiveDeclarations``).
- Reflowing text in comments (``ReflowComments``).
- Sorting ``#includes`` (``SortIncludes``).
They are typically useful for block re-formatting, rather than full-file.
You might want to create another ``.clang-format`` file and use that one
from your editor/IDE instead.

View File

@ -631,6 +631,14 @@ options ``-kr -i8`` (stands for ``K&R, 8 character indents``), or use
re-formatting you may want to take a look at the man page. But re-formatting you may want to take a look at the man page. But
remember: ``indent`` is not a fix for bad programming. remember: ``indent`` is not a fix for bad programming.
Note that you can also use the ``clang-format`` tool to help you with
these rules, to quickly re-format parts of your code automatically,
and to review full files in order to spot coding style mistakes,
typos and possible improvements. It is also handy for sorting ``#includes``,
for aligning variables/macros, for reflowing text and other similar tasks.
See the file :ref:`Documentation/process/clang-format.rst <clangformat>`
for more details.
10) Kconfig configuration files 10) Kconfig configuration files
------------------------------- -------------------------------

View File

@ -964,32 +964,34 @@ detect a hard lockup condition.
tainted: tainted:
Non-zero if the kernel has been tainted. Numeric values, which Non-zero if the kernel has been tainted. Numeric values, which can be
can be ORed together: ORed together. The letters are seen in "Tainted" line of Oops reports.
1 - A module with a non-GPL license has been loaded, this 1 (P): A module with a non-GPL license has been loaded, this
includes modules with no license. includes modules with no license.
Set by modutils >= 2.4.9 and module-init-tools. Set by modutils >= 2.4.9 and module-init-tools.
2 - A module was force loaded by insmod -f. 2 (F): A module was force loaded by insmod -f.
Set by modutils >= 2.4.9 and module-init-tools. Set by modutils >= 2.4.9 and module-init-tools.
4 - Unsafe SMP processors: SMP with CPUs not designed for SMP. 4 (S): Unsafe SMP processors: SMP with CPUs not designed for SMP.
8 - A module was forcibly unloaded from the system by rmmod -f. 8 (R): A module was forcibly unloaded from the system by rmmod -f.
16 - A hardware machine check error occurred on the system. 16 (M): A hardware machine check error occurred on the system.
32 - A bad page was discovered on the system. 32 (B): A bad page was discovered on the system.
64 - The user has asked that the system be marked "tainted". This 64 (U): The user has asked that the system be marked "tainted". This
could be because they are running software that directly modifies could be because they are running software that directly modifies
the hardware, or for other reasons. the hardware, or for other reasons.
128 - The system has died. 128 (D): The system has died.
256 - The ACPI DSDT has been overridden with one supplied by the user 256 (A): The ACPI DSDT has been overridden with one supplied by the user
instead of using the one provided by the hardware. instead of using the one provided by the hardware.
512 - A kernel warning has occurred. 512 (W): A kernel warning has occurred.
1024 - A module from drivers/staging was loaded. 1024 (C): A module from drivers/staging was loaded.
2048 - The system is working around a severe firmware bug. 2048 (I): The system is working around a severe firmware bug.
4096 - An out-of-tree module has been loaded. 4096 (O): An out-of-tree module has been loaded.
8192 - An unsigned module has been loaded in a kernel supporting module 8192 (E): An unsigned module has been loaded in a kernel supporting module
signature. signature.
16384 - A soft lockup has previously occurred on the system. 16384 (L): A soft lockup has previously occurred on the system.
32768 - The kernel has been live patched. 32768 (K): The kernel has been live patched.
65536 (X): Auxiliary taint, defined and used by for distros.
131072 (T): The kernel was built with the struct randomization plugin.
============================================================== ==============================================================

View File

@ -312,8 +312,6 @@ The lowmem_reserve_ratio is an array. You can see them by reading this file.
% cat /proc/sys/vm/lowmem_reserve_ratio % cat /proc/sys/vm/lowmem_reserve_ratio
256 256 32 256 256 32
- -
Note: # of this elements is one fewer than number of zones. Because the highest
zone's value is not necessary for following calculation.
But, these values are not used directly. The kernel calculates # of protection But, these values are not used directly. The kernel calculates # of protection
pages for each zones from them. These are shown as array of protection pages pages for each zones from them. These are shown as array of protection pages
@ -364,7 +362,8 @@ As above expression, they are reciprocal number of ratio.
pages of higher zones on the node. pages of higher zones on the node.
If you would like to protect more pages, smaller values are effective. If you would like to protect more pages, smaller values are effective.
The minimum value is 1 (1/1 -> 100%). The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
disables protection of the pages.
============================================================== ==============================================================

View File

@ -1,152 +1,160 @@
Heterogeneous Memory Management (HMM) Heterogeneous Memory Management (HMM)
Transparently allow any component of a program to use any memory region of said Provide infrastructure and helpers to integrate non-conventional memory (device
program with a device without using device specific memory allocator. This is memory like GPU on board memory) into regular kernel path, with the cornerstone
becoming a requirement to simplify the use of advance heterogeneous computing of this being specialized struct page for such memory (see sections 5 to 7 of
where GPU, DSP or FPGA are use to perform various computations. this document).
This document is divided as follow, in the first section i expose the problems HMM also provides optional helpers for SVM (Share Virtual Memory), i.e.,
related to the use of a device specific allocator. The second section i expose allowing a device to transparently access program address coherently with the
the hardware limitations that are inherent to many platforms. The third section CPU meaning that any valid pointer on the CPU is also a valid pointer for the
gives an overview of HMM designs. The fourth section explains how CPU page- device. This is becoming mandatory to simplify the use of advanced hetero-
table mirroring works and what is HMM purpose in this context. Fifth section geneous computing where GPU, DSP, or FPGA are used to perform various
deals with how device memory is represented inside the kernel. Finaly the last computations on behalf of a process.
section present the new migration helper that allow to leverage the device DMA
engine. This document is divided as follows: in the first section I expose the problems
related to using device specific memory allocators. In the second section, I
expose the hardware limitations that are inherent to many platforms. The third
section gives an overview of the HMM design. The fourth section explains how
CPU page-table mirroring works and the purpose of HMM in this context. The
fifth section deals with how device memory is represented inside the kernel.
Finally, the last section presents a new migration helper that allows lever-
aging the device DMA engine.
1) Problems of using device specific memory allocator: 1) Problems of using a device specific memory allocator:
2) System bus, device memory characteristics 2) I/O bus, device memory characteristics
3) Share address space and migration 3) Shared address space and migration
4) Address space mirroring implementation and API 4) Address space mirroring implementation and API
5) Represent and manage device memory from core kernel point of view 5) Represent and manage device memory from core kernel point of view
6) Migrate to and from device memory 6) Migration to and from device memory
7) Memory cgroup (memcg) and rss accounting 7) Memory cgroup (memcg) and rss accounting
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
1) Problems of using device specific memory allocator: 1) Problems of using a device specific memory allocator:
Device with large amount of on board memory (several giga bytes) like GPU have Devices with a large amount of on board memory (several gigabytes) like GPUs
historically manage their memory through dedicated driver specific API. This have historically managed their memory through dedicated driver specific APIs.
creates a disconnect between memory allocated and managed by device driver and This creates a disconnect between memory allocated and managed by a device
regular application memory (private anonymous, share memory or regular file driver and regular application memory (private anonymous, shared memory, or
back memory). From here on i will refer to this aspect as split address space. regular file backed memory). From here on I will refer to this aspect as split
I use share address space to refer to the opposite situation ie one in which address space. I use shared address space to refer to the opposite situation:
any memory region can be use by device transparently. i.e., one in which any application memory region can be used by a device
transparently.
Split address space because device can only access memory allocated through the Split address space happens because device can only access memory allocated
device specific API. This imply that all memory object in a program are not through device specific API. This implies that all memory objects in a program
equal from device point of view which complicate large program that rely on a are not equal from the device point of view which complicates large programs
wide set of libraries. that rely on a wide set of libraries.
Concretly this means that code that wants to leverage device like GPU need to Concretely this means that code that wants to leverage devices like GPUs needs
copy object between genericly allocated memory (malloc, mmap private/share/) to copy object between generically allocated memory (malloc, mmap private, mmap
and memory allocated through the device driver API (this still end up with an share) and memory allocated through the device driver API (this still ends up
mmap but of the device file). with an mmap but of the device file).
For flat dataset (array, grid, image, ...) this isn't too hard to achieve but For flat data sets (array, grid, image, ...) this isn't too hard to achieve but
complex data-set (list, tree, ...) are hard to get right. Duplicating a complex complex data sets (list, tree, ...) are hard to get right. Duplicating a
data-set need to re-map all the pointer relations between each of its elements. complex data set needs to re-map all the pointer relations between each of its
This is error prone and program gets harder to debug because of the duplicate elements. This is error prone and program gets harder to debug because of the
data-set. duplicate data set and addresses.
Split address space also means that library can not transparently use data they Split address space also means that libraries cannot transparently use data
are getting from core program or other library and thus each library might have they are getting from the core program or another library and thus each library
to duplicate its input data-set using specific memory allocator. Large project might have to duplicate its input data set using the device specific memory
suffer from this and waste resources because of the various memory copy. allocator. Large projects suffer from this and waste resources because of the
various memory copies.
Duplicating each library API to accept as input or output memory allocted by Duplicating each library API to accept as input or output memory allocated by
each device specific allocator is not a viable option. It would lead to a each device specific allocator is not a viable option. It would lead to a
combinatorial explosions in the library entry points. combinatorial explosion in the library entry points.
Finaly with the advance of high level language constructs (in C++ but in other Finally, with the advance of high level language constructs (in C++ but in
language too) it is now possible for compiler to leverage GPU or other devices other languages too) it is now possible for the compiler to leverage GPUs and
without even the programmer knowledge. Some of compiler identified patterns are other devices without programmer knowledge. Some compiler identified patterns
only do-able with a share address. It is as well more reasonable to use a share are only do-able with a shared address space. It is also more reasonable to use
address space for all the other patterns. a shared address space for all other patterns.
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
2) System bus, device memory characteristics 2) I/O bus, device memory characteristics
System bus cripple share address due to few limitations. Most system bus only I/O buses cripple shared address spaces due to a few limitations. Most I/O
allow basic memory access from device to main memory, even cache coherency is buses only allow basic memory access from device to main memory; even cache
often optional. Access to device memory from CPU is even more limited, most coherency is often optional. Access to device memory from CPU is even more
often than not it is not cache coherent. limited. More often than not, it is not cache coherent.
If we only consider the PCIE bus than device can access main memory (often If we only consider the PCIE bus, then a device can access main memory (often
through an IOMMU) and be cache coherent with the CPUs. However it only allows through an IOMMU) and be cache coherent with the CPUs. However, it only allows
a limited set of atomic operation from device on main memory. This is worse a limited set of atomic operations from device on main memory. This is worse
in the other direction the CPUs can only access a limited range of the device in the other direction: the CPU can only access a limited range of the device
memory and can not perform atomic operations on it. Thus device memory can not memory and cannot perform atomic operations on it. Thus device memory cannot
be consider like regular memory from kernel point of view. be considered the same as regular memory from the kernel point of view.
Another crippling factor is the limited bandwidth (~32GBytes/s with PCIE 4.0 Another crippling factor is the limited bandwidth (~32GBytes/s with PCIE 4.0
and 16 lanes). This is 33 times less that fastest GPU memory (1 TBytes/s). and 16 lanes). This is 33 times less than the fastest GPU memory (1 TBytes/s).
The final limitation is latency, access to main memory from the device has an The final limitation is latency. Access to main memory from the device has an
order of magnitude higher latency than when the device access its own memory. order of magnitude higher latency than when the device accesses its own memory.
Some platform are developing new system bus or additions/modifications to PCIE Some platforms are developing new I/O buses or additions/modifications to PCIE
to address some of those limitations (OpenCAPI, CCIX). They mainly allow two to address some of these limitations (OpenCAPI, CCIX). They mainly allow two-
way cache coherency between CPU and device and allow all atomic operations the way cache coherency between CPU and device and allow all atomic operations the
architecture supports. Saddly not all platform are following this trends and architecture supports. Sadly, not all platforms are following this trend and
some major architecture are left without hardware solutions to those problems. some major architectures are left without hardware solutions to these problems.
So for share address space to make sense not only we must allow device to So for shared address space to make sense, not only must we allow devices to
access any memory memory but we must also permit any memory to be migrated to access any memory but we must also permit any memory to be migrated to device
device memory while device is using it (blocking CPU access while it happens). memory while device is using it (blocking CPU access while it happens).
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
3) Share address space and migration 3) Shared address space and migration
HMM intends to provide two main features. First one is to share the address HMM intends to provide two main features. First one is to share the address
space by duplication the CPU page table into the device page table so same space by duplicating the CPU page table in the device page table so the same
address point to same memory and this for any valid main memory address in address points to the same physical memory for any valid main memory address in
the process address space. the process address space.
To achieve this, HMM offer a set of helpers to populate the device page table To achieve this, HMM offers a set of helpers to populate the device page table
while keeping track of CPU page table updates. Device page table updates are while keeping track of CPU page table updates. Device page table updates are
not as easy as CPU page table updates. To update the device page table you must not as easy as CPU page table updates. To update the device page table, you must
allow a buffer (or use a pool of pre-allocated buffer) and write GPU specifics allocate a buffer (or use a pool of pre-allocated buffers) and write GPU
commands in it to perform the update (unmap, cache invalidations and flush, specific commands in it to perform the update (unmap, cache invalidations, and
...). This can not be done through common code for all device. Hence why HMM flush, ...). This cannot be done through common code for all devices. Hence
provides helpers to factor out everything that can be while leaving the gory why HMM provides helpers to factor out everything that can be while leaving the
details to the device driver. hardware specific details to the device driver.
The second mechanism HMM provide is a new kind of ZONE_DEVICE memory that does The second mechanism HMM provides is a new kind of ZONE_DEVICE memory that
allow to allocate a struct page for each page of the device memory. Those page allows allocating a struct page for each page of the device memory. Those pages
are special because the CPU can not map them. They however allow to migrate are special because the CPU cannot map them. However, they allow migrating
main memory to device memory using exhisting migration mechanism and everything main memory to device memory using existing migration mechanisms and everything
looks like if page was swap out to disk from CPU point of view. Using a struct looks like a page is swapped out to disk from the CPU point of view. Using a
page gives the easiest and cleanest integration with existing mm mechanisms. struct page gives the easiest and cleanest integration with existing mm mech-
Again here HMM only provide helpers, first to hotplug new ZONE_DEVICE memory anisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE
for the device memory and second to perform migration. Policy decision of what memory for the device memory and second to perform migration. Policy decisions
and when to migrate things is left to the device driver. of what and when to migrate things is left to the device driver.
Note that any CPU access to a device page trigger a page fault and a migration Note that any CPU access to a device page triggers a page fault and a migration
back to main memory ie when a page backing an given address A is migrated from back to main memory. For example, when a page backing a given CPU address A is
a main memory page to a device page then any CPU access to address A trigger a migrated from a main memory page to a device page, then any CPU access to
page fault and initiate a migration back to main memory. address A triggers a page fault and initiates a migration back to main memory.
With these two features, HMM not only allows a device to mirror process address
With this two features, HMM not only allow a device to mirror a process address space and keeping both CPU and device page table synchronized, but also lever-
space and keeps both CPU and device page table synchronize, but also allow to ages device memory by migrating the part of the data set that is actively being
leverage device memory by migrating part of data-set that is actively use by a used by the device.
device.
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
4) Address space mirroring implementation and API 4) Address space mirroring implementation and API
Address space mirroring main objective is to allow to duplicate range of CPU Address space mirroring's main objective is to allow duplication of a range of
page table into a device page table and HMM helps keeping both synchronize. A CPU page table into a device page table; HMM helps keep both synchronized. A
device driver that want to mirror a process address space must start with the device driver that wants to mirror a process address space must start with the
registration of an hmm_mirror struct: registration of an hmm_mirror struct:
int hmm_mirror_register(struct hmm_mirror *mirror, int hmm_mirror_register(struct hmm_mirror *mirror,
@ -154,9 +162,9 @@ registration of an hmm_mirror struct:
int hmm_mirror_register_locked(struct hmm_mirror *mirror, int hmm_mirror_register_locked(struct hmm_mirror *mirror,
struct mm_struct *mm); struct mm_struct *mm);
The locked variant is to be use when the driver is already holding the mmap_sem The locked variant is to be used when the driver is already holding mmap_sem
of the mm in write mode. The mirror struct has a set of callback that are use of the mm in write mode. The mirror struct has a set of callbacks that are used
to propagate CPU page table: to propagate CPU page tables:
struct hmm_mirror_ops { struct hmm_mirror_ops {
/* sync_cpu_device_pagetables() - synchronize page tables /* sync_cpu_device_pagetables() - synchronize page tables
@ -181,13 +189,13 @@ to propagate CPU page table:
unsigned long end); unsigned long end);
}; };
Device driver must perform update to the range following action (turn range The device driver must perform the update action to the range (mark range
read only, or fully unmap, ...). Once driver callback returns the device must read only, or fully unmap, ...). The device must be done with the update before
be done with the update. the driver callback returns.
When device driver wants to populate a range of virtual address it can use When the device driver wants to populate a range of virtual addresses, it can
either: use either:
int hmm_vma_get_pfns(struct vm_area_struct *vma, int hmm_vma_get_pfns(struct vm_area_struct *vma,
struct hmm_range *range, struct hmm_range *range,
unsigned long start, unsigned long start,
@ -201,17 +209,19 @@ either:
bool write, bool write,
bool block); bool block);
First one (hmm_vma_get_pfns()) will only fetch present CPU page table entry and The first one (hmm_vma_get_pfns()) will only fetch present CPU page table
will not trigger a page fault on missing or non present entry. The second one entries and will not trigger a page fault on missing or non-present entries.
do trigger page fault on missing or read only entry if write parameter is true. The second one does trigger a page fault on missing or read-only entry if the
Page fault use the generic mm page fault code path just like a CPU page fault. write parameter is true. Page faults use the generic mm page fault code path
just like a CPU page fault.
Both function copy CPU page table into their pfns array argument. Each entry in Both functions copy CPU page table entries into their pfns array argument. Each
that array correspond to an address in the virtual range. HMM provide a set of entry in that array corresponds to an address in the virtual range. HMM
flags to help driver identify special CPU page table entries. provides a set of flags to help the driver identify special CPU page table
entries.
Locking with the update() callback is the most important aspect the driver must Locking with the update() callback is the most important aspect the driver must
respect in order to keep things properly synchronize. The usage pattern is : respect in order to keep things properly synchronized. The usage pattern is:
int driver_populate_range(...) int driver_populate_range(...)
{ {
@ -233,43 +243,44 @@ respect in order to keep things properly synchronize. The usage pattern is :
return 0; return 0;
} }
The driver->update lock is the same lock that driver takes inside its update() The driver->update lock is the same lock that the driver takes inside its
callback. That lock must be call before hmm_vma_range_done() to avoid any race update() callback. That lock must be held before hmm_vma_range_done() to avoid
with a concurrent CPU page table update. any race with a concurrent CPU page table update.
HMM implements all this on top of the mmu_notifier API because we wanted to a HMM implements all this on top of the mmu_notifier API because we wanted a
simpler API and also to be able to perform optimization latter own like doing simpler API and also to be able to perform optimizations latter on like doing
concurrent device update in multi-devices scenario. concurrent device updates in multi-devices scenario.
HMM also serve as an impedence missmatch between how CPU page table update are HMM also serves as an impedance mismatch between how CPU page table updates
done (by CPU write to the page table and TLB flushes) from how device update are done (by CPU write to the page table and TLB flushes) and how devices
their own page table. Device update is a multi-step process, first appropriate update their own page table. Device updates are a multi-step process. First,
commands are write to a buffer, then this buffer is schedule for execution on appropriate commands are written to a buffer, then this buffer is scheduled for
the device. It is only once the device has executed commands in the buffer that execution on the device. It is only once the device has executed commands in
the update is done. Creating and scheduling update command buffer can happen the buffer that the update is done. Creating and scheduling the update command
concurrently for multiple devices. Waiting for each device to report commands buffer can happen concurrently for multiple devices. Waiting for each device to
as executed is serialize (there is no point in doing this concurrently). report commands as executed is serialized (there is no point in doing this
concurrently).
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
5) Represent and manage device memory from core kernel point of view 5) Represent and manage device memory from core kernel point of view
Several differents design were try to support device memory. First one use Several different designs were tried to support device memory. First one used
device specific data structure to keep information about migrated memory and a device specific data structure to keep information about migrated memory and
HMM hooked itself in various place of mm code to handle any access to address HMM hooked itself in various places of mm code to handle any access to
that were back by device memory. It turns out that this ended up replicating addresses that were backed by device memory. It turns out that this ended up
most of the fields of struct page and also needed many kernel code path to be replicating most of the fields of struct page and also needed many kernel code
updated to understand this new kind of memory. paths to be updated to understand this new kind of memory.
Thing is most kernel code path never try to access the memory behind a page Most kernel code paths never try to access the memory behind a page
but only care about struct page contents. Because of this HMM switchted to but only care about struct page contents. Because of this, HMM switched to
directly using struct page for device memory which left most kernel code path directly using struct page for device memory which left most kernel code paths
un-aware of the difference. We only need to make sure that no one ever try to unaware of the difference. We only need to make sure that no one ever tries to
map those page from the CPU side. map those pages from the CPU side.
HMM provide a set of helpers to register and hotplug device memory as a new HMM provides a set of helpers to register and hotplug device memory as a new
region needing struct page. This is offer through a very simple API: region needing a struct page. This is offered through a very simple API:
struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
struct device *device, struct device *device,
@ -289,18 +300,19 @@ The hmm_devmem_ops is where most of the important things are:
}; };
The first callback (free()) happens when the last reference on a device page is The first callback (free()) happens when the last reference on a device page is
drop. This means the device page is now free and no longer use by anyone. The dropped. This means the device page is now free and no longer used by anyone.
second callback happens whenever CPU try to access a device page which it can The second callback happens whenever the CPU tries to access a device page
not do. This second callback must trigger a migration back to system memory. which it cannot do. This second callback must trigger a migration back to
system memory.
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
6) Migrate to and from device memory 6) Migration to and from device memory
Because CPU can not access device memory, migration must use device DMA engine Because the CPU cannot access device memory, migration must use the device DMA
to perform copy from and to device memory. For this we need a new migration engine to perform copy from and to device memory. For this we need a new
helper: migration helper:
int migrate_vma(const struct migrate_vma_ops *ops, int migrate_vma(const struct migrate_vma_ops *ops,
struct vm_area_struct *vma, struct vm_area_struct *vma,
@ -311,15 +323,15 @@ helper:
unsigned long *dst, unsigned long *dst,
void *private); void *private);
Unlike other migration function it works on a range of virtual address, there Unlike other migration functions it works on a range of virtual address, there
is two reasons for that. First device DMA copy has a high setup overhead cost are two reasons for that. First, device DMA copy has a high setup overhead cost
and thus batching multiple pages is needed as otherwise the migration overhead and thus batching multiple pages is needed as otherwise the migration overhead
make the whole excersie pointless. The second reason is because driver trigger makes the whole exercise pointless. The second reason is because the
such migration base on range of address the device is actively accessing. migration might be for a range of addresses the device is actively accessing.
The migrate_vma_ops struct define two callbacks. First one (alloc_and_copy()) The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
control destination memory allocation and copy operation. Second one is there controls destination memory allocation and copy operation. Second one is there
to allow device driver to perform cleanup operation after migration. to allow the device driver to perform cleanup operations after migration.
struct migrate_vma_ops { struct migrate_vma_ops {
void (*alloc_and_copy)(struct vm_area_struct *vma, void (*alloc_and_copy)(struct vm_area_struct *vma,
@ -336,19 +348,19 @@ to allow device driver to perform cleanup operation after migration.
void *private); void *private);
}; };
It is important to stress that this migration helpers allow for hole in the It is important to stress that these migration helpers allow for holes in the
virtual address range. Some pages in the range might not be migrated for all virtual address range. Some pages in the range might not be migrated for all
the usual reasons (page is pin, page is lock, ...). This helper does not fail the usual reasons (page is pinned, page is locked, ...). This helper does not
but just skip over those pages. fail but just skips over those pages.
The alloc_and_copy() might as well decide to not migrate all pages in the The alloc_and_copy() might decide to not migrate all pages in the
range (for reasons under the callback control). For those the callback just range (for reasons under the callback control). For those, the callback just
have to leave the corresponding dst entry empty. has to leave the corresponding dst entry empty.
Finaly the migration of the struct page might fails (for file back page) for Finally, the migration of the struct page might fail (for file backed page) for
various reasons (failure to freeze reference, or update page cache, ...). If various reasons (failure to freeze reference, or update page cache, ...). If
that happens then the finalize_and_map() can catch any pages that was not that happens, then the finalize_and_map() can catch any pages that were not
migrated. Note those page were still copied to new page and thus we wasted migrated. Note those pages were still copied to a new page and thus we wasted
bandwidth but this is considered as a rare event and a price that we are bandwidth but this is considered as a rare event and a price that we are
willing to pay to keep all the code simpler. willing to pay to keep all the code simpler.
@ -358,27 +370,27 @@ willing to pay to keep all the code simpler.
7) Memory cgroup (memcg) and rss accounting 7) Memory cgroup (memcg) and rss accounting
For now device memory is accounted as any regular page in rss counters (either For now device memory is accounted as any regular page in rss counters (either
anonymous if device page is use for anonymous, file if device page is use for anonymous if device page is used for anonymous, file if device page is used for
file back page or shmem if device page is use for share memory). This is a file backed page or shmem if device page is used for shared memory). This is a
deliberate choice to keep existing application that might start using device deliberate choice to keep existing applications, that might start using device
memory without knowing about it to keep runing unimpacted. memory without knowing about it, running unimpacted.
Drawbacks is that OOM killer might kill an application using a lot of device A drawback is that the OOM killer might kill an application using a lot of
memory and not a lot of regular system memory and thus not freeing much system device memory and not a lot of regular system memory and thus not freeing much
memory. We want to gather more real world experience on how application and system memory. We want to gather more real world experience on how applications
system react under memory pressure in the presence of device memory before and system react under memory pressure in the presence of device memory before
deciding to account device memory differently. deciding to account device memory differently.
Same decision was made for memory cgroup. Device memory page are accounted Same decision was made for memory cgroup. Device memory pages are accounted
against same memory cgroup a regular page would be accounted to. This does against same memory cgroup a regular page would be accounted to. This does
simplify migration to and from device memory. This also means that migration simplify migration to and from device memory. This also means that migration
back from device memory to regular memory can not fail because it would back from device memory to regular memory cannot fail because it would
go above memory cgroup limit. We might revisit this choice latter on once we go above memory cgroup limit. We might revisit this choice latter on once we
get more experience in how device memory is use and its impact on memory get more experience in how device memory is used and its impact on memory
resource control. resource control.
Note that device memory can never be pin nor by device driver nor through GUP Note that device memory can never be pinned by device driver nor through GUP
and thus such memory is always free upon process exit. Or when last reference and thus such memory is always free upon process exit. Or when last reference
is drop in case of share memory or file back memory. is dropped in case of shared memory or file backed memory.

View File

@ -90,7 +90,7 @@ Steps:
1. Lock the page to be migrated 1. Lock the page to be migrated
2. Insure that writeback is complete. 2. Ensure that writeback is complete.
3. Lock the new page that we want to move to. It is locked so that accesses to 3. Lock the new page that we want to move to. It is locked so that accesses to
this (not yet uptodate) page immediately lock while the move is in progress. this (not yet uptodate) page immediately lock while the move is in progress.
@ -100,8 +100,8 @@ Steps:
mapcount is not zero then we do not migrate the page. All user space mapcount is not zero then we do not migrate the page. All user space
processes that attempt to access the page will now wait on the page lock. processes that attempt to access the page will now wait on the page lock.
5. The radix tree lock is taken. This will cause all processes trying 5. The i_pages lock is taken. This will cause all processes trying
to access the page via the mapping to block on the radix tree spinlock. to access the page via the mapping to block on the spinlock.
6. The refcount of the page is examined and we back out if references remain 6. The refcount of the page is examined and we back out if references remain
otherwise we know that we are the only one referencing this page. otherwise we know that we are the only one referencing this page.
@ -114,12 +114,12 @@ Steps:
9. The radix tree is changed to point to the new page. 9. The radix tree is changed to point to the new page.
10. The reference count of the old page is dropped because the radix tree 10. The reference count of the old page is dropped because the address space
reference is gone. A reference to the new page is established because reference is gone. A reference to the new page is established because
the new page is referenced to by the radix tree. the new page is referenced by the address space.
11. The radix tree lock is dropped. With that lookups in the mapping 11. The i_pages lock is dropped. With that lookups in the mapping
become possible again. Processes will move from spinning on the tree_lock become possible again. Processes will move from spinning on the lock
to sleeping on the locked new page. to sleeping on the locked new page.
12. The page contents are copied to the new page. 12. The page contents are copied to the new page.

View File

@ -4392,7 +4392,7 @@ S: Maintained
F: drivers/staging/fsl-dpaa2/ethsw F: drivers/staging/fsl-dpaa2/ethsw
DPT_I2O SCSI RAID DRIVER DPT_I2O SCSI RAID DRIVER
M: Adaptec OEM Raid Solutions <aacraid@adaptec.com> M: Adaptec OEM Raid Solutions <aacraid@microsemi.com>
L: linux-scsi@vger.kernel.org L: linux-scsi@vger.kernel.org
W: http://www.adaptec.com/ W: http://www.adaptec.com/
S: Maintained S: Maintained
@ -6410,6 +6410,7 @@ L: linux-mm@kvack.org
S: Maintained S: Maintained
F: mm/hmm* F: mm/hmm*
F: include/linux/hmm* F: include/linux/hmm*
F: Documentation/vm/hmm.txt
HOST AP DRIVER HOST AP DRIVER
M: Jouni Malinen <j@w1.fi> M: Jouni Malinen <j@w1.fi>
@ -7344,7 +7345,7 @@ F: include/linux/ipmi*
F: include/uapi/linux/ipmi* F: include/uapi/linux/ipmi*
IPS SCSI RAID DRIVER IPS SCSI RAID DRIVER
M: Adaptec OEM Raid Solutions <aacraid@adaptec.com> M: Adaptec OEM Raid Solutions <aacraid@microsemi.com>
L: linux-scsi@vger.kernel.org L: linux-scsi@vger.kernel.org
W: http://www.adaptec.com/ W: http://www.adaptec.com/
S: Maintained S: Maintained
@ -11762,7 +11763,7 @@ F: drivers/char/random.c
RAPIDIO SUBSYSTEM RAPIDIO SUBSYSTEM
M: Matt Porter <mporter@kernel.crashing.org> M: Matt Porter <mporter@kernel.crashing.org>
M: Alexandre Bounine <alexandre.bounine@idt.com> M: Alexandre Bounine <alex.bou9@gmail.com>
S: Maintained S: Maintained
F: drivers/rapidio/ F: drivers/rapidio/

View File

@ -32,6 +32,7 @@
#define MAP_NONBLOCK 0x40000 /* do not block on IO */ #define MAP_NONBLOCK 0x40000 /* do not block on IO */
#define MAP_STACK 0x80000 /* give out an address that is best suited for process/thread stacks */ #define MAP_STACK 0x80000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x100000 /* create a huge page mapping */ #define MAP_HUGETLB 0x100000 /* create a huge page mapping */
#define MAP_FIXED_NOREPLACE 0x200000/* MAP_FIXED which doesn't unmap underlying mapping */
#define MS_ASYNC 1 /* sync memory asynchronously */ #define MS_ASYNC 1 /* sync memory asynchronously */
#define MS_SYNC 2 /* synchronous memory sync */ #define MS_SYNC 2 /* synchronous memory sync */

View File

@ -318,10 +318,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
extern void flush_kernel_dcache_page(struct page *); extern void flush_kernel_dcache_page(struct page *);
#define flush_dcache_mmap_lock(mapping) \ #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
spin_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)
#define flush_icache_user_range(vma,page,addr,len) \ #define flush_icache_user_range(vma,page,addr,len) \
flush_dcache_page(page) flush_dcache_page(page)

View File

@ -22,12 +22,6 @@
#include <mach/memory.h> #include <mach/memory.h>
#endif #endif
/*
* Allow for constants defined here to be used from assembly code
* by prepending the UL suffix only with actual C code compilation.
*/
#define UL(x) _AC(x, UL)
/* PAGE_OFFSET - the virtual address of the start of the kernel image */ /* PAGE_OFFSET - the virtual address of the start of the kernel image */
#define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET) #define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET)

View File

@ -466,6 +466,12 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
void __init dma_contiguous_remap(void) void __init dma_contiguous_remap(void)
{ {
int i; int i;
if (!dma_mmu_remap_num)
return;
/* call flush_cache_all() since CMA area would be large enough */
flush_cache_all();
for (i = 0; i < dma_mmu_remap_num; i++) { for (i = 0; i < dma_mmu_remap_num; i++) {
phys_addr_t start = dma_mmu_remap[i].base; phys_addr_t start = dma_mmu_remap[i].base;
phys_addr_t end = start + dma_mmu_remap[i].size; phys_addr_t end = start + dma_mmu_remap[i].size;
@ -498,7 +504,15 @@ void __init dma_contiguous_remap(void)
flush_tlb_kernel_range(__phys_to_virt(start), flush_tlb_kernel_range(__phys_to_virt(start),
__phys_to_virt(end)); __phys_to_virt(end));
iotable_init(&map, 1); /*
* All the memory in CMA region will be on ZONE_MOVABLE.
* If that zone is considered as highmem, the memory in CMA
* region is also considered as highmem even if it's
* physical address belong to lowmem. In this case,
* re-mapping isn't required.
*/
if (!is_highmem_idx(ZONE_MOVABLE))
iotable_init(&map, 1);
} }
} }

View File

@ -21,20 +21,20 @@
#define MIN_GAP (128*1024*1024UL) #define MIN_GAP (128*1024*1024UL)
#define MAX_GAP ((TASK_SIZE)/6*5) #define MAX_GAP ((TASK_SIZE)/6*5)
static int mmap_is_legacy(void) static int mmap_is_legacy(struct rlimit *rlim_stack)
{ {
if (current->personality & ADDR_COMPAT_LAYOUT) if (current->personality & ADDR_COMPAT_LAYOUT)
return 1; return 1;
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1; return 1;
return sysctl_legacy_va_layout; return sysctl_legacy_va_layout;
} }
static unsigned long mmap_base(unsigned long rnd) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{ {
unsigned long gap = rlimit(RLIMIT_STACK); unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP) if (gap < MIN_GAP)
gap = MIN_GAP; gap = MIN_GAP;
@ -180,18 +180,18 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT; return rnd << PAGE_SHIFT;
} }
void arch_pick_mmap_layout(struct mm_struct *mm) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{ {
unsigned long random_factor = 0UL; unsigned long random_factor = 0UL;
if (current->flags & PF_RANDOMIZE) if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd(); random_factor = arch_mmap_rnd();
if (mmap_is_legacy()) { if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area; mm->get_unmapped_area = arch_get_unmapped_area;
} else { } else {
mm->mmap_base = mmap_base(random_factor); mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->get_unmapped_area = arch_get_unmapped_area_topdown;
} }
} }

View File

@ -140,10 +140,8 @@ static inline void __flush_icache_all(void)
dsb(ish); dsb(ish);
} }
#define flush_dcache_mmap_lock(mapping) \ #define flush_dcache_mmap_lock(mapping) do { } while (0)
spin_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)
/* /*
* We don't appear to need to do anything here. In fact, if we did, we'd * We don't appear to need to do anything here. In fact, if we did, we'd

View File

@ -28,12 +28,6 @@
#include <asm/page-def.h> #include <asm/page-def.h>
#include <asm/sizes.h> #include <asm/sizes.h>
/*
* Allow for constants defined here to be used from assembly code
* by prepending the UL suffix only with actual C code compilation.
*/
#define UL(x) _AC(x, UL)
/* /*
* Size of the PCI I/O space. This must remain a power of two so that * Size of the PCI I/O space. This must remain a power of two so that
* IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses. * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses.

View File

@ -38,12 +38,12 @@
#define MIN_GAP (SZ_128M) #define MIN_GAP (SZ_128M)
#define MAX_GAP (STACK_TOP/6*5) #define MAX_GAP (STACK_TOP/6*5)
static int mmap_is_legacy(void) static int mmap_is_legacy(struct rlimit *rlim_stack)
{ {
if (current->personality & ADDR_COMPAT_LAYOUT) if (current->personality & ADDR_COMPAT_LAYOUT)
return 1; return 1;
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1; return 1;
return sysctl_legacy_va_layout; return sysctl_legacy_va_layout;
@ -62,9 +62,9 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT; return rnd << PAGE_SHIFT;
} }
static unsigned long mmap_base(unsigned long rnd) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{ {
unsigned long gap = rlimit(RLIMIT_STACK); unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap; unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
/* Values close to RLIM_INFINITY can overflow. */ /* Values close to RLIM_INFINITY can overflow. */
@ -83,7 +83,7 @@ static unsigned long mmap_base(unsigned long rnd)
* This function, called very early during the creation of a new process VM * This function, called very early during the creation of a new process VM
* image, sets up which VM layout function to use: * image, sets up which VM layout function to use:
*/ */
void arch_pick_mmap_layout(struct mm_struct *mm) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{ {
unsigned long random_factor = 0UL; unsigned long random_factor = 0UL;
@ -94,11 +94,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality bit is set, or * Fall back to the standard layout if the personality bit is set, or
* if the expected stack growth is unlimited: * if the expected stack growth is unlimited:
*/ */
if (mmap_is_legacy()) { if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area; mm->get_unmapped_area = arch_get_unmapped_area;
} else { } else {
mm->mmap_base = mmap_base(random_factor); mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->get_unmapped_area = arch_get_unmapped_area_topdown;
} }
} }

View File

@ -50,6 +50,7 @@
#define MAP_NONBLOCK 0x20000 /* do not block on IO */ #define MAP_NONBLOCK 0x20000 /* do not block on IO */
#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ #define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x80000 /* create a huge page mapping */ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */
#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
/* /*
* Flags for msync * Flags for msync

View File

@ -24,20 +24,20 @@ EXPORT_SYMBOL(shm_align_mask);
#define MIN_GAP (128*1024*1024UL) #define MIN_GAP (128*1024*1024UL)
#define MAX_GAP ((TASK_SIZE)/6*5) #define MAX_GAP ((TASK_SIZE)/6*5)
static int mmap_is_legacy(void) static int mmap_is_legacy(struct rlimit *rlim_stack)
{ {
if (current->personality & ADDR_COMPAT_LAYOUT) if (current->personality & ADDR_COMPAT_LAYOUT)
return 1; return 1;
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1; return 1;
return sysctl_legacy_va_layout; return sysctl_legacy_va_layout;
} }
static unsigned long mmap_base(unsigned long rnd) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{ {
unsigned long gap = rlimit(RLIMIT_STACK); unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP) if (gap < MIN_GAP)
gap = MIN_GAP; gap = MIN_GAP;
@ -158,18 +158,18 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT; return rnd << PAGE_SHIFT;
} }
void arch_pick_mmap_layout(struct mm_struct *mm) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{ {
unsigned long random_factor = 0UL; unsigned long random_factor = 0UL;
if (current->flags & PF_RANDOMIZE) if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd(); random_factor = arch_mmap_rnd();
if (mmap_is_legacy()) { if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area; mm->get_unmapped_area = arch_get_unmapped_area;
} else { } else {
mm->mmap_base = mmap_base(random_factor); mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->get_unmapped_area = arch_get_unmapped_area_topdown;
} }
} }

View File

@ -34,8 +34,8 @@ void flush_anon_page(struct vm_area_struct *vma,
void flush_kernel_dcache_page(struct page *page); void flush_kernel_dcache_page(struct page *page);
void flush_icache_range(unsigned long start, unsigned long end); void flush_icache_range(unsigned long start, unsigned long end);
void flush_icache_page(struct vm_area_struct *vma, struct page *page); void flush_icache_page(struct vm_area_struct *vma, struct page *page);
#define flush_dcache_mmap_lock(mapping) spin_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages)
#define flush_dcache_mmap_unlock(mapping) spin_unlock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages)
#else #else
#include <asm-generic/cacheflush.h> #include <asm-generic/cacheflush.h>

View File

@ -46,9 +46,7 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
extern void flush_dcache_range(unsigned long start, unsigned long end); extern void flush_dcache_range(unsigned long start, unsigned long end);
extern void invalidate_dcache_range(unsigned long start, unsigned long end); extern void invalidate_dcache_range(unsigned long start, unsigned long end);
#define flush_dcache_mmap_lock(mapping) \ #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
spin_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)
#endif /* _ASM_NIOS2_CACHEFLUSH_H */ #endif /* _ASM_NIOS2_CACHEFLUSH_H */

View File

@ -55,10 +55,8 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *page); extern void flush_dcache_page(struct page *page);
#define flush_dcache_mmap_lock(mapping) \ #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
spin_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)
#define flush_icache_page(vma,page) do { \ #define flush_icache_page(vma,page) do { \
flush_kernel_dcache_page(page); \ flush_kernel_dcache_page(page); \

View File

@ -26,6 +26,7 @@
#define MAP_NONBLOCK 0x20000 /* do not block on IO */ #define MAP_NONBLOCK 0x20000 /* do not block on IO */
#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ #define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x80000 /* create a huge page mapping */ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */
#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
#define MS_SYNC 1 /* synchronous memory sync */ #define MS_SYNC 1 /* synchronous memory sync */
#define MS_ASYNC 2 /* sync memory asynchronously */ #define MS_ASYNC 2 /* sync memory asynchronously */

View File

@ -70,12 +70,18 @@ static inline unsigned long COLOR_ALIGN(unsigned long addr,
* Top of mmap area (just below the process stack). * Top of mmap area (just below the process stack).
*/ */
static unsigned long mmap_upper_limit(void) /*
* When called from arch_get_unmapped_area(), rlim_stack will be NULL,
* indicating that "current" should be used instead of a passed-in
* value from the exec bprm as done with arch_pick_mmap_layout().
*/
static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
{ {
unsigned long stack_base; unsigned long stack_base;
/* Limit stack size - see setup_arg_pages() in fs/exec.c */ /* Limit stack size - see setup_arg_pages() in fs/exec.c */
stack_base = rlimit_max(RLIMIT_STACK); stack_base = rlim_stack ? rlim_stack->rlim_max
: rlimit_max(RLIMIT_STACK);
if (stack_base > STACK_SIZE_MAX) if (stack_base > STACK_SIZE_MAX)
stack_base = STACK_SIZE_MAX; stack_base = STACK_SIZE_MAX;
@ -127,7 +133,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.flags = 0; info.flags = 0;
info.length = len; info.length = len;
info.low_limit = mm->mmap_legacy_base; info.low_limit = mm->mmap_legacy_base;
info.high_limit = mmap_upper_limit(); info.high_limit = mmap_upper_limit(NULL);
info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0; info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
info.align_offset = shared_align_offset(last_mmap, pgoff); info.align_offset = shared_align_offset(last_mmap, pgoff);
addr = vm_unmapped_area(&info); addr = vm_unmapped_area(&info);
@ -250,10 +256,10 @@ static unsigned long mmap_legacy_base(void)
* This function, called very early during the creation of a new * This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use: * process VM image, sets up which VM layout function to use:
*/ */
void arch_pick_mmap_layout(struct mm_struct *mm) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{ {
mm->mmap_legacy_base = mmap_legacy_base(); mm->mmap_legacy_base = mmap_legacy_base();
mm->mmap_base = mmap_upper_limit(); mm->mmap_base = mmap_upper_limit(rlim_stack);
if (mmap_is_legacy()) { if (mmap_is_legacy()) {
mm->mmap_base = mm->mmap_legacy_base; mm->mmap_base = mm->mmap_legacy_base;

View File

@ -39,12 +39,12 @@
#define MIN_GAP (128*1024*1024) #define MIN_GAP (128*1024*1024)
#define MAX_GAP (TASK_SIZE/6*5) #define MAX_GAP (TASK_SIZE/6*5)
static inline int mmap_is_legacy(void) static inline int mmap_is_legacy(struct rlimit *rlim_stack)
{ {
if (current->personality & ADDR_COMPAT_LAYOUT) if (current->personality & ADDR_COMPAT_LAYOUT)
return 1; return 1;
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1; return 1;
return sysctl_legacy_va_layout; return sysctl_legacy_va_layout;
@ -76,9 +76,10 @@ static inline unsigned long stack_maxrandom_size(void)
return (1<<30); return (1<<30);
} }
static inline unsigned long mmap_base(unsigned long rnd) static inline unsigned long mmap_base(unsigned long rnd,
struct rlimit *rlim_stack)
{ {
unsigned long gap = rlimit(RLIMIT_STACK); unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_maxrandom_size() + stack_guard_gap; unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
/* Values close to RLIM_INFINITY can overflow. */ /* Values close to RLIM_INFINITY can overflow. */
@ -196,26 +197,28 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
} }
static void radix__arch_pick_mmap_layout(struct mm_struct *mm, static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
unsigned long random_factor) unsigned long random_factor,
struct rlimit *rlim_stack)
{ {
if (mmap_is_legacy()) { if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE; mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = radix__arch_get_unmapped_area; mm->get_unmapped_area = radix__arch_get_unmapped_area;
} else { } else {
mm->mmap_base = mmap_base(random_factor); mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown; mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown;
} }
} }
#else #else
/* dummy */ /* dummy */
extern void radix__arch_pick_mmap_layout(struct mm_struct *mm, extern void radix__arch_pick_mmap_layout(struct mm_struct *mm,
unsigned long random_factor); unsigned long random_factor,
struct rlimit *rlim_stack);
#endif #endif
/* /*
* This function, called very early during the creation of a new * This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use: * process VM image, sets up which VM layout function to use:
*/ */
void arch_pick_mmap_layout(struct mm_struct *mm) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{ {
unsigned long random_factor = 0UL; unsigned long random_factor = 0UL;
@ -223,16 +226,17 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
random_factor = arch_mmap_rnd(); random_factor = arch_mmap_rnd();
if (radix_enabled()) if (radix_enabled())
return radix__arch_pick_mmap_layout(mm, random_factor); return radix__arch_pick_mmap_layout(mm, random_factor,
rlim_stack);
/* /*
* Fall back to the standard layout if the personality * Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited: * bit is set, or if the expected stack growth is unlimited:
*/ */
if (mmap_is_legacy()) { if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE; mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area; mm->get_unmapped_area = arch_get_unmapped_area;
} else { } else {
mm->mmap_base = mmap_base(random_factor); mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->get_unmapped_area = arch_get_unmapped_area_topdown;
} }
} }

View File

@ -75,8 +75,7 @@ EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
/* /*
* Taken from alloc_migrate_target with changes to remove CMA allocations * Taken from alloc_migrate_target with changes to remove CMA allocations
*/ */
struct page *new_iommu_non_cma_page(struct page *page, unsigned long private, struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
int **resultp)
{ {
gfp_t gfp_mask = GFP_USER; gfp_t gfp_mask = GFP_USER;
struct page *new_page; struct page *new_page;

View File

@ -37,11 +37,11 @@ static unsigned long stack_maxrandom_size(void)
#define MIN_GAP (32*1024*1024) #define MIN_GAP (32*1024*1024)
#define MAX_GAP (STACK_TOP/6*5) #define MAX_GAP (STACK_TOP/6*5)
static inline int mmap_is_legacy(void) static inline int mmap_is_legacy(struct rlimit *rlim_stack)
{ {
if (current->personality & ADDR_COMPAT_LAYOUT) if (current->personality & ADDR_COMPAT_LAYOUT)
return 1; return 1;
if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1; return 1;
return sysctl_legacy_va_layout; return sysctl_legacy_va_layout;
} }
@ -56,9 +56,10 @@ static unsigned long mmap_base_legacy(unsigned long rnd)
return TASK_UNMAPPED_BASE + rnd; return TASK_UNMAPPED_BASE + rnd;
} }
static inline unsigned long mmap_base(unsigned long rnd) static inline unsigned long mmap_base(unsigned long rnd,
struct rlimit *rlim_stack)
{ {
unsigned long gap = rlimit(RLIMIT_STACK); unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP) if (gap < MIN_GAP)
gap = MIN_GAP; gap = MIN_GAP;
@ -184,7 +185,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
* This function, called very early during the creation of a new * This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use: * process VM image, sets up which VM layout function to use:
*/ */
void arch_pick_mmap_layout(struct mm_struct *mm) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{ {
unsigned long random_factor = 0UL; unsigned long random_factor = 0UL;
@ -195,11 +196,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality * Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited: * bit is set, or if the expected stack growth is unlimited:
*/ */
if (mmap_is_legacy()) { if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = mmap_base_legacy(random_factor); mm->mmap_base = mmap_base_legacy(random_factor);
mm->get_unmapped_area = arch_get_unmapped_area; mm->get_unmapped_area = arch_get_unmapped_area;
} else { } else {
mm->mmap_base = mmap_base(random_factor); mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->get_unmapped_area = arch_get_unmapped_area_topdown;
} }
} }

View File

@ -276,7 +276,7 @@ static unsigned long mmap_rnd(void)
return rnd << PAGE_SHIFT; return rnd << PAGE_SHIFT;
} }
void arch_pick_mmap_layout(struct mm_struct *mm) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{ {
unsigned long random_factor = mmap_rnd(); unsigned long random_factor = mmap_rnd();
unsigned long gap; unsigned long gap;
@ -285,7 +285,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality * Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited: * bit is set, or if the expected stack growth is unlimited:
*/ */
gap = rlimit(RLIMIT_STACK); gap = rlim_stack->rlim_cur;
if (!test_thread_flag(TIF_32BIT) || if (!test_thread_flag(TIF_32BIT) ||
(current->personality & ADDR_COMPAT_LAYOUT) || (current->personality & ADDR_COMPAT_LAYOUT) ||
gap == RLIM_INFINITY || gap == RLIM_INFINITY ||

View File

@ -170,10 +170,8 @@ extern void flush_cache_page(struct vm_area_struct *vma,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *); extern void flush_dcache_page(struct page *);
#define flush_dcache_mmap_lock(mapping) \ #define flush_dcache_mmap_lock(mapping) do { } while (0)
spin_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) \
spin_unlock_irq(&(mapping)->tree_lock)
#define flush_icache_user_range(vma, page, addr, len) \ #define flush_icache_user_range(vma, page, addr, len) \
flush_dcache_page(page) flush_dcache_page(page)

View File

@ -19,12 +19,6 @@
#include <asm/sizes.h> #include <asm/sizes.h>
#include <mach/memory.h> #include <mach/memory.h>
/*
* Allow for constants defined here to be used from assembly code
* by prepending the UL suffix only with actual C code compilation.
*/
#define UL(x) _AC(x, UL)
/* /*
* PAGE_OFFSET - the virtual address of the start of the kernel image * PAGE_OFFSET - the virtual address of the start of the kernel image
* TASK_SIZE - the maximum size of a user space task. * TASK_SIZE - the maximum size of a user space task.

View File

@ -122,12 +122,14 @@ struct x86_init_pci {
* @guest_late_init: guest late init * @guest_late_init: guest late init
* @x2apic_available: X2APIC detection * @x2apic_available: X2APIC detection
* @init_mem_mapping: setup early mappings during init_mem_mapping() * @init_mem_mapping: setup early mappings during init_mem_mapping()
* @init_after_bootmem: guest init after boot allocator is finished
*/ */
struct x86_hyper_init { struct x86_hyper_init {
void (*init_platform)(void); void (*init_platform)(void);
void (*guest_late_init)(void); void (*guest_late_init)(void);
bool (*x2apic_available)(void); bool (*x2apic_available)(void);
void (*init_mem_mapping)(void); void (*init_mem_mapping)(void);
void (*init_after_bootmem)(void);
}; };
/** /**

View File

@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = {
.guest_late_init = x86_init_noop, .guest_late_init = x86_init_noop,
.x2apic_available = bool_x86_init_noop, .x2apic_available = bool_x86_init_noop,
.init_mem_mapping = x86_init_noop, .init_mem_mapping = x86_init_noop,
.init_after_bootmem = x86_init_noop,
}, },
.acpi = { .acpi = {

View File

@ -778,6 +778,7 @@ void __init mem_init(void)
free_all_bootmem(); free_all_bootmem();
after_bootmem = 1; after_bootmem = 1;
x86_init.hyper.init_after_bootmem();
mem_init_print_info(NULL); mem_init_print_info(NULL);
printk(KERN_INFO "virtual kernel memory layout:\n" printk(KERN_INFO "virtual kernel memory layout:\n"

View File

@ -1185,6 +1185,7 @@ void __init mem_init(void)
/* this will put all memory onto the freelists */ /* this will put all memory onto the freelists */
free_all_bootmem(); free_all_bootmem();
after_bootmem = 1; after_bootmem = 1;
x86_init.hyper.init_after_bootmem();
/* /*
* Must be done after boot memory is put on freelist, because here we * Must be done after boot memory is put on freelist, because here we

View File

@ -90,9 +90,10 @@ unsigned long arch_mmap_rnd(void)
return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits); return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
} }
static unsigned long mmap_base(unsigned long rnd, unsigned long task_size) static unsigned long mmap_base(unsigned long rnd, unsigned long task_size,
struct rlimit *rlim_stack)
{ {
unsigned long gap = rlimit(RLIMIT_STACK); unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
unsigned long gap_min, gap_max; unsigned long gap_min, gap_max;
@ -126,16 +127,17 @@ static unsigned long mmap_legacy_base(unsigned long rnd,
* process VM image, sets up which VM layout function to use: * process VM image, sets up which VM layout function to use:
*/ */
static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base, static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
unsigned long random_factor, unsigned long task_size) unsigned long random_factor, unsigned long task_size,
struct rlimit *rlim_stack)
{ {
*legacy_base = mmap_legacy_base(random_factor, task_size); *legacy_base = mmap_legacy_base(random_factor, task_size);
if (mmap_is_legacy()) if (mmap_is_legacy())
*base = *legacy_base; *base = *legacy_base;
else else
*base = mmap_base(random_factor, task_size); *base = mmap_base(random_factor, task_size, rlim_stack);
} }
void arch_pick_mmap_layout(struct mm_struct *mm) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{ {
if (mmap_is_legacy()) if (mmap_is_legacy())
mm->get_unmapped_area = arch_get_unmapped_area; mm->get_unmapped_area = arch_get_unmapped_area;
@ -143,7 +145,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->get_unmapped_area = arch_get_unmapped_area_topdown;
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base, arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
arch_rnd(mmap64_rnd_bits), task_size_64bit(0)); arch_rnd(mmap64_rnd_bits), task_size_64bit(0),
rlim_stack);
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
/* /*
@ -153,7 +156,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* mmap_base, the compat syscall uses mmap_compat_base. * mmap_base, the compat syscall uses mmap_compat_base.
*/ */
arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base, arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
arch_rnd(mmap32_rnd_bits), task_size_32bit()); arch_rnd(mmap32_rnd_bits), task_size_32bit(),
rlim_stack);
#endif #endif
} }

View File

@ -116,6 +116,8 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
static phys_addr_t xen_pt_base, xen_pt_size __initdata; static phys_addr_t xen_pt_base, xen_pt_size __initdata;
static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready);
/* /*
* Just beyond the highest usermode address. STACK_TOP_MAX has a * Just beyond the highest usermode address. STACK_TOP_MAX has a
* redzone above it, so round it up to a PGD boundary. * redzone above it, so round it up to a PGD boundary.
@ -155,11 +157,18 @@ void make_lowmem_page_readwrite(void *vaddr)
} }
/*
* During early boot all page table pages are pinned, but we do not have struct
* pages, so return true until struct pages are ready.
*/
static bool xen_page_pinned(void *ptr) static bool xen_page_pinned(void *ptr)
{ {
struct page *page = virt_to_page(ptr); if (static_branch_likely(&xen_struct_pages_ready)) {
struct page *page = virt_to_page(ptr);
return PagePinned(page); return PagePinned(page);
}
return true;
} }
static void xen_extend_mmu_update(const struct mmu_update *update) static void xen_extend_mmu_update(const struct mmu_update *update)
@ -836,11 +845,6 @@ void xen_mm_pin_all(void)
spin_unlock(&pgd_lock); spin_unlock(&pgd_lock);
} }
/*
* The init_mm pagetable is really pinned as soon as its created, but
* that's before we have page structures to store the bits. So do all
* the book-keeping now.
*/
static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
enum pt_level level) enum pt_level level)
{ {
@ -848,8 +852,18 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
return 0; return 0;
} }
static void __init xen_mark_init_mm_pinned(void) /*
* The init_mm pagetable is really pinned as soon as its created, but
* that's before we have page structures to store the bits. So do all
* the book-keeping now once struct pages for allocated pages are
* initialized. This happens only after free_all_bootmem() is called.
*/
static void __init xen_after_bootmem(void)
{ {
static_branch_enable(&xen_struct_pages_ready);
#ifdef CONFIG_X86_64
SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
} }
@ -1623,14 +1637,15 @@ static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
unsigned level) unsigned level)
{ {
bool pinned = PagePinned(virt_to_page(mm->pgd)); bool pinned = xen_page_pinned(mm->pgd);
trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned); trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
if (pinned) { if (pinned) {
struct page *page = pfn_to_page(pfn); struct page *page = pfn_to_page(pfn);
SetPagePinned(page); if (static_branch_likely(&xen_struct_pages_ready))
SetPagePinned(page);
if (!PageHighMem(page)) { if (!PageHighMem(page)) {
xen_mc_batch(); xen_mc_batch();
@ -2364,9 +2379,7 @@ static void __init xen_post_allocator_init(void)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
pv_mmu_ops.write_cr3 = &xen_write_cr3; pv_mmu_ops.write_cr3 = &xen_write_cr3;
SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif #endif
xen_mark_init_mm_pinned();
} }
static void xen_leave_lazy_mmu(void) static void xen_leave_lazy_mmu(void)
@ -2450,6 +2463,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
void __init xen_init_mmu_ops(void) void __init xen_init_mmu_ops(void)
{ {
x86_init.paging.pagetable_init = xen_pagetable_init; x86_init.paging.pagetable_init = xen_pagetable_init;
x86_init.hyper.init_after_bootmem = xen_after_bootmem;
pv_mmu_ops = xen_mmu_ops; pv_mmu_ops = xen_mmu_ops;

View File

@ -57,6 +57,7 @@
#define MAP_NONBLOCK 0x20000 /* do not block on IO */ #define MAP_NONBLOCK 0x20000 /* do not block on IO */
#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ #define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x80000 /* create a huge page mapping */ #define MAP_HUGETLB 0x80000 /* create a huge page mapping */
#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED #ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be # define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be
* uninitialized */ * uninitialized */

View File

@ -837,11 +837,8 @@ int __init memory_dev_init(void)
* during boot and have been initialized * during boot and have been initialized
*/ */
mutex_lock(&mem_sysfs_mutex); mutex_lock(&mem_sysfs_mutex);
for (i = 0; i < NR_MEM_SECTIONS; i += sections_per_block) { for (i = 0; i <= __highest_present_section_nr;
/* Don't iterate over sections we know are !present: */ i += sections_per_block) {
if (i > __highest_present_section_nr)
break;
err = add_memory_block(i); err = add_memory_block(i);
if (!ret) if (!ret)
ret = err; ret = err;

View File

@ -253,7 +253,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
static unsigned int hwsim_net_id; static unsigned int hwsim_net_id;
static struct ida hwsim_netgroup_ida = IDA_INIT; static DEFINE_IDA(hwsim_netgroup_ida);
struct hwsim_net { struct hwsim_net {
int netgroup; int netgroup;

View File

@ -295,7 +295,7 @@ static void __init of_unittest_printf(void)
return; return;
} }
num_to_str(phandle_str, sizeof(phandle_str), np->phandle); num_to_str(phandle_str, sizeof(phandle_str), np->phandle, 0);
of_unittest_printf_one(np, "%pOF", full_name); of_unittest_printf_one(np, "%pOF", full_name);
of_unittest_printf_one(np, "%pOFf", full_name); of_unittest_printf_one(np, "%pOFf", full_name);

View File

@ -212,7 +212,6 @@ struct mport_cdev_priv {
#ifdef CONFIG_RAPIDIO_DMA_ENGINE #ifdef CONFIG_RAPIDIO_DMA_ENGINE
struct dma_chan *dmach; struct dma_chan *dmach;
struct list_head async_list; struct list_head async_list;
struct list_head pend_list;
spinlock_t req_lock; spinlock_t req_lock;
struct mutex dma_lock; struct mutex dma_lock;
struct kref dma_ref; struct kref dma_ref;
@ -258,8 +257,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mport_cdev_wait);
static struct class *dev_class; static struct class *dev_class;
static dev_t dev_number; static dev_t dev_number;
static struct workqueue_struct *dma_wq;
static void mport_release_mapping(struct kref *ref); static void mport_release_mapping(struct kref *ref);
static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg, static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg,
@ -539,6 +536,7 @@ static int maint_comptag_set(struct mport_cdev_priv *priv, void __user *arg)
#ifdef CONFIG_RAPIDIO_DMA_ENGINE #ifdef CONFIG_RAPIDIO_DMA_ENGINE
struct mport_dma_req { struct mport_dma_req {
struct kref refcount;
struct list_head node; struct list_head node;
struct file *filp; struct file *filp;
struct mport_cdev_priv *priv; struct mport_cdev_priv *priv;
@ -554,11 +552,6 @@ struct mport_dma_req {
struct completion req_comp; struct completion req_comp;
}; };
struct mport_faf_work {
struct work_struct work;
struct mport_dma_req *req;
};
static void mport_release_def_dma(struct kref *dma_ref) static void mport_release_def_dma(struct kref *dma_ref)
{ {
struct mport_dev *md = struct mport_dev *md =
@ -578,8 +571,10 @@ static void mport_release_dma(struct kref *dma_ref)
complete(&priv->comp); complete(&priv->comp);
} }
static void dma_req_free(struct mport_dma_req *req) static void dma_req_free(struct kref *ref)
{ {
struct mport_dma_req *req = container_of(ref, struct mport_dma_req,
refcount);
struct mport_cdev_priv *priv = req->priv; struct mport_cdev_priv *priv = req->priv;
unsigned int i; unsigned int i;
@ -611,30 +606,7 @@ static void dma_xfer_callback(void *param)
req->status = dma_async_is_tx_complete(priv->dmach, req->cookie, req->status = dma_async_is_tx_complete(priv->dmach, req->cookie,
NULL, NULL); NULL, NULL);
complete(&req->req_comp); complete(&req->req_comp);
} kref_put(&req->refcount, dma_req_free);
static void dma_faf_cleanup(struct work_struct *_work)
{
struct mport_faf_work *work = container_of(_work,
struct mport_faf_work, work);
struct mport_dma_req *req = work->req;
dma_req_free(req);
kfree(work);
}
static void dma_faf_callback(void *param)
{
struct mport_dma_req *req = (struct mport_dma_req *)param;
struct mport_faf_work *work;
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return;
INIT_WORK(&work->work, dma_faf_cleanup);
work->req = req;
queue_work(dma_wq, &work->work);
} }
/* /*
@ -765,16 +737,14 @@ static int do_dma_request(struct mport_dma_req *req,
goto err_out; goto err_out;
} }
if (sync == RIO_TRANSFER_FAF) tx->callback = dma_xfer_callback;
tx->callback = dma_faf_callback;
else
tx->callback = dma_xfer_callback;
tx->callback_param = req; tx->callback_param = req;
req->dmach = chan; req->dmach = chan;
req->sync = sync; req->sync = sync;
req->status = DMA_IN_PROGRESS; req->status = DMA_IN_PROGRESS;
init_completion(&req->req_comp); init_completion(&req->req_comp);
kref_get(&req->refcount);
cookie = dmaengine_submit(tx); cookie = dmaengine_submit(tx);
req->cookie = cookie; req->cookie = cookie;
@ -785,6 +755,7 @@ static int do_dma_request(struct mport_dma_req *req,
if (dma_submit_error(cookie)) { if (dma_submit_error(cookie)) {
rmcd_error("submit err=%d (addr:0x%llx len:0x%llx)", rmcd_error("submit err=%d (addr:0x%llx len:0x%llx)",
cookie, xfer->rio_addr, xfer->length); cookie, xfer->rio_addr, xfer->length);
kref_put(&req->refcount, dma_req_free);
ret = -EIO; ret = -EIO;
goto err_out; goto err_out;
} }
@ -860,6 +831,8 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
if (!req) if (!req)
return -ENOMEM; return -ENOMEM;
kref_init(&req->refcount);
ret = get_dma_channel(priv); ret = get_dma_channel(priv);
if (ret) { if (ret) {
kfree(req); kfree(req);
@ -968,42 +941,20 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
ret = do_dma_request(req, xfer, sync, nents); ret = do_dma_request(req, xfer, sync, nents);
if (ret >= 0) { if (ret >= 0) {
if (sync == RIO_TRANSFER_SYNC) if (sync == RIO_TRANSFER_ASYNC)
goto sync_out; return ret; /* return ASYNC cookie */
return ret; /* return ASYNC cookie */ } else {
rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
} }
if (ret == -ETIMEDOUT || ret == -EINTR) {
/*
* This can happen only in case of SYNC transfer.
* Do not free unfinished request structure immediately.
* Place it into pending list and deal with it later
*/
spin_lock(&priv->req_lock);
list_add_tail(&req->node, &priv->pend_list);
spin_unlock(&priv->req_lock);
return ret;
}
rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
sync_out:
dma_unmap_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir);
sg_free_table(&req->sgt);
err_pg: err_pg:
if (page_list) { if (!req->page_list) {
for (i = 0; i < nr_pages; i++) for (i = 0; i < nr_pages; i++)
put_page(page_list[i]); put_page(page_list[i]);
kfree(page_list); kfree(page_list);
} }
err_req: err_req:
if (req->map) { kref_put(&req->refcount, dma_req_free);
mutex_lock(&md->buf_mutex);
kref_put(&req->map->ref, mport_release_mapping);
mutex_unlock(&md->buf_mutex);
}
put_dma_channel(priv);
kfree(req);
return ret; return ret;
} }
@ -1121,7 +1072,7 @@ static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
ret = 0; ret = 0;
if (req->status != DMA_IN_PROGRESS && req->status != DMA_PAUSED) if (req->status != DMA_IN_PROGRESS && req->status != DMA_PAUSED)
dma_req_free(req); kref_put(&req->refcount, dma_req_free);
return ret; return ret;
@ -1966,7 +1917,6 @@ static int mport_cdev_open(struct inode *inode, struct file *filp)
#ifdef CONFIG_RAPIDIO_DMA_ENGINE #ifdef CONFIG_RAPIDIO_DMA_ENGINE
INIT_LIST_HEAD(&priv->async_list); INIT_LIST_HEAD(&priv->async_list);
INIT_LIST_HEAD(&priv->pend_list);
spin_lock_init(&priv->req_lock); spin_lock_init(&priv->req_lock);
mutex_init(&priv->dma_lock); mutex_init(&priv->dma_lock);
#endif #endif
@ -2006,8 +1956,6 @@ static void mport_cdev_release_dma(struct file *filp)
md = priv->md; md = priv->md;
flush_workqueue(dma_wq);
spin_lock(&priv->req_lock); spin_lock(&priv->req_lock);
if (!list_empty(&priv->async_list)) { if (!list_empty(&priv->async_list)) {
rmcd_debug(EXIT, "async list not empty filp=%p %s(%d)", rmcd_debug(EXIT, "async list not empty filp=%p %s(%d)",
@ -2023,20 +1971,7 @@ static void mport_cdev_release_dma(struct file *filp)
req->filp, req->cookie, req->filp, req->cookie,
completion_done(&req->req_comp)?"yes":"no"); completion_done(&req->req_comp)?"yes":"no");
list_del(&req->node); list_del(&req->node);
dma_req_free(req); kref_put(&req->refcount, dma_req_free);
}
}
if (!list_empty(&priv->pend_list)) {
rmcd_debug(EXIT, "Free pending DMA requests for filp=%p %s(%d)",
filp, current->comm, task_pid_nr(current));
list_for_each_entry_safe(req,
req_next, &priv->pend_list, node) {
rmcd_debug(EXIT, "free req->filp=%p cookie=%d compl=%s",
req->filp, req->cookie,
completion_done(&req->req_comp)?"yes":"no");
list_del(&req->node);
dma_req_free(req);
} }
} }
@ -2048,15 +1983,6 @@ static void mport_cdev_release_dma(struct file *filp)
current->comm, task_pid_nr(current), wret); current->comm, task_pid_nr(current), wret);
} }
spin_lock(&priv->req_lock);
if (!list_empty(&priv->pend_list)) {
rmcd_debug(EXIT, "ATTN: pending DMA requests, filp=%p %s(%d)",
filp, current->comm, task_pid_nr(current));
}
spin_unlock(&priv->req_lock);
if (priv->dmach != priv->md->dma_chan) { if (priv->dmach != priv->md->dma_chan) {
rmcd_debug(EXIT, "Release DMA channel for filp=%p %s(%d)", rmcd_debug(EXIT, "Release DMA channel for filp=%p %s(%d)",
filp, current->comm, task_pid_nr(current)); filp, current->comm, task_pid_nr(current));
@ -2573,8 +2499,6 @@ static void mport_cdev_remove(struct mport_dev *md)
cdev_device_del(&md->cdev, &md->dev); cdev_device_del(&md->cdev, &md->dev);
mport_cdev_kill_fasync(md); mport_cdev_kill_fasync(md);
flush_workqueue(dma_wq);
/* TODO: do we need to give clients some time to close file /* TODO: do we need to give clients some time to close file
* descriptors? Simple wait for XX, or kref? * descriptors? Simple wait for XX, or kref?
*/ */
@ -2691,17 +2615,8 @@ static int __init mport_init(void)
goto err_cli; goto err_cli;
} }
dma_wq = create_singlethread_workqueue("dma_wq");
if (!dma_wq) {
rmcd_error("failed to create DMA work queue");
ret = -ENOMEM;
goto err_wq;
}
return 0; return 0;
err_wq:
class_interface_unregister(&rio_mport_interface);
err_cli: err_cli:
unregister_chrdev_region(dev_number, RIO_MAX_MPORTS); unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
err_chr: err_chr:
@ -2717,7 +2632,6 @@ static void __exit mport_exit(void)
class_interface_unregister(&rio_mport_interface); class_interface_unregister(&rio_mport_interface);
class_destroy(dev_class); class_destroy(dev_class);
unregister_chrdev_region(dev_number, RIO_MAX_MPORTS); unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
destroy_workqueue(dma_wq);
} }
module_init(mport_init); module_init(mport_init);

View File

@ -76,7 +76,7 @@ static u16 rio_destid_alloc(struct rio_net *net)
} }
/** /**
* rio_destid_reserve - Reserve the specivied destID * rio_destid_reserve - Reserve the specified destID
* @net: RIO network * @net: RIO network
* @destid: destID to reserve * @destid: destID to reserve
* *
@ -885,7 +885,7 @@ static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport,
* *
* For each enumerated device, ensure that each switch in a system * For each enumerated device, ensure that each switch in a system
* has correct routing entries. Add routes for devices that where * has correct routing entries. Add routes for devices that where
* unknown dirung the first enumeration pass through the switch. * unknown during the first enumeration pass through the switch.
*/ */
static void rio_update_route_tables(struct rio_net *net) static void rio_update_route_tables(struct rio_net *net)
{ {
@ -983,7 +983,7 @@ static int rio_enum_mport(struct rio_mport *mport, u32 flags)
/* reserve mport destID in new net */ /* reserve mport destID in new net */
rio_destid_reserve(net, mport->host_deviceid); rio_destid_reserve(net, mport->host_deviceid);
/* Enable Input Output Port (transmitter reviever) */ /* Enable Input Output Port (transmitter receiver) */
rio_enable_rx_tx_port(mport, 1, 0, 0, 0); rio_enable_rx_tx_port(mport, 1, 0, 0, 0);
/* Set component tag for host */ /* Set component tag for host */

View File

@ -69,7 +69,7 @@ blkcnt_t dirty_cnt(struct inode *inode)
void *results[1]; void *results[1];
if (inode->i_mapping) if (inode->i_mapping)
cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree, cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->i_pages,
results, 0, 1, results, 0, 1,
PAGECACHE_TAG_DIRTY); PAGECACHE_TAG_DIRTY);
if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0) if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)

View File

@ -934,14 +934,14 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
struct page *page; struct page *page;
int found; int found;
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(&mapping->i_pages);
found = radix_tree_gang_lookup(&mapping->page_tree, found = radix_tree_gang_lookup(&mapping->i_pages,
(void **)&page, offset, 1); (void **)&page, offset, 1);
if (found > 0 && !radix_tree_exceptional_entry(page)) { if (found > 0 && !radix_tree_exceptional_entry(page)) {
struct lu_dirpage *dp; struct lu_dirpage *dp;
get_page(page); get_page(page);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
/* /*
* In contrast to find_lock_page() we are sure that directory * In contrast to find_lock_page() we are sure that directory
* page cannot be truncated (while DLM lock is held) and, * page cannot be truncated (while DLM lock is held) and,
@ -989,7 +989,7 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
page = ERR_PTR(-EIO); page = ERR_PTR(-EIO);
} }
} else { } else {
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
page = NULL; page = NULL;
} }
return page; return page;

View File

@ -570,10 +570,11 @@ static int afs_writepages_region(struct address_space *mapping,
_debug("wback %lx", page->index); _debug("wback %lx", page->index);
/* at this point we hold neither mapping->tree_lock nor lock on /*
* the page itself: the page may be truncated or invalidated * at this point we hold neither the i_pages lock nor the
* (changing page->mapping to NULL), or even swizzled back from * page lock: the page may be truncated or invalidated
* swapper_space to tmpfs file mapping * (changing page->mapping to NULL), or even swizzled
* back from swapper_space to tmpfs file mapping
*/ */
ret = lock_page_killable(page); ret = lock_page_killable(page);
if (ret < 0) { if (ret < 0) {

View File

@ -19,9 +19,6 @@
*/ */
static autofs_wqt_t autofs4_next_wait_queue = 1; static autofs_wqt_t autofs4_next_wait_queue = 1;
/* These are the signals we allow interrupting a pending mount */
#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT))
void autofs4_catatonic_mode(struct autofs_sb_info *sbi) void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
{ {
struct autofs_wait_queue *wq, *nwq; struct autofs_wait_queue *wq, *nwq;
@ -486,29 +483,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
* wq->name.name is NULL iff the lock is already released * wq->name.name is NULL iff the lock is already released
* or the mount has been made catatonic. * or the mount has been made catatonic.
*/ */
if (wq->name.name) { wait_event_killable(wq->queue, wq->name.name == NULL);
/* Block all but "shutdown" signals while waiting */
unsigned long shutdown_sigs_mask;
unsigned long irqflags;
sigset_t oldset;
spin_lock_irqsave(&current->sighand->siglock, irqflags);
oldset = current->blocked;
shutdown_sigs_mask = SHUTDOWN_SIGS & ~oldset.sig[0];
siginitsetinv(&current->blocked, shutdown_sigs_mask);
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
wait_event_interruptible(wq->queue, wq->name.name == NULL);
spin_lock_irqsave(&current->sighand->siglock, irqflags);
current->blocked = oldset;
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
} else {
pr_debug("skipped sleeping\n");
}
status = wq->status; status = wq->status;
/* /*
@ -574,7 +549,7 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
kfree(wq->name.name); kfree(wq->name.name);
wq->name.name = NULL; /* Do not wait on this queue */ wq->name.name = NULL; /* Do not wait on this queue */
wq->status = status; wq->status = status;
wake_up_interruptible(&wq->queue); wake_up(&wq->queue);
if (!--wq->wait_ctr) if (!--wq->wait_ctr)
kfree(wq); kfree(wq);
mutex_unlock(&sbi->wq_mutex); mutex_unlock(&sbi->wq_mutex);

View File

@ -330,6 +330,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
#ifdef __alpha__ #ifdef __alpha__
regs->gp = ex.a_gpvalue; regs->gp = ex.a_gpvalue;
#endif #endif
finalize_exec(bprm);
start_thread(regs, ex.a_entry, current->mm->start_stack); start_thread(regs, ex.a_entry, current->mm->start_stack);
return 0; return 0;
} }

View File

@ -377,6 +377,11 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
} else } else
map_addr = vm_mmap(filep, addr, size, prot, type, off); map_addr = vm_mmap(filep, addr, size, prot, type, off);
if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr))
pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n",
task_pid_nr(current), current->comm,
(void *)addr);
return(map_addr); return(map_addr);
} }
@ -575,7 +580,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
elf_prot |= PROT_EXEC; elf_prot |= PROT_EXEC;
vaddr = eppnt->p_vaddr; vaddr = eppnt->p_vaddr;
if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
elf_type |= MAP_FIXED; elf_type |= MAP_FIXED_NOREPLACE;
else if (no_base && interp_elf_ex->e_type == ET_DYN) else if (no_base && interp_elf_ex->e_type == ET_DYN)
load_addr = -vaddr; load_addr = -vaddr;
@ -890,7 +895,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
the correct location in memory. */ the correct location in memory. */
for(i = 0, elf_ppnt = elf_phdata; for(i = 0, elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags; int elf_prot = 0, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
unsigned long k, vaddr; unsigned long k, vaddr;
unsigned long total_size = 0; unsigned long total_size = 0;
@ -922,6 +927,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
*/ */
} }
} }
/*
* Some binaries have overlapping elf segments and then
* we have to forcefully map over an existing mapping
* e.g. over this newly established brk mapping.
*/
elf_fixed = MAP_FIXED;
} }
if (elf_ppnt->p_flags & PF_R) if (elf_ppnt->p_flags & PF_R)
@ -939,7 +951,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
* the ET_DYN load_addr calculations, proceed normally. * the ET_DYN load_addr calculations, proceed normally.
*/ */
if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
elf_flags |= MAP_FIXED; elf_flags |= elf_fixed;
} else if (loc->elf_ex.e_type == ET_DYN) { } else if (loc->elf_ex.e_type == ET_DYN) {
/* /*
* This logic is run once for the first LOAD Program * This logic is run once for the first LOAD Program
@ -975,7 +987,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
load_bias = ELF_ET_DYN_BASE; load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE) if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd(); load_bias += arch_mmap_rnd();
elf_flags |= MAP_FIXED; elf_flags |= elf_fixed;
} else } else
load_bias = 0; load_bias = 0;
@ -1155,6 +1167,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
ELF_PLAT_INIT(regs, reloc_func_desc); ELF_PLAT_INIT(regs, reloc_func_desc);
#endif #endif
finalize_exec(bprm);
start_thread(regs, elf_entry, bprm->p); start_thread(regs, elf_entry, bprm->p);
retval = 0; retval = 0;
out: out:
@ -1234,7 +1247,7 @@ static int load_elf_library(struct file *file)
(eppnt->p_filesz + (eppnt->p_filesz +
ELF_PAGEOFFSET(eppnt->p_vaddr)), ELF_PAGEOFFSET(eppnt->p_vaddr)),
PROT_READ | PROT_WRITE | PROT_EXEC, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
(eppnt->p_offset - (eppnt->p_offset -
ELF_PAGEOFFSET(eppnt->p_vaddr))); ELF_PAGEOFFSET(eppnt->p_vaddr)));
if (error != ELF_PAGESTART(eppnt->p_vaddr)) if (error != ELF_PAGESTART(eppnt->p_vaddr))

View File

@ -463,6 +463,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
dynaddr); dynaddr);
#endif #endif
finalize_exec(bprm);
/* everything is now ready... get the userspace context ready to roll */ /* everything is now ready... get the userspace context ready to roll */
entryaddr = interp_params.entry_addr ?: exec_params.entry_addr; entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
start_thread(regs, entryaddr, current->mm->start_stack); start_thread(regs, entryaddr, current->mm->start_stack);

View File

@ -994,6 +994,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
FLAT_PLAT_INIT(regs); FLAT_PLAT_INIT(regs);
#endif #endif
finalize_exec(bprm);
pr_debug("start_thread(regs=0x%p, entry=0x%lx, start_stack=0x%lx)\n", pr_debug("start_thread(regs=0x%p, entry=0x%lx, start_stack=0x%lx)\n",
regs, start_addr, current->mm->start_stack); regs, start_addr, current->mm->start_stack);
start_thread(regs, start_addr, current->mm->start_stack); start_thread(regs, start_addr, current->mm->start_stack);

View File

@ -458,7 +458,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
break; break;
rcu_read_lock(); rcu_read_lock();
page = radix_tree_lookup(&mapping->page_tree, pg_index); page = radix_tree_lookup(&mapping->i_pages, pg_index);
rcu_read_unlock(); rcu_read_unlock();
if (page && !radix_tree_exceptional_entry(page)) { if (page && !radix_tree_exceptional_entry(page)) {
misses++; misses++;

View File

@ -3963,11 +3963,11 @@ static int extent_write_cache_pages(struct address_space *mapping,
done_index = page->index; done_index = page->index;
/* /*
* At this point we hold neither mapping->tree_lock nor * At this point we hold neither the i_pages lock nor
* lock on the page itself: the page may be truncated or * the page lock: the page may be truncated or
* invalidated (changing page->mapping to NULL), or even * invalidated (changing page->mapping to NULL),
* swizzled back from swapper_space to tmpfs file * or even swizzled back from swapper_space to
* mapping * tmpfs file mapping
*/ */
if (!trylock_page(page)) { if (!trylock_page(page)) {
flush_write_bio(epd); flush_write_bio(epd);
@ -5174,13 +5174,13 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
WARN_ON(!PagePrivate(page)); WARN_ON(!PagePrivate(page));
clear_page_dirty_for_io(page); clear_page_dirty_for_io(page);
spin_lock_irq(&page->mapping->tree_lock); xa_lock_irq(&page->mapping->i_pages);
if (!PageDirty(page)) { if (!PageDirty(page)) {
radix_tree_tag_clear(&page->mapping->page_tree, radix_tree_tag_clear(&page->mapping->i_pages,
page_index(page), page_index(page),
PAGECACHE_TAG_DIRTY); PAGECACHE_TAG_DIRTY);
} }
spin_unlock_irq(&page->mapping->tree_lock); xa_unlock_irq(&page->mapping->i_pages);
ClearPageError(page); ClearPageError(page);
unlock_page(page); unlock_page(page);
} }

View File

@ -185,10 +185,9 @@ EXPORT_SYMBOL(end_buffer_write_sync);
* we get exclusion from try_to_free_buffers with the blockdev mapping's * we get exclusion from try_to_free_buffers with the blockdev mapping's
* private_lock. * private_lock.
* *
* Hack idea: for the blockdev mapping, i_bufferlist_lock contention * Hack idea: for the blockdev mapping, private_lock contention
* may be quite high. This code could TryLock the page, and if that * may be quite high. This code could TryLock the page, and if that
* succeeds, there is no need to take private_lock. (But if * succeeds, there is no need to take private_lock.
* private_lock is contended then so is mapping->tree_lock).
*/ */
static struct buffer_head * static struct buffer_head *
__find_get_block_slow(struct block_device *bdev, sector_t block) __find_get_block_slow(struct block_device *bdev, sector_t block)
@ -594,20 +593,21 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
* *
* The caller must hold lock_page_memcg(). * The caller must hold lock_page_memcg().
*/ */
static void __set_page_dirty(struct page *page, struct address_space *mapping, void __set_page_dirty(struct page *page, struct address_space *mapping,
int warn) int warn)
{ {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&mapping->tree_lock, flags); xa_lock_irqsave(&mapping->i_pages, flags);
if (page->mapping) { /* Race with truncate? */ if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page)); WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping); account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree, radix_tree_tag_set(&mapping->i_pages,
page_index(page), PAGECACHE_TAG_DIRTY); page_index(page), PAGECACHE_TAG_DIRTY);
} }
spin_unlock_irqrestore(&mapping->tree_lock, flags); xa_unlock_irqrestore(&mapping->i_pages, flags);
} }
EXPORT_SYMBOL_GPL(__set_page_dirty);
/* /*
* Add a page to the dirty page list. * Add a page to the dirty page list.
@ -1095,7 +1095,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
* inode list. * inode list.
* *
* mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock,
* mapping->tree_lock and mapping->host->i_lock. * i_pages lock and mapping->host->i_lock.
*/ */
void mark_buffer_dirty(struct buffer_head *bh) void mark_buffer_dirty(struct buffer_head *bh)
{ {

View File

@ -1987,11 +1987,10 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
for (i = 0; i < found_pages; i++) { for (i = 0; i < found_pages; i++) {
page = wdata->pages[i]; page = wdata->pages[i];
/* /*
* At this point we hold neither mapping->tree_lock nor * At this point we hold neither the i_pages lock nor the
* lock on the page itself: the page may be truncated or * page lock: the page may be truncated or invalidated
* invalidated (changing page->mapping to NULL), or even * (changing page->mapping to NULL), or even swizzled
* swizzled back from swapper_space to tmpfs file * back from swapper_space to tmpfs file mapping
* mapping
*/ */
if (nr_pages == 0) if (nr_pages == 0)

124
fs/dax.c
View File

@ -158,11 +158,9 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mo
} }
/* /*
* We do not necessarily hold the mapping->tree_lock when we call this * @entry may no longer be the entry at the index in the mapping.
* function so it is possible that 'entry' is no longer a valid item in the * The important information it's conveying is whether the entry at
* radix tree. This is okay because all we really need to do is to find the * this index used to be a PMD entry.
* correct waitqueue where tasks might be waiting for that old 'entry' and
* wake them.
*/ */
static void dax_wake_mapping_entry_waiter(struct address_space *mapping, static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
pgoff_t index, void *entry, bool wake_all) pgoff_t index, void *entry, bool wake_all)
@ -174,7 +172,7 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
/* /*
* Checking for locked entry and prepare_to_wait_exclusive() happens * Checking for locked entry and prepare_to_wait_exclusive() happens
* under mapping->tree_lock, ditto for entry handling in our callers. * under the i_pages lock, ditto for entry handling in our callers.
* So at this point all tasks that could have seen our entry locked * So at this point all tasks that could have seen our entry locked
* must be in the waitqueue and the following check will see them. * must be in the waitqueue and the following check will see them.
*/ */
@ -183,41 +181,39 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
} }
/* /*
* Check whether the given slot is locked. The function must be called with * Check whether the given slot is locked. Must be called with the i_pages
* mapping->tree_lock held * lock held.
*/ */
static inline int slot_locked(struct address_space *mapping, void **slot) static inline int slot_locked(struct address_space *mapping, void **slot)
{ {
unsigned long entry = (unsigned long) unsigned long entry = (unsigned long)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock); radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
return entry & RADIX_DAX_ENTRY_LOCK; return entry & RADIX_DAX_ENTRY_LOCK;
} }
/* /*
* Mark the given slot is locked. The function must be called with * Mark the given slot as locked. Must be called with the i_pages lock held.
* mapping->tree_lock held
*/ */
static inline void *lock_slot(struct address_space *mapping, void **slot) static inline void *lock_slot(struct address_space *mapping, void **slot)
{ {
unsigned long entry = (unsigned long) unsigned long entry = (unsigned long)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock); radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
entry |= RADIX_DAX_ENTRY_LOCK; entry |= RADIX_DAX_ENTRY_LOCK;
radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
return (void *)entry; return (void *)entry;
} }
/* /*
* Mark the given slot is unlocked. The function must be called with * Mark the given slot as unlocked. Must be called with the i_pages lock held.
* mapping->tree_lock held
*/ */
static inline void *unlock_slot(struct address_space *mapping, void **slot) static inline void *unlock_slot(struct address_space *mapping, void **slot)
{ {
unsigned long entry = (unsigned long) unsigned long entry = (unsigned long)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock); radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
return (void *)entry; return (void *)entry;
} }
@ -228,7 +224,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
* put_locked_mapping_entry() when he locked the entry and now wants to * put_locked_mapping_entry() when he locked the entry and now wants to
* unlock it. * unlock it.
* *
* The function must be called with mapping->tree_lock held. * Must be called with the i_pages lock held.
*/ */
static void *get_unlocked_mapping_entry(struct address_space *mapping, static void *get_unlocked_mapping_entry(struct address_space *mapping,
pgoff_t index, void ***slotp) pgoff_t index, void ***slotp)
@ -241,7 +237,7 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
ewait.wait.func = wake_exceptional_entry_func; ewait.wait.func = wake_exceptional_entry_func;
for (;;) { for (;;) {
entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
&slot); &slot);
if (!entry || if (!entry ||
WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) ||
@ -254,10 +250,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key); wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key);
prepare_to_wait_exclusive(wq, &ewait.wait, prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
schedule(); schedule();
finish_wait(wq, &ewait.wait); finish_wait(wq, &ewait.wait);
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(&mapping->i_pages);
} }
} }
@ -266,15 +262,15 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
{ {
void *entry, **slot; void *entry, **slot;
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(&mapping->i_pages);
entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot); entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) || if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
!slot_locked(mapping, slot))) { !slot_locked(mapping, slot))) {
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
return; return;
} }
unlock_slot(mapping, slot); unlock_slot(mapping, slot);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
dax_wake_mapping_entry_waiter(mapping, index, entry, false); dax_wake_mapping_entry_waiter(mapping, index, entry, false);
} }
@ -388,7 +384,7 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
void *entry, **slot; void *entry, **slot;
restart: restart:
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(&mapping->i_pages);
entry = get_unlocked_mapping_entry(mapping, index, &slot); entry = get_unlocked_mapping_entry(mapping, index, &slot);
if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) { if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) {
@ -420,12 +416,12 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
if (pmd_downgrade) { if (pmd_downgrade) {
/* /*
* Make sure 'entry' remains valid while we drop * Make sure 'entry' remains valid while we drop
* mapping->tree_lock. * the i_pages lock.
*/ */
entry = lock_slot(mapping, slot); entry = lock_slot(mapping, slot);
} }
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
/* /*
* Besides huge zero pages the only other thing that gets * Besides huge zero pages the only other thing that gets
* downgraded are empty entries which don't need to be * downgraded are empty entries which don't need to be
@ -442,27 +438,27 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
put_locked_mapping_entry(mapping, index); put_locked_mapping_entry(mapping, index);
return ERR_PTR(err); return ERR_PTR(err);
} }
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(&mapping->i_pages);
if (!entry) { if (!entry) {
/* /*
* We needed to drop the page_tree lock while calling * We needed to drop the i_pages lock while calling
* radix_tree_preload() and we didn't have an entry to * radix_tree_preload() and we didn't have an entry to
* lock. See if another thread inserted an entry at * lock. See if another thread inserted an entry at
* our index during this time. * our index during this time.
*/ */
entry = __radix_tree_lookup(&mapping->page_tree, index, entry = __radix_tree_lookup(&mapping->i_pages, index,
NULL, &slot); NULL, &slot);
if (entry) { if (entry) {
radix_tree_preload_end(); radix_tree_preload_end();
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
goto restart; goto restart;
} }
} }
if (pmd_downgrade) { if (pmd_downgrade) {
dax_disassociate_entry(entry, mapping, false); dax_disassociate_entry(entry, mapping, false);
radix_tree_delete(&mapping->page_tree, index); radix_tree_delete(&mapping->i_pages, index);
mapping->nrexceptional--; mapping->nrexceptional--;
dax_wake_mapping_entry_waiter(mapping, index, entry, dax_wake_mapping_entry_waiter(mapping, index, entry,
true); true);
@ -470,11 +466,11 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY); entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
err = __radix_tree_insert(&mapping->page_tree, index, err = __radix_tree_insert(&mapping->i_pages, index,
dax_radix_order(entry), entry); dax_radix_order(entry), entry);
radix_tree_preload_end(); radix_tree_preload_end();
if (err) { if (err) {
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
/* /*
* Our insertion of a DAX entry failed, most likely * Our insertion of a DAX entry failed, most likely
* because we were inserting a PMD entry and it * because we were inserting a PMD entry and it
@ -487,12 +483,12 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
} }
/* Good, we have inserted empty locked entry into the tree. */ /* Good, we have inserted empty locked entry into the tree. */
mapping->nrexceptional++; mapping->nrexceptional++;
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
return entry; return entry;
} }
entry = lock_slot(mapping, slot); entry = lock_slot(mapping, slot);
out_unlock: out_unlock:
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
return entry; return entry;
} }
@ -501,23 +497,23 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
{ {
int ret = 0; int ret = 0;
void *entry; void *entry;
struct radix_tree_root *page_tree = &mapping->page_tree; struct radix_tree_root *pages = &mapping->i_pages;
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(pages);
entry = get_unlocked_mapping_entry(mapping, index, NULL); entry = get_unlocked_mapping_entry(mapping, index, NULL);
if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry))) if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)))
goto out; goto out;
if (!trunc && if (!trunc &&
(radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)))
goto out; goto out;
dax_disassociate_entry(entry, mapping, trunc); dax_disassociate_entry(entry, mapping, trunc);
radix_tree_delete(page_tree, index); radix_tree_delete(pages, index);
mapping->nrexceptional--; mapping->nrexceptional--;
ret = 1; ret = 1;
out: out:
put_unlocked_mapping_entry(mapping, index, entry); put_unlocked_mapping_entry(mapping, index, entry);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(pages);
return ret; return ret;
} }
/* /*
@ -587,7 +583,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
void *entry, pfn_t pfn_t, void *entry, pfn_t pfn_t,
unsigned long flags, bool dirty) unsigned long flags, bool dirty)
{ {
struct radix_tree_root *page_tree = &mapping->page_tree; struct radix_tree_root *pages = &mapping->i_pages;
unsigned long pfn = pfn_t_to_pfn(pfn_t); unsigned long pfn = pfn_t_to_pfn(pfn_t);
pgoff_t index = vmf->pgoff; pgoff_t index = vmf->pgoff;
void *new_entry; void *new_entry;
@ -604,7 +600,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
unmap_mapping_pages(mapping, vmf->pgoff, 1, false); unmap_mapping_pages(mapping, vmf->pgoff, 1, false);
} }
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(pages);
new_entry = dax_radix_locked_entry(pfn, flags); new_entry = dax_radix_locked_entry(pfn, flags);
if (dax_entry_size(entry) != dax_entry_size(new_entry)) { if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
dax_disassociate_entry(entry, mapping, false); dax_disassociate_entry(entry, mapping, false);
@ -624,17 +620,17 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
void **slot; void **slot;
void *ret; void *ret;
ret = __radix_tree_lookup(page_tree, index, &node, &slot); ret = __radix_tree_lookup(pages, index, &node, &slot);
WARN_ON_ONCE(ret != entry); WARN_ON_ONCE(ret != entry);
__radix_tree_replace(page_tree, node, slot, __radix_tree_replace(pages, node, slot,
new_entry, NULL); new_entry, NULL);
entry = new_entry; entry = new_entry;
} }
if (dirty) if (dirty)
radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(pages);
return entry; return entry;
} }
@ -723,7 +719,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
static int dax_writeback_one(struct dax_device *dax_dev, static int dax_writeback_one(struct dax_device *dax_dev,
struct address_space *mapping, pgoff_t index, void *entry) struct address_space *mapping, pgoff_t index, void *entry)
{ {
struct radix_tree_root *page_tree = &mapping->page_tree; struct radix_tree_root *pages = &mapping->i_pages;
void *entry2, **slot; void *entry2, **slot;
unsigned long pfn; unsigned long pfn;
long ret = 0; long ret = 0;
@ -736,7 +732,7 @@ static int dax_writeback_one(struct dax_device *dax_dev,
if (WARN_ON(!radix_tree_exceptional_entry(entry))) if (WARN_ON(!radix_tree_exceptional_entry(entry)))
return -EIO; return -EIO;
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(pages);
entry2 = get_unlocked_mapping_entry(mapping, index, &slot); entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
/* Entry got punched out / reallocated? */ /* Entry got punched out / reallocated? */
if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2))) if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2)))
@ -755,7 +751,7 @@ static int dax_writeback_one(struct dax_device *dax_dev,
} }
/* Another fsync thread may have already written back this entry */ /* Another fsync thread may have already written back this entry */
if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
goto put_unlocked; goto put_unlocked;
/* Lock the entry to serialize with page faults */ /* Lock the entry to serialize with page faults */
entry = lock_slot(mapping, slot); entry = lock_slot(mapping, slot);
@ -763,11 +759,11 @@ static int dax_writeback_one(struct dax_device *dax_dev,
* We can clear the tag now but we have to be careful so that concurrent * We can clear the tag now but we have to be careful so that concurrent
* dax_writeback_one() calls for the same index cannot finish before we * dax_writeback_one() calls for the same index cannot finish before we
* actually flush the caches. This is achieved as the calls will look * actually flush the caches. This is achieved as the calls will look
* at the entry only under tree_lock and once they do that they will * at the entry only under the i_pages lock and once they do that
* see the entry locked and wait for it to unlock. * they will see the entry locked and wait for it to unlock.
*/ */
radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE); radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(pages);
/* /*
* Even if dax_writeback_mapping_range() was given a wbc->range_start * Even if dax_writeback_mapping_range() was given a wbc->range_start
@ -787,16 +783,16 @@ static int dax_writeback_one(struct dax_device *dax_dev,
* the pfn mappings are writeprotected and fault waits for mapping * the pfn mappings are writeprotected and fault waits for mapping
* entry lock. * entry lock.
*/ */
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(pages);
radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY); radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(pages);
trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
put_locked_mapping_entry(mapping, index); put_locked_mapping_entry(mapping, index);
return ret; return ret;
put_unlocked: put_unlocked:
put_unlocked_mapping_entry(mapping, index, entry2); put_unlocked_mapping_entry(mapping, index, entry2);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(pages);
return ret; return ret;
} }
@ -1566,21 +1562,21 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
pgoff_t index = vmf->pgoff; pgoff_t index = vmf->pgoff;
int vmf_ret, error; int vmf_ret, error;
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(&mapping->i_pages);
entry = get_unlocked_mapping_entry(mapping, index, &slot); entry = get_unlocked_mapping_entry(mapping, index, &slot);
/* Did we race with someone splitting entry or so? */ /* Did we race with someone splitting entry or so? */
if (!entry || if (!entry ||
(pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) || (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
(pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) { (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
put_unlocked_mapping_entry(mapping, index, entry); put_unlocked_mapping_entry(mapping, index, entry);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
VM_FAULT_NOPAGE); VM_FAULT_NOPAGE);
return VM_FAULT_NOPAGE; return VM_FAULT_NOPAGE;
} }
radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY); radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY);
entry = lock_slot(mapping, slot); entry = lock_slot(mapping, slot);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
switch (pe_size) { switch (pe_size) {
case PE_SIZE_PTE: case PE_SIZE_PTE:
error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);

View File

@ -257,11 +257,25 @@ static void __d_free(struct rcu_head *head)
kmem_cache_free(dentry_cache, dentry); kmem_cache_free(dentry_cache, dentry);
} }
static void __d_free_external_name(struct rcu_head *head)
{
struct external_name *name = container_of(head, struct external_name,
u.head);
mod_node_page_state(page_pgdat(virt_to_page(name)),
NR_INDIRECTLY_RECLAIMABLE_BYTES,
-ksize(name));
kfree(name);
}
static void __d_free_external(struct rcu_head *head) static void __d_free_external(struct rcu_head *head)
{ {
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
kfree(external_name(dentry));
kmem_cache_free(dentry_cache, dentry); __d_free_external_name(&external_name(dentry)->u.head);
kmem_cache_free(dentry_cache, dentry);
} }
static inline int dname_external(const struct dentry *dentry) static inline int dname_external(const struct dentry *dentry)
@ -291,7 +305,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name)
struct external_name *p; struct external_name *p;
p = container_of(name->name, struct external_name, name[0]); p = container_of(name->name, struct external_name, name[0]);
if (unlikely(atomic_dec_and_test(&p->u.count))) if (unlikely(atomic_dec_and_test(&p->u.count)))
kfree_rcu(p, u.head); call_rcu(&p->u.head, __d_free_external_name);
} }
} }
EXPORT_SYMBOL(release_dentry_name_snapshot); EXPORT_SYMBOL(release_dentry_name_snapshot);
@ -1038,6 +1052,8 @@ static void shrink_dentry_list(struct list_head *list)
while (!list_empty(list)) { while (!list_empty(list)) {
struct dentry *dentry, *parent; struct dentry *dentry, *parent;
cond_resched();
dentry = list_entry(list->prev, struct dentry, d_lru); dentry = list_entry(list->prev, struct dentry, d_lru);
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
rcu_read_lock(); rcu_read_lock();
@ -1191,7 +1207,6 @@ void shrink_dcache_sb(struct super_block *sb)
this_cpu_sub(nr_dentry_unused, freed); this_cpu_sub(nr_dentry_unused, freed);
shrink_dentry_list(&dispose); shrink_dentry_list(&dispose);
cond_resched();
} while (list_lru_count(&sb->s_dentry_lru) > 0); } while (list_lru_count(&sb->s_dentry_lru) > 0);
} }
EXPORT_SYMBOL(shrink_dcache_sb); EXPORT_SYMBOL(shrink_dcache_sb);
@ -1473,7 +1488,6 @@ void shrink_dcache_parent(struct dentry *parent)
break; break;
shrink_dentry_list(&data.dispose); shrink_dentry_list(&data.dispose);
cond_resched();
} }
} }
EXPORT_SYMBOL(shrink_dcache_parent); EXPORT_SYMBOL(shrink_dcache_parent);
@ -1600,7 +1614,6 @@ void d_invalidate(struct dentry *dentry)
detach_mounts(data.mountpoint); detach_mounts(data.mountpoint);
dput(data.mountpoint); dput(data.mountpoint);
} }
cond_resched();
} }
} }
EXPORT_SYMBOL(d_invalidate); EXPORT_SYMBOL(d_invalidate);
@ -1617,6 +1630,7 @@ EXPORT_SYMBOL(d_invalidate);
struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{ {
struct external_name *ext = NULL;
struct dentry *dentry; struct dentry *dentry;
char *dname; char *dname;
int err; int err;
@ -1637,14 +1651,14 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
dname = dentry->d_iname; dname = dentry->d_iname;
} else if (name->len > DNAME_INLINE_LEN-1) { } else if (name->len > DNAME_INLINE_LEN-1) {
size_t size = offsetof(struct external_name, name[1]); size_t size = offsetof(struct external_name, name[1]);
struct external_name *p = kmalloc(size + name->len,
GFP_KERNEL_ACCOUNT); ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT);
if (!p) { if (!ext) {
kmem_cache_free(dentry_cache, dentry); kmem_cache_free(dentry_cache, dentry);
return NULL; return NULL;
} }
atomic_set(&p->u.count, 1); atomic_set(&ext->u.count, 1);
dname = p->name; dname = ext->name;
} else { } else {
dname = dentry->d_iname; dname = dentry->d_iname;
} }
@ -1683,6 +1697,12 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
} }
} }
if (unlikely(ext)) {
pg_data_t *pgdat = page_pgdat(virt_to_page(ext));
mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES,
ksize(ext));
}
this_cpu_inc(nr_dentry); this_cpu_inc(nr_dentry);
return dentry; return dentry;
@ -2770,7 +2790,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
dentry->d_name.hash_len = target->d_name.hash_len; dentry->d_name.hash_len = target->d_name.hash_len;
} }
if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
kfree_rcu(old_name, u.head); call_rcu(&old_name->u.head, __d_free_external_name);
} }
/* /*

View File

@ -257,7 +257,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
* to work from. * to work from.
*/ */
limit = _STK_LIM / 4 * 3; limit = _STK_LIM / 4 * 3;
limit = min(limit, rlimit(RLIMIT_STACK) / 4); limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
if (size > limit) if (size > limit)
goto fail; goto fail;
} }
@ -411,6 +411,11 @@ static int bprm_mm_init(struct linux_binprm *bprm)
if (!mm) if (!mm)
goto err; goto err;
/* Save current stack limit for all calculations made during exec. */
task_lock(current->group_leader);
bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
task_unlock(current->group_leader);
err = __bprm_mm_init(bprm); err = __bprm_mm_init(bprm);
if (err) if (err)
goto err; goto err;
@ -697,7 +702,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
#ifdef CONFIG_STACK_GROWSUP #ifdef CONFIG_STACK_GROWSUP
/* Limit stack size */ /* Limit stack size */
stack_base = rlimit_max(RLIMIT_STACK); stack_base = bprm->rlim_stack.rlim_max;
if (stack_base > STACK_SIZE_MAX) if (stack_base > STACK_SIZE_MAX)
stack_base = STACK_SIZE_MAX; stack_base = STACK_SIZE_MAX;
@ -770,7 +775,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
* Align this down to a page boundary as expand_stack * Align this down to a page boundary as expand_stack
* will align it up. * will align it up.
*/ */
rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK; rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
#ifdef CONFIG_STACK_GROWSUP #ifdef CONFIG_STACK_GROWSUP
if (stack_size + stack_expand > rlim_stack) if (stack_size + stack_expand > rlim_stack)
stack_base = vma->vm_start + rlim_stack; stack_base = vma->vm_start + rlim_stack;
@ -1341,11 +1346,11 @@ void setup_new_exec(struct linux_binprm * bprm)
* RLIMIT_STACK, but after the point of no return to avoid * RLIMIT_STACK, but after the point of no return to avoid
* needing to clean up the change on failure. * needing to clean up the change on failure.
*/ */
if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) if (bprm->rlim_stack.rlim_cur > _STK_LIM)
current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; bprm->rlim_stack.rlim_cur = _STK_LIM;
} }
arch_pick_mmap_layout(current->mm); arch_pick_mmap_layout(current->mm, &bprm->rlim_stack);
current->sas_ss_sp = current->sas_ss_size = 0; current->sas_ss_sp = current->sas_ss_size = 0;
@ -1378,6 +1383,16 @@ void setup_new_exec(struct linux_binprm * bprm)
} }
EXPORT_SYMBOL(setup_new_exec); EXPORT_SYMBOL(setup_new_exec);
/* Runs immediately before start_thread() takes over. */
void finalize_exec(struct linux_binprm *bprm)
{
/* Store any stack rlimit changes before starting thread. */
task_lock(current->group_leader);
current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
task_unlock(current->group_leader);
}
EXPORT_SYMBOL(finalize_exec);
/* /*
* Prepare credentials and lock ->cred_guard_mutex. * Prepare credentials and lock ->cred_guard_mutex.
* install_exec_creds() commits the new creds and drops the lock. * install_exec_creds() commits the new creds and drops the lock.

View File

@ -2424,12 +2424,12 @@ void f2fs_set_page_dirty_nobuffers(struct page *page)
SetPageDirty(page); SetPageDirty(page);
spin_unlock(&mapping->private_lock); spin_unlock(&mapping->private_lock);
spin_lock_irqsave(&mapping->tree_lock, flags); xa_lock_irqsave(&mapping->i_pages, flags);
WARN_ON_ONCE(!PageUptodate(page)); WARN_ON_ONCE(!PageUptodate(page));
account_page_dirtied(page, mapping); account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree, radix_tree_tag_set(&mapping->i_pages,
page_index(page), PAGECACHE_TAG_DIRTY); page_index(page), PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags); xa_unlock_irqrestore(&mapping->i_pages, flags);
unlock_page_memcg(page); unlock_page_memcg(page);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

View File

@ -732,10 +732,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
if (bit_pos == NR_DENTRY_IN_BLOCK && if (bit_pos == NR_DENTRY_IN_BLOCK &&
!truncate_hole(dir, page->index, page->index + 1)) { !truncate_hole(dir, page->index, page->index + 1)) {
spin_lock_irqsave(&mapping->tree_lock, flags); xa_lock_irqsave(&mapping->i_pages, flags);
radix_tree_tag_clear(&mapping->page_tree, page_index(page), radix_tree_tag_clear(&mapping->i_pages, page_index(page),
PAGECACHE_TAG_DIRTY); PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags); xa_unlock_irqrestore(&mapping->i_pages, flags);
clear_page_dirty_for_io(page); clear_page_dirty_for_io(page);
ClearPagePrivate(page); ClearPagePrivate(page);

View File

@ -1015,7 +1015,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
unsigned int init_segno = segno; unsigned int init_segno = segno;
struct gc_inode_list gc_list = { struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist), .ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(GFP_NOFS), .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
}; };
trace_f2fs_gc_begin(sbi->sb, sync, background, trace_f2fs_gc_begin(sbi->sb, sync, background,

View File

@ -226,10 +226,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
kunmap_atomic(src_addr); kunmap_atomic(src_addr);
set_page_dirty(dn.inode_page); set_page_dirty(dn.inode_page);
spin_lock_irqsave(&mapping->tree_lock, flags); xa_lock_irqsave(&mapping->i_pages, flags);
radix_tree_tag_clear(&mapping->page_tree, page_index(page), radix_tree_tag_clear(&mapping->i_pages, page_index(page),
PAGECACHE_TAG_DIRTY); PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags); xa_unlock_irqrestore(&mapping->i_pages, flags);
set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST); set_inode_flag(inode, FI_DATA_EXIST);

View File

@ -91,11 +91,11 @@ static void clear_node_page_dirty(struct page *page)
unsigned int long flags; unsigned int long flags;
if (PageDirty(page)) { if (PageDirty(page)) {
spin_lock_irqsave(&mapping->tree_lock, flags); xa_lock_irqsave(&mapping->i_pages, flags);
radix_tree_tag_clear(&mapping->page_tree, radix_tree_tag_clear(&mapping->i_pages,
page_index(page), page_index(page),
PAGECACHE_TAG_DIRTY); PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags); xa_unlock_irqrestore(&mapping->i_pages, flags);
clear_page_dirty_for_io(page); clear_page_dirty_for_io(page);
dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
@ -1161,7 +1161,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
f2fs_bug_on(sbi, check_nid_range(sbi, nid)); f2fs_bug_on(sbi, check_nid_range(sbi, nid));
rcu_read_lock(); rcu_read_lock();
apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid); apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
rcu_read_unlock(); rcu_read_unlock();
if (apage) if (apage)
return; return;

View File

@ -347,9 +347,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
* By the time control reaches here, RCU grace period has passed * By the time control reaches here, RCU grace period has passed
* since I_WB_SWITCH assertion and all wb stat update transactions * since I_WB_SWITCH assertion and all wb stat update transactions
* between unlocked_inode_to_wb_begin/end() are guaranteed to be * between unlocked_inode_to_wb_begin/end() are guaranteed to be
* synchronizing against mapping->tree_lock. * synchronizing against the i_pages lock.
* *
* Grabbing old_wb->list_lock, inode->i_lock and mapping->tree_lock * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
* gives us exclusion against all wb related operations on @inode * gives us exclusion against all wb related operations on @inode
* including IO list manipulations and stat updates. * including IO list manipulations and stat updates.
*/ */
@ -361,7 +361,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING); spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
} }
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(&mapping->i_pages);
/* /*
* Once I_FREEING is visible under i_lock, the eviction path owns * Once I_FREEING is visible under i_lock, the eviction path owns
@ -373,22 +373,22 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
/* /*
* Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points
* to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
* pages actually under underwriteback. * pages actually under writeback.
*/ */
radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0, radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
PAGECACHE_TAG_DIRTY) { PAGECACHE_TAG_DIRTY) {
struct page *page = radix_tree_deref_slot_protected(slot, struct page *page = radix_tree_deref_slot_protected(slot,
&mapping->tree_lock); &mapping->i_pages.xa_lock);
if (likely(page) && PageDirty(page)) { if (likely(page) && PageDirty(page)) {
dec_wb_stat(old_wb, WB_RECLAIMABLE); dec_wb_stat(old_wb, WB_RECLAIMABLE);
inc_wb_stat(new_wb, WB_RECLAIMABLE); inc_wb_stat(new_wb, WB_RECLAIMABLE);
} }
} }
radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0, radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
PAGECACHE_TAG_WRITEBACK) { PAGECACHE_TAG_WRITEBACK) {
struct page *page = radix_tree_deref_slot_protected(slot, struct page *page = radix_tree_deref_slot_protected(slot,
&mapping->tree_lock); &mapping->i_pages.xa_lock);
if (likely(page)) { if (likely(page)) {
WARN_ON_ONCE(!PageWriteback(page)); WARN_ON_ONCE(!PageWriteback(page));
dec_wb_stat(old_wb, WB_WRITEBACK); dec_wb_stat(old_wb, WB_WRITEBACK);
@ -430,7 +430,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
*/ */
smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH); smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&new_wb->list_lock); spin_unlock(&new_wb->list_lock);
spin_unlock(&old_wb->list_lock); spin_unlock(&old_wb->list_lock);
@ -506,8 +506,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
/* /*
* In addition to synchronizing among switchers, I_WB_SWITCH tells * In addition to synchronizing among switchers, I_WB_SWITCH tells
* the RCU protected stat update paths to grab the mapping's * the RCU protected stat update paths to grab the i_page
* tree_lock so that stat transfer can synchronize against them. * lock so that stat transfer can synchronize against them.
* Let's continue after I_WB_SWITCH is guaranteed to be visible. * Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/ */
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);

View File

@ -832,7 +832,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie,
/* Clear pointers back to the netfs */ /* Clear pointers back to the netfs */
cookie->netfs_data = NULL; cookie->netfs_data = NULL;
cookie->def = NULL; cookie->def = NULL;
BUG_ON(cookie->stores.rnode); BUG_ON(!radix_tree_empty(&cookie->stores));
if (cookie->parent) { if (cookie->parent) {
ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0);

View File

@ -973,7 +973,7 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj
* retire the object instead. * retire the object instead.
*/ */
if (!fscache_use_cookie(object)) { if (!fscache_use_cookie(object)) {
ASSERT(object->cookie->stores.rnode == NULL); ASSERT(radix_tree_empty(&object->cookie->stores));
set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
_leave(" [no cookie]"); _leave(" [no cookie]");
return transit_to(KILL_OBJECT); return transit_to(KILL_OBJECT);

View File

@ -348,8 +348,7 @@ EXPORT_SYMBOL(inc_nlink);
static void __address_space_init_once(struct address_space *mapping) static void __address_space_init_once(struct address_space *mapping)
{ {
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT); INIT_RADIX_TREE(&mapping->i_pages, GFP_ATOMIC | __GFP_ACCOUNT);
spin_lock_init(&mapping->tree_lock);
init_rwsem(&mapping->i_mmap_rwsem); init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list); INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock); spin_lock_init(&mapping->private_lock);
@ -504,14 +503,14 @@ EXPORT_SYMBOL(__remove_inode_hash);
void clear_inode(struct inode *inode) void clear_inode(struct inode *inode)
{ {
/* /*
* We have to cycle tree_lock here because reclaim can be still in the * We have to cycle the i_pages lock here because reclaim can be in the
* process of removing the last page (in __delete_from_page_cache()) * process of removing the last page (in __delete_from_page_cache())
* and we must not free mapping under it. * and we must not free the mapping under it.
*/ */
spin_lock_irq(&inode->i_data.tree_lock); xa_lock_irq(&inode->i_data.i_pages);
BUG_ON(inode->i_data.nrpages); BUG_ON(inode->i_data.nrpages);
BUG_ON(inode->i_data.nrexceptional); BUG_ON(inode->i_data.nrexceptional);
spin_unlock_irq(&inode->i_data.tree_lock); xa_unlock_irq(&inode->i_data.i_pages);
BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(inode->i_state & I_CLEAR); BUG_ON(inode->i_state & I_CLEAR);

View File

@ -193,9 +193,9 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
(unsigned long long)oldkey, (unsigned long long)oldkey,
(unsigned long long)newkey); (unsigned long long)newkey);
spin_lock_irq(&btnc->tree_lock); xa_lock_irq(&btnc->i_pages);
err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); err = radix_tree_insert(&btnc->i_pages, newkey, obh->b_page);
spin_unlock_irq(&btnc->tree_lock); xa_unlock_irq(&btnc->i_pages);
/* /*
* Note: page->index will not change to newkey until * Note: page->index will not change to newkey until
* nilfs_btnode_commit_change_key() will be called. * nilfs_btnode_commit_change_key() will be called.
@ -251,11 +251,11 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
(unsigned long long)newkey); (unsigned long long)newkey);
mark_buffer_dirty(obh); mark_buffer_dirty(obh);
spin_lock_irq(&btnc->tree_lock); xa_lock_irq(&btnc->i_pages);
radix_tree_delete(&btnc->page_tree, oldkey); radix_tree_delete(&btnc->i_pages, oldkey);
radix_tree_tag_set(&btnc->page_tree, newkey, radix_tree_tag_set(&btnc->i_pages, newkey,
PAGECACHE_TAG_DIRTY); PAGECACHE_TAG_DIRTY);
spin_unlock_irq(&btnc->tree_lock); xa_unlock_irq(&btnc->i_pages);
opage->index = obh->b_blocknr = newkey; opage->index = obh->b_blocknr = newkey;
unlock_page(opage); unlock_page(opage);
@ -283,9 +283,9 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
return; return;
if (nbh == NULL) { /* blocksize == pagesize */ if (nbh == NULL) { /* blocksize == pagesize */
spin_lock_irq(&btnc->tree_lock); xa_lock_irq(&btnc->i_pages);
radix_tree_delete(&btnc->page_tree, newkey); radix_tree_delete(&btnc->i_pages, newkey);
spin_unlock_irq(&btnc->tree_lock); xa_unlock_irq(&btnc->i_pages);
unlock_page(ctxt->bh->b_page); unlock_page(ctxt->bh->b_page);
} else } else
brelse(nbh); brelse(nbh);

View File

@ -331,15 +331,15 @@ void nilfs_copy_back_pages(struct address_space *dmap,
struct page *page2; struct page *page2;
/* move the page to the destination cache */ /* move the page to the destination cache */
spin_lock_irq(&smap->tree_lock); xa_lock_irq(&smap->i_pages);
page2 = radix_tree_delete(&smap->page_tree, offset); page2 = radix_tree_delete(&smap->i_pages, offset);
WARN_ON(page2 != page); WARN_ON(page2 != page);
smap->nrpages--; smap->nrpages--;
spin_unlock_irq(&smap->tree_lock); xa_unlock_irq(&smap->i_pages);
spin_lock_irq(&dmap->tree_lock); xa_lock_irq(&dmap->i_pages);
err = radix_tree_insert(&dmap->page_tree, offset, page); err = radix_tree_insert(&dmap->i_pages, offset, page);
if (unlikely(err < 0)) { if (unlikely(err < 0)) {
WARN_ON(err == -EEXIST); WARN_ON(err == -EEXIST);
page->mapping = NULL; page->mapping = NULL;
@ -348,11 +348,11 @@ void nilfs_copy_back_pages(struct address_space *dmap,
page->mapping = dmap; page->mapping = dmap;
dmap->nrpages++; dmap->nrpages++;
if (PageDirty(page)) if (PageDirty(page))
radix_tree_tag_set(&dmap->page_tree, radix_tree_tag_set(&dmap->i_pages,
offset, offset,
PAGECACHE_TAG_DIRTY); PAGECACHE_TAG_DIRTY);
} }
spin_unlock_irq(&dmap->tree_lock); xa_unlock_irq(&dmap->i_pages);
} }
unlock_page(page); unlock_page(page);
} }
@ -474,15 +474,15 @@ int __nilfs_clear_page_dirty(struct page *page)
struct address_space *mapping = page->mapping; struct address_space *mapping = page->mapping;
if (mapping) { if (mapping) {
spin_lock_irq(&mapping->tree_lock); xa_lock_irq(&mapping->i_pages);
if (test_bit(PG_dirty, &page->flags)) { if (test_bit(PG_dirty, &page->flags)) {
radix_tree_tag_clear(&mapping->page_tree, radix_tree_tag_clear(&mapping->i_pages,
page_index(page), page_index(page),
PAGECACHE_TAG_DIRTY); PAGECACHE_TAG_DIRTY);
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
return clear_page_dirty_for_io(page); return clear_page_dirty_for_io(page);
} }
spin_unlock_irq(&mapping->tree_lock); xa_unlock_irq(&mapping->i_pages);
return 0; return 0;
} }
return TestClearPageDirty(page); return TestClearPageDirty(page);

View File

@ -141,25 +141,12 @@ static inline const char *get_task_state(struct task_struct *tsk)
return task_state_array[task_state_index(tsk)]; return task_state_array[task_state_index(tsk)];
} }
static inline int get_task_umask(struct task_struct *tsk)
{
struct fs_struct *fs;
int umask = -ENOENT;
task_lock(tsk);
fs = tsk->fs;
if (fs)
umask = fs->umask;
task_unlock(tsk);
return umask;
}
static inline void task_state(struct seq_file *m, struct pid_namespace *ns, static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *p) struct pid *pid, struct task_struct *p)
{ {
struct user_namespace *user_ns = seq_user_ns(m); struct user_namespace *user_ns = seq_user_ns(m);
struct group_info *group_info; struct group_info *group_info;
int g, umask; int g, umask = -1;
struct task_struct *tracer; struct task_struct *tracer;
const struct cred *cred; const struct cred *cred;
pid_t ppid, tpid = 0, tgid, ngid; pid_t ppid, tpid = 0, tgid, ngid;
@ -177,17 +164,18 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
ngid = task_numa_group_id(p); ngid = task_numa_group_id(p);
cred = get_task_cred(p); cred = get_task_cred(p);
umask = get_task_umask(p);
if (umask >= 0)
seq_printf(m, "Umask:\t%#04o\n", umask);
task_lock(p); task_lock(p);
if (p->fs)
umask = p->fs->umask;
if (p->files) if (p->files)
max_fds = files_fdtable(p->files)->max_fds; max_fds = files_fdtable(p->files)->max_fds;
task_unlock(p); task_unlock(p);
rcu_read_unlock(); rcu_read_unlock();
seq_printf(m, "State:\t%s", get_task_state(p)); if (umask >= 0)
seq_printf(m, "Umask:\t%#04o\n", umask);
seq_puts(m, "State:\t");
seq_puts(m, get_task_state(p));
seq_put_decimal_ull(m, "\nTgid:\t", tgid); seq_put_decimal_ull(m, "\nTgid:\t", tgid);
seq_put_decimal_ull(m, "\nNgid:\t", ngid); seq_put_decimal_ull(m, "\nNgid:\t", ngid);
@ -313,8 +301,8 @@ static void render_cap_t(struct seq_file *m, const char *header,
seq_puts(m, header); seq_puts(m, header);
CAP_FOR_EACH_U32(__capi) { CAP_FOR_EACH_U32(__capi) {
seq_printf(m, "%08x", seq_put_hex_ll(m, NULL,
a->cap[CAP_LAST_U32 - __capi]); a->cap[CAP_LAST_U32 - __capi], 8);
} }
seq_putc(m, '\n'); seq_putc(m, '\n');
} }
@ -368,7 +356,8 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
{ {
seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state); seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
seq_putc(m, '\n');
} }
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
@ -504,7 +493,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
/* convert nsec -> ticks */ /* convert nsec -> ticks */
start_time = nsec_to_clock_t(task->real_start_time); start_time = nsec_to_clock_t(task->real_start_time);
seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
seq_puts(m, " (");
seq_puts(m, tcomm);
seq_puts(m, ") ");
seq_putc(m, state);
seq_put_decimal_ll(m, " ", ppid); seq_put_decimal_ll(m, " ", ppid);
seq_put_decimal_ll(m, " ", pgid); seq_put_decimal_ll(m, " ", pgid);
seq_put_decimal_ll(m, " ", sid); seq_put_decimal_ll(m, " ", sid);

View File

@ -388,14 +388,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
unsigned long wchan; unsigned long wchan;
char symname[KSYM_NAME_LEN]; char symname[KSYM_NAME_LEN];
if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
goto print0;
wchan = get_wchan(task); wchan = get_wchan(task);
if (wchan && !lookup_symbol_name(wchan, symname)) {
seq_puts(m, symname);
return 0;
}
if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) print0:
&& !lookup_symbol_name(wchan, symname)) seq_putc(m, '0');
seq_printf(m, "%s", symname);
else
seq_putc(m, '0');
return 0; return 0;
} }
#endif /* CONFIG_KALLSYMS */ #endif /* CONFIG_KALLSYMS */
@ -1910,6 +1913,8 @@ static int dname_to_vma_addr(struct dentry *dentry,
unsigned long long sval, eval; unsigned long long sval, eval;
unsigned int len; unsigned int len;
if (str[0] == '0' && str[1] != '-')
return -EINVAL;
len = _parse_integer(str, 16, &sval); len = _parse_integer(str, 16, &sval);
if (len & KSTRTOX_OVERFLOW) if (len & KSTRTOX_OVERFLOW)
return -EINVAL; return -EINVAL;
@ -1921,6 +1926,8 @@ static int dname_to_vma_addr(struct dentry *dentry,
return -EINVAL; return -EINVAL;
str++; str++;
if (str[0] == '0' && str[1])
return -EINVAL;
len = _parse_integer(str, 16, &eval); len = _parse_integer(str, 16, &eval);
if (len & KSTRTOX_OVERFLOW) if (len & KSTRTOX_OVERFLOW)
return -EINVAL; return -EINVAL;
@ -2204,6 +2211,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
} }
} }
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
mmput(mm);
for (i = 0; i < nr_files; i++) { for (i = 0; i < nr_files; i++) {
char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
@ -2221,7 +2229,6 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
} }
if (fa) if (fa)
flex_array_free(fa); flex_array_free(fa);
mmput(mm);
out_put_task: out_put_task:
put_task_struct(task); put_task_struct(task);

View File

@ -6,7 +6,8 @@
static int cmdline_proc_show(struct seq_file *m, void *v) static int cmdline_proc_show(struct seq_file *m, void *v)
{ {
seq_printf(m, "%s\n", saved_command_line); seq_puts(m, saved_command_line);
seq_putc(m, '\n');
return 0; return 0;
} }

View File

@ -8,6 +8,7 @@
* Copyright (C) 1997 Theodore Ts'o * Copyright (C) 1997 Theodore Ts'o
*/ */
#include <linux/cache.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
@ -28,6 +29,17 @@
static DEFINE_RWLOCK(proc_subdir_lock); static DEFINE_RWLOCK(proc_subdir_lock);
struct kmem_cache *proc_dir_entry_cache __ro_after_init;
void pde_free(struct proc_dir_entry *pde)
{
if (S_ISLNK(pde->mode))
kfree(pde->data);
if (pde->name != pde->inline_name)
kfree(pde->name);
kmem_cache_free(proc_dir_entry_cache, pde);
}
static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len) static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len)
{ {
if (len < de->namelen) if (len < de->namelen)
@ -40,8 +52,8 @@ static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int
static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
{ {
return rb_entry_safe(rb_first_cached(&dir->subdir), return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry,
struct proc_dir_entry, subdir_node); subdir_node);
} }
static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
@ -54,7 +66,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
const char *name, const char *name,
unsigned int len) unsigned int len)
{ {
struct rb_node *node = dir->subdir.rb_root.rb_node; struct rb_node *node = dir->subdir.rb_node;
while (node) { while (node) {
struct proc_dir_entry *de = rb_entry(node, struct proc_dir_entry *de = rb_entry(node,
@ -75,9 +87,8 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
static bool pde_subdir_insert(struct proc_dir_entry *dir, static bool pde_subdir_insert(struct proc_dir_entry *dir,
struct proc_dir_entry *de) struct proc_dir_entry *de)
{ {
struct rb_root_cached *root = &dir->subdir; struct rb_root *root = &dir->subdir;
struct rb_node **new = &root->rb_root.rb_node, *parent = NULL; struct rb_node **new = &root->rb_node, *parent = NULL;
bool leftmost = true;
/* Figure out where to put new node */ /* Figure out where to put new node */
while (*new) { while (*new) {
@ -89,16 +100,15 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
parent = *new; parent = *new;
if (result < 0) if (result < 0)
new = &(*new)->rb_left; new = &(*new)->rb_left;
else if (result > 0) { else if (result > 0)
new = &(*new)->rb_right; new = &(*new)->rb_right;
leftmost = false; else
} else
return false; return false;
} }
/* Add new node and rebalance tree. */ /* Add new node and rebalance tree. */
rb_link_node(&de->subdir_node, parent, new); rb_link_node(&de->subdir_node, parent, new);
rb_insert_color_cached(&de->subdir_node, root, leftmost); rb_insert_color(&de->subdir_node, root);
return true; return true;
} }
@ -354,6 +364,14 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
WARN(1, "name len %u\n", qstr.len); WARN(1, "name len %u\n", qstr.len);
return NULL; return NULL;
} }
if (qstr.len == 1 && fn[0] == '.') {
WARN(1, "name '.'\n");
return NULL;
}
if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') {
WARN(1, "name '..'\n");
return NULL;
}
if (*parent == &proc_root && name_to_int(&qstr) != ~0U) { if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
WARN(1, "create '/proc/%s' by hand\n", qstr.name); WARN(1, "create '/proc/%s' by hand\n", qstr.name);
return NULL; return NULL;
@ -363,16 +381,26 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
return NULL; return NULL;
} }
ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL); ent = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL);
if (!ent) if (!ent)
goto out; goto out;
if (qstr.len + 1 <= sizeof(ent->inline_name)) {
ent->name = ent->inline_name;
} else {
ent->name = kmalloc(qstr.len + 1, GFP_KERNEL);
if (!ent->name) {
pde_free(ent);
return NULL;
}
}
memcpy(ent->name, fn, qstr.len + 1); memcpy(ent->name, fn, qstr.len + 1);
ent->namelen = qstr.len; ent->namelen = qstr.len;
ent->mode = mode; ent->mode = mode;
ent->nlink = nlink; ent->nlink = nlink;
ent->subdir = RB_ROOT_CACHED; ent->subdir = RB_ROOT;
atomic_set(&ent->count, 1); refcount_set(&ent->refcnt, 1);
spin_lock_init(&ent->pde_unload_lock); spin_lock_init(&ent->pde_unload_lock);
INIT_LIST_HEAD(&ent->pde_openers); INIT_LIST_HEAD(&ent->pde_openers);
proc_set_user(ent, (*parent)->uid, (*parent)->gid); proc_set_user(ent, (*parent)->uid, (*parent)->gid);
@ -395,12 +423,11 @@ struct proc_dir_entry *proc_symlink(const char *name,
strcpy((char*)ent->data,dest); strcpy((char*)ent->data,dest);
ent->proc_iops = &proc_link_inode_operations; ent->proc_iops = &proc_link_inode_operations;
if (proc_register(parent, ent) < 0) { if (proc_register(parent, ent) < 0) {
kfree(ent->data); pde_free(ent);
kfree(ent);
ent = NULL; ent = NULL;
} }
} else { } else {
kfree(ent); pde_free(ent);
ent = NULL; ent = NULL;
} }
} }
@ -423,7 +450,7 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
ent->proc_iops = &proc_dir_inode_operations; ent->proc_iops = &proc_dir_inode_operations;
parent->nlink++; parent->nlink++;
if (proc_register(parent, ent) < 0) { if (proc_register(parent, ent) < 0) {
kfree(ent); pde_free(ent);
parent->nlink--; parent->nlink--;
ent = NULL; ent = NULL;
} }
@ -458,7 +485,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
ent->proc_iops = NULL; ent->proc_iops = NULL;
parent->nlink++; parent->nlink++;
if (proc_register(parent, ent) < 0) { if (proc_register(parent, ent) < 0) {
kfree(ent); pde_free(ent);
parent->nlink--; parent->nlink--;
ent = NULL; ent = NULL;
} }
@ -495,7 +522,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
goto out_free; goto out_free;
return pde; return pde;
out_free: out_free:
kfree(pde); pde_free(pde);
out: out:
return NULL; return NULL;
} }
@ -522,19 +549,12 @@ void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid)
} }
EXPORT_SYMBOL(proc_set_user); EXPORT_SYMBOL(proc_set_user);
static void free_proc_entry(struct proc_dir_entry *de)
{
proc_free_inum(de->low_ino);
if (S_ISLNK(de->mode))
kfree(de->data);
kfree(de);
}
void pde_put(struct proc_dir_entry *pde) void pde_put(struct proc_dir_entry *pde)
{ {
if (atomic_dec_and_test(&pde->count)) if (refcount_dec_and_test(&pde->refcnt)) {
free_proc_entry(pde); proc_free_inum(pde->low_ino);
pde_free(pde);
}
} }
/* /*
@ -555,7 +575,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
de = pde_subdir_find(parent, fn, len); de = pde_subdir_find(parent, fn, len);
if (de) if (de)
rb_erase_cached(&de->subdir_node, &parent->subdir); rb_erase(&de->subdir_node, &parent->subdir);
write_unlock(&proc_subdir_lock); write_unlock(&proc_subdir_lock);
if (!de) { if (!de) {
WARN(1, "name '%s'\n", name); WARN(1, "name '%s'\n", name);
@ -592,13 +612,13 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
write_unlock(&proc_subdir_lock); write_unlock(&proc_subdir_lock);
return -ENOENT; return -ENOENT;
} }
rb_erase_cached(&root->subdir_node, &parent->subdir); rb_erase(&root->subdir_node, &parent->subdir);
de = root; de = root;
while (1) { while (1) {
next = pde_subdir_first(de); next = pde_subdir_first(de);
if (next) { if (next) {
rb_erase_cached(&next->subdir_node, &de->subdir); rb_erase(&next->subdir_node, &de->subdir);
de = next; de = next;
continue; continue;
} }

View File

@ -54,6 +54,7 @@ static void proc_evict_inode(struct inode *inode)
} }
static struct kmem_cache *proc_inode_cachep __ro_after_init; static struct kmem_cache *proc_inode_cachep __ro_after_init;
static struct kmem_cache *pde_opener_cache __ro_after_init;
static struct inode *proc_alloc_inode(struct super_block *sb) static struct inode *proc_alloc_inode(struct super_block *sb)
{ {
@ -92,7 +93,7 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode); inode_init_once(&ei->vfs_inode);
} }
void __init proc_init_inodecache(void) void __init proc_init_kmemcache(void)
{ {
proc_inode_cachep = kmem_cache_create("proc_inode_cache", proc_inode_cachep = kmem_cache_create("proc_inode_cache",
sizeof(struct proc_inode), sizeof(struct proc_inode),
@ -100,6 +101,13 @@ void __init proc_init_inodecache(void)
SLAB_MEM_SPREAD|SLAB_ACCOUNT| SLAB_MEM_SPREAD|SLAB_ACCOUNT|
SLAB_PANIC), SLAB_PANIC),
init_once); init_once);
pde_opener_cache =
kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0,
SLAB_ACCOUNT|SLAB_PANIC, NULL);
proc_dir_entry_cache = kmem_cache_create_usercopy(
"proc_dir_entry", sizeof(struct proc_dir_entry), 0, SLAB_PANIC,
offsetof(struct proc_dir_entry, inline_name),
sizeof_field(struct proc_dir_entry, inline_name), NULL);
} }
static int proc_show_options(struct seq_file *seq, struct dentry *root) static int proc_show_options(struct seq_file *seq, struct dentry *root)
@ -138,7 +146,7 @@ static void unuse_pde(struct proc_dir_entry *pde)
complete(pde->pde_unload_completion); complete(pde->pde_unload_completion);
} }
/* pde is locked */ /* pde is locked on entry, unlocked on exit */
static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
{ {
/* /*
@ -157,9 +165,10 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
pdeo->c = &c; pdeo->c = &c;
spin_unlock(&pde->pde_unload_lock); spin_unlock(&pde->pde_unload_lock);
wait_for_completion(&c); wait_for_completion(&c);
spin_lock(&pde->pde_unload_lock);
} else { } else {
struct file *file; struct file *file;
struct completion *c;
pdeo->closing = true; pdeo->closing = true;
spin_unlock(&pde->pde_unload_lock); spin_unlock(&pde->pde_unload_lock);
file = pdeo->file; file = pdeo->file;
@ -167,9 +176,11 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
spin_lock(&pde->pde_unload_lock); spin_lock(&pde->pde_unload_lock);
/* After ->release. */ /* After ->release. */
list_del(&pdeo->lh); list_del(&pdeo->lh);
if (unlikely(pdeo->c)) c = pdeo->c;
complete(pdeo->c); spin_unlock(&pde->pde_unload_lock);
kfree(pdeo); if (unlikely(c))
complete(c);
kmem_cache_free(pde_opener_cache, pdeo);
} }
} }
@ -188,6 +199,7 @@ void proc_entry_rundown(struct proc_dir_entry *de)
struct pde_opener *pdeo; struct pde_opener *pdeo;
pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
close_pdeo(de, pdeo); close_pdeo(de, pdeo);
spin_lock(&de->pde_unload_lock);
} }
spin_unlock(&de->pde_unload_lock); spin_unlock(&de->pde_unload_lock);
} }
@ -338,31 +350,36 @@ static int proc_reg_open(struct inode *inode, struct file *file)
* *
* Save every "struct file" with custom ->release hook. * Save every "struct file" with custom ->release hook.
*/ */
pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); if (!use_pde(pde))
if (!pdeo)
return -ENOMEM;
if (!use_pde(pde)) {
kfree(pdeo);
return -ENOENT; return -ENOENT;
}
open = pde->proc_fops->open;
release = pde->proc_fops->release;
release = pde->proc_fops->release;
if (release) {
pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL);
if (!pdeo) {
rv = -ENOMEM;
goto out_unuse;
}
}
open = pde->proc_fops->open;
if (open) if (open)
rv = open(inode, file); rv = open(inode, file);
if (rv == 0 && release) { if (release) {
/* To know what to release. */ if (rv == 0) {
pdeo->file = file; /* To know what to release. */
pdeo->closing = false; pdeo->file = file;
pdeo->c = NULL; pdeo->closing = false;
spin_lock(&pde->pde_unload_lock); pdeo->c = NULL;
list_add(&pdeo->lh, &pde->pde_openers); spin_lock(&pde->pde_unload_lock);
spin_unlock(&pde->pde_unload_lock); list_add(&pdeo->lh, &pde->pde_openers);
} else spin_unlock(&pde->pde_unload_lock);
kfree(pdeo); } else
kmem_cache_free(pde_opener_cache, pdeo);
}
out_unuse:
unuse_pde(pde); unuse_pde(pde);
return rv; return rv;
} }
@ -375,7 +392,7 @@ static int proc_reg_release(struct inode *inode, struct file *file)
list_for_each_entry(pdeo, &pde->pde_openers, lh) { list_for_each_entry(pdeo, &pde->pde_openers, lh) {
if (pdeo->file == file) { if (pdeo->file == file) {
close_pdeo(pde, pdeo); close_pdeo(pde, pdeo);
break; return 0;
} }
} }
spin_unlock(&pde->pde_unload_lock); spin_unlock(&pde->pde_unload_lock);

View File

@ -11,6 +11,7 @@
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/proc_ns.h> #include <linux/proc_ns.h>
#include <linux/refcount.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/binfmts.h> #include <linux/binfmts.h>
@ -36,7 +37,7 @@ struct proc_dir_entry {
* negative -> it's going away RSN * negative -> it's going away RSN
*/ */
atomic_t in_use; atomic_t in_use;
atomic_t count; /* use count */ refcount_t refcnt;
struct list_head pde_openers; /* who did ->open, but not ->release */ struct list_head pde_openers; /* who did ->open, but not ->release */
/* protects ->pde_openers and all struct pde_opener instances */ /* protects ->pde_openers and all struct pde_opener instances */
spinlock_t pde_unload_lock; spinlock_t pde_unload_lock;
@ -50,13 +51,22 @@ struct proc_dir_entry {
kgid_t gid; kgid_t gid;
loff_t size; loff_t size;
struct proc_dir_entry *parent; struct proc_dir_entry *parent;
struct rb_root_cached subdir; struct rb_root subdir;
struct rb_node subdir_node; struct rb_node subdir_node;
char *name;
umode_t mode; umode_t mode;
u8 namelen; u8 namelen;
char name[]; #ifdef CONFIG_64BIT
#define SIZEOF_PDE_INLINE_NAME (192-139)
#else
#define SIZEOF_PDE_INLINE_NAME (128-87)
#endif
char inline_name[SIZEOF_PDE_INLINE_NAME];
} __randomize_layout; } __randomize_layout;
extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);
union proc_op { union proc_op {
int (*proc_get_link)(struct dentry *, struct path *); int (*proc_get_link)(struct dentry *, struct path *);
int (*proc_show)(struct seq_file *m, int (*proc_show)(struct seq_file *m,
@ -159,7 +169,7 @@ int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *
static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
{ {
atomic_inc(&pde->count); refcount_inc(&pde->refcnt);
return pde; return pde;
} }
extern void pde_put(struct proc_dir_entry *); extern void pde_put(struct proc_dir_entry *);
@ -177,12 +187,12 @@ struct pde_opener {
struct list_head lh; struct list_head lh;
bool closing; bool closing;
struct completion *c; struct completion *c;
}; } __randomize_layout;
extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_link_inode_operations;
extern const struct inode_operations proc_pid_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations;
extern void proc_init_inodecache(void); void proc_init_kmemcache(void);
void set_proc_pid_nlink(void); void set_proc_pid_nlink(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
extern int proc_fill_super(struct super_block *, void *data, int flags); extern int proc_fill_super(struct super_block *, void *data, int flags);

View File

@ -26,20 +26,7 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
static void show_val_kb(struct seq_file *m, const char *s, unsigned long num) static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
{ {
char v[32]; seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8);
static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '};
int len;
len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10));
seq_write(m, s, 16);
if (len > 0) {
if (len < 8)
seq_write(m, blanks, 8 - len);
seq_write(m, v, len);
}
seq_write(m, " kB\n", 4); seq_write(m, " kB\n", 4);
} }

View File

@ -192,15 +192,16 @@ static __net_init int proc_net_ns_init(struct net *net)
int err; int err;
err = -ENOMEM; err = -ENOMEM;
netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL); netd = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL);
if (!netd) if (!netd)
goto out; goto out;
netd->subdir = RB_ROOT_CACHED; netd->subdir = RB_ROOT;
netd->data = net; netd->data = net;
netd->nlink = 2; netd->nlink = 2;
netd->namelen = 3; netd->namelen = 3;
netd->parent = &proc_root; netd->parent = &proc_root;
netd->name = netd->inline_name;
memcpy(netd->name, "net", 4); memcpy(netd->name, "net", 4);
uid = make_kuid(net->user_ns, 0); uid = make_kuid(net->user_ns, 0);
@ -223,7 +224,7 @@ static __net_init int proc_net_ns_init(struct net *net)
return 0; return 0;
free_net: free_net:
kfree(netd); pde_free(netd);
out: out:
return err; return err;
} }
@ -231,7 +232,7 @@ static __net_init int proc_net_ns_init(struct net *net)
static __net_exit void proc_net_ns_exit(struct net *net) static __net_exit void proc_net_ns_exit(struct net *net)
{ {
remove_proc_entry("stat", net->proc_net); remove_proc_entry("stat", net->proc_net);
kfree(net->proc_net); pde_free(net->proc_net);
} }
static struct pernet_operations __net_initdata proc_net_ns_ops = { static struct pernet_operations __net_initdata proc_net_ns_ops = {

View File

@ -707,14 +707,14 @@ static bool proc_sys_link_fill_cache(struct file *file,
struct ctl_table *table) struct ctl_table *table)
{ {
bool ret = true; bool ret = true;
head = sysctl_head_grab(head);
if (S_ISLNK(table->mode)) { head = sysctl_head_grab(head);
/* It is not an error if we can not follow the link ignore it */ if (IS_ERR(head))
int err = sysctl_follow_link(&head, &table); return false;
if (err)
goto out; /* It is not an error if we can not follow the link ignore it */
} if (sysctl_follow_link(&head, &table))
goto out;
ret = proc_sys_fill_cache(file, ctx, head, table); ret = proc_sys_fill_cache(file, ctx, head, table);
out: out:
@ -1086,7 +1086,7 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table)
if ((table->proc_handler == proc_douintvec) || if ((table->proc_handler == proc_douintvec) ||
(table->proc_handler == proc_douintvec_minmax)) { (table->proc_handler == proc_douintvec_minmax)) {
if (table->maxlen != sizeof(unsigned int)) if (table->maxlen != sizeof(unsigned int))
err |= sysctl_err(path, table, "array now allowed"); err |= sysctl_err(path, table, "array not allowed");
} }
return err; return err;

View File

@ -123,23 +123,13 @@ static struct file_system_type proc_fs_type = {
void __init proc_root_init(void) void __init proc_root_init(void)
{ {
int err; proc_init_kmemcache();
proc_init_inodecache();
set_proc_pid_nlink(); set_proc_pid_nlink();
err = register_filesystem(&proc_fs_type);
if (err)
return;
proc_self_init(); proc_self_init();
proc_thread_self_init(); proc_thread_self_init();
proc_symlink("mounts", NULL, "self/mounts"); proc_symlink("mounts", NULL, "self/mounts");
proc_net_init(); proc_net_init();
#ifdef CONFIG_SYSVIPC
proc_mkdir("sysvipc", NULL);
#endif
proc_mkdir("fs", NULL); proc_mkdir("fs", NULL);
proc_mkdir("driver", NULL); proc_mkdir("driver", NULL);
proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
@ -150,6 +140,8 @@ void __init proc_root_init(void)
proc_tty_init(); proc_tty_init();
proc_mkdir("bus", NULL); proc_mkdir("bus", NULL);
proc_sys_init(); proc_sys_init();
register_filesystem(&proc_fs_type);
} }
static int proc_root_getattr(const struct path *path, struct kstat *stat, static int proc_root_getattr(const struct path *path, struct kstat *stat,
@ -207,12 +199,13 @@ struct proc_dir_entry proc_root = {
.namelen = 5, .namelen = 5,
.mode = S_IFDIR | S_IRUGO | S_IXUGO, .mode = S_IFDIR | S_IRUGO | S_IXUGO,
.nlink = 2, .nlink = 2,
.count = ATOMIC_INIT(1), .refcnt = REFCOUNT_INIT(1),
.proc_iops = &proc_root_inode_operations, .proc_iops = &proc_root_inode_operations,
.proc_fops = &proc_root_operations, .proc_fops = &proc_root_operations,
.parent = &proc_root, .parent = &proc_root,
.subdir = RB_ROOT_CACHED, .subdir = RB_ROOT,
.name = "/proc", .name = proc_root.inline_name,
.inline_name = "/proc",
}; };
int pid_ns_prepare_proc(struct pid_namespace *ns) int pid_ns_prepare_proc(struct pid_namespace *ns)

View File

@ -24,6 +24,8 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include "internal.h" #include "internal.h"
#define SEQ_PUT_DEC(str, val) \
seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
void task_mem(struct seq_file *m, struct mm_struct *mm) void task_mem(struct seq_file *m, struct mm_struct *mm)
{ {
unsigned long text, lib, swap, anon, file, shmem; unsigned long text, lib, swap, anon, file, shmem;
@ -53,39 +55,28 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
lib = (mm->exec_vm << PAGE_SHIFT) - text; lib = (mm->exec_vm << PAGE_SHIFT) - text;
swap = get_mm_counter(mm, MM_SWAPENTS); swap = get_mm_counter(mm, MM_SWAPENTS);
seq_printf(m, SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
"VmPeak:\t%8lu kB\n" SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
"VmSize:\t%8lu kB\n" SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
"VmLck:\t%8lu kB\n" SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
"VmPin:\t%8lu kB\n" SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
"VmHWM:\t%8lu kB\n" SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
"VmRSS:\t%8lu kB\n" SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
"RssAnon:\t%8lu kB\n" SEQ_PUT_DEC(" kB\nRssFile:\t", file);
"RssFile:\t%8lu kB\n" SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
"RssShmem:\t%8lu kB\n" SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
"VmData:\t%8lu kB\n" SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
"VmStk:\t%8lu kB\n" seq_put_decimal_ull_width(m,
"VmExe:\t%8lu kB\n" " kB\nVmExe:\t", text >> 10, 8);
"VmLib:\t%8lu kB\n" seq_put_decimal_ull_width(m,
"VmPTE:\t%8lu kB\n" " kB\nVmLib:\t", lib >> 10, 8);
"VmSwap:\t%8lu kB\n", seq_put_decimal_ull_width(m,
hiwater_vm << (PAGE_SHIFT-10), " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
total_vm << (PAGE_SHIFT-10), SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
mm->locked_vm << (PAGE_SHIFT-10), seq_puts(m, " kB\n");
mm->pinned_vm << (PAGE_SHIFT-10),
hiwater_rss << (PAGE_SHIFT-10),
total_rss << (PAGE_SHIFT-10),
anon << (PAGE_SHIFT-10),
file << (PAGE_SHIFT-10),
shmem << (PAGE_SHIFT-10),
mm->data_vm << (PAGE_SHIFT-10),
mm->stack_vm << (PAGE_SHIFT-10),
text >> 10,
lib >> 10,
mm_pgtables_bytes(mm) >> 10,
swap << (PAGE_SHIFT-10));
hugetlb_report_usage(m, mm); hugetlb_report_usage(m, mm);
} }
#undef SEQ_PUT_DEC
unsigned long task_vsize(struct mm_struct *mm) unsigned long task_vsize(struct mm_struct *mm)
{ {
@ -287,15 +278,18 @@ static void show_vma_header_prefix(struct seq_file *m,
dev_t dev, unsigned long ino) dev_t dev, unsigned long ino)
{ {
seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", seq_put_hex_ll(m, NULL, start, 8);
start, seq_put_hex_ll(m, "-", end, 8);
end, seq_putc(m, ' ');
flags & VM_READ ? 'r' : '-', seq_putc(m, flags & VM_READ ? 'r' : '-');
flags & VM_WRITE ? 'w' : '-', seq_putc(m, flags & VM_WRITE ? 'w' : '-');
flags & VM_EXEC ? 'x' : '-', seq_putc(m, flags & VM_EXEC ? 'x' : '-');
flags & VM_MAYSHARE ? 's' : 'p', seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p');
pgoff, seq_put_hex_ll(m, " ", pgoff, 8);
MAJOR(dev), MINOR(dev), ino); seq_put_hex_ll(m, " ", MAJOR(dev), 2);
seq_put_hex_ll(m, ":", MINOR(dev), 2);
seq_put_decimal_ull(m, " ", ino);
seq_putc(m, ' ');
} }
static void static void
@ -694,8 +688,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
if (!mnemonics[i][0]) if (!mnemonics[i][0])
continue; continue;
if (vma->vm_flags & (1UL << i)) { if (vma->vm_flags & (1UL << i)) {
seq_printf(m, "%c%c ", seq_putc(m, mnemonics[i][0]);
mnemonics[i][0], mnemonics[i][1]); seq_putc(m, mnemonics[i][1]);
seq_putc(m, ' ');
} }
} }
seq_putc(m, '\n'); seq_putc(m, '\n');
@ -736,6 +731,8 @@ void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
{ {
} }
#define SEQ_PUT_DEC(str, val) \
seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
static int show_smap(struct seq_file *m, void *v, int is_pid) static int show_smap(struct seq_file *m, void *v, int is_pid)
{ {
struct proc_maps_private *priv = m->private; struct proc_maps_private *priv = m->private;
@ -809,51 +806,34 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
ret = SEQ_SKIP; ret = SEQ_SKIP;
} }
if (!rollup_mode) if (!rollup_mode) {
seq_printf(m, SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
"Size: %8lu kB\n" SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
"KernelPageSize: %8lu kB\n" SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
"MMUPageSize: %8lu kB\n", seq_puts(m, " kB\n");
(vma->vm_end - vma->vm_start) >> 10, }
vma_kernel_pagesize(vma) >> 10,
vma_mmu_pagesize(vma) >> 10);
if (!rollup_mode || last_vma)
seq_printf(m,
"Rss: %8lu kB\n"
"Pss: %8lu kB\n"
"Shared_Clean: %8lu kB\n"
"Shared_Dirty: %8lu kB\n"
"Private_Clean: %8lu kB\n"
"Private_Dirty: %8lu kB\n"
"Referenced: %8lu kB\n"
"Anonymous: %8lu kB\n"
"LazyFree: %8lu kB\n"
"AnonHugePages: %8lu kB\n"
"ShmemPmdMapped: %8lu kB\n"
"Shared_Hugetlb: %8lu kB\n"
"Private_Hugetlb: %7lu kB\n"
"Swap: %8lu kB\n"
"SwapPss: %8lu kB\n"
"Locked: %8lu kB\n",
mss->resident >> 10,
(unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
mss->shared_clean >> 10,
mss->shared_dirty >> 10,
mss->private_clean >> 10,
mss->private_dirty >> 10,
mss->referenced >> 10,
mss->anonymous >> 10,
mss->lazyfree >> 10,
mss->anonymous_thp >> 10,
mss->shmem_thp >> 10,
mss->shared_hugetlb >> 10,
mss->private_hugetlb >> 10,
mss->swap >> 10,
(unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
(unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
if (!rollup_mode || last_vma) {
SEQ_PUT_DEC("Rss: ", mss->resident);
SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
mss->private_hugetlb >> 10, 7);
SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
SEQ_PUT_DEC(" kB\nSwapPss: ",
mss->swap_pss >> PSS_SHIFT);
SEQ_PUT_DEC(" kB\nLocked: ", mss->pss >> PSS_SHIFT);
seq_puts(m, " kB\n");
}
if (!rollup_mode) { if (!rollup_mode) {
arch_show_smap(m, vma); arch_show_smap(m, vma);
show_smap_vma_flags(m, vma); show_smap_vma_flags(m, vma);
@ -861,6 +841,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
m_cache_vma(m, vma); m_cache_vma(m, vma);
return ret; return ret;
} }
#undef SEQ_PUT_DEC
static int show_pid_smap(struct seq_file *m, void *v) static int show_pid_smap(struct seq_file *m, void *v)
{ {

View File

@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super_block *super,
if (IS_ERR(journal->j_dev_bd)) { if (IS_ERR(journal->j_dev_bd)) {
result = PTR_ERR(journal->j_dev_bd); result = PTR_ERR(journal->j_dev_bd);
journal->j_dev_bd = NULL; journal->j_dev_bd = NULL;
reiserfs_warning(super, reiserfs_warning(super, "sh-457",
"journal_init_dev: Cannot open '%s': %i", "journal_init_dev: Cannot open '%s': %i",
jdev_name, result); jdev_name, result);
return result; return result;

View File

@ -6,6 +6,7 @@
* initial implementation -- AV, Oct 2001. * initial implementation -- AV, Oct 2001.
*/ */
#include <linux/cache.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
@ -19,6 +20,8 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/page.h> #include <asm/page.h>
static struct kmem_cache *seq_file_cache __ro_after_init;
static void seq_set_overflow(struct seq_file *m) static void seq_set_overflow(struct seq_file *m)
{ {
m->count = m->size; m->count = m->size;
@ -26,7 +29,7 @@ static void seq_set_overflow(struct seq_file *m)
static void *seq_buf_alloc(unsigned long size) static void *seq_buf_alloc(unsigned long size)
{ {
return kvmalloc(size, GFP_KERNEL); return kvmalloc(size, GFP_KERNEL_ACCOUNT);
} }
/** /**
@ -51,7 +54,7 @@ int seq_open(struct file *file, const struct seq_operations *op)
WARN_ON(file->private_data); WARN_ON(file->private_data);
p = kzalloc(sizeof(*p), GFP_KERNEL); p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL);
if (!p) if (!p)
return -ENOMEM; return -ENOMEM;
@ -366,7 +369,7 @@ int seq_release(struct inode *inode, struct file *file)
{ {
struct seq_file *m = file->private_data; struct seq_file *m = file->private_data;
kvfree(m->buf); kvfree(m->buf);
kfree(m); kmem_cache_free(seq_file_cache, m);
return 0; return 0;
} }
EXPORT_SYMBOL(seq_release); EXPORT_SYMBOL(seq_release);
@ -563,7 +566,7 @@ static void single_stop(struct seq_file *p, void *v)
int single_open(struct file *file, int (*show)(struct seq_file *, void *), int single_open(struct file *file, int (*show)(struct seq_file *, void *),
void *data) void *data)
{ {
struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL); struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL_ACCOUNT);
int res = -ENOMEM; int res = -ENOMEM;
if (op) { if (op) {
@ -625,7 +628,7 @@ void *__seq_open_private(struct file *f, const struct seq_operations *ops,
void *private; void *private;
struct seq_file *seq; struct seq_file *seq;
private = kzalloc(psize, GFP_KERNEL); private = kzalloc(psize, GFP_KERNEL_ACCOUNT);
if (private == NULL) if (private == NULL)
goto out; goto out;
@ -673,29 +676,37 @@ void seq_puts(struct seq_file *m, const char *s)
} }
EXPORT_SYMBOL(seq_puts); EXPORT_SYMBOL(seq_puts);
/* /**
* A helper routine for putting decimal numbers without rich format of printf(). * A helper routine for putting decimal numbers without rich format of printf().
* only 'unsigned long long' is supported. * only 'unsigned long long' is supported.
* This routine will put strlen(delimiter) + number into seq_file. * @m: seq_file identifying the buffer to which data should be written
* @delimiter: a string which is printed before the number
* @num: the number
* @width: a minimum field width
*
* This routine will put strlen(delimiter) + number into seq_filed.
* This routine is very quick when you show lots of numbers. * This routine is very quick when you show lots of numbers.
* In usual cases, it will be better to use seq_printf(). It's easier to read. * In usual cases, it will be better to use seq_printf(). It's easier to read.
*/ */
void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
unsigned long long num) unsigned long long num, unsigned int width)
{ {
int len; int len;
if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */ if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
goto overflow; goto overflow;
len = strlen(delimiter); if (delimiter && delimiter[0]) {
if (m->count + len >= m->size) if (delimiter[1] == 0)
goto overflow; seq_putc(m, delimiter[0]);
else
seq_puts(m, delimiter);
}
memcpy(m->buf + m->count, delimiter, len); if (!width)
m->count += len; width = 1;
if (m->count + 1 >= m->size) if (m->count + width >= m->size)
goto overflow; goto overflow;
if (num < 10) { if (num < 10) {
@ -703,7 +714,7 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
return; return;
} }
len = num_to_str(m->buf + m->count, m->size - m->count, num); len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
if (!len) if (!len)
goto overflow; goto overflow;
@ -713,8 +724,60 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
overflow: overflow:
seq_set_overflow(m); seq_set_overflow(m);
} }
void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
unsigned long long num)
{
return seq_put_decimal_ull_width(m, delimiter, num, 0);
}
EXPORT_SYMBOL(seq_put_decimal_ull); EXPORT_SYMBOL(seq_put_decimal_ull);
/**
* seq_put_hex_ll - put a number in hexadecimal notation
* @m: seq_file identifying the buffer to which data should be written
* @delimiter: a string which is printed before the number
* @v: the number
* @width: a minimum field width
*
* seq_put_hex_ll(m, "", v, 8) is equal to seq_printf(m, "%08llx", v)
*
* This routine is very quick when you show lots of numbers.
* In usual cases, it will be better to use seq_printf(). It's easier to read.
*/
void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
unsigned long long v, unsigned int width)
{
unsigned int len;
int i;
if (delimiter && delimiter[0]) {
if (delimiter[1] == 0)
seq_putc(m, delimiter[0]);
else
seq_puts(m, delimiter);
}
/* If x is 0, the result of __builtin_clzll is undefined */
if (v == 0)
len = 1;
else
len = (sizeof(v) * 8 - __builtin_clzll(v) + 3) / 4;
if (len < width)
len = width;
if (m->count + len > m->size) {
seq_set_overflow(m);
return;
}
for (i = len - 1; i >= 0; i--) {
m->buf[m->count + i] = hex_asc[0xf & v];
v = v >> 4;
}
m->count += len;
}
void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num) void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num)
{ {
int len; int len;
@ -722,12 +785,12 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */ if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
goto overflow; goto overflow;
len = strlen(delimiter); if (delimiter && delimiter[0]) {
if (m->count + len >= m->size) if (delimiter[1] == 0)
goto overflow; seq_putc(m, delimiter[0]);
else
memcpy(m->buf + m->count, delimiter, len); seq_puts(m, delimiter);
m->count += len; }
if (m->count + 2 >= m->size) if (m->count + 2 >= m->size)
goto overflow; goto overflow;
@ -742,7 +805,7 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
return; return;
} }
len = num_to_str(m->buf + m->count, m->size - m->count, num); len = num_to_str(m->buf + m->count, m->size - m->count, num, 0);
if (!len) if (!len)
goto overflow; goto overflow;
@ -782,8 +845,14 @@ EXPORT_SYMBOL(seq_write);
void seq_pad(struct seq_file *m, char c) void seq_pad(struct seq_file *m, char c)
{ {
int size = m->pad_until - m->count; int size = m->pad_until - m->count;
if (size > 0) if (size > 0) {
seq_printf(m, "%*s", size, ""); if (size + m->count > m->size) {
seq_set_overflow(m);
return;
}
memset(m->buf + m->count, ' ', size);
m->count += size;
}
if (c) if (c)
seq_putc(m, c); seq_putc(m, c);
} }
@ -1040,3 +1109,8 @@ seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head,
return NULL; return NULL;
} }
EXPORT_SYMBOL(seq_hlist_next_percpu); EXPORT_SYMBOL(seq_hlist_next_percpu);
void __init seq_file_init(void)
{
seq_file_cache = KMEM_CACHE(seq_file, SLAB_ACCOUNT|SLAB_PANIC);
}

View File

@ -1467,19 +1467,8 @@ xfs_vm_set_page_dirty(
newly_dirty = !TestSetPageDirty(page); newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock); spin_unlock(&mapping->private_lock);
if (newly_dirty) { if (newly_dirty)
/* sigh - __set_page_dirty() is static, so copy it here, too */ __set_page_dirty(page, mapping, 1);
unsigned long flags;
spin_lock_irqsave(&mapping->tree_lock, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(!PageUptodate(page));
account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
spin_unlock_irqrestore(&mapping->tree_lock, flags);
}
unlock_page_memcg(page); unlock_page_memcg(page);
if (newly_dirty) if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

View File

@ -175,7 +175,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
} }
long congestion_wait(int sync, long timeout); long congestion_wait(int sync, long timeout);
long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout); long wait_iff_congested(int sync, long timeout);
static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi) static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi)
{ {
@ -329,7 +329,7 @@ static inline bool inode_to_wb_is_valid(struct inode *inode)
* @inode: inode of interest * @inode: inode of interest
* *
* Returns the wb @inode is currently associated with. The caller must be * Returns the wb @inode is currently associated with. The caller must be
* holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the * holding either @inode->i_lock, the i_pages lock, or the
* associated wb's list_lock. * associated wb's list_lock.
*/ */
static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
@ -337,7 +337,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
WARN_ON_ONCE(debug_locks && WARN_ON_ONCE(debug_locks &&
(!lockdep_is_held(&inode->i_lock) && (!lockdep_is_held(&inode->i_lock) &&
!lockdep_is_held(&inode->i_mapping->tree_lock) && !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) &&
!lockdep_is_held(&inode->i_wb->list_lock))); !lockdep_is_held(&inode->i_wb->list_lock)));
#endif #endif
return inode->i_wb; return inode->i_wb;
@ -349,7 +349,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
* @lockedp: temp bool output param, to be passed to the end function * @lockedp: temp bool output param, to be passed to the end function
* *
* The caller wants to access the wb associated with @inode but isn't * The caller wants to access the wb associated with @inode but isn't
* holding inode->i_lock, mapping->tree_lock or wb->list_lock. This * holding inode->i_lock, the i_pages lock or wb->list_lock. This
* function determines the wb associated with @inode and ensures that the * function determines the wb associated with @inode and ensures that the
* association doesn't change until the transaction is finished with * association doesn't change until the transaction is finished with
* unlocked_inode_to_wb_end(). * unlocked_inode_to_wb_end().
@ -370,11 +370,11 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
*lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
if (unlikely(*lockedp)) if (unlikely(*lockedp))
spin_lock_irq(&inode->i_mapping->tree_lock); xa_lock_irq(&inode->i_mapping->i_pages);
/* /*
* Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock. * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
* inode_to_wb() will bark. Deref directly. * lock. inode_to_wb() will bark. Deref directly.
*/ */
return inode->i_wb; return inode->i_wb;
} }
@ -387,7 +387,7 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
{ {
if (unlikely(locked)) if (unlikely(locked))
spin_unlock_irq(&inode->i_mapping->tree_lock); xa_unlock_irq(&inode->i_mapping->i_pages);
rcu_read_unlock(); rcu_read_unlock();
} }

View File

@ -61,6 +61,8 @@ struct linux_binprm {
unsigned interp_flags; unsigned interp_flags;
unsigned interp_data; unsigned interp_data;
unsigned long loader, exec; unsigned long loader, exec;
struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
} __randomize_layout; } __randomize_layout;
#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
@ -118,6 +120,7 @@ extern int __must_check remove_arg_zero(struct linux_binprm *);
extern int search_binary_handler(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *);
extern int flush_old_exec(struct linux_binprm * bprm); extern int flush_old_exec(struct linux_binprm * bprm);
extern void setup_new_exec(struct linux_binprm * bprm); extern void setup_new_exec(struct linux_binprm * bprm);
extern void finalize_exec(struct linux_binprm *bprm);
extern void would_dump(struct linux_binprm *, struct file *); extern void would_dump(struct linux_binprm *, struct file *);
extern int suid_dumpable; extern int suid_dumpable;

View File

@ -17,9 +17,6 @@
*/ */
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
#define randomized_struct_fields_start struct {
#define randomized_struct_fields_end };
/* all clang versions usable with the kernel support KASAN ABI version 5 */ /* all clang versions usable with the kernel support KASAN ABI version 5 */
#define KASAN_ABI_VERSION 5 #define KASAN_ABI_VERSION 5

View File

@ -242,6 +242,9 @@
#if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__) #if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
#define __randomize_layout __attribute__((randomize_layout)) #define __randomize_layout __attribute__((randomize_layout))
#define __no_randomize_layout __attribute__((no_randomize_layout)) #define __no_randomize_layout __attribute__((no_randomize_layout))
/* This anon struct can add padding, so only enable it under randstruct. */
#define randomized_struct_fields_start struct {
#define randomized_struct_fields_end } __randomize_layout;
#endif #endif
#endif /* GCC_VERSION >= 40500 */ #endif /* GCC_VERSION >= 40500 */
@ -256,15 +259,6 @@
*/ */
#define __visible __attribute__((externally_visible)) #define __visible __attribute__((externally_visible))
/*
* RANDSTRUCT_PLUGIN wants to use an anonymous struct, but it is only
* possible since GCC 4.6. To provide as much build testing coverage
* as possible, this is used for all GCC 4.6+ builds, and not just on
* RANDSTRUCT_PLUGIN builds.
*/
#define randomized_struct_fields_start struct {
#define randomized_struct_fields_end } __randomize_layout;
#endif /* GCC_VERSION >= 40600 */ #endif /* GCC_VERSION >= 40600 */

9
include/linux/const.h Normal file
View File

@ -0,0 +1,9 @@
#ifndef _LINUX_CONST_H
#define _LINUX_CONST_H
#include <uapi/linux/const.h>
#define UL(x) (_UL(x))
#define ULL(x) (_ULL(x))
#endif /* _LINUX_CONST_H */

View File

@ -13,6 +13,7 @@
#include <linux/list_lru.h> #include <linux/list_lru.h>
#include <linux/llist.h> #include <linux/llist.h>
#include <linux/radix-tree.h> #include <linux/radix-tree.h>
#include <linux/xarray.h>
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/pid.h> #include <linux/pid.h>
@ -390,12 +391,11 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
struct address_space { struct address_space {
struct inode *host; /* owner: inode, block_device */ struct inode *host; /* owner: inode, block_device */
struct radix_tree_root page_tree; /* radix tree of all pages */ struct radix_tree_root i_pages; /* cached pages */
spinlock_t tree_lock; /* and lock protecting it */
atomic_t i_mmap_writable;/* count VM_SHARED mappings */ atomic_t i_mmap_writable;/* count VM_SHARED mappings */
struct rb_root_cached i_mmap; /* tree of private and shared mappings */ struct rb_root_cached i_mmap; /* tree of private and shared mappings */
struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */ /* Protected by the i_pages lock */
unsigned long nrpages; /* number of total pages */ unsigned long nrpages; /* number of total pages */
/* number of shadow or DAX exceptional entries */ /* number of shadow or DAX exceptional entries */
unsigned long nrexceptional; unsigned long nrexceptional;
@ -1989,7 +1989,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
* *
* I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to
* synchronize competing switching instances and to tell * synchronize competing switching instances and to tell
* wb stat updates to grab mapping->tree_lock. See * wb stat updates to grab the i_pages lock. See
* inode_switch_wb_work_fn() for details. * inode_switch_wb_work_fn() for details.
* *
* I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper

View File

@ -80,76 +80,145 @@
struct hmm; struct hmm;
/* /*
* hmm_pfn_t - HMM uses its own pfn type to keep several flags per page * hmm_pfn_flag_e - HMM flag enums
* *
* Flags: * Flags:
* HMM_PFN_VALID: pfn is valid * HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
* HMM_PFN_READ: CPU page table has read permission set
* HMM_PFN_WRITE: CPU page table has write permission set * HMM_PFN_WRITE: CPU page table has write permission set
* HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE)
*
* The driver provide a flags array, if driver valid bit for an entry is bit
* 3 ie (entry & (1 << 3)) is true if entry is valid then driver must provide
* an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3.
* Same logic apply to all flags. This is same idea as vm_page_prot in vma
* except that this is per device driver rather than per architecture.
*/
enum hmm_pfn_flag_e {
HMM_PFN_VALID = 0,
HMM_PFN_WRITE,
HMM_PFN_DEVICE_PRIVATE,
HMM_PFN_FLAG_MAX
};
/*
* hmm_pfn_value_e - HMM pfn special value
*
* Flags:
* HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
* HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none() * HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
* HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
* result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not * result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not
* be mirrored by a device, because the entry will never have HMM_PFN_VALID * be mirrored by a device, because the entry will never have HMM_PFN_VALID
* set and the pfn value is undefined. * set and the pfn value is undefined.
* HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE)
*/
typedef unsigned long hmm_pfn_t;
#define HMM_PFN_VALID (1 << 0)
#define HMM_PFN_READ (1 << 1)
#define HMM_PFN_WRITE (1 << 2)
#define HMM_PFN_ERROR (1 << 3)
#define HMM_PFN_EMPTY (1 << 4)
#define HMM_PFN_SPECIAL (1 << 5)
#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6)
#define HMM_PFN_SHIFT 7
/*
* hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t
* @pfn: hmm_pfn_t to convert to struct page
* Returns: struct page pointer if pfn is a valid hmm_pfn_t, NULL otherwise
* *
* If the hmm_pfn_t is valid (ie valid flag set) then return the struct page * Driver provide entry value for none entry, error entry and special entry,
* matching the pfn value stored in the hmm_pfn_t. Otherwise return NULL. * driver can alias (ie use same value for error and special for instance). It
* should not alias none and error or special.
*
* HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be:
* hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous,
* hmm_range.values[HMM_PFN_NONE] if there is no CPU page table
* hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one
*/ */
static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn) enum hmm_pfn_value_e {
HMM_PFN_ERROR,
HMM_PFN_NONE,
HMM_PFN_SPECIAL,
HMM_PFN_VALUE_MAX
};
/*
* struct hmm_range - track invalidation lock on virtual address range
*
* @vma: the vm area struct for the range
* @list: all range lock are on a list
* @start: range virtual start address (inclusive)
* @end: range virtual end address (exclusive)
* @pfns: array of pfns (big enough for the range)
* @flags: pfn flags to match device driver page table
* @values: pfn value for some special case (none, special, error, ...)
* @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
* @valid: pfns array did not change since it has been fill by an HMM function
*/
struct hmm_range {
struct vm_area_struct *vma;
struct list_head list;
unsigned long start;
unsigned long end;
uint64_t *pfns;
const uint64_t *flags;
const uint64_t *values;
uint8_t pfn_shift;
bool valid;
};
/*
* hmm_pfn_to_page() - return struct page pointed to by a valid HMM pfn
* @range: range use to decode HMM pfn value
* @pfn: HMM pfn value to get corresponding struct page from
* Returns: struct page pointer if pfn is a valid HMM pfn, NULL otherwise
*
* If the HMM pfn is valid (ie valid flag set) then return the struct page
* matching the pfn value stored in the HMM pfn. Otherwise return NULL.
*/
static inline struct page *hmm_pfn_to_page(const struct hmm_range *range,
uint64_t pfn)
{ {
if (!(pfn & HMM_PFN_VALID)) if (pfn == range->values[HMM_PFN_NONE])
return NULL; return NULL;
return pfn_to_page(pfn >> HMM_PFN_SHIFT); if (pfn == range->values[HMM_PFN_ERROR])
return NULL;
if (pfn == range->values[HMM_PFN_SPECIAL])
return NULL;
if (!(pfn & range->flags[HMM_PFN_VALID]))
return NULL;
return pfn_to_page(pfn >> range->pfn_shift);
} }
/* /*
* hmm_pfn_t_to_pfn() - return pfn value store in a hmm_pfn_t * hmm_pfn_to_pfn() - return pfn value store in a HMM pfn
* @pfn: hmm_pfn_t to extract pfn from * @range: range use to decode HMM pfn value
* Returns: pfn value if hmm_pfn_t is valid, -1UL otherwise * @pfn: HMM pfn value to extract pfn from
* Returns: pfn value if HMM pfn is valid, -1UL otherwise
*/ */
static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn) static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range,
uint64_t pfn)
{ {
if (!(pfn & HMM_PFN_VALID)) if (pfn == range->values[HMM_PFN_NONE])
return -1UL; return -1UL;
return (pfn >> HMM_PFN_SHIFT); if (pfn == range->values[HMM_PFN_ERROR])
return -1UL;
if (pfn == range->values[HMM_PFN_SPECIAL])
return -1UL;
if (!(pfn & range->flags[HMM_PFN_VALID]))
return -1UL;
return (pfn >> range->pfn_shift);
} }
/* /*
* hmm_pfn_t_from_page() - create a valid hmm_pfn_t value from struct page * hmm_pfn_from_page() - create a valid HMM pfn value from struct page
* @page: struct page pointer for which to create the hmm_pfn_t * @range: range use to encode HMM pfn value
* Returns: valid hmm_pfn_t for the page * @page: struct page pointer for which to create the HMM pfn
* Returns: valid HMM pfn for the page
*/ */
static inline hmm_pfn_t hmm_pfn_t_from_page(struct page *page) static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range,
struct page *page)
{ {
return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID; return (page_to_pfn(page) << range->pfn_shift) |
range->flags[HMM_PFN_VALID];
} }
/* /*
* hmm_pfn_t_from_pfn() - create a valid hmm_pfn_t value from pfn * hmm_pfn_from_pfn() - create a valid HMM pfn value from pfn
* @pfn: pfn value for which to create the hmm_pfn_t * @range: range use to encode HMM pfn value
* Returns: valid hmm_pfn_t for the pfn * @pfn: pfn value for which to create the HMM pfn
* Returns: valid HMM pfn for the pfn
*/ */
static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn) static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
unsigned long pfn)
{ {
return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID; return (pfn << range->pfn_shift) |
range->flags[HMM_PFN_VALID];
} }
@ -218,6 +287,16 @@ enum hmm_update_type {
* @update: callback to update range on a device * @update: callback to update range on a device
*/ */
struct hmm_mirror_ops { struct hmm_mirror_ops {
/* release() - release hmm_mirror
*
* @mirror: pointer to struct hmm_mirror
*
* This is called when the mm_struct is being released.
* The callback should make sure no references to the mirror occur
* after the callback returns.
*/
void (*release)(struct hmm_mirror *mirror);
/* sync_cpu_device_pagetables() - synchronize page tables /* sync_cpu_device_pagetables() - synchronize page tables
* *
* @mirror: pointer to struct hmm_mirror * @mirror: pointer to struct hmm_mirror
@ -261,23 +340,6 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm);
void hmm_mirror_unregister(struct hmm_mirror *mirror); void hmm_mirror_unregister(struct hmm_mirror *mirror);
/*
* struct hmm_range - track invalidation lock on virtual address range
*
* @list: all range lock are on a list
* @start: range virtual start address (inclusive)
* @end: range virtual end address (exclusive)
* @pfns: array of pfns (big enough for the range)
* @valid: pfns array did not change since it has been fill by an HMM function
*/
struct hmm_range {
struct list_head list;
unsigned long start;
unsigned long end;
hmm_pfn_t *pfns;
bool valid;
};
/* /*
* To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device
* driver lock that serializes device page table updates, then call * driver lock that serializes device page table updates, then call
@ -291,17 +353,13 @@ struct hmm_range {
* *
* IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID ! * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID !
*/ */
int hmm_vma_get_pfns(struct vm_area_struct *vma, int hmm_vma_get_pfns(struct hmm_range *range);
struct hmm_range *range, bool hmm_vma_range_done(struct hmm_range *range);
unsigned long start,
unsigned long end,
hmm_pfn_t *pfns);
bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
/* /*
* Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will
* not migrate any device memory back to system memory. The hmm_pfn_t array will * not migrate any device memory back to system memory. The HMM pfn array will
* be updated with the fault result and current snapshot of the CPU page table * be updated with the fault result and current snapshot of the CPU page table
* for the range. * for the range.
* *
@ -310,22 +368,26 @@ bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
* function returns -EAGAIN. * function returns -EAGAIN.
* *
* Return value does not reflect if the fault was successful for every single * Return value does not reflect if the fault was successful for every single
* address or not. Therefore, the caller must to inspect the hmm_pfn_t array to * address or not. Therefore, the caller must to inspect the HMM pfn array to
* determine fault status for each address. * determine fault status for each address.
* *
* Trying to fault inside an invalid vma will result in -EINVAL. * Trying to fault inside an invalid vma will result in -EINVAL.
* *
* See the function description in mm/hmm.c for further documentation. * See the function description in mm/hmm.c for further documentation.
*/ */
int hmm_vma_fault(struct vm_area_struct *vma, int hmm_vma_fault(struct hmm_range *range, bool block);
struct hmm_range *range,
unsigned long start,
unsigned long end,
hmm_pfn_t *pfns,
bool write,
bool block);
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
/* Below are for HMM internal use only! Not to be used by device driver! */
void hmm_mm_destroy(struct mm_struct *mm);
static inline void hmm_mm_init(struct mm_struct *mm)
{
mm->hmm = NULL;
}
#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
static inline void hmm_mm_destroy(struct mm_struct *mm) {}
static inline void hmm_mm_init(struct mm_struct *mm) {}
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC)
struct hmm_devmem; struct hmm_devmem;
@ -498,23 +560,9 @@ struct hmm_device {
struct hmm_device *hmm_device_new(void *drvdata); struct hmm_device *hmm_device_new(void *drvdata);
void hmm_device_put(struct hmm_device *hmm_device); void hmm_device_put(struct hmm_device *hmm_device);
#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
#endif /* IS_ENABLED(CONFIG_HMM) */
/* Below are for HMM internal use only! Not to be used by device driver! */
#if IS_ENABLED(CONFIG_HMM_MIRROR)
void hmm_mm_destroy(struct mm_struct *mm);
static inline void hmm_mm_init(struct mm_struct *mm)
{
mm->hmm = NULL;
}
#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
static inline void hmm_mm_destroy(struct mm_struct *mm) {}
static inline void hmm_mm_init(struct mm_struct *mm) {}
#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
#else /* IS_ENABLED(CONFIG_HMM) */ #else /* IS_ENABLED(CONFIG_HMM) */
static inline void hmm_mm_destroy(struct mm_struct *mm) {} static inline void hmm_mm_destroy(struct mm_struct *mm) {}
static inline void hmm_mm_init(struct mm_struct *mm) {} static inline void hmm_mm_init(struct mm_struct *mm) {}
#endif /* IS_ENABLED(CONFIG_HMM) */
#endif /* LINUX_HMM_H */ #endif /* LINUX_HMM_H */

View File

@ -29,29 +29,31 @@ struct idr {
#define IDR_FREE 0 #define IDR_FREE 0
/* Set the IDR flag and the IDR_FREE tag */ /* Set the IDR flag and the IDR_FREE tag */
#define IDR_RT_MARKER ((__force gfp_t)(3 << __GFP_BITS_SHIFT)) #define IDR_RT_MARKER (ROOT_IS_IDR | (__force gfp_t) \
(1 << (ROOT_TAG_SHIFT + IDR_FREE)))
#define IDR_INIT_BASE(base) { \ #define IDR_INIT_BASE(name, base) { \
.idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER), \ .idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER), \
.idr_base = (base), \ .idr_base = (base), \
.idr_next = 0, \ .idr_next = 0, \
} }
/** /**
* IDR_INIT() - Initialise an IDR. * IDR_INIT() - Initialise an IDR.
* @name: Name of IDR.
* *
* A freshly-initialised IDR contains no IDs. * A freshly-initialised IDR contains no IDs.
*/ */
#define IDR_INIT IDR_INIT_BASE(0) #define IDR_INIT(name) IDR_INIT_BASE(name, 0)
/** /**
* DEFINE_IDR() - Define a statically-allocated IDR * DEFINE_IDR() - Define a statically-allocated IDR.
* @name: Name of IDR * @name: Name of IDR.
* *
* An IDR defined using this macro is ready for use with no additional * An IDR defined using this macro is ready for use with no additional
* initialisation required. It contains no IDs. * initialisation required. It contains no IDs.
*/ */
#define DEFINE_IDR(name) struct idr name = IDR_INIT #define DEFINE_IDR(name) struct idr name = IDR_INIT(name)
/** /**
* idr_get_cursor - Return the current position of the cyclic allocator * idr_get_cursor - Return the current position of the cyclic allocator
@ -218,10 +220,10 @@ struct ida {
struct radix_tree_root ida_rt; struct radix_tree_root ida_rt;
}; };
#define IDA_INIT { \ #define IDA_INIT(name) { \
.ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT), \ .ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT), \
} }
#define DEFINE_IDA(name) struct ida name = IDA_INIT #define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
int ida_pre_get(struct ida *ida, gfp_t gfp_mask); int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);

View File

@ -439,7 +439,8 @@ extern long simple_strtol(const char *,char **,unsigned int);
extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
extern long long simple_strtoll(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int);
extern int num_to_str(char *buf, int size, unsigned long long num); extern int num_to_str(char *buf, int size,
unsigned long long num, unsigned int width);
/* lib/printf utilities */ /* lib/printf utilities */
@ -543,6 +544,7 @@ extern enum system_states {
SYSTEM_RESTART, SYSTEM_RESTART,
} system_state; } system_state;
/* This cannot be an enum because some may be used in assembly source. */
#define TAINT_PROPRIETARY_MODULE 0 #define TAINT_PROPRIETARY_MODULE 0
#define TAINT_FORCED_MODULE 1 #define TAINT_FORCED_MODULE 1
#define TAINT_CPU_OUT_OF_SPEC 2 #define TAINT_CPU_OUT_OF_SPEC 2
@ -560,7 +562,8 @@ extern enum system_states {
#define TAINT_SOFTLOCKUP 14 #define TAINT_SOFTLOCKUP 14
#define TAINT_LIVEPATCH 15 #define TAINT_LIVEPATCH 15
#define TAINT_AUX 16 #define TAINT_AUX 16
#define TAINT_FLAGS_COUNT 17 #define TAINT_RANDSTRUCT 17
#define TAINT_FLAGS_COUNT 18
struct taint_flag { struct taint_flag {
char c_true; /* character printed when tainted */ char c_true; /* character printed when tainted */

View File

@ -41,11 +41,11 @@
*/ */
/* /*
* Note about locking : There is no locking required until only * one reader * Note about locking: There is no locking required until only one reader
* and one writer is using the fifo and no kfifo_reset() will be * called * and one writer is using the fifo and no kfifo_reset() will be called.
* kfifo_reset_out() can be safely used, until it will be only called * kfifo_reset_out() can be safely used, until it will be only called
* in the reader thread. * in the reader thread.
* For multiple writer and one reader there is only a need to lock the writer. * For multiple writer and one reader there is only a need to lock the writer.
* And vice versa for only one writer and multiple reader there is only a need * And vice versa for only one writer and multiple reader there is only a need
* to lock the reader. * to lock the reader.
*/ */

View File

@ -48,13 +48,12 @@ enum memcg_stat_item {
MEMCG_NR_STAT, MEMCG_NR_STAT,
}; };
/* Cgroup-specific events, on top of universal VM events */ enum memcg_memory_event {
enum memcg_event_item { MEMCG_LOW,
MEMCG_LOW = NR_VM_EVENT_ITEMS,
MEMCG_HIGH, MEMCG_HIGH,
MEMCG_MAX, MEMCG_MAX,
MEMCG_OOM, MEMCG_OOM,
MEMCG_NR_EVENTS, MEMCG_NR_MEMORY_EVENTS,
}; };
struct mem_cgroup_reclaim_cookie { struct mem_cgroup_reclaim_cookie {
@ -88,7 +87,7 @@ enum mem_cgroup_events_target {
struct mem_cgroup_stat_cpu { struct mem_cgroup_stat_cpu {
long count[MEMCG_NR_STAT]; long count[MEMCG_NR_STAT];
unsigned long events[MEMCG_NR_EVENTS]; unsigned long events[NR_VM_EVENT_ITEMS];
unsigned long nr_page_events; unsigned long nr_page_events;
unsigned long targets[MEM_CGROUP_NTARGETS]; unsigned long targets[MEM_CGROUP_NTARGETS];
}; };
@ -120,6 +119,9 @@ struct mem_cgroup_per_node {
unsigned long usage_in_excess;/* Set to the value by which */ unsigned long usage_in_excess;/* Set to the value by which */
/* the soft limit is exceeded*/ /* the soft limit is exceeded*/
bool on_tree; bool on_tree;
bool congested; /* memcg has many dirty pages */
/* backed by a congested BDI */
struct mem_cgroup *memcg; /* Back pointer, we cannot */ struct mem_cgroup *memcg; /* Back pointer, we cannot */
/* use container_of */ /* use container_of */
}; };
@ -202,7 +204,8 @@ struct mem_cgroup {
/* OOM-Killer disable */ /* OOM-Killer disable */
int oom_kill_disable; int oom_kill_disable;
/* handle for "memory.events" */ /* memory.events */
atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
struct cgroup_file events_file; struct cgroup_file events_file;
/* protect arrays of thresholds */ /* protect arrays of thresholds */
@ -231,9 +234,10 @@ struct mem_cgroup {
struct task_struct *move_lock_task; struct task_struct *move_lock_task;
unsigned long move_lock_flags; unsigned long move_lock_flags;
/* memory.stat */
struct mem_cgroup_stat_cpu __percpu *stat_cpu; struct mem_cgroup_stat_cpu __percpu *stat_cpu;
atomic_long_t stat[MEMCG_NR_STAT]; atomic_long_t stat[MEMCG_NR_STAT];
atomic_long_t events[MEMCG_NR_EVENTS]; atomic_long_t events[NR_VM_EVENT_ITEMS];
unsigned long socket_pressure; unsigned long socket_pressure;
@ -645,9 +649,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask, gfp_t gfp_mask,
unsigned long *total_scanned); unsigned long *total_scanned);
/* idx can be of type enum memcg_event_item or vm_event_item */
static inline void __count_memcg_events(struct mem_cgroup *memcg, static inline void __count_memcg_events(struct mem_cgroup *memcg,
int idx, unsigned long count) enum vm_event_item idx,
unsigned long count)
{ {
unsigned long x; unsigned long x;
@ -663,7 +667,8 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg,
} }
static inline void count_memcg_events(struct mem_cgroup *memcg, static inline void count_memcg_events(struct mem_cgroup *memcg,
int idx, unsigned long count) enum vm_event_item idx,
unsigned long count)
{ {
unsigned long flags; unsigned long flags;
@ -672,9 +677,8 @@ static inline void count_memcg_events(struct mem_cgroup *memcg,
local_irq_restore(flags); local_irq_restore(flags);
} }
/* idx can be of type enum memcg_event_item or vm_event_item */
static inline void count_memcg_page_event(struct page *page, static inline void count_memcg_page_event(struct page *page,
int idx) enum vm_event_item idx)
{ {
if (page->mem_cgroup) if (page->mem_cgroup)
count_memcg_events(page->mem_cgroup, idx, 1); count_memcg_events(page->mem_cgroup, idx, 1);
@ -698,10 +702,10 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
rcu_read_unlock(); rcu_read_unlock();
} }
static inline void mem_cgroup_event(struct mem_cgroup *memcg, static inline void memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_event_item event) enum memcg_memory_event event)
{ {
count_memcg_events(memcg, event, 1); atomic_long_inc(&memcg->memory_events[event]);
cgroup_file_notify(&memcg->events_file); cgroup_file_notify(&memcg->events_file);
} }
@ -721,8 +725,8 @@ static inline bool mem_cgroup_disabled(void)
return true; return true;
} }
static inline void mem_cgroup_event(struct mem_cgroup *memcg, static inline void memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_event_item event) enum memcg_memory_event event)
{ {
} }

View File

@ -216,9 +216,6 @@ void put_online_mems(void);
void mem_hotplug_begin(void); void mem_hotplug_begin(void);
void mem_hotplug_done(void); void mem_hotplug_done(void);
extern void set_zone_contiguous(struct zone *zone);
extern void clear_zone_contiguous(struct zone *zone);
#else /* ! CONFIG_MEMORY_HOTPLUG */ #else /* ! CONFIG_MEMORY_HOTPLUG */
#define pfn_to_online_page(pfn) \ #define pfn_to_online_page(pfn) \
({ \ ({ \

View File

@ -7,8 +7,7 @@
#include <linux/migrate_mode.h> #include <linux/migrate_mode.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
typedef struct page *new_page_t(struct page *page, unsigned long private, typedef struct page *new_page_t(struct page *page, unsigned long private);
int **reason);
typedef void free_page_t(struct page *page, unsigned long private); typedef void free_page_t(struct page *page, unsigned long private);
/* /*
@ -43,9 +42,9 @@ static inline struct page *new_page_nodemask(struct page *page,
return alloc_huge_page_nodemask(page_hstate(compound_head(page)), return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
preferred_nid, nodemask); preferred_nid, nodemask);
if (thp_migration_supported() && PageTransHuge(page)) { if (PageTransHuge(page)) {
order = HPAGE_PMD_ORDER;
gfp_mask |= GFP_TRANSHUGE; gfp_mask |= GFP_TRANSHUGE;
order = HPAGE_PMD_ORDER;
} }
if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))

View File

@ -747,7 +747,7 @@ int finish_mkwrite_fault(struct vm_fault *vmf);
* refcount. The each user mapping also has a reference to the page. * refcount. The each user mapping also has a reference to the page.
* *
* The pagecache pages are stored in a per-mapping radix tree, which is * The pagecache pages are stored in a per-mapping radix tree, which is
* rooted at mapping->page_tree, and indexed by offset. * rooted at mapping->i_pages, and indexed by offset.
* Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
* lists, we instead now tag pages as dirty/writeback in the radix tree. * lists, we instead now tag pages as dirty/writeback in the radix tree.
* *
@ -1466,6 +1466,7 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
extern void do_invalidatepage(struct page *page, unsigned int offset, extern void do_invalidatepage(struct page *page, unsigned int offset,
unsigned int length); unsigned int length);
void __set_page_dirty(struct page *, struct address_space *, int warn);
int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_nobuffers(struct page *page);
int __set_page_dirty_no_writeback(struct page *page); int __set_page_dirty_no_writeback(struct page *page);
int redirty_page_for_writepage(struct writeback_control *wbc, int redirty_page_for_writepage(struct writeback_control *wbc,
@ -2108,6 +2109,7 @@ extern void setup_per_cpu_pageset(void);
extern void zone_pcp_update(struct zone *zone); extern void zone_pcp_update(struct zone *zone);
extern void zone_pcp_reset(struct zone *zone); extern void zone_pcp_reset(struct zone *zone);
extern void setup_zone_pageset(struct zone *zone);
/* page_alloc.c */ /* page_alloc.c */
extern int min_free_kbytes; extern int min_free_kbytes;

View File

@ -180,6 +180,7 @@ enum node_stat_item {
NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
NR_DIRTIED, /* page dirtyings since bootup */ NR_DIRTIED, /* page dirtyings since bootup */
NR_WRITTEN, /* page writings since bootup */ NR_WRITTEN, /* page writings since bootup */
NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */
NR_VM_NODE_STAT_ITEMS NR_VM_NODE_STAT_ITEMS
}; };
@ -884,7 +885,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *); void __user *, size_t *, loff_t *);
int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *); void __user *, size_t *, loff_t *);
extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *); void __user *, size_t *, loff_t *);
int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,

View File

@ -63,7 +63,6 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
bool skip_hwpoisoned_pages); bool skip_hwpoisoned_pages);
struct page *alloc_migrate_target(struct page *page, unsigned long private, struct page *alloc_migrate_target(struct page *page, unsigned long private);
int **resultp);
#endif #endif

Some files were not shown because too many files have changed in this diff Show More