Merge remote-tracking branch 'net-next/master' into mac80211-next

Merge net-next so that we get the changes from net, which would
otherwise conflict with the NLA_POLICY_NESTED/_ARRAY changes.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
This commit is contained in:
Johannes Berg 2019-02-01 11:05:27 +01:00
commit 752cfee90d
1123 changed files with 44218 additions and 27535 deletions

View File

@ -72,6 +72,10 @@ ForEachMacros:
- 'apei_estatus_for_each_section'
- 'ata_for_each_dev'
- 'ata_for_each_link'
- '__ata_qc_for_each'
- 'ata_qc_for_each'
- 'ata_qc_for_each_raw'
- 'ata_qc_for_each_with_internal'
- 'ax25_for_each'
- 'ax25_uid_for_each'
- 'bio_for_each_integrity_vec'
@ -85,6 +89,7 @@ ForEachMacros:
- 'blk_queue_for_each_rl'
- 'bond_for_each_slave'
- 'bond_for_each_slave_rcu'
- 'bpf_for_each_spilled_reg'
- 'btree_for_each_safe128'
- 'btree_for_each_safe32'
- 'btree_for_each_safe64'
@ -103,6 +108,8 @@ ForEachMacros:
- 'drm_atomic_crtc_for_each_plane'
- 'drm_atomic_crtc_state_for_each_plane'
- 'drm_atomic_crtc_state_for_each_plane_state'
- 'drm_atomic_for_each_plane_damage'
- 'drm_connector_for_each_possible_encoder'
- 'drm_for_each_connector_iter'
- 'drm_for_each_crtc'
- 'drm_for_each_encoder'
@ -121,11 +128,21 @@ ForEachMacros:
- 'for_each_bio'
- 'for_each_board_func_rsrc'
- 'for_each_bvec'
- 'for_each_card_components'
- 'for_each_card_links'
- 'for_each_card_links_safe'
- 'for_each_card_prelinks'
- 'for_each_card_rtds'
- 'for_each_card_rtds_safe'
- 'for_each_cgroup_storage_type'
- 'for_each_child_of_node'
- 'for_each_clear_bit'
- 'for_each_clear_bit_from'
- 'for_each_cmsghdr'
- 'for_each_compatible_node'
- 'for_each_component_dais'
- 'for_each_component_dais_safe'
- 'for_each_comp_order'
- 'for_each_console'
- 'for_each_cpu'
- 'for_each_cpu_and'
@ -133,6 +150,10 @@ ForEachMacros:
- 'for_each_cpu_wrap'
- 'for_each_dev_addr'
- 'for_each_dma_cap_mask'
- 'for_each_dpcm_be'
- 'for_each_dpcm_be_rollback'
- 'for_each_dpcm_be_safe'
- 'for_each_dpcm_fe'
- 'for_each_drhd_unit'
- 'for_each_dss_dev'
- 'for_each_efi_memory_desc'
@ -149,6 +170,7 @@ ForEachMacros:
- 'for_each_iommu'
- 'for_each_ip_tunnel_rcu'
- 'for_each_irq_nr'
- 'for_each_link_codecs'
- 'for_each_lru'
- 'for_each_matching_node'
- 'for_each_matching_node_and_match'
@ -160,6 +182,7 @@ ForEachMacros:
- 'for_each_mem_range_rev'
- 'for_each_migratetype_order'
- 'for_each_msi_entry'
- 'for_each_msi_entry_safe'
- 'for_each_net'
- 'for_each_netdev'
- 'for_each_netdev_continue'
@ -183,12 +206,14 @@ ForEachMacros:
- 'for_each_node_with_property'
- 'for_each_of_allnodes'
- 'for_each_of_allnodes_from'
- 'for_each_of_cpu_node'
- 'for_each_of_pci_range'
- 'for_each_old_connector_in_state'
- 'for_each_old_crtc_in_state'
- 'for_each_oldnew_connector_in_state'
- 'for_each_oldnew_crtc_in_state'
- 'for_each_oldnew_plane_in_state'
- 'for_each_oldnew_plane_in_state_reverse'
- 'for_each_oldnew_private_obj_in_state'
- 'for_each_old_plane_in_state'
- 'for_each_old_private_obj_in_state'
@ -206,14 +231,17 @@ ForEachMacros:
- 'for_each_process'
- 'for_each_process_thread'
- 'for_each_property_of_node'
- 'for_each_registered_fb'
- 'for_each_reserved_mem_region'
- 'for_each_resv_unavail_range'
- 'for_each_rtd_codec_dai'
- 'for_each_rtd_codec_dai_rollback'
- 'for_each_rtdcom'
- 'for_each_rtdcom_safe'
- 'for_each_set_bit'
- 'for_each_set_bit_from'
- 'for_each_sg'
- 'for_each_sg_page'
- 'for_each_sibling_event'
- '__for_each_thread'
- 'for_each_thread'
- 'for_each_zone'
@ -251,6 +279,8 @@ ForEachMacros:
- 'hlist_nulls_for_each_entry_from'
- 'hlist_nulls_for_each_entry_rcu'
- 'hlist_nulls_for_each_entry_safe'
- 'i3c_bus_for_each_i2cdev'
- 'i3c_bus_for_each_i3cdev'
- 'ide_host_for_each_port'
- 'ide_port_for_each_dev'
- 'ide_port_for_each_present_dev'
@ -267,11 +297,14 @@ ForEachMacros:
- 'kvm_for_each_memslot'
- 'kvm_for_each_vcpu'
- 'list_for_each'
- 'list_for_each_codec'
- 'list_for_each_codec_safe'
- 'list_for_each_entry'
- 'list_for_each_entry_continue'
- 'list_for_each_entry_continue_rcu'
- 'list_for_each_entry_continue_reverse'
- 'list_for_each_entry_from'
- 'list_for_each_entry_from_rcu'
- 'list_for_each_entry_from_reverse'
- 'list_for_each_entry_lockless'
- 'list_for_each_entry_rcu'
@ -291,6 +324,7 @@ ForEachMacros:
- 'media_device_for_each_intf'
- 'media_device_for_each_link'
- 'media_device_for_each_pad'
- 'nanddev_io_for_each_page'
- 'netdev_for_each_lower_dev'
- 'netdev_for_each_lower_private'
- 'netdev_for_each_lower_private_rcu'
@ -357,12 +391,14 @@ ForEachMacros:
- 'sk_nulls_for_each'
- 'sk_nulls_for_each_from'
- 'sk_nulls_for_each_rcu'
- 'snd_array_for_each'
- 'snd_pcm_group_for_each_entry'
- 'snd_soc_dapm_widget_for_each_path'
- 'snd_soc_dapm_widget_for_each_path_safe'
- 'snd_soc_dapm_widget_for_each_sink_path'
- 'snd_soc_dapm_widget_for_each_source_path'
- 'tb_property_for_each'
- 'tcf_exts_for_each_action'
- 'udp_portaddr_for_each_entry'
- 'udp_portaddr_for_each_entry_rcu'
- 'usb_hub_for_each_child'
@ -371,6 +407,11 @@ ForEachMacros:
- 'v4l2_m2m_for_each_dst_buf_safe'
- 'v4l2_m2m_for_each_src_buf'
- 'v4l2_m2m_for_each_src_buf_safe'
- 'virtio_device_for_each_vq'
- 'xa_for_each'
- 'xas_for_each'
- 'xas_for_each_conflict'
- 'xas_for_each_marked'
- 'zorro_for_each_dev'
#IncludeBlocks: Preserve # Unknown to clang-format-5.0

870
Documentation/bpf/btf.rst Normal file
View File

@ -0,0 +1,870 @@
=====================
BPF Type Format (BTF)
=====================
1. Introduction
***************
BTF (BPF Type Format) is the meta data format which
encodes the debug info related to BPF program/map.
The name BTF was used initially to describe
data types. The BTF was later extended to include
function info for defined subroutines, and line info
for source/line information.
The debug info is used for map pretty print, function
signature, etc. The function signature enables better
bpf program/function kernel symbol.
The line info helps generate
source annotated translated byte code, jited code
and verifier log.
The BTF specification contains two parts,
* BTF kernel API
* BTF ELF file format
The kernel API is the contract between
user space and kernel. The kernel verifies
the BTF info before using it.
The ELF file format is a user space contract
between ELF file and libbpf loader.
The type and string sections are part of the
BTF kernel API, describing the debug info
(mostly types related) referenced by the bpf program.
These two sections are discussed in
details in :ref:`BTF_Type_String`.
.. _BTF_Type_String:
2. BTF Type and String Encoding
*******************************
The file ``include/uapi/linux/btf.h`` provides high
level definition on how types/strings are encoded.
The beginning of data blob must be::
struct btf_header {
__u16 magic;
__u8 version;
__u8 flags;
__u32 hdr_len;
/* All offsets are in bytes relative to the end of this header */
__u32 type_off; /* offset of type section */
__u32 type_len; /* length of type section */
__u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */
};
The magic is ``0xeB9F``, which has different encoding for big and little
endian system, and can be used to test whether BTF is generated for
big or little endian target.
The btf_header is designed to be extensible with hdr_len equal to
``sizeof(struct btf_header)`` when the data blob is generated.
2.1 String Encoding
===================
The first string in the string section must be a null string.
The rest of string table is a concatenation of other null-treminated
strings.
2.2 Type Encoding
=================
The type id ``0`` is reserved for ``void`` type.
The type section is parsed sequentially and the type id is assigned to
each recognized type starting from id ``1``.
Currently, the following types are supported::
#define BTF_KIND_INT 1 /* Integer */
#define BTF_KIND_PTR 2 /* Pointer */
#define BTF_KIND_ARRAY 3 /* Array */
#define BTF_KIND_STRUCT 4 /* Struct */
#define BTF_KIND_UNION 5 /* Union */
#define BTF_KIND_ENUM 6 /* Enumeration */
#define BTF_KIND_FWD 7 /* Forward */
#define BTF_KIND_TYPEDEF 8 /* Typedef */
#define BTF_KIND_VOLATILE 9 /* Volatile */
#define BTF_KIND_CONST 10 /* Const */
#define BTF_KIND_RESTRICT 11 /* Restrict */
#define BTF_KIND_FUNC 12 /* Function */
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
Note that the type section encodes debug info, not just pure types.
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
Each type contains the following common data::
struct btf_type {
__u32 name_off;
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
* bits 24-27: kind (e.g. int, ptr, array...etc)
* bits 28-30: unused
* bit 31: kind_flag, currently used by
* struct, union and fwd
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT and UNION.
* "size" tells the size of the type it is describing.
*
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
* FUNC and FUNC_PROTO.
* "type" is a type_id referring to another type.
*/
union {
__u32 size;
__u32 type;
};
};
For certain kinds, the common data are followed by kind specific data.
The ``name_off`` in ``struct btf_type`` specifies the offset in the string table.
The following details encoding of each kind.
2.2.1 BTF_KIND_INT
~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: any valid offset
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_INT
* ``info.vlen``: 0
* ``size``: the size of the int type in bytes.
``btf_type`` is followed by a ``u32`` with following bits arrangement::
#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
The ``BTF_INT_ENCODING`` has the following attributes::
#define BTF_INT_SIGNED (1 << 0)
#define BTF_INT_CHAR (1 << 1)
#define BTF_INT_BOOL (1 << 2)
The ``BTF_INT_ENCODING()`` provides extra information, signness,
char, or bool, for the int type. The char and bool encoding
are mostly useful for pretty print. At most one encoding can
be specified for the int type.
The ``BTF_INT_BITS()`` specifies the number of actual bits held by
this int type. For example, a 4-bit bitfield encodes
``BTF_INT_BITS()`` equals to 4. The ``btf_type.size * 8``
must be equal to or greater than ``BTF_INT_BITS()`` for the type.
The maximum value of ``BTF_INT_BITS()`` is 128.
The ``BTF_INT_OFFSET()`` specifies the starting bit offset to
calculate values for this int. For example, a bitfield struct
member has
* btf member bit offset 100 from the start of the structure,
* btf member pointing to an int type,
* the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
Then in the struct memory layout, this member will occupy
``4`` bits starting from bits ``100 + 2 = 102``.
Alternatively, the bitfield struct member can be the following to
access the same bits as the above:
* btf member bit offset 102,
* btf member pointing to an int type,
* the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4``
The original intention of ``BTF_INT_OFFSET()`` is to provide
flexibility of bitfield encoding.
Currently, both llvm and pahole generates ``BTF_INT_OFFSET() = 0``
for all int types.
2.2.2 BTF_KIND_PTR
~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: 0
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_PTR
* ``info.vlen``: 0
* ``type``: the pointee type of the pointer
No additional type data follow ``btf_type``.
2.2.3 BTF_KIND_ARRAY
~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: 0
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_ARRAY
* ``info.vlen``: 0
* ``size/type``: 0, not used
btf_type is followed by one "struct btf_array"::
struct btf_array {
__u32 type;
__u32 index_type;
__u32 nelems;
};
The ``struct btf_array`` encoding:
* ``type``: the element type
* ``index_type``: the index type
* ``nelems``: the number of elements for this array (``0`` is also allowed).
The ``index_type`` can be any regular int types
(u8, u16, u32, u64, unsigned __int128).
The original design of including ``index_type`` follows dwarf
which has a ``index_type`` for its array type.
Currently in BTF, beyond type verification, the ``index_type`` is not used.
The ``struct btf_array`` allows chaining through element type to represent
multiple dimensional arrays. For example, ``int a[5][6]``, the following
type system illustrates the chaining:
* [1]: int
* [2]: array, ``btf_array.type = [1]``, ``btf_array.nelems = 6``
* [3]: array, ``btf_array.type = [2]``, ``btf_array.nelems = 5``
Currently, both pahole and llvm collapse multiple dimensional array
into one dimensional array, e.g., ``a[5][6]``, the btf_array.nelems
equal to ``30``. This is because the original use case is map pretty
print where the whole array is dumped out so one dimensional array
is enough. As more BTF usage is explored, pahole and llvm can be
changed to generate proper chained representation for
multiple dimensional arrays.
2.2.4 BTF_KIND_STRUCT
~~~~~~~~~~~~~~~~~~~~~
2.2.5 BTF_KIND_UNION
~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: 0 or offset to a valid C identifier
* ``info.kind_flag``: 0 or 1
* ``info.kind``: BTF_KIND_STRUCT or BTF_KIND_UNION
* ``info.vlen``: the number of struct/union members
* ``info.size``: the size of the struct/union in bytes
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_member``.::
struct btf_member {
__u32 name_off;
__u32 type;
__u32 offset;
};
``struct btf_member`` encoding:
* ``name_off``: offset to a valid C identifier
* ``type``: the member type
* ``offset``: <see below>
If the type info ``kind_flag`` is not set, the offset contains
only bit offset of the member. Note that the base type of the
bitfield can only be int or enum type. If the bitfield size
is 32, the base type can be either int or enum type.
If the bitfield size is not 32, the base type must be int,
and int type ``BTF_INT_BITS()`` encodes the bitfield size.
If the ``kind_flag`` is set, the ``btf_member.offset``
contains both member bitfield size and bit offset. The
bitfield size and bit offset are calculated as below.::
#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24)
#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff)
In this case, if the base type is an int type, it must
be a regular int type:
* ``BTF_INT_OFFSET()`` must be 0.
* ``BTF_INT_BITS()`` must be equal to ``{1,2,4,8,16} * 8``.
The following kernel patch introduced ``kind_flag`` and
explained why both modes exist:
https://github.com/torvalds/linux/commit/9d5f9f701b1891466fb3dbb1806ad97716f95cc3#diff-fa650a64fdd3968396883d2fe8215ff3
2.2.6 BTF_KIND_ENUM
~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: 0 or offset to a valid C identifier
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_ENUM
* ``info.vlen``: number of enum values
* ``size``: 4
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.::
struct btf_enum {
__u32 name_off;
__s32 val;
};
The ``btf_enum`` encoding:
* ``name_off``: offset to a valid C identifier
* ``val``: any value
2.2.7 BTF_KIND_FWD
~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: offset to a valid C identifier
* ``info.kind_flag``: 0 for struct, 1 for union
* ``info.kind``: BTF_KIND_FWD
* ``info.vlen``: 0
* ``type``: 0
No additional type data follow ``btf_type``.
2.2.8 BTF_KIND_TYPEDEF
~~~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: offset to a valid C identifier
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_TYPEDEF
* ``info.vlen``: 0
* ``type``: the type which can be referred by name at ``name_off``
No additional type data follow ``btf_type``.
2.2.9 BTF_KIND_VOLATILE
~~~~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: 0
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_VOLATILE
* ``info.vlen``: 0
* ``type``: the type with ``volatile`` qualifier
No additional type data follow ``btf_type``.
2.2.10 BTF_KIND_CONST
~~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: 0
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_CONST
* ``info.vlen``: 0
* ``type``: the type with ``const`` qualifier
No additional type data follow ``btf_type``.
2.2.11 BTF_KIND_RESTRICT
~~~~~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: 0
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_RESTRICT
* ``info.vlen``: 0
* ``type``: the type with ``restrict`` qualifier
No additional type data follow ``btf_type``.
2.2.12 BTF_KIND_FUNC
~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: offset to a valid C identifier
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_FUNC
* ``info.vlen``: 0
* ``type``: a BTF_KIND_FUNC_PROTO type
No additional type data follow ``btf_type``.
A BTF_KIND_FUNC defines, not a type, but a subprogram (function) whose
signature is defined by ``type``. The subprogram is thus an instance of
that type. The BTF_KIND_FUNC may in turn be referenced by a func_info in
the :ref:`BTF_Ext_Section` (ELF) or in the arguments to
:ref:`BPF_Prog_Load` (ABI).
2.2.13 BTF_KIND_FUNC_PROTO
~~~~~~~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: 0
* ``info.kind_flag``: 0
* ``info.kind``: BTF_KIND_FUNC_PROTO
* ``info.vlen``: # of parameters
* ``type``: the return type
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_param``.::
struct btf_param {
__u32 name_off;
__u32 type;
};
If a BTF_KIND_FUNC_PROTO type is referred by a BTF_KIND_FUNC type,
then ``btf_param.name_off`` must point to a valid C identifier
except for the possible last argument representing the variable
argument. The btf_param.type refers to parameter type.
If the function has variable arguments, the last parameter
is encoded with ``name_off = 0`` and ``type = 0``.
3. BTF Kernel API
*****************
The following bpf syscall command involves BTF:
* BPF_BTF_LOAD: load a blob of BTF data into kernel
* BPF_MAP_CREATE: map creation with btf key and value type info.
* BPF_PROG_LOAD: prog load with btf function and line info.
* BPF_BTF_GET_FD_BY_ID: get a btf fd
* BPF_OBJ_GET_INFO_BY_FD: btf, func_info, line_info
and other btf related info are returned.
The workflow typically looks like:
::
Application:
BPF_BTF_LOAD
|
v
BPF_MAP_CREATE and BPF_PROG_LOAD
|
V
......
Introspection tool:
......
BPF_{PROG,MAP}_GET_NEXT_ID (get prog/map id's)
|
V
BPF_{PROG,MAP}_GET_FD_BY_ID (get a prog/map fd)
|
V
BPF_OBJ_GET_INFO_BY_FD (get bpf_prog_info/bpf_map_info with btf_id)
| |
V |
BPF_BTF_GET_FD_BY_ID (get btf_fd) |
| |
V |
BPF_OBJ_GET_INFO_BY_FD (get btf) |
| |
V V
pretty print types, dump func signatures and line info, etc.
3.1 BPF_BTF_LOAD
================
Load a blob of BTF data into kernel. A blob of data
described in :ref:`BTF_Type_String`
can be directly loaded into the kernel.
A ``btf_fd`` returns to userspace.
3.2 BPF_MAP_CREATE
==================
A map can be created with ``btf_fd`` and specified key/value type id.::
__u32 btf_fd; /* fd pointing to a BTF type data */
__u32 btf_key_type_id; /* BTF type_id of the key */
__u32 btf_value_type_id; /* BTF type_id of the value */
In libbpf, the map can be defined with extra annotation like below:
::
struct bpf_map_def SEC("maps") btf_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(struct ipv_counts),
.max_entries = 4,
};
BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name,
key and value types for the map.
During ELF parsing, libbpf is able to extract key/value type_id's
and assigned them to BPF_MAP_CREATE attributes automatically.
.. _BPF_Prog_Load:
3.3 BPF_PROG_LOAD
=================
During prog_load, func_info and line_info can be passed to kernel with
proper values for the following attributes:
::
__u32 insn_cnt;
__aligned_u64 insns;
......
__u32 prog_btf_fd; /* fd pointing to BTF type data */
__u32 func_info_rec_size; /* userspace bpf_func_info size */
__aligned_u64 func_info; /* func info */
__u32 func_info_cnt; /* number of bpf_func_info records */
__u32 line_info_rec_size; /* userspace bpf_line_info size */
__aligned_u64 line_info; /* line info */
__u32 line_info_cnt; /* number of bpf_line_info records */
The func_info and line_info are an array of below, respectively.::
struct bpf_func_info {
__u32 insn_off; /* [0, insn_cnt - 1] */
__u32 type_id; /* pointing to a BTF_KIND_FUNC type */
};
struct bpf_line_info {
__u32 insn_off; /* [0, insn_cnt - 1] */
__u32 file_name_off; /* offset to string table for the filename */
__u32 line_off; /* offset to string table for the source line */
__u32 line_col; /* line number and column number */
};
func_info_rec_size is the size of each func_info record, and line_info_rec_size
is the size of each line_info record. Passing the record size to kernel make
it possible to extend the record itself in the future.
Below are requirements for func_info:
* func_info[0].insn_off must be 0.
* the func_info insn_off is in strictly increasing order and matches
bpf func boundaries.
Below are requirements for line_info:
* the first insn in each func must points to a line_info record.
* the line_info insn_off is in strictly increasing order.
For line_info, the line number and column number are defined as below:
::
#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
3.4 BPF_{PROG,MAP}_GET_NEXT_ID
In kernel, every loaded program, map or btf has a unique id.
The id won't change during the life time of the program, map or btf.
The bpf syscall command BPF_{PROG,MAP}_GET_NEXT_ID
returns all id's, one for each command, to user space, for bpf
program or maps,
so the inspection tool can inspect all programs and maps.
3.5 BPF_{PROG,MAP}_GET_FD_BY_ID
The introspection tool cannot use id to get details about program or maps.
A file descriptor needs to be obtained first for reference counting purpose.
3.6 BPF_OBJ_GET_INFO_BY_FD
==========================
Once a program/map fd is acquired, the introspection tool can
get the detailed information from kernel about this fd,
some of which is btf related. For example,
``bpf_map_info`` returns ``btf_id``, key/value type id.
``bpf_prog_info`` returns ``btf_id``, func_info and line info
for translated bpf byte codes, and jited_line_info.
3.7 BPF_BTF_GET_FD_BY_ID
========================
With ``btf_id`` obtained in ``bpf_map_info`` and ``bpf_prog_info``,
bpf syscall command BPF_BTF_GET_FD_BY_ID can retrieve a btf fd.
Then, with command BPF_OBJ_GET_INFO_BY_FD, the btf blob, originally
loaded into the kernel with BPF_BTF_LOAD, can be retrieved.
With the btf blob, ``bpf_map_info`` and ``bpf_prog_info``, the introspection
tool has full btf knowledge and is able to pretty print map key/values,
dump func signatures, dump line info along with byte/jit codes.
4. ELF File Format Interface
****************************
4.1 .BTF section
================
The .BTF section contains type and string data. The format of this section
is same as the one describe in :ref:`BTF_Type_String`.
.. _BTF_Ext_Section:
4.2 .BTF.ext section
====================
The .BTF.ext section encodes func_info and line_info which
needs loader manipulation before loading into the kernel.
The specification for .BTF.ext section is defined at
``tools/lib/bpf/btf.h`` and ``tools/lib/bpf/btf.c``.
The current header of .BTF.ext section::
struct btf_ext_header {
__u16 magic;
__u8 version;
__u8 flags;
__u32 hdr_len;
/* All offsets are in bytes relative to the end of this header */
__u32 func_info_off;
__u32 func_info_len;
__u32 line_info_off;
__u32 line_info_len;
};
It is very similar to .BTF section. Instead of type/string section,
it contains func_info and line_info section. See :ref:`BPF_Prog_Load`
for details about func_info and line_info record format.
The func_info is organized as below.::
func_info_rec_size
btf_ext_info_sec for section #1 /* func_info for section #1 */
btf_ext_info_sec for section #2 /* func_info for section #2 */
...
``func_info_rec_size`` specifies the size of ``bpf_func_info`` structure
when .BTF.ext is generated. btf_ext_info_sec, defined below, is
the func_info for each specific ELF section.::
struct btf_ext_info_sec {
__u32 sec_name_off; /* offset to section name */
__u32 num_info;
/* Followed by num_info * record_size number of bytes */
__u8 data[0];
};
Here, num_info must be greater than 0.
The line_info is organized as below.::
line_info_rec_size
btf_ext_info_sec for section #1 /* line_info for section #1 */
btf_ext_info_sec for section #2 /* line_info for section #2 */
...
``line_info_rec_size`` specifies the size of ``bpf_line_info`` structure
when .BTF.ext is generated.
The interpretation of ``bpf_func_info->insn_off`` and
``bpf_line_info->insn_off`` is different between kernel API and ELF API.
For kernel API, the ``insn_off`` is the instruction offset in the unit
of ``struct bpf_insn``. For ELF API, the ``insn_off`` is the byte offset
from the beginning of section (``btf_ext_info_sec->sec_name_off``).
5. Using BTF
************
5.1 bpftool map pretty print
============================
With BTF, the map key/value can be printed based on fields rather than
simply raw bytes. This is especially
valuable for large structure or if you data structure
has bitfields. For example, for the following map,::
enum A { A1, A2, A3, A4, A5 };
typedef enum A ___A;
struct tmp_t {
char a1:4;
int a2:4;
int :4;
__u32 a3:4;
int b;
___A b1:4;
enum A b2:4;
};
struct bpf_map_def SEC("maps") tmpmap = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(struct tmp_t),
.max_entries = 1,
};
BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
bpftool is able to pretty print like below:
::
[{
"key": 0,
"value": {
"a1": 0x2,
"a2": 0x4,
"a3": 0x6,
"b": 7,
"b1": 0x8,
"b2": 0xa
}
}
]
5.2 bpftool prog dump
=====================
The following is an example to show func_info and line_info
can help prog dump with better kernel symbol name, function prototype
and line information.::
$ bpftool prog dump jited pinned /sys/fs/bpf/test_btf_haskv
[...]
int test_long_fname_2(struct dummy_tracepoint_args * arg):
bpf_prog_44a040bf25481309_test_long_fname_2:
; static int test_long_fname_2(struct dummy_tracepoint_args *arg)
0: push %rbp
1: mov %rsp,%rbp
4: sub $0x30,%rsp
b: sub $0x28,%rbp
f: mov %rbx,0x0(%rbp)
13: mov %r13,0x8(%rbp)
17: mov %r14,0x10(%rbp)
1b: mov %r15,0x18(%rbp)
1f: xor %eax,%eax
21: mov %rax,0x20(%rbp)
25: xor %esi,%esi
; int key = 0;
27: mov %esi,-0x4(%rbp)
; if (!arg->sock)
2a: mov 0x8(%rdi),%rdi
; if (!arg->sock)
2e: cmp $0x0,%rdi
32: je 0x0000000000000070
34: mov %rbp,%rsi
; counts = bpf_map_lookup_elem(&btf_map, &key);
[...]
5.3 verifier log
================
The following is an example how line_info can help verifier failure debug.::
/* The code at tools/testing/selftests/bpf/test_xdp_noinline.c
* is modified as below.
*/
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
/*
if (data + 4 > data_end)
return XDP_DROP;
*/
*(u32 *)data = dst->dst;
$ bpftool prog load ./test_xdp_noinline.o /sys/fs/bpf/test_xdp_noinline type xdp
; data = (void *)(long)xdp->data;
224: (79) r2 = *(u64 *)(r10 -112)
225: (61) r2 = *(u32 *)(r2 +0)
; *(u32 *)data = dst->dst;
226: (63) *(u32 *)(r2 +0) = r1
invalid access to packet, off=0 size=4, R2(id=0,off=0,r=0)
R2 offset is outside of the packet
6. BTF Generation
*****************
You need latest pahole
https://git.kernel.org/pub/scm/devel/pahole/pahole.git/
or llvm (8.0 or later). The pahole acts as a dwarf2btf converter. It doesn't support .BTF.ext
and btf BTF_KIND_FUNC type yet. For example,::
-bash-4.4$ cat t.c
struct t {
int a:2;
int b:3;
int c:2;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
-bash-4.4$ pahole -JV t.o
File t.o:
[1] STRUCT t kind_flag=1 size=4 vlen=3
a type_id=2 bitfield_size=2 bits_offset=0
b type_id=2 bitfield_size=3 bits_offset=2
c type_id=2 bitfield_size=2 bits_offset=5
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
The llvm is able to generate .BTF and .BTF.ext directly with -g for bpf target only.
The assembly code (-S) is able to show the BTF encoding in assembly format.::
-bash-4.4$ cat t2.c
typedef int __int32;
struct t2 {
int a2;
int (*f2)(char q1, __int32 q2, ...);
int (*f3)();
} g2;
int main() { return 0; }
int test() { return 0; }
-bash-4.4$ clang -c -g -O2 -target bpf t2.c
-bash-4.4$ readelf -S t2.o
......
[ 8] .BTF PROGBITS 0000000000000000 00000247
000000000000016e 0000000000000000 0 0 1
[ 9] .BTF.ext PROGBITS 0000000000000000 000003b5
0000000000000060 0000000000000000 0 0 1
[10] .rel.BTF.ext REL 0000000000000000 000007e0
0000000000000040 0000000000000010 16 9 8
......
-bash-4.4$ clang -S -g -O2 -target bpf t2.c
-bash-4.4$ cat t2.s
......
.section .BTF,"",@progbits
.short 60319 # 0xeb9f
.byte 1
.byte 0
.long 24
.long 0
.long 220
.long 220
.long 122
.long 0 # BTF_KIND_FUNC_PROTO(id = 1)
.long 218103808 # 0xd000000
.long 2
.long 83 # BTF_KIND_INT(id = 2)
.long 16777216 # 0x1000000
.long 4
.long 16777248 # 0x1000020
......
.byte 0 # string offset=0
.ascii ".text" # string offset=1
.byte 0
.ascii "/home/yhs/tmp-pahole/t2.c" # string offset=7
.byte 0
.ascii "int main() { return 0; }" # string offset=33
.byte 0
.ascii "int test() { return 0; }" # string offset=58
.byte 0
.ascii "int" # string offset=83
......
.section .BTF.ext,"",@progbits
.short 60319 # 0xeb9f
.byte 1
.byte 0
.long 24
.long 0
.long 28
.long 28
.long 44
.long 8 # FuncInfo
.long 1 # FuncInfo section string offset=1
.long 2
.long .Lfunc_begin0
.long 3
.long .Lfunc_begin1
.long 5
.long 16 # LineInfo
.long 1 # LineInfo section string offset=1
.long 2
.long .Ltmp0
.long 7
.long 33
.long 7182 # Line 7 Col 14
.long .Ltmp3
.long 7
.long 58
.long 8206 # Line 8 Col 14
7. Testing
**********
Kernel bpf selftest `test_btf.c` provides extensive set of BTF related tests.

View File

@ -15,6 +15,13 @@ that goes into great technical depth about the BPF Architecture.
The primary info for the bpf syscall is available in the `man-pages`_
for `bpf(2)`_.
BPF Type Format (BTF)
=====================
.. toctree::
:maxdepth: 1
btf
Frequently asked questions (FAQ)

View File

@ -108,12 +108,13 @@ some, but not all of the other indices changing.
Sometimes you need to ensure that a subsequent call to :c:func:`xa_store`
will not need to allocate memory. The :c:func:`xa_reserve` function
will store a reserved entry at the indicated index. Users of the normal
API will see this entry as containing ``NULL``. If you do not need to
use the reserved entry, you can call :c:func:`xa_release` to remove the
unused entry. If another user has stored to the entry in the meantime,
:c:func:`xa_release` will do nothing; if instead you want the entry to
become ``NULL``, you should use :c:func:`xa_erase`.
will store a reserved entry at the indicated index. Users of the
normal API will see this entry as containing ``NULL``. If you do
not need to use the reserved entry, you can call :c:func:`xa_release`
to remove the unused entry. If another user has stored to the entry
in the meantime, :c:func:`xa_release` will do nothing; if instead you
want the entry to become ``NULL``, you should use :c:func:`xa_erase`.
Using :c:func:`xa_insert` on a reserved entry will fail.
If all entries in the array are ``NULL``, the :c:func:`xa_empty` function
will return ``true``.
@ -183,6 +184,8 @@ Takes xa_lock internally:
* :c:func:`xa_store_bh`
* :c:func:`xa_store_irq`
* :c:func:`xa_insert`
* :c:func:`xa_insert_bh`
* :c:func:`xa_insert_irq`
* :c:func:`xa_erase`
* :c:func:`xa_erase_bh`
* :c:func:`xa_erase_irq`

View File

@ -235,4 +235,4 @@ cpus {
===========================================
[1] ARM Linux Kernel documentation - CPUs bindings
Documentation/devicetree/bindings/arm/cpus.txt
Documentation/devicetree/bindings/arm/cpus.yaml

View File

@ -684,7 +684,7 @@ cpus {
===========================================
[1] ARM Linux Kernel documentation - CPUs bindings
Documentation/devicetree/bindings/arm/cpus.txt
Documentation/devicetree/bindings/arm/cpus.yaml
[2] ARM Linux Kernel documentation - PSCI bindings
Documentation/devicetree/bindings/arm/psci.txt

View File

@ -4,7 +4,7 @@ SP810 System Controller
Required properties:
- compatible: standard compatible string for a Primecell peripheral,
see Documentation/devicetree/bindings/arm/primecell.txt
see Documentation/devicetree/bindings/arm/primecell.yaml
for more details
should be: "arm,sp810", "arm,primecell"

View File

@ -472,4 +472,4 @@ cpus {
===============================================================================
[1] ARM Linux kernel documentation
Documentation/devicetree/bindings/arm/cpus.txt
Documentation/devicetree/bindings/arm/cpus.yaml

View File

@ -18,4 +18,4 @@ Required Properties:
Each clock is assigned an identifier and client nodes use this identifier
to specify the clock which they consume.
All these identifier could be found in <dt-bindings/clock/marvell-mmp2.h>.
All these identifiers could be found in <dt-bindings/clock/marvell,mmp2.h>.

View File

@ -1,6 +1,6 @@
* ARM PrimeCell Color LCD Controller PL110/PL111
See also Documentation/devicetree/bindings/arm/primecell.txt
See also Documentation/devicetree/bindings/arm/primecell.yaml
Required properties:

View File

@ -27,7 +27,6 @@ Example:
reg = <0x04300000 0x20000>;
reg-names = "kgsl_3d0_reg_memory";
interrupts = <GIC_SPI 80 0>;
interrupt-names = "kgsl_3d0_irq";
clock-names =
"core",
"iface",

View File

@ -14,8 +14,6 @@ Required properties:
"marvell,armada-8k-gpio" should be used for the Armada 7K and 8K
SoCs (either from AP or CP), see
Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
and
Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt
for specific details about the offset property.

View File

@ -78,7 +78,7 @@ Sub-nodes:
PPI affinity can be expressed as a single "ppi-partitions" node,
containing a set of sub-nodes, each with the following property:
- affinity: Should be a list of phandles to CPU nodes (as described in
Documentation/devicetree/bindings/arm/cpus.txt).
Documentation/devicetree/bindings/arm/cpus.yaml).
GICv3 has one or more Interrupt Translation Services (ITS) that are
used to route Message Signalled Interrupts (MSI) to the CPUs.

View File

@ -3,12 +3,16 @@ Mediatek MT7530 Ethernet switch
Required properties:
- compatible: Must be compatible = "mediatek,mt7530";
- compatible: may be compatible = "mediatek,mt7530"
or compatible = "mediatek,mt7621"
- #address-cells: Must be 1.
- #size-cells: Must be 0.
- mediatek,mcm: Boolean; if defined, indicates that either MT7530 is the part
on multi-chip module belong to MT7623A has or the remotely standalone
chip as the function MT7623N reference board provided for.
If compatible mediatek,mt7530 is set then the following properties are required
- core-supply: Phandle to the regulator node necessary for the core power.
- io-supply: Phandle to the regulator node necessary for the I/O power.
See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt

View File

@ -0,0 +1,64 @@
Qualcomm Ethernet ETHQOS device
This documents dwmmac based ethernet device which supports Gigabit
ethernet for version v2.3.0 onwards.
This device has following properties:
Required properties:
- compatible: Should be qcom,qcs404-ethqos"
- reg: Address and length of the register set for the device
- reg-names: Should contain register names "stmmaceth", "rgmii"
- clocks: Should contain phandle to clocks
- clock-names: Should contain clock names "stmmaceth", "pclk",
"ptp_ref", "rgmii"
- interrupts: Should contain phandle to interrupts
- interrupt-names: Should contain interrupt names "macirq", "eth_lpi"
Rest of the properties are defined in stmmac.txt file in same directory
Example:
ethernet: ethernet@7a80000 {
compatible = "qcom,qcs404-ethqos";
reg = <0x07a80000 0x10000>,
<0x07a96000 0x100>;
reg-names = "stmmaceth", "rgmii";
clock-names = "stmmaceth", "pclk", "ptp_ref", "rgmii";
clocks = <&gcc GCC_ETH_AXI_CLK>,
<&gcc GCC_ETH_SLAVE_AHB_CLK>,
<&gcc GCC_ETH_PTP_CLK>,
<&gcc GCC_ETH_RGMII_CLK>;
interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "macirq", "eth_lpi";
snps,reset-gpio = <&tlmm 60 GPIO_ACTIVE_LOW>;
snps,reset-active-low;
snps,txpbl = <8>;
snps,rxpbl = <2>;
snps,aal;
snps,tso;
phy-handle = <&phy1>;
phy-mode = "rgmii";
mdio {
#address-cells = <0x1>;
#size-cells = <0x0>;
compatible = "snps,dwmac-mdio";
phy1: phy@4 {
device_type = "ethernet-phy";
reg = <0x4>;
};
};
};

View File

@ -17,6 +17,8 @@ Clock Properties:
- fsl,tmr-fiper1 Fixed interval period pulse generator.
- fsl,tmr-fiper2 Fixed interval period pulse generator.
- fsl,max-adj Maximum frequency adjustment in parts per billion.
- fsl,extts-fifo The presence of this property indicates hardware
support for the external trigger stamp FIFO.
These properties set the operational parameters for the PTP
clock. You must choose these carefully for the clock to work right.

View File

@ -55,7 +55,7 @@ of these nodes are defined by the individual bindings for the specific function
= EXAMPLE
The following example represents the GLINK RPM node on a MSM8996 device, with
the function for the "rpm_request" channel defined, which is used for
regualtors and root clocks.
regulators and root clocks.
apcs_glb: mailbox@9820000 {
compatible = "qcom,msm8996-apcs-hmss-global";

View File

@ -41,12 +41,12 @@ processor ID) and a string identifier.
- qcom,local-pid:
Usage: required
Value type: <u32>
Definition: specifies the identfier of the local endpoint of this edge
Definition: specifies the identifier of the local endpoint of this edge
- qcom,remote-pid:
Usage: required
Value type: <u32>
Definition: specifies the identfier of the remote endpoint of this edge
Definition: specifies the identifier of the remote endpoint of this edge
= SUBNODES
Each SMP2P pair contain a set of inbound and outbound entries, these are

View File

@ -163,6 +163,14 @@ C. Boot options
be preserved until there actually is some text is output to the console.
This option causes fbcon to bind immediately to the fbdev device.
7. fbcon=logo-pos:<location>
The only possible 'location' is 'center' (without quotes), and when
given, the bootup logo is moved from the default top-left corner
location to the center of the framebuffer. If more than one logo is
displayed due to multiple CPUs, the collected line of logos is moved
as a whole.
C. Attaching, Detaching and Unloading
Before going on to how to attach, detach and unload the framebuffer console, an

View File

@ -1,86 +0,0 @@
The health mechanism is targeted for Real Time Alerting, in order to know when
something bad had happened to a PCI device
- Provide alert debug information
- Self healing
- If problem needs vendor support, provide a way to gather all needed debugging
information.
The main idea is to unify and centralize driver health reports in the
generic devlink instance and allow the user to set different
attributes of the health reporting and recovery procedures.
The devlink health reporter:
Device driver creates a "health reporter" per each error/health type.
Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
or unknown (driver specific).
For each registered health reporter a driver can issue error/health reports
asynchronously. All health reports handling is done by devlink.
Device driver can provide specific callbacks for each "health reporter", e.g.
- Recovery procedures
- Diagnostics and object dump procedures
- OOB initial parameters
Different parts of the driver can register different types of health reporters
with different handlers.
Once an error is reported, devlink health will do the following actions:
* A log is being send to the kernel trace events buffer
* Health status and statistics are being updated for the reporter instance
* Object dump is being taken and saved at the reporter instance (as long as
there is no other dump which is already stored)
* Auto recovery attempt is being done. Depends on:
- Auto-recovery configuration
- Grace period vs. time passed since last recover
The user interface:
User can access/change each reporter's parameters and driver specific callbacks
via devlink, e.g per error type (per health reporter)
- Configure reporter's generic parameters (like: disable/enable auto recovery)
- Invoke recovery procedure
- Run diagnostics
- Object dump
The devlink health interface (via netlink):
DEVLINK_CMD_HEALTH_REPORTER_GET
Retrieves status and configuration info per DEV and reporter.
DEVLINK_CMD_HEALTH_REPORTER_SET
Allows reporter-related configuration setting.
DEVLINK_CMD_HEALTH_REPORTER_RECOVER
Triggers a reporter's recovery procedure.
DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE
Retrieves diagnostics data from a reporter on a device.
DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET
Retrieves the last stored dump. Devlink health
saves a single dump. If an dump is not already stored by the devlink
for this reporter, devlink generates a new dump.
dump output is defined by the reporter.
DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR
Clears the last saved dump file for the specified reporter.
netlink
+--------------------------+
| |
| + |
| | |
+--------------------------+
|request for ops
|(diagnose,
mlx5_core devlink |recover,
|dump)
+--------+ +--------------------------+
| | | reporter| |
| | | +---------v----------+ |
| | ops execution | | | |
| <----------------------------------+ | |
| | | | | |
| | | + ^------------------+ |
| | | | request for ops |
| | | | (recover, dump) |
| | | | |
| | | +-+------------------+ |
| | health report | | health handler | |
| +-------------------------------> | |
| | | +--------------------+ |
| | health reporter create | |
| +----------------------------> |
+--------+ +--------------------------+

View File

@ -0,0 +1,2 @@
fw_load_policy [DEVICE, GENERIC]
Configuration mode: driverinit

View File

@ -865,7 +865,7 @@ Three LSB bits store instruction class which is one of:
BPF_STX 0x03 BPF_STX 0x03
BPF_ALU 0x04 BPF_ALU 0x04
BPF_JMP 0x05 BPF_JMP 0x05
BPF_RET 0x06 [ class 6 unused, for future if needed ]
BPF_RET 0x06 BPF_JMP32 0x06
BPF_MISC 0x07 BPF_ALU64 0x07
When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ...
@ -902,9 +902,9 @@ If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:
BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */
BPF_END 0xd0 /* eBPF only: endianness conversion */
If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of:
BPF_JA 0x00
BPF_JA 0x00 /* BPF_JMP only */
BPF_JEQ 0x10
BPF_JGT 0x20
BPF_JGE 0x30
@ -912,8 +912,8 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
BPF_JNE 0x50 /* eBPF only: jump != */
BPF_JSGT 0x60 /* eBPF only: signed '>' */
BPF_JSGE 0x70 /* eBPF only: signed '>=' */
BPF_CALL 0x80 /* eBPF only: function call */
BPF_EXIT 0x90 /* eBPF only: function return */
BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */
BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */
BPF_JLT 0xa0 /* eBPF only: unsigned '<' */
BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */
BPF_JSLT 0xc0 /* eBPF only: signed '<' */
@ -936,8 +936,9 @@ Classic BPF wastes the whole BPF_RET class to represent a single 'ret'
operation. Classic BPF_RET | BPF_K means copy imm32 into return register
and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
in eBPF means function exit only. The eBPF program needs to store return
value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is currently
unused and reserved for future use.
value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as
BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
operands for the comparisons instead.
For load and store instructions the 8-bit 'code' field is divided as:

View File

@ -11,24 +11,25 @@ Contents:
batman-adv
can
can_ucan_protocol
dpaa2/index
e100
e1000
e1000e
fm10k
igb
igbvf
ixgb
ixgbe
ixgbevf
i40e
iavf
ice
device_drivers/freescale/dpaa2/index
device_drivers/intel/e100
device_drivers/intel/e1000
device_drivers/intel/e1000e
device_drivers/intel/fm10k
device_drivers/intel/igb
device_drivers/intel/igbvf
device_drivers/intel/ixgb
device_drivers/intel/ixgbe
device_drivers/intel/ixgbevf
device_drivers/intel/i40e
device_drivers/intel/iavf
device_drivers/intel/ice
kapi
z8530book
msg_zerocopy
failover
net_failover
phy
alias
bridge
snmp_counter

View File

@ -0,0 +1,447 @@
=====================
PHY Abstraction Layer
=====================
Purpose
=======
Most network devices consist of set of registers which provide an interface
to a MAC layer, which communicates with the physical connection through a
PHY. The PHY concerns itself with negotiating link parameters with the link
partner on the other side of the network connection (typically, an ethernet
cable), and provides a register interface to allow drivers to determine what
settings were chosen, and to configure what settings are allowed.
While these devices are distinct from the network devices, and conform to a
standard layout for the registers, it has been common practice to integrate
the PHY management code with the network driver. This has resulted in large
amounts of redundant code. Also, on embedded systems with multiple (and
sometimes quite different) ethernet controllers connected to the same
management bus, it is difficult to ensure safe use of the bus.
Since the PHYs are devices, and the management busses through which they are
accessed are, in fact, busses, the PHY Abstraction Layer treats them as such.
In doing so, it has these goals:
#. Increase code-reuse
#. Increase overall code-maintainability
#. Speed development time for new network drivers, and for new systems
Basically, this layer is meant to provide an interface to PHY devices which
allows network driver writers to write as little code as possible, while
still providing a full feature set.
The MDIO bus
============
Most network devices are connected to a PHY by means of a management bus.
Different devices use different busses (though some share common interfaces).
In order to take advantage of the PAL, each bus interface needs to be
registered as a distinct device.
#. read and write functions must be implemented. Their prototypes are::
int write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
int read(struct mii_bus *bus, int mii_id, int regnum);
mii_id is the address on the bus for the PHY, and regnum is the register
number. These functions are guaranteed not to be called from interrupt
time, so it is safe for them to block, waiting for an interrupt to signal
the operation is complete
#. A reset function is optional. This is used to return the bus to an
initialized state.
#. A probe function is needed. This function should set up anything the bus
driver needs, setup the mii_bus structure, and register with the PAL using
mdiobus_register. Similarly, there's a remove function to undo all of
that (use mdiobus_unregister).
#. Like any driver, the device_driver structure must be configured, and init
exit functions are used to register the driver.
#. The bus must also be declared somewhere as a device, and registered.
As an example for how one driver implemented an mdio bus driver, see
drivers/net/ethernet/freescale/fsl_pq_mdio.c and an associated DTS file
for one of the users. (e.g. "git grep fsl,.*-mdio arch/powerpc/boot/dts/")
(RG)MII/electrical interface considerations
===========================================
The Reduced Gigabit Medium Independent Interface (RGMII) is a 12-pin
electrical signal interface using a synchronous 125Mhz clock signal and several
data lines. Due to this design decision, a 1.5ns to 2ns delay must be added
between the clock line (RXC or TXC) and the data lines to let the PHY (clock
sink) have enough setup and hold times to sample the data lines correctly. The
PHY library offers different types of PHY_INTERFACE_MODE_RGMII* values to let
the PHY driver and optionally the MAC driver, implement the required delay. The
values of phy_interface_t must be understood from the perspective of the PHY
device itself, leading to the following:
* PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any
internal delay by itself, it assumes that either the Ethernet MAC (if capable
or the PCB traces) insert the correct 1.5-2ns delay
* PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay
for the transmit data lines (TXD[3:0]) processed by the PHY device
* PHY_INTERFACE_MODE_RGMII_RXID: the PHY should insert an internal delay
for the receive data lines (RXD[3:0]) processed by the PHY device
* PHY_INTERFACE_MODE_RGMII_ID: the PHY should insert internal delays for
both transmit AND receive data lines from/to the PHY device
Whenever possible, use the PHY side RGMII delay for these reasons:
* PHY devices may offer sub-nanosecond granularity in how they allow a
receiver/transmitter side delay (e.g: 0.5, 1.0, 1.5ns) to be specified. Such
precision may be required to account for differences in PCB trace lengths
* PHY devices are typically qualified for a large range of applications
(industrial, medical, automotive...), and they provide a constant and
reliable delay across temperature/pressure/voltage ranges
* PHY device drivers in PHYLIB being reusable by nature, being able to
configure correctly a specified delay enables more designs with similar delay
requirements to be operate correctly
For cases where the PHY is not capable of providing this delay, but the
Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
should be PHY_INTERFACE_MODE_RGMII, and the Ethernet MAC driver should be
configured correctly in order to provide the required transmit and/or receive
side delay from the perspective of the PHY device. Conversely, if the Ethernet
MAC driver looks at the phy_interface_t value, for any other mode but
PHY_INTERFACE_MODE_RGMII, it should make sure that the MAC-level delays are
disabled.
In case neither the Ethernet MAC, nor the PHY are capable of providing the
required delays, as defined per the RGMII standard, several options may be
available:
* Some SoCs may offer a pin pad/mux/controller capable of configuring a given
set of pins'strength, delays, and voltage; and it may be a suitable
option to insert the expected 2ns RGMII delay.
* Modifying the PCB design to include a fixed delay (e.g: using a specifically
designed serpentine), which may not require software configuration at all.
Common problems with RGMII delay mismatch
-----------------------------------------
When there is a RGMII delay mismatch between the Ethernet MAC and the PHY, this
will most likely result in the clock and data line signals to be unstable when
the PHY or MAC take a snapshot of these signals to translate them into logical
1 or 0 states and reconstruct the data being transmitted/received. Typical
symptoms include:
* Transmission/reception partially works, and there is frequent or occasional
packet loss observed
* Ethernet MAC may report some or all packets ingressing with a FCS/CRC error,
or just discard them all
* Switching to lower speeds such as 10/100Mbits/sec makes the problem go away
(since there is enough setup/hold time in that case)
Connecting to a PHY
===================
Sometime during startup, the network driver needs to establish a connection
between the PHY device, and the network device. At this time, the PHY's bus
and drivers need to all have been loaded, so it is ready for the connection.
At this point, there are several ways to connect to the PHY:
#. The PAL handles everything, and only calls the network driver when
the link state changes, so it can react.
#. The PAL handles everything except interrupts (usually because the
controller has the interrupt registers).
#. The PAL handles everything, but checks in with the driver every second,
allowing the network driver to react first to any changes before the PAL
does.
#. The PAL serves only as a library of functions, with the network device
manually calling functions to update status, and configure the PHY
Letting the PHY Abstraction Layer do Everything
===============================================
If you choose option 1 (The hope is that every driver can, but to still be
useful to drivers that can't), connecting to the PHY is simple:
First, you need a function to react to changes in the link state. This
function follows this protocol::
static void adjust_link(struct net_device *dev);
Next, you need to know the device name of the PHY connected to this device.
The name will look something like, "0:00", where the first number is the
bus id, and the second is the PHY's address on that bus. Typically,
the bus is responsible for making its ID unique.
Now, to connect, just call this function::
phydev = phy_connect(dev, phy_name, &adjust_link, interface);
*phydev* is a pointer to the phy_device structure which represents the PHY.
If phy_connect is successful, it will return the pointer. dev, here, is the
pointer to your net_device. Once done, this function will have started the
PHY's software state machine, and registered for the PHY's interrupt, if it
has one. The phydev structure will be populated with information about the
current state, though the PHY will not yet be truly operational at this
point.
PHY-specific flags should be set in phydev->dev_flags prior to the call
to phy_connect() such that the underlying PHY driver can check for flags
and perform specific operations based on them.
This is useful if the system has put hardware restrictions on
the PHY/controller, of which the PHY needs to be aware.
*interface* is a u32 which specifies the connection type used
between the controller and the PHY. Examples are GMII, MII,
RGMII, and SGMII. For a full list, see include/linux/phy.h
Now just make sure that phydev->supported and phydev->advertising have any
values pruned from them which don't make sense for your controller (a 10/100
controller may be connected to a gigabit capable PHY, so you would need to
mask off SUPPORTED_1000baseT*). See include/linux/ethtool.h for definitions
for these bitfields. Note that you should not SET any bits, except the
SUPPORTED_Pause and SUPPORTED_AsymPause bits (see below), or the PHY may get
put into an unsupported state.
Lastly, once the controller is ready to handle network traffic, you call
phy_start(phydev). This tells the PAL that you are ready, and configures the
PHY to connect to the network. If the MAC interrupt of your network driver
also handles PHY status changes, just set phydev->irq to PHY_IGNORE_INTERRUPT
before you call phy_start and use phy_mac_interrupt() from the network
driver. If you don't want to use interrupts, set phydev->irq to PHY_POLL.
phy_start() enables the PHY interrupts (if applicable) and starts the
phylib state machine.
When you want to disconnect from the network (even if just briefly), you call
phy_stop(phydev). This function also stops the phylib state machine and
disables PHY interrupts.
Pause frames / flow control
===========================
The PHY does not participate directly in flow control/pause frames except by
making sure that the SUPPORTED_Pause and SUPPORTED_AsymPause bits are set in
MII_ADVERTISE to indicate towards the link partner that the Ethernet MAC
controller supports such a thing. Since flow control/pause frames generation
involves the Ethernet MAC driver, it is recommended that this driver takes care
of properly indicating advertisement and support for such features by setting
the SUPPORTED_Pause and SUPPORTED_AsymPause bits accordingly. This can be done
either before or after phy_connect() and/or as a result of implementing the
ethtool::set_pauseparam feature.
Keeping Close Tabs on the PAL
=============================
It is possible that the PAL's built-in state machine needs a little help to
keep your network device and the PHY properly in sync. If so, you can
register a helper function when connecting to the PHY, which will be called
every second before the state machine reacts to any changes. To do this, you
need to manually call phy_attach() and phy_prepare_link(), and then call
phy_start_machine() with the second argument set to point to your special
handler.
Currently there are no examples of how to use this functionality, and testing
on it has been limited because the author does not have any drivers which use
it (they all use option 1). So Caveat Emptor.
Doing it all yourself
=====================
There's a remote chance that the PAL's built-in state machine cannot track
the complex interactions between the PHY and your network device. If this is
so, you can simply call phy_attach(), and not call phy_start_machine or
phy_prepare_link(). This will mean that phydev->state is entirely yours to
handle (phy_start and phy_stop toggle between some of the states, so you
might need to avoid them).
An effort has been made to make sure that useful functionality can be
accessed without the state-machine running, and most of these functions are
descended from functions which did not interact with a complex state-machine.
However, again, no effort has been made so far to test running without the
state machine, so tryer beware.
Here is a brief rundown of the functions::
int phy_read(struct phy_device *phydev, u16 regnum);
int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
Simple read/write primitives. They invoke the bus's read/write function
pointers.
::
void phy_print_status(struct phy_device *phydev);
A convenience function to print out the PHY status neatly.
::
void phy_request_interrupt(struct phy_device *phydev);
Requests the IRQ for the PHY interrupts.
::
struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
phy_interface_t interface);
Attaches a network device to a particular PHY, binding the PHY to a generic
driver if none was found during bus initialization.
::
int phy_start_aneg(struct phy_device *phydev);
Using variables inside the phydev structure, either configures advertising
and resets autonegotiation, or disables autonegotiation, and configures
forced settings.
::
static inline int phy_read_status(struct phy_device *phydev);
Fills the phydev structure with up-to-date information about the current
settings in the PHY.
::
int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
Ethtool convenience functions.
::
int phy_mii_ioctl(struct phy_device *phydev,
struct mii_ioctl_data *mii_data, int cmd);
The MII ioctl. Note that this function will completely screw up the state
machine if you write registers like BMCR, BMSR, ADVERTISE, etc. Best to
use this only to write registers which are not standard, and don't set off
a renegotiation.
PHY Device Drivers
==================
With the PHY Abstraction Layer, adding support for new PHYs is
quite easy. In some cases, no work is required at all! However,
many PHYs require a little hand-holding to get up-and-running.
Generic PHY driver
------------------
If the desired PHY doesn't have any errata, quirks, or special
features you want to support, then it may be best to not add
support, and let the PHY Abstraction Layer's Generic PHY Driver
do all of the work.
Writing a PHY driver
--------------------
If you do need to write a PHY driver, the first thing to do is
make sure it can be matched with an appropriate PHY device.
This is done during bus initialization by reading the device's
UID (stored in registers 2 and 3), then comparing it to each
driver's phy_id field by ANDing it with each driver's
phy_id_mask field. Also, it needs a name. Here's an example::
static struct phy_driver dm9161_driver = {
.phy_id = 0x0181b880,
.name = "Davicom DM9161E",
.phy_id_mask = 0x0ffffff0,
...
}
Next, you need to specify what features (speed, duplex, autoneg,
etc) your PHY device and driver support. Most PHYs support
PHY_BASIC_FEATURES, but you can look in include/mii.h for other
features.
Each driver consists of a number of function pointers, documented
in include/linux/phy.h under the phy_driver structure.
Of these, only config_aneg and read_status are required to be
assigned by the driver code. The rest are optional. Also, it is
preferred to use the generic phy driver's versions of these two
functions if at all possible: genphy_read_status and
genphy_config_aneg. If this is not possible, it is likely that
you only need to perform some actions before and after invoking
these functions, and so your functions will wrap the generic
ones.
Feel free to look at the Marvell, Cicada, and Davicom drivers in
drivers/net/phy/ for examples (the lxt and qsemi drivers have
not been tested as of this writing).
The PHY's MMD register accesses are handled by the PAL framework
by default, but can be overridden by a specific PHY driver if
required. This could be the case if a PHY was released for
manufacturing before the MMD PHY register definitions were
standardized by the IEEE. Most modern PHYs will be able to use
the generic PAL framework for accessing the PHY's MMD registers.
An example of such usage is for Energy Efficient Ethernet support,
implemented in the PAL. This support uses the PAL to access MMD
registers for EEE query and configuration if the PHY supports
the IEEE standard access mechanisms, or can use the PHY's specific
access interfaces if overridden by the specific PHY driver. See
the Micrel driver in drivers/net/phy/ for an example of how this
can be implemented.
Board Fixups
============
Sometimes the specific interaction between the platform and the PHY requires
special handling. For instance, to change where the PHY's clock input is,
or to add a delay to account for latency issues in the data path. In order
to support such contingencies, the PHY Layer allows platform code to register
fixups to be run when the PHY is brought up (or subsequently reset).
When the PHY Layer brings up a PHY it checks to see if there are any fixups
registered for it, matching based on UID (contained in the PHY device's phy_id
field) and the bus identifier (contained in phydev->dev.bus_id). Both must
match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as
wildcards for the bus ID and UID, respectively.
When a match is found, the PHY layer will invoke the run function associated
with the fixup. This function is passed a pointer to the phy_device of
interest. It should therefore only operate on that PHY.
The platform code can either register the fixup using phy_register_fixup()::
int phy_register_fixup(const char *phy_id,
u32 phy_uid, u32 phy_uid_mask,
int (*run)(struct phy_device *));
Or using one of the two stubs, phy_register_fixup_for_uid() and
phy_register_fixup_for_id()::
int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
int (*run)(struct phy_device *));
int phy_register_fixup_for_id(const char *phy_id,
int (*run)(struct phy_device *));
The stubs set one of the two matching criteria, and set the other one to
match anything.
When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module,
unregister fixup and free allocate memory are required.
Call one of following function before unloading module::
int phy_unregister_fixup(const char *phy_id, u32 phy_uid, u32 phy_uid_mask);
int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
int phy_register_fixup_for_id(const char *phy_id);
Standards
=========
IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two:
http://standards.ieee.org/getieee802/download/802.3-2008_section2.pdf
RGMII v1.3:
http://web.archive.org/web/20160303212629/http://www.hp.com/rnd/pdfs/RGMIIv1_3.pdf
RGMII v2.0:
http://web.archive.org/web/20160303171328/http://www.hp.com/rnd/pdfs/RGMIIv2_0_final_hp.pdf

View File

@ -1,427 +0,0 @@
-------
PHY Abstraction Layer
(Updated 2008-04-08)
Purpose
Most network devices consist of set of registers which provide an interface
to a MAC layer, which communicates with the physical connection through a
PHY. The PHY concerns itself with negotiating link parameters with the link
partner on the other side of the network connection (typically, an ethernet
cable), and provides a register interface to allow drivers to determine what
settings were chosen, and to configure what settings are allowed.
While these devices are distinct from the network devices, and conform to a
standard layout for the registers, it has been common practice to integrate
the PHY management code with the network driver. This has resulted in large
amounts of redundant code. Also, on embedded systems with multiple (and
sometimes quite different) ethernet controllers connected to the same
management bus, it is difficult to ensure safe use of the bus.
Since the PHYs are devices, and the management busses through which they are
accessed are, in fact, busses, the PHY Abstraction Layer treats them as such.
In doing so, it has these goals:
1) Increase code-reuse
2) Increase overall code-maintainability
3) Speed development time for new network drivers, and for new systems
Basically, this layer is meant to provide an interface to PHY devices which
allows network driver writers to write as little code as possible, while
still providing a full feature set.
The MDIO bus
Most network devices are connected to a PHY by means of a management bus.
Different devices use different busses (though some share common interfaces).
In order to take advantage of the PAL, each bus interface needs to be
registered as a distinct device.
1) read and write functions must be implemented. Their prototypes are:
int write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
int read(struct mii_bus *bus, int mii_id, int regnum);
mii_id is the address on the bus for the PHY, and regnum is the register
number. These functions are guaranteed not to be called from interrupt
time, so it is safe for them to block, waiting for an interrupt to signal
the operation is complete
2) A reset function is optional. This is used to return the bus to an
initialized state.
3) A probe function is needed. This function should set up anything the bus
driver needs, setup the mii_bus structure, and register with the PAL using
mdiobus_register. Similarly, there's a remove function to undo all of
that (use mdiobus_unregister).
4) Like any driver, the device_driver structure must be configured, and init
exit functions are used to register the driver.
5) The bus must also be declared somewhere as a device, and registered.
As an example for how one driver implemented an mdio bus driver, see
drivers/net/ethernet/freescale/fsl_pq_mdio.c and an associated DTS file
for one of the users. (e.g. "git grep fsl,.*-mdio arch/powerpc/boot/dts/")
(RG)MII/electrical interface considerations
The Reduced Gigabit Medium Independent Interface (RGMII) is a 12-pin
electrical signal interface using a synchronous 125Mhz clock signal and several
data lines. Due to this design decision, a 1.5ns to 2ns delay must be added
between the clock line (RXC or TXC) and the data lines to let the PHY (clock
sink) have enough setup and hold times to sample the data lines correctly. The
PHY library offers different types of PHY_INTERFACE_MODE_RGMII* values to let
the PHY driver and optionally the MAC driver, implement the required delay. The
values of phy_interface_t must be understood from the perspective of the PHY
device itself, leading to the following:
* PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any
internal delay by itself, it assumes that either the Ethernet MAC (if capable
or the PCB traces) insert the correct 1.5-2ns delay
* PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay
for the transmit data lines (TXD[3:0]) processed by the PHY device
* PHY_INTERFACE_MODE_RGMII_RXID: the PHY should insert an internal delay
for the receive data lines (RXD[3:0]) processed by the PHY device
* PHY_INTERFACE_MODE_RGMII_ID: the PHY should insert internal delays for
both transmit AND receive data lines from/to the PHY device
Whenever possible, use the PHY side RGMII delay for these reasons:
* PHY devices may offer sub-nanosecond granularity in how they allow a
receiver/transmitter side delay (e.g: 0.5, 1.0, 1.5ns) to be specified. Such
precision may be required to account for differences in PCB trace lengths
* PHY devices are typically qualified for a large range of applications
(industrial, medical, automotive...), and they provide a constant and
reliable delay across temperature/pressure/voltage ranges
* PHY device drivers in PHYLIB being reusable by nature, being able to
configure correctly a specified delay enables more designs with similar delay
requirements to be operate correctly
For cases where the PHY is not capable of providing this delay, but the
Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
should be PHY_INTERFACE_MODE_RGMII, and the Ethernet MAC driver should be
configured correctly in order to provide the required transmit and/or receive
side delay from the perspective of the PHY device. Conversely, if the Ethernet
MAC driver looks at the phy_interface_t value, for any other mode but
PHY_INTERFACE_MODE_RGMII, it should make sure that the MAC-level delays are
disabled.
In case neither the Ethernet MAC, nor the PHY are capable of providing the
required delays, as defined per the RGMII standard, several options may be
available:
* Some SoCs may offer a pin pad/mux/controller capable of configuring a given
set of pins'strength, delays, and voltage; and it may be a suitable
option to insert the expected 2ns RGMII delay.
* Modifying the PCB design to include a fixed delay (e.g: using a specifically
designed serpentine), which may not require software configuration at all.
Common problems with RGMII delay mismatch
When there is a RGMII delay mismatch between the Ethernet MAC and the PHY, this
will most likely result in the clock and data line signals to be unstable when
the PHY or MAC take a snapshot of these signals to translate them into logical
1 or 0 states and reconstruct the data being transmitted/received. Typical
symptoms include:
* Transmission/reception partially works, and there is frequent or occasional
packet loss observed
* Ethernet MAC may report some or all packets ingressing with a FCS/CRC error,
or just discard them all
* Switching to lower speeds such as 10/100Mbits/sec makes the problem go away
(since there is enough setup/hold time in that case)
Connecting to a PHY
Sometime during startup, the network driver needs to establish a connection
between the PHY device, and the network device. At this time, the PHY's bus
and drivers need to all have been loaded, so it is ready for the connection.
At this point, there are several ways to connect to the PHY:
1) The PAL handles everything, and only calls the network driver when
the link state changes, so it can react.
2) The PAL handles everything except interrupts (usually because the
controller has the interrupt registers).
3) The PAL handles everything, but checks in with the driver every second,
allowing the network driver to react first to any changes before the PAL
does.
4) The PAL serves only as a library of functions, with the network device
manually calling functions to update status, and configure the PHY
Letting the PHY Abstraction Layer do Everything
If you choose option 1 (The hope is that every driver can, but to still be
useful to drivers that can't), connecting to the PHY is simple:
First, you need a function to react to changes in the link state. This
function follows this protocol:
static void adjust_link(struct net_device *dev);
Next, you need to know the device name of the PHY connected to this device.
The name will look something like, "0:00", where the first number is the
bus id, and the second is the PHY's address on that bus. Typically,
the bus is responsible for making its ID unique.
Now, to connect, just call this function:
phydev = phy_connect(dev, phy_name, &adjust_link, interface);
phydev is a pointer to the phy_device structure which represents the PHY. If
phy_connect is successful, it will return the pointer. dev, here, is the
pointer to your net_device. Once done, this function will have started the
PHY's software state machine, and registered for the PHY's interrupt, if it
has one. The phydev structure will be populated with information about the
current state, though the PHY will not yet be truly operational at this
point.
PHY-specific flags should be set in phydev->dev_flags prior to the call
to phy_connect() such that the underlying PHY driver can check for flags
and perform specific operations based on them.
This is useful if the system has put hardware restrictions on
the PHY/controller, of which the PHY needs to be aware.
interface is a u32 which specifies the connection type used
between the controller and the PHY. Examples are GMII, MII,
RGMII, and SGMII. For a full list, see include/linux/phy.h
Now just make sure that phydev->supported and phydev->advertising have any
values pruned from them which don't make sense for your controller (a 10/100
controller may be connected to a gigabit capable PHY, so you would need to
mask off SUPPORTED_1000baseT*). See include/linux/ethtool.h for definitions
for these bitfields. Note that you should not SET any bits, except the
SUPPORTED_Pause and SUPPORTED_AsymPause bits (see below), or the PHY may get
put into an unsupported state.
Lastly, once the controller is ready to handle network traffic, you call
phy_start(phydev). This tells the PAL that you are ready, and configures the
PHY to connect to the network. If you want to handle your own interrupts,
just set phydev->irq to PHY_IGNORE_INTERRUPT before you call phy_start.
Similarly, if you don't want to use interrupts, set phydev->irq to PHY_POLL.
When you want to disconnect from the network (even if just briefly), you call
phy_stop(phydev).
Pause frames / flow control
The PHY does not participate directly in flow control/pause frames except by
making sure that the SUPPORTED_Pause and SUPPORTED_AsymPause bits are set in
MII_ADVERTISE to indicate towards the link partner that the Ethernet MAC
controller supports such a thing. Since flow control/pause frames generation
involves the Ethernet MAC driver, it is recommended that this driver takes care
of properly indicating advertisement and support for such features by setting
the SUPPORTED_Pause and SUPPORTED_AsymPause bits accordingly. This can be done
either before or after phy_connect() and/or as a result of implementing the
ethtool::set_pauseparam feature.
Keeping Close Tabs on the PAL
It is possible that the PAL's built-in state machine needs a little help to
keep your network device and the PHY properly in sync. If so, you can
register a helper function when connecting to the PHY, which will be called
every second before the state machine reacts to any changes. To do this, you
need to manually call phy_attach() and phy_prepare_link(), and then call
phy_start_machine() with the second argument set to point to your special
handler.
Currently there are no examples of how to use this functionality, and testing
on it has been limited because the author does not have any drivers which use
it (they all use option 1). So Caveat Emptor.
Doing it all yourself
There's a remote chance that the PAL's built-in state machine cannot track
the complex interactions between the PHY and your network device. If this is
so, you can simply call phy_attach(), and not call phy_start_machine or
phy_prepare_link(). This will mean that phydev->state is entirely yours to
handle (phy_start and phy_stop toggle between some of the states, so you
might need to avoid them).
An effort has been made to make sure that useful functionality can be
accessed without the state-machine running, and most of these functions are
descended from functions which did not interact with a complex state-machine.
However, again, no effort has been made so far to test running without the
state machine, so tryer beware.
Here is a brief rundown of the functions:
int phy_read(struct phy_device *phydev, u16 regnum);
int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
Simple read/write primitives. They invoke the bus's read/write function
pointers.
void phy_print_status(struct phy_device *phydev);
A convenience function to print out the PHY status neatly.
int phy_start_interrupts(struct phy_device *phydev);
int phy_stop_interrupts(struct phy_device *phydev);
Requests the IRQ for the PHY interrupts, then enables them for
start, or disables then frees them for stop.
struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
phy_interface_t interface);
Attaches a network device to a particular PHY, binding the PHY to a generic
driver if none was found during bus initialization.
int phy_start_aneg(struct phy_device *phydev);
Using variables inside the phydev structure, either configures advertising
and resets autonegotiation, or disables autonegotiation, and configures
forced settings.
static inline int phy_read_status(struct phy_device *phydev);
Fills the phydev structure with up-to-date information about the current
settings in the PHY.
int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
Ethtool convenience functions.
int phy_mii_ioctl(struct phy_device *phydev,
struct mii_ioctl_data *mii_data, int cmd);
The MII ioctl. Note that this function will completely screw up the state
machine if you write registers like BMCR, BMSR, ADVERTISE, etc. Best to
use this only to write registers which are not standard, and don't set off
a renegotiation.
PHY Device Drivers
With the PHY Abstraction Layer, adding support for new PHYs is
quite easy. In some cases, no work is required at all! However,
many PHYs require a little hand-holding to get up-and-running.
Generic PHY driver
If the desired PHY doesn't have any errata, quirks, or special
features you want to support, then it may be best to not add
support, and let the PHY Abstraction Layer's Generic PHY Driver
do all of the work.
Writing a PHY driver
If you do need to write a PHY driver, the first thing to do is
make sure it can be matched with an appropriate PHY device.
This is done during bus initialization by reading the device's
UID (stored in registers 2 and 3), then comparing it to each
driver's phy_id field by ANDing it with each driver's
phy_id_mask field. Also, it needs a name. Here's an example:
static struct phy_driver dm9161_driver = {
.phy_id = 0x0181b880,
.name = "Davicom DM9161E",
.phy_id_mask = 0x0ffffff0,
...
}
Next, you need to specify what features (speed, duplex, autoneg,
etc) your PHY device and driver support. Most PHYs support
PHY_BASIC_FEATURES, but you can look in include/mii.h for other
features.
Each driver consists of a number of function pointers, documented
in include/linux/phy.h under the phy_driver structure.
Of these, only config_aneg and read_status are required to be
assigned by the driver code. The rest are optional. Also, it is
preferred to use the generic phy driver's versions of these two
functions if at all possible: genphy_read_status and
genphy_config_aneg. If this is not possible, it is likely that
you only need to perform some actions before and after invoking
these functions, and so your functions will wrap the generic
ones.
Feel free to look at the Marvell, Cicada, and Davicom drivers in
drivers/net/phy/ for examples (the lxt and qsemi drivers have
not been tested as of this writing).
The PHY's MMD register accesses are handled by the PAL framework
by default, but can be overridden by a specific PHY driver if
required. This could be the case if a PHY was released for
manufacturing before the MMD PHY register definitions were
standardized by the IEEE. Most modern PHYs will be able to use
the generic PAL framework for accessing the PHY's MMD registers.
An example of such usage is for Energy Efficient Ethernet support,
implemented in the PAL. This support uses the PAL to access MMD
registers for EEE query and configuration if the PHY supports
the IEEE standard access mechanisms, or can use the PHY's specific
access interfaces if overridden by the specific PHY driver. See
the Micrel driver in drivers/net/phy/ for an example of how this
can be implemented.
Board Fixups
Sometimes the specific interaction between the platform and the PHY requires
special handling. For instance, to change where the PHY's clock input is,
or to add a delay to account for latency issues in the data path. In order
to support such contingencies, the PHY Layer allows platform code to register
fixups to be run when the PHY is brought up (or subsequently reset).
When the PHY Layer brings up a PHY it checks to see if there are any fixups
registered for it, matching based on UID (contained in the PHY device's phy_id
field) and the bus identifier (contained in phydev->dev.bus_id). Both must
match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as
wildcards for the bus ID and UID, respectively.
When a match is found, the PHY layer will invoke the run function associated
with the fixup. This function is passed a pointer to the phy_device of
interest. It should therefore only operate on that PHY.
The platform code can either register the fixup using phy_register_fixup():
int phy_register_fixup(const char *phy_id,
u32 phy_uid, u32 phy_uid_mask,
int (*run)(struct phy_device *));
Or using one of the two stubs, phy_register_fixup_for_uid() and
phy_register_fixup_for_id():
int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
int (*run)(struct phy_device *));
int phy_register_fixup_for_id(const char *phy_id,
int (*run)(struct phy_device *));
The stubs set one of the two matching criteria, and set the other one to
match anything.
When phy_register_fixup() or *_for_uid()/*_for_id() is called at module,
unregister fixup and free allocate memory are required.
Call one of following function before unloading module.
int phy_unregister_fixup(const char *phy_id, u32 phy_uid, u32 phy_uid_mask);
int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
int phy_register_fixup_for_id(const char *phy_id);
Standards
IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two:
http://standards.ieee.org/getieee802/download/802.3-2008_section2.pdf
RGMII v1.3:
http://web.archive.org/web/20160303212629/http://www.hp.com/rnd/pdfs/RGMIIv1_3.pdf
RGMII v2.0:
http://web.archive.org/web/20160303171328/http://www.hp.com/rnd/pdfs/RGMIIv2_0_final_hp.pdf

View File

@ -1000,51 +1000,6 @@ The kernel interface functions are as follows:
size should be set when the call is begun. tx_total_len may not be less
than zero.
(*) Check to see the completion state of a call so that the caller can assess
whether it needs to be retried.
enum rxrpc_call_completion {
RXRPC_CALL_SUCCEEDED,
RXRPC_CALL_REMOTELY_ABORTED,
RXRPC_CALL_LOCALLY_ABORTED,
RXRPC_CALL_LOCAL_ERROR,
RXRPC_CALL_NETWORK_ERROR,
};
int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call,
enum rxrpc_call_completion *_compl,
u32 *_abort_code);
On return, -EINPROGRESS will be returned if the call is still ongoing; if
it is finished, *_compl will be set to indicate the manner of completion,
*_abort_code will be set to any abort code that occurred. 0 will be
returned on a successful completion, -ECONNABORTED will be returned if the
client failed due to a remote abort and anything else will return an
appropriate error code.
The caller should look at this information to decide if it's worth
retrying the call.
(*) Retry a client call.
int rxrpc_kernel_retry_call(struct socket *sock,
struct rxrpc_call *call,
struct sockaddr_rxrpc *srx,
struct key *key);
This attempts to partially reinitialise a call and submit it again while
reusing the original call's Tx queue to avoid the need to repackage and
re-encrypt the data to be sent. call indicates the call to retry, srx the
new address to send it to and key the encryption key to use for signing or
encrypting the packets.
For this to work, the first Tx data packet must still be in the transmit
queue, and currently this is only permitted for local and network errors
and the call must not have been aborted. Any partially constructed Tx
packet is left as is and can continue being filled afterwards.
It returns 0 if the call was requeued and an error otherwise.
(*) Get call RTT.
u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call);

View File

@ -366,6 +366,27 @@ to the accept queue.
TCP Fast Open
=============
* TcpEstabResets
Defined in `RFC1213 tcpEstabResets`_.
.. _RFC1213 tcpEstabResets: https://tools.ietf.org/html/rfc1213#page-48
* TcpAttemptFails
Defined in `RFC1213 tcpAttemptFails`_.
.. _RFC1213 tcpAttemptFails: https://tools.ietf.org/html/rfc1213#page-48
* TcpOutRsts
Defined in `RFC1213 tcpOutRsts`_. The RFC says this counter indicates
the 'segments sent containing the RST flag', but in linux kernel, this
couner indicates the segments kerenl tried to send. The sending
process might be failed due to some errors (e.g. memory alloc failed).
.. _RFC1213 tcpOutRsts: https://tools.ietf.org/html/rfc1213#page-52
TCP Fast Path
============
When kernel receives a TCP packet, it has two paths to handler the
packet, one is fast path, another is slow path. The comment in kernel
code provides a good explanation of them, I pasted them below::
@ -413,7 +434,6 @@ increase 1.
TCP abort
=========
* TcpExtTCPAbortOnData
It means TCP layer has data in flight, but need to close the
@ -589,7 +609,6 @@ packet yet, the sender would know packet 4 is out of order. The TCP
stack of kernel will increase TcpExtTCPSACKReorder for both of the
above scenarios.
DSACK
=====
The DSACK is defined in `RFC2883`_. The receiver uses DSACK to report
@ -612,8 +631,7 @@ The TCP stack receives an out of order duplicate packet, so it sends a
DSACK to the sender.
* TcpExtTCPDSACKRecv
The TCP stack receives a DSACK, which indicate an acknowledged
The TCP stack receives a DSACK, which indicates an acknowledged
duplicate packet is received.
* TcpExtTCPDSACKOfoRecv
@ -621,6 +639,56 @@ duplicate packet is received.
The TCP stack receives a DSACK, which indicate an out of order
duplicate packet is received.
invalid SACK and DSACK
====================
When a SACK (or DSACK) block is invalid, a corresponding counter would
be updated. The validation method is base on the start/end sequence
number of the SACK block. For more details, please refer the comment
of the function tcp_is_sackblock_valid in the kernel source code. A
SACK option could have up to 4 blocks, they are checked
individually. E.g., if 3 blocks of a SACk is invalid, the
corresponding counter would be updated 3 times. The comment of the
`Add counters for discarded SACK blocks`_ patch has additional
explaination:
.. _Add counters for discarded SACK blocks: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=18f02545a9a16c9a89778b91a162ad16d510bb32
* TcpExtTCPSACKDiscard
This counter indicates how many SACK blocks are invalid. If the invalid
SACK block is caused by ACK recording, the TCP stack will only ignore
it and won't update this counter.
* TcpExtTCPDSACKIgnoredOld and TcpExtTCPDSACKIgnoredNoUndo
When a DSACK block is invalid, one of these two counters would be
updated. Which counter will be updated depends on the undo_marker flag
of the TCP socket. If the undo_marker is not set, the TCP stack isn't
likely to re-transmit any packets, and we still receive an invalid
DSACK block, the reason might be that the packet is duplicated in the
middle of the network. In such scenario, TcpExtTCPDSACKIgnoredNoUndo
will be updated. If the undo_marker is set, TcpExtTCPDSACKIgnoredOld
will be updated. As implied in its name, it might be an old packet.
SACK shift
=========
The linux networking stack stores data in sk_buff struct (skb for
short). If a SACK block acrosses multiple skb, the TCP stack will try
to re-arrange data in these skb. E.g. if a SACK block acknowledges seq
10 to 15, skb1 has seq 10 to 13, skb2 has seq 14 to 20. The seq 14 and
15 in skb2 would be moved to skb1. This operation is 'shift'. If a
SACK block acknowledges seq 10 to 20, skb1 has seq 10 to 13, skb2 has
seq 14 to 20. All data in skb2 will be moved to skb1, and skb2 will be
discard, this operation is 'merge'.
* TcpExtTCPSackShifted
A skb is shifted
* TcpExtTCPSackMerged
A skb is merged
* TcpExtTCPSackShiftFallback
A skb should be shifted or merged, but the TCP stack doesn't do it for
some reasons.
TCP out of order
================
* TcpExtTCPOFOQueue
@ -721,6 +789,60 @@ unacknowledged number (more strict than `RFC 5961 section 5.2`_).
.. _RFC 5961 section 4.2: https://tools.ietf.org/html/rfc5961#page-9
.. _RFC 5961 section 5.2: https://tools.ietf.org/html/rfc5961#page-11
TCP receive window
=================
* TcpExtTCPWantZeroWindowAdv
Depending on current memory usage, the TCP stack tries to set receive
window to zero. But the receive window might still be a no-zero
value. For example, if the previous window size is 10, and the TCP
stack receives 3 bytes, the current window size would be 7 even if the
window size calculated by the memory usage is zero.
* TcpExtTCPToZeroWindowAdv
The TCP receive window is set to zero from a no-zero value.
* TcpExtTCPFromZeroWindowAdv
The TCP receive window is set to no-zero value from zero.
Delayed ACK
==========
The TCP Delayed ACK is a technique which is used for reducing the
packet count in the network. For more details, please refer the
`Delayed ACK wiki`_
.. _Delayed ACK wiki: https://en.wikipedia.org/wiki/TCP_delayed_acknowledgment
* TcpExtDelayedACKs
A delayed ACK timer expires. The TCP stack will send a pure ACK packet
and exit the delayed ACK mode.
* TcpExtDelayedACKLocked
A delayed ACK timer expires, but the TCP stack can't send an ACK
immediately due to the socket is locked by a userspace program. The
TCP stack will send a pure ACK later (after the userspace program
unlock the socket). When the TCP stack sends the pure ACK later, the
TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK
mode.
* TcpExtDelayedACKLost
It will be updated when the TCP stack receives a packet which has been
ACKed. A Delayed ACK loss might cause this issue, but it would also be
triggered by other reasons, such as a packet is duplicated in the
network.
Tail Loss Probe (TLP)
===================
TLP is an algorithm which is used to detect TCP packet loss. For more
details, please refer the `TLP paper`_.
.. _TLP paper: https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01
* TcpExtTCPLossProbes
A TLP probe packet is sent.
* TcpExtTCPLossProbeRecovery
A packet loss is detected and recovered by TLP.
examples
========

View File

@ -417,7 +417,7 @@ is again deprecated and ts[2] holds a hardware timestamp if set.
Hardware time stamping must also be initialized for each device driver
that is expected to do hardware time stamping. The parameter is defined in
/include/linux/net_tstamp.h as:
include/uapi/linux/net_tstamp.h as:
struct hwtstamp_config {
int flags; /* no flags defined right now, must be zero */
@ -487,7 +487,7 @@ enum {
HWTSTAMP_FILTER_PTP_V1_L4_EVENT,
/* for the complete list of values, please check
* the include file /include/linux/net_tstamp.h
* the include file include/uapi/linux/net_tstamp.h
*/
};

View File

@ -291,6 +291,20 @@ user space is responsible for creating them if needed.
Default : 0 (for compatibility reasons)
devconf_inherit_init_net
----------------------------
Controls if a new network namespace should inherit all current
settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
default, we keep the current behavior: for IPv4 we inherit all current
settings from init_net and for IPv6 we reset all settings to default.
If set to 1, both IPv4 and IPv6 settings are forced to inherit from
current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
forced to reset to their default values.
Default : 0 (for compatibility reasons)
2. /proc/sys/net/unix - Parameters for Unix domain sockets
-------------------------------------------------------

View File

@ -3052,8 +3052,8 @@ F: include/linux/bcm963xx_nvram.h
F: include/linux/bcm963xx_tag.h
BROADCOM BNX2 GIGABIT ETHERNET DRIVER
M: Rasesh Mody <rasesh.mody@cavium.com>
M: Dept-GELinuxNICDev@cavium.com
M: Rasesh Mody <rmody@marvell.com>
M: GR-Linux-NIC-Dev@marvell.com
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/broadcom/bnx2.*
@ -3072,9 +3072,9 @@ S: Supported
F: drivers/scsi/bnx2i/
BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
M: Ariel Elior <ariel.elior@cavium.com>
M: Sudarsana Kalluru <sudarsana.kalluru@cavium.com>
M: everest-linux-l2@cavium.com
M: Ariel Elior <aelior@marvell.com>
M: Sudarsana Kalluru <skalluru@marvell.com>
M: GR-everest-linux-l2@marvell.com
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/broadcom/bnx2x/
@ -3249,9 +3249,9 @@ S: Supported
F: drivers/scsi/bfa/
BROCADE BNA 10 GIGABIT ETHERNET DRIVER
M: Rasesh Mody <rasesh.mody@cavium.com>
M: Sudarsana Kalluru <sudarsana.kalluru@cavium.com>
M: Dept-GELinuxNICDev@cavium.com
M: Rasesh Mody <rmody@marvell.com>
M: Sudarsana Kalluru <skalluru@marvell.com>
M: GR-Linux-NIC-Dev@marvell.com
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/brocade/bna/
@ -3471,10 +3471,9 @@ F: drivers/i2c/busses/i2c-octeon*
F: drivers/i2c/busses/i2c-thunderx*
CAVIUM LIQUIDIO NETWORK DRIVER
M: Derek Chickles <derek.chickles@caviumnetworks.com>
M: Satanand Burla <satananda.burla@caviumnetworks.com>
M: Felix Manlunas <felix.manlunas@caviumnetworks.com>
M: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
M: Derek Chickles <dchickles@marvell.com>
M: Satanand Burla <sburla@marvell.com>
M: Felix Manlunas <fmanlunas@marvell.com>
L: netdev@vger.kernel.org
W: http://www.cavium.com
S: Supported
@ -3979,6 +3978,7 @@ F: drivers/cpufreq/arm_big_little.c
CPU POWER MONITORING SUBSYSTEM
M: Thomas Renninger <trenn@suse.com>
M: Shuah Khan <shuah@kernel.org>
M: Shuah Khan <skhan@linuxfoundation.org>
L: linux-pm@vger.kernel.org
S: Maintained
F: tools/power/cpupower/
@ -4123,7 +4123,7 @@ S: Maintained
F: drivers/media/dvb-frontends/cxd2820r*
CXGB3 ETHERNET DRIVER (CXGB3)
M: Arjun Vynipadath <arjun@chelsio.com>
M: Vishal Kulkarni <vishal@chelsio.com>
L: netdev@vger.kernel.org
W: http://www.chelsio.com
S: Supported
@ -4152,7 +4152,7 @@ S: Supported
F: drivers/crypto/chelsio
CXGB4 ETHERNET DRIVER (CXGB4)
M: Arjun Vynipadath <arjun@chelsio.com>
M: Vishal Kulkarni <vishal@chelsio.com>
L: netdev@vger.kernel.org
W: http://www.chelsio.com
S: Supported
@ -6024,6 +6024,12 @@ L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: drivers/dma/fsldma.*
FREESCALE ENETC ETHERNET DRIVERS
M: Claudiu Manoil <claudiu.manoil@nxp.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/freescale/enetc/
FREESCALE eTSEC ETHERNET DRIVER (GIANFAR)
M: Claudiu Manoil <claudiu.manoil@nxp.com>
L: netdev@vger.kernel.org
@ -6088,6 +6094,7 @@ M: Yangbo Lu <yangbo.lu@nxp.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/ptp/ptp_qoriq.c
F: drivers/ptp/ptp_qoriq_debugfs.c
F: include/linux/fsl/ptp_qoriq.h
F: Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
@ -8259,6 +8266,7 @@ F: include/uapi/linux/sunrpc/
KERNEL SELFTEST FRAMEWORK
M: Shuah Khan <shuah@kernel.org>
M: Shuah Khan <skhan@linuxfoundation.org>
L: linux-kselftest@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
Q: https://patchwork.kernel.org/project/linux-kselftest/list/
@ -10690,9 +10698,9 @@ S: Maintained
F: drivers/net/netdevsim/*
NETXEN (1/10) GbE SUPPORT
M: Manish Chopra <manish.chopra@cavium.com>
M: Rahul Verma <rahul.verma@cavium.com>
M: Dept-GELinuxNICDev@cavium.com
M: Manish Chopra <manishc@marvell.com>
M: Rahul Verma <rahulv@marvell.com>
M: GR-Linux-NIC-Dev@marvell.com
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/qlogic/netxen/
@ -12476,8 +12484,8 @@ S: Supported
F: drivers/scsi/qedi/
QLOGIC QL4xxx ETHERNET DRIVER
M: Ariel Elior <Ariel.Elior@cavium.com>
M: everest-linux-l2@cavium.com
M: Ariel Elior <aelior@marvell.com>
M: GR-everest-linux-l2@marvell.com
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/qlogic/qed/
@ -12485,8 +12493,8 @@ F: include/linux/qed/
F: drivers/net/ethernet/qlogic/qede/
QLOGIC QL4xxx RDMA DRIVER
M: Michal Kalderon <Michal.Kalderon@cavium.com>
M: Ariel Elior <Ariel.Elior@cavium.com>
M: Michal Kalderon <mkalderon@marvell.com>
M: Ariel Elior <aelior@marvell.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/qedr/
@ -12506,7 +12514,7 @@ F: Documentation/scsi/LICENSE.qla2xxx
F: drivers/scsi/qla2xxx/
QLOGIC QLA3XXX NETWORK DRIVER
M: Dept-GELinuxNICDev@cavium.com
M: GR-Linux-NIC-Dev@marvell.com
L: netdev@vger.kernel.org
S: Supported
F: Documentation/networking/device_drivers/qlogic/LICENSE.qla3xxx
@ -12520,16 +12528,16 @@ F: Documentation/scsi/LICENSE.qla4xxx
F: drivers/scsi/qla4xxx/
QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
M: Shahed Shaikh <Shahed.Shaikh@cavium.com>
M: Manish Chopra <manish.chopra@cavium.com>
M: Dept-GELinuxNICDev@cavium.com
M: Shahed Shaikh <shshaikh@marvell.com>
M: Manish Chopra <manishc@marvell.com>
M: GR-Linux-NIC-Dev@marvell.com
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/qlogic/qlcnic/
QLOGIC QLGE 10Gb ETHERNET DRIVER
M: Manish Chopra <manish.chopra@cavium.com>
M: Dept-GELinuxNICDev@cavium.com
M: Manish Chopra <manishc@marvell.com>
M: GR-Linux-NIC-Dev@marvell.com
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/qlogic/qlge/
@ -12609,6 +12617,14 @@ L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/qualcomm/emac/
QUALCOMM ETHQOS ETHERNET DRIVER
M: Vinod Koul <vkoul@kernel.org>
M: Niklas Cassel <niklas.cassel@linaro.org>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
F: Documentation/devicetree/bindings/net/qcom,dwmac.txt
QUALCOMM GENERIC INTERFACE I2C DRIVER
M: Alok Chauhan <alokc@codeaurora.org>
M: Karthikeyan Ramasubramanian <kramasub@codeaurora.org>
@ -15843,6 +15859,7 @@ F: drivers/usb/common/usb-otg-fsm.c
USB OVER IP DRIVER
M: Valentina Manea <valentina.manea.m@gmail.com>
M: Shuah Khan <shuah@kernel.org>
M: Shuah Khan <skhan@linuxfoundation.org>
L: linux-usb@vger.kernel.org
S: Maintained
F: Documentation/usb/usbip_protocol.txt
@ -16672,6 +16689,24 @@ T: git git://linuxtv.org/media_tree.git
S: Maintained
F: drivers/media/tuners/tuner-xc2028.*
XDP (eXpress Data Path)
M: Alexei Starovoitov <ast@kernel.org>
M: Daniel Borkmann <daniel@iogearbox.net>
M: David S. Miller <davem@davemloft.net>
M: Jakub Kicinski <jakub.kicinski@netronome.com>
M: Jesper Dangaard Brouer <hawk@kernel.org>
M: John Fastabend <john.fastabend@gmail.com>
L: netdev@vger.kernel.org
L: xdp-newbies@vger.kernel.org
S: Supported
F: net/core/xdp.c
F: include/net/xdp.h
F: kernel/bpf/devmap.c
F: kernel/bpf/cpumap.c
F: include/trace/events/xdp.h
K: xdp
N: xdp
XDP SOCKETS (AF_XDP)
M: Björn Töpel <bjorn.topel@intel.com>
M: Magnus Karlsson <magnus.karlsson@intel.com>

View File

@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 0
SUBLEVEL = 0
EXTRAVERSION = -rc2
EXTRAVERSION = -rc4
NAME = Shy Crocodile
# *DOCUMENTATION*
@ -955,6 +955,7 @@ ifdef CONFIG_STACK_VALIDATION
endif
endif
PHONY += prepare0
ifeq ($(KBUILD_EXTMOD),)
core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
@ -1061,8 +1062,7 @@ scripts: scripts_basic scripts_dtc
# archprepare is used in arch Makefiles and when processed asm symlink,
# version.h and scripts_basic is processed / created.
# Listed in dependency order
PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3
PHONY += prepare archprepare prepare1 prepare2 prepare3
# prepare3 is used to check if we are building in a separate output directory,
# and if so do:
@ -1360,11 +1360,11 @@ mrproper: rm-dirs := $(wildcard $(MRPROPER_DIRS))
mrproper: rm-files := $(wildcard $(MRPROPER_FILES))
mrproper-dirs := $(addprefix _mrproper_,scripts)
PHONY += $(mrproper-dirs) mrproper archmrproper
PHONY += $(mrproper-dirs) mrproper
$(mrproper-dirs):
$(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@)
mrproper: clean archmrproper $(mrproper-dirs)
mrproper: clean $(mrproper-dirs)
$(call cmd,rmdirs)
$(call cmd,rmfiles)

View File

@ -3,23 +3,19 @@ generic-y += bugs.h
generic-y += compat.h
generic-y += device.h
generic-y += div64.h
generic-y += dma-mapping.h
generic-y += emergency-restart.h
generic-y += extable.h
generic-y += fb.h
generic-y += ftrace.h
generic-y += hardirq.h
generic-y += hw_irq.h
generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kmap_types.h
generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += msi.h
generic-y += parport.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += topology.h

View File

@ -216,6 +216,14 @@ struct bcr_fp_arcv2 {
#endif
};
struct bcr_actionpoint {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:21, min:1, num:2, ver:8;
#else
unsigned int ver:8, num:2, min:1, pad:21;
#endif
};
#include <soc/arc/timers.h>
struct bcr_bpu_arcompact {
@ -283,7 +291,7 @@ struct cpuinfo_arc_cache {
};
struct cpuinfo_arc_bpu {
unsigned int ver, full, num_cache, num_pred;
unsigned int ver, full, num_cache, num_pred, ret_stk;
};
struct cpuinfo_arc_ccm {
@ -302,7 +310,7 @@ struct cpuinfo_arc {
struct {
unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
debug:1, ap:1, smart:1, rtt:1, pad3:4,
ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1,
timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
} extn;
struct bcr_mpy extn_mpy;

View File

@ -340,7 +340,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
/*
* __ffs: Similar to ffs, but zero based (0-31)
*/
static inline __attribute__ ((const)) int __ffs(unsigned long word)
static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
{
if (!word)
return word;
@ -400,9 +400,9 @@ static inline __attribute__ ((const)) int ffs(unsigned long x)
/*
* __ffs: Similar to ffs, but zero based (0-31)
*/
static inline __attribute__ ((const)) int __ffs(unsigned long x)
static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
{
int n;
unsigned long n;
asm volatile(
" ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */

View File

@ -103,7 +103,8 @@ static const char * const arc_pmu_ev_hw_map[] = {
/* counts condition */
[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */
/* All jump instructions that are taken */
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
[PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
#ifdef CONFIG_ISA_ARCV2
[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",

View File

@ -1,15 +1,10 @@
/*
* Linux performance counter support for ARC700 series
*
* Copyright (C) 2013-2015 Synopsys, Inc. (www.synopsys.com)
*
* This code is inspired by the perf support of various other architectures.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
// SPDX-License-Identifier: GPL-2.0+
//
// Linux performance counter support for ARC CPUs.
// This code is inspired by the perf support of various other architectures.
//
// Copyright (C) 2013-2018 Synopsys, Inc. (www.synopsys.com)
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/module.h>
@ -19,12 +14,31 @@
#include <asm/arcregs.h>
#include <asm/stacktrace.h>
/* HW holds 8 symbols + one for null terminator */
#define ARCPMU_EVENT_NAME_LEN 9
enum arc_pmu_attr_groups {
ARCPMU_ATTR_GR_EVENTS,
ARCPMU_ATTR_GR_FORMATS,
ARCPMU_NR_ATTR_GR
};
struct arc_pmu_raw_event_entry {
char name[ARCPMU_EVENT_NAME_LEN];
};
struct arc_pmu {
struct pmu pmu;
unsigned int irq;
int n_counters;
int n_events;
u64 max_period;
int ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
struct arc_pmu_raw_event_entry *raw_entry;
struct attribute **attrs;
struct perf_pmu_events_attr *attr;
const struct attribute_group *attr_groups[ARCPMU_NR_ATTR_GR + 1];
};
struct arc_pmu_cpu {
@ -49,6 +63,7 @@ static int callchain_trace(unsigned int addr, void *data)
{
struct arc_callchain_trace *ctrl = data;
struct perf_callchain_entry_ctx *entry = ctrl->perf_stuff;
perf_callchain_store(entry, addr);
if (ctrl->depth++ < 3)
@ -57,8 +72,8 @@ static int callchain_trace(unsigned int addr, void *data)
return -1;
}
void
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
struct arc_callchain_trace ctrl = {
.depth = 0,
@ -68,8 +83,8 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
arc_unwind_core(NULL, regs, callchain_trace, &ctrl);
}
void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
/*
* User stack can't be unwound trivially with kernel dwarf unwinder
@ -82,10 +97,10 @@ static struct arc_pmu *arc_pmu;
static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu);
/* read counter #idx; note that counter# != event# on ARC! */
static uint64_t arc_pmu_read_counter(int idx)
static u64 arc_pmu_read_counter(int idx)
{
uint32_t tmp;
uint64_t result;
u32 tmp;
u64 result;
/*
* ARC supports making 'snapshots' of the counters, so we don't
@ -94,7 +109,7 @@ static uint64_t arc_pmu_read_counter(int idx)
write_aux_reg(ARC_REG_PCT_INDEX, idx);
tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
write_aux_reg(ARC_REG_PCT_CONTROL, tmp | ARC_REG_PCT_CONTROL_SN);
result = (uint64_t) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32;
result = (u64) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32;
result |= read_aux_reg(ARC_REG_PCT_SNAPL);
return result;
@ -103,9 +118,9 @@ static uint64_t arc_pmu_read_counter(int idx)
static void arc_perf_event_update(struct perf_event *event,
struct hw_perf_event *hwc, int idx)
{
uint64_t prev_raw_count = local64_read(&hwc->prev_count);
uint64_t new_raw_count = arc_pmu_read_counter(idx);
int64_t delta = new_raw_count - prev_raw_count;
u64 prev_raw_count = local64_read(&hwc->prev_count);
u64 new_raw_count = arc_pmu_read_counter(idx);
s64 delta = new_raw_count - prev_raw_count;
/*
* We aren't afraid of hwc->prev_count changing beneath our feet
@ -155,7 +170,7 @@ static int arc_pmu_event_init(struct perf_event *event)
int ret;
if (!is_sampling_event(event)) {
hwc->sample_period = arc_pmu->max_period;
hwc->sample_period = arc_pmu->max_period;
hwc->last_period = hwc->sample_period;
local64_set(&hwc->period_left, hwc->sample_period);
}
@ -192,6 +207,18 @@ static int arc_pmu_event_init(struct perf_event *event)
pr_debug("init cache event with h/w %08x \'%s\'\n",
(int)hwc->config, arc_pmu_ev_hw_map[ret]);
return 0;
case PERF_TYPE_RAW:
if (event->attr.config >= arc_pmu->n_events)
return -ENOENT;
hwc->config |= event->attr.config;
pr_debug("init raw event with idx %lld \'%s\'\n",
event->attr.config,
arc_pmu->raw_entry[event->attr.config].name);
return 0;
default:
return -ENOENT;
}
@ -200,7 +227,7 @@ static int arc_pmu_event_init(struct perf_event *event)
/* starts all counters */
static void arc_pmu_enable(struct pmu *pmu)
{
uint32_t tmp;
u32 tmp;
tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x1);
}
@ -208,7 +235,7 @@ static void arc_pmu_enable(struct pmu *pmu)
/* stops all counters */
static void arc_pmu_disable(struct pmu *pmu)
{
uint32_t tmp;
u32 tmp;
tmp = read_aux_reg(ARC_REG_PCT_CONTROL);
write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0);
}
@ -228,7 +255,7 @@ static int arc_pmu_event_set_period(struct perf_event *event)
local64_set(&hwc->period_left, left);
hwc->last_period = period;
overflow = 1;
} else if (unlikely(left <= 0)) {
} else if (unlikely(left <= 0)) {
/* left underflowed by less than period. */
left += period;
local64_set(&hwc->period_left, left);
@ -246,8 +273,8 @@ static int arc_pmu_event_set_period(struct perf_event *event)
write_aux_reg(ARC_REG_PCT_INDEX, idx);
/* Write value */
write_aux_reg(ARC_REG_PCT_COUNTL, (u32)value);
write_aux_reg(ARC_REG_PCT_COUNTH, (value >> 32));
write_aux_reg(ARC_REG_PCT_COUNTL, lower_32_bits(value));
write_aux_reg(ARC_REG_PCT_COUNTH, upper_32_bits(value));
perf_event_update_userpage(event);
@ -277,7 +304,7 @@ static void arc_pmu_start(struct perf_event *event, int flags)
/* Enable interrupt for this counter */
if (is_sampling_event(event))
write_aux_reg(ARC_REG_PCT_INT_CTRL,
read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx));
/* enable ARC pmu here */
write_aux_reg(ARC_REG_PCT_INDEX, idx); /* counter # */
@ -295,9 +322,9 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
* Reset interrupt flag by writing of 1. This is required
* to make sure pending interrupt was not left.
*/
write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx));
write_aux_reg(ARC_REG_PCT_INT_CTRL,
read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~(1 << idx));
read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~BIT(idx));
}
if (!(event->hw.state & PERF_HES_STOPPED)) {
@ -349,9 +376,10 @@ static int arc_pmu_add(struct perf_event *event, int flags)
if (is_sampling_event(event)) {
/* Mimic full counter overflow as other arches do */
write_aux_reg(ARC_REG_PCT_INT_CNTL, (u32)arc_pmu->max_period);
write_aux_reg(ARC_REG_PCT_INT_CNTL,
lower_32_bits(arc_pmu->max_period));
write_aux_reg(ARC_REG_PCT_INT_CNTH,
(arc_pmu->max_period >> 32));
upper_32_bits(arc_pmu->max_period));
}
write_aux_reg(ARC_REG_PCT_CONFIG, 0);
@ -392,7 +420,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
idx = __ffs(active_ints);
/* Reset interrupt flag by writing of 1 */
write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx));
/*
* On reset of "interrupt active" bit corresponding
@ -400,7 +428,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
* Now we need to re-enable interrupt for the counter.
*/
write_aux_reg(ARC_REG_PCT_INT_CTRL,
read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx));
event = pmu_cpu->act_counter[idx];
hwc = &event->hw;
@ -414,7 +442,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
arc_pmu_stop(event, 0);
}
active_ints &= ~(1U << idx);
active_ints &= ~BIT(idx);
} while (active_ints);
done:
@ -441,19 +469,108 @@ static void arc_cpu_pmu_irq_init(void *data)
write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
}
/* Event field occupies the bottom 15 bits of our config field */
PMU_FORMAT_ATTR(event, "config:0-14");
static struct attribute *arc_pmu_format_attrs[] = {
&format_attr_event.attr,
NULL,
};
static struct attribute_group arc_pmu_format_attr_gr = {
.name = "format",
.attrs = arc_pmu_format_attrs,
};
static ssize_t arc_pmu_events_sysfs_show(struct device *dev,
struct device_attribute *attr,
char *page)
{
struct perf_pmu_events_attr *pmu_attr;
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
}
/*
* We don't add attrs here as we don't have pre-defined list of perf events.
* We will generate and add attrs dynamically in probe() after we read HW
* configuration.
*/
static struct attribute_group arc_pmu_events_attr_gr = {
.name = "events",
};
static void arc_pmu_add_raw_event_attr(int j, char *str)
{
memmove(arc_pmu->raw_entry[j].name, str, ARCPMU_EVENT_NAME_LEN - 1);
arc_pmu->attr[j].attr.attr.name = arc_pmu->raw_entry[j].name;
arc_pmu->attr[j].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(0444);
arc_pmu->attr[j].attr.show = arc_pmu_events_sysfs_show;
arc_pmu->attr[j].id = j;
arc_pmu->attrs[j] = &(arc_pmu->attr[j].attr.attr);
}
static int arc_pmu_raw_alloc(struct device *dev)
{
arc_pmu->attr = devm_kmalloc_array(dev, arc_pmu->n_events + 1,
sizeof(*arc_pmu->attr), GFP_KERNEL | __GFP_ZERO);
if (!arc_pmu->attr)
return -ENOMEM;
arc_pmu->attrs = devm_kmalloc_array(dev, arc_pmu->n_events + 1,
sizeof(*arc_pmu->attrs), GFP_KERNEL | __GFP_ZERO);
if (!arc_pmu->attrs)
return -ENOMEM;
arc_pmu->raw_entry = devm_kmalloc_array(dev, arc_pmu->n_events,
sizeof(*arc_pmu->raw_entry), GFP_KERNEL | __GFP_ZERO);
if (!arc_pmu->raw_entry)
return -ENOMEM;
return 0;
}
static inline bool event_in_hw_event_map(int i, char *name)
{
if (!arc_pmu_ev_hw_map[i])
return false;
if (!strlen(arc_pmu_ev_hw_map[i]))
return false;
if (strcmp(arc_pmu_ev_hw_map[i], name))
return false;
return true;
}
static void arc_pmu_map_hw_event(int j, char *str)
{
int i;
/* See if HW condition has been mapped to a perf event_id */
for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
if (event_in_hw_event_map(i, str)) {
pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
i, str, j);
arc_pmu->ev_hw_idx[i] = j;
}
}
}
static int arc_pmu_device_probe(struct platform_device *pdev)
{
struct arc_reg_pct_build pct_bcr;
struct arc_reg_cc_build cc_bcr;
int i, j, has_interrupts;
int i, has_interrupts;
int counter_size; /* in bits */
union cc_name {
struct {
uint32_t word0, word1;
u32 word0, word1;
char sentinel;
} indiv;
char str[9];
char str[ARCPMU_EVENT_NAME_LEN];
} cc_name;
@ -463,15 +580,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
return -ENODEV;
}
BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32);
BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS);
if (WARN_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS))
return -EINVAL;
READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */
if (WARN(!cc_bcr.v, "Counters exist but No countable conditions?"))
return -EINVAL;
arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
if (!arc_pmu)
return -ENOMEM;
arc_pmu->n_events = cc_bcr.c;
if (arc_pmu_raw_alloc(&pdev->dev))
return -ENOMEM;
has_interrupts = is_isa_arcv2() ? pct_bcr.i : 0;
arc_pmu->n_counters = pct_bcr.c;
@ -481,30 +605,26 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
pr_info("ARC perf\t: %d counters (%d bits), %d conditions%s\n",
arc_pmu->n_counters, counter_size, cc_bcr.c,
has_interrupts ? ", [overflow IRQ support]":"");
has_interrupts ? ", [overflow IRQ support]" : "");
cc_name.str[8] = 0;
cc_name.str[ARCPMU_EVENT_NAME_LEN - 1] = 0;
for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
arc_pmu->ev_hw_idx[i] = -1;
/* loop thru all available h/w condition indexes */
for (j = 0; j < cc_bcr.c; j++) {
write_aux_reg(ARC_REG_CC_INDEX, j);
for (i = 0; i < cc_bcr.c; i++) {
write_aux_reg(ARC_REG_CC_INDEX, i);
cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);
/* See if it has been mapped to a perf event_id */
for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
if (arc_pmu_ev_hw_map[i] &&
!strcmp(arc_pmu_ev_hw_map[i], cc_name.str) &&
strlen(arc_pmu_ev_hw_map[i])) {
pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
i, cc_name.str, j);
arc_pmu->ev_hw_idx[i] = j;
}
}
arc_pmu_map_hw_event(i, cc_name.str);
arc_pmu_add_raw_event_attr(i, cc_name.str);
}
arc_pmu_events_attr_gr.attrs = arc_pmu->attrs;
arc_pmu->attr_groups[ARCPMU_ATTR_GR_EVENTS] = &arc_pmu_events_attr_gr;
arc_pmu->attr_groups[ARCPMU_ATTR_GR_FORMATS] = &arc_pmu_format_attr_gr;
arc_pmu->pmu = (struct pmu) {
.pmu_enable = arc_pmu_enable,
.pmu_disable = arc_pmu_disable,
@ -514,6 +634,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
.start = arc_pmu_start,
.stop = arc_pmu_stop,
.read = arc_pmu_read,
.attr_groups = arc_pmu->attr_groups,
};
if (has_interrupts) {
@ -535,17 +656,19 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
} else
arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
/*
* perf parser doesn't really like '-' symbol in events name, so let's
* use '_' in arc pct name as it goes to kernel PMU event prefix.
*/
return perf_pmu_register(&arc_pmu->pmu, "arc_pct", PERF_TYPE_RAW);
}
#ifdef CONFIG_OF
static const struct of_device_id arc_pmu_match[] = {
{ .compatible = "snps,arc700-pct" },
{ .compatible = "snps,archs-pct" },
{},
};
MODULE_DEVICE_TABLE(of, arc_pmu_match);
#endif
static struct platform_driver arc_pmu_driver = {
.driver = {

View File

@ -123,6 +123,7 @@ static void read_arc_build_cfg_regs(void)
struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
const struct id_to_str *tbl;
struct bcr_isa_arcv2 isa;
struct bcr_actionpoint ap;
FIX_PTR(cpu);
@ -195,6 +196,7 @@ static void read_arc_build_cfg_regs(void)
cpu->bpu.full = bpu.ft;
cpu->bpu.num_cache = 256 << bpu.bce;
cpu->bpu.num_pred = 2048 << bpu.pte;
cpu->bpu.ret_stk = 4 << bpu.rse;
if (cpu->core.family >= 0x54) {
unsigned int exec_ctrl;
@ -207,8 +209,11 @@ static void read_arc_build_cfg_regs(void)
}
}
READ_BCR(ARC_REG_AP_BCR, bcr);
cpu->extn.ap = bcr.ver ? 1 : 0;
READ_BCR(ARC_REG_AP_BCR, ap);
if (ap.ver) {
cpu->extn.ap_num = 2 << ap.num;
cpu->extn.ap_full = !!ap.min;
}
READ_BCR(ARC_REG_SMART_BCR, bcr);
cpu->extn.smart = bcr.ver ? 1 : 0;
@ -216,8 +221,6 @@ static void read_arc_build_cfg_regs(void)
READ_BCR(ARC_REG_RTT_BCR, bcr);
cpu->extn.rtt = bcr.ver ? 1 : 0;
cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt;
READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
/* some hacks for lack of feature BCR info in old ARC700 cores */
@ -299,10 +302,10 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
if (cpu->bpu.ver)
n += scnprintf(buf + n, len - n,
"BPU\t\t: %s%s match, cache:%d, Predict Table:%d",
"BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d",
IS_AVAIL1(cpu->bpu.full, "full"),
IS_AVAIL1(!cpu->bpu.full, "partial"),
cpu->bpu.num_cache, cpu->bpu.num_pred);
cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk);
if (is_isa_arcv2()) {
struct bcr_lpb lpb;
@ -336,11 +339,17 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
if (cpu->extn.debug)
n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s%s\n",
IS_AVAIL1(cpu->extn.ap, "ActionPoint "),
if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) {
n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s",
IS_AVAIL1(cpu->extn.smart, "smaRT "),
IS_AVAIL1(cpu->extn.rtt, "RTT "));
if (cpu->extn.ap_num) {
n += scnprintf(buf + n, len - n, "ActionPoint %d/%s",
cpu->extn.ap_num,
cpu->extn.ap_full ? "full":"min");
}
n += scnprintf(buf + n, len - n, "\n");
}
if (cpu->dccm.sz || cpu->iccm.sz)
n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",

View File

@ -18,6 +18,8 @@
#include <asm/arcregs.h>
#include <asm/irqflags.h>
#define ARC_PATH_MAX 256
/*
* Common routine to print scratch regs (r0-r12) or callee regs (r13-r25)
* -Prints 3 regs per line and a CR.
@ -58,11 +60,12 @@ static void show_callee_regs(struct callee_regs *cregs)
print_reg_file(&(cregs->r13), 13);
}
static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
static void print_task_path_n_nm(struct task_struct *tsk)
{
char *path_nm = NULL;
struct mm_struct *mm;
struct file *exe_file;
char buf[ARC_PATH_MAX];
mm = get_task_mm(tsk);
if (!mm)
@ -72,7 +75,7 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
mmput(mm);
if (exe_file) {
path_nm = file_path(exe_file, buf, 255);
path_nm = file_path(exe_file, buf, ARC_PATH_MAX-1);
fput(exe_file);
}
@ -80,10 +83,9 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?");
}
static void show_faulting_vma(unsigned long address, char *buf)
static void show_faulting_vma(unsigned long address)
{
struct vm_area_struct *vma;
char *nm = buf;
struct mm_struct *active_mm = current->active_mm;
/* can't use print_vma_addr() yet as it doesn't check for
@ -96,8 +98,11 @@ static void show_faulting_vma(unsigned long address, char *buf)
* if the container VMA is not found
*/
if (vma && (vma->vm_start <= address)) {
char buf[ARC_PATH_MAX];
char *nm = "?";
if (vma->vm_file) {
nm = file_path(vma->vm_file, buf, PAGE_SIZE - 1);
nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1);
if (IS_ERR(nm))
nm = "?";
}
@ -173,13 +178,14 @@ void show_regs(struct pt_regs *regs)
{
struct task_struct *tsk = current;
struct callee_regs *cregs;
char *buf;
buf = (char *)__get_free_page(GFP_KERNEL);
if (!buf)
return;
/*
* generic code calls us with preemption disabled, but some calls
* here could sleep, so re-enable to avoid lockdep splat
*/
preempt_enable();
print_task_path_n_nm(tsk, buf);
print_task_path_n_nm(tsk);
show_regs_print_info(KERN_INFO);
show_ecr_verbose(regs);
@ -189,7 +195,7 @@ void show_regs(struct pt_regs *regs)
(void *)regs->blink, (void *)regs->ret);
if (user_mode(regs))
show_faulting_vma(regs->ret, buf); /* faulting code, not data */
show_faulting_vma(regs->ret); /* faulting code, not data */
pr_info("[STAT32]: 0x%08lx", regs->status32);
@ -222,7 +228,7 @@ void show_regs(struct pt_regs *regs)
if (cregs)
show_callee_regs(cregs);
free_page((unsigned long)buf);
preempt_disable();
}
void show_kernel_fault_diag(const char *str, struct pt_regs *regs,

View File

@ -7,11 +7,39 @@
*/
#include <linux/linkage.h>
#include <asm/cache.h>
#undef PREALLOC_NOT_AVAIL
/*
* The memset implementation below is optimized to use prefetchw and prealloc
* instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
* If you want to implement optimized memset for other possible L1 data cache
* line lengths (32B and 128B) you should rewrite code carefully checking
* we don't call any prefetchw/prealloc instruction for L1 cache lines which
* don't belongs to memset area.
*/
#if L1_CACHE_SHIFT == 6
.macro PREALLOC_INSTR reg, off
prealloc [\reg, \off]
.endm
.macro PREFETCHW_INSTR reg, off
prefetchw [\reg, \off]
.endm
#else
.macro PREALLOC_INSTR
.endm
.macro PREFETCHW_INSTR
.endm
#endif
ENTRY_CFI(memset)
prefetchw [r0] ; Prefetch the write location
PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
mov.f 0, r2
;;; if size is zero
jz.d [blink]
@ -48,11 +76,8 @@ ENTRY_CFI(memset)
lpnz @.Lset64bytes
;; LOOP START
#ifdef PREALLOC_NOT_AVAIL
prefetchw [r3, 64] ;Prefetch the next write location
#else
prealloc [r3, 64]
#endif
PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching
#ifdef CONFIG_ARC_HAS_LL64
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
@ -85,7 +110,6 @@ ENTRY_CFI(memset)
lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
lpnz .Lset32bytes
;; LOOP START
prefetchw [r3, 32] ;Prefetch the next write location
#ifdef CONFIG_ARC_HAS_LL64
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]

View File

@ -141,12 +141,17 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
*/
fault = handle_mm_fault(vma, address, flags);
/* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
if (fatal_signal_pending(current)) {
if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY))
up_read(&mm->mmap_sem);
if (user_mode(regs))
/*
* if fault retry, mmap_sem already relinquished by core mm
* so OK to return to user mode (with signal handled first)
*/
if (fault & VM_FAULT_RETRY) {
if (!user_mode(regs))
goto no_context;
return;
}
}
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);

View File

@ -119,7 +119,8 @@ void __init setup_arch_memory(void)
*/
memblock_add_node(low_mem_start, low_mem_sz, 0);
memblock_reserve(low_mem_start, __pa(_end) - low_mem_start);
memblock_reserve(CONFIG_LINUX_LINK_BASE,
__pa(_end) - CONFIG_LINUX_LINK_BASE);
#ifdef CONFIG_BLK_DEV_INITRD
if (phys_initrd_size) {

View File

@ -706,6 +706,7 @@ ptp_clock@2d10e00 {
fsl,tmr-fiper1 = <999999995>;
fsl,tmr-fiper2 = <99990>;
fsl,max-adj = <499999999>;
fsl,extts-fifo;
};
enet0: ethernet@2d10000 {

View File

@ -1 +1,95 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H
#define _ASM_ARM_XEN_PAGE_COHERENT_H
#include <linux/dma-mapping.h>
#include <asm/page.h>
#include <xen/arm/page-coherent.h>
static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev)
{
if (dev && dev->archdata.dev_dma_ops)
return dev->archdata.dev_dma_ops;
return get_arch_dma_ops(NULL);
}
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
{
return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
}
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
{
xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
unsigned long page_pfn = page_to_xen_pfn(page);
unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
unsigned long compound_pages =
(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
bool local = (page_pfn <= dev_pfn) &&
(dev_pfn - page_pfn < compound_pages);
/*
* Dom0 is mapped 1:1, while the Linux page can span across
* multiple Xen pages, it's not possible for it to contain a
* mix of local and foreign Xen pages. So if the first xen_pfn
* == mfn the page is local otherwise it's a foreign page
* grant-mapped in dom0. If the page is local we can safely
* call the native dma_ops function, otherwise we call the xen
* specific function.
*/
if (local)
xen_get_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
else
__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
}
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
unsigned long pfn = PFN_DOWN(handle);
/*
* Dom0 is mapped 1:1, while the Linux page can be spanned accross
* multiple Xen page, it's not possible to have a mix of local and
* foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
* foreign mfn will always return false. If the page is local we can
* safely call the native dma_ops function, otherwise we call the xen
* specific function.
*/
if (pfn_valid(pfn)) {
if (xen_get_dma_ops(hwdev)->unmap_page)
xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
} else
__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
}
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn)) {
if (xen_get_dma_ops(hwdev)->sync_single_for_cpu)
xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir);
} else
__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
}
static inline void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn)) {
if (xen_get_dma_ops(hwdev)->sync_single_for_device)
xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir);
} else
__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
}
#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */

View File

@ -1083,12 +1083,17 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
/* Arithmatic Operation */
static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
const u8 rn, struct jit_ctx *ctx, u8 op) {
const u8 rn, struct jit_ctx *ctx, u8 op,
bool is_jmp64) {
switch (op) {
case BPF_JSET:
emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
if (is_jmp64) {
emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
} else {
emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx);
}
break;
case BPF_JEQ:
case BPF_JNE:
@ -1096,18 +1101,25 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
case BPF_JGE:
case BPF_JLE:
case BPF_JLT:
emit(ARM_CMP_R(rd, rm), ctx);
_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
if (is_jmp64) {
emit(ARM_CMP_R(rd, rm), ctx);
/* Only compare low halve if high halve are equal. */
_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
} else {
emit(ARM_CMP_R(rt, rn), ctx);
}
break;
case BPF_JSLE:
case BPF_JSGT:
emit(ARM_CMP_R(rn, rt), ctx);
emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
if (is_jmp64)
emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
break;
case BPF_JSLT:
case BPF_JSGE:
emit(ARM_CMP_R(rt, rn), ctx);
emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
if (is_jmp64)
emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
break;
}
}
@ -1615,6 +1627,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_JMP | BPF_JLT | BPF_X:
case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP | BPF_JSLE | BPF_X:
case BPF_JMP32 | BPF_JEQ | BPF_X:
case BPF_JMP32 | BPF_JGT | BPF_X:
case BPF_JMP32 | BPF_JGE | BPF_X:
case BPF_JMP32 | BPF_JNE | BPF_X:
case BPF_JMP32 | BPF_JSGT | BPF_X:
case BPF_JMP32 | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JLE | BPF_X:
case BPF_JMP32 | BPF_JLT | BPF_X:
case BPF_JMP32 | BPF_JSLT | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_X:
/* Setup source registers */
rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx);
rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
@ -1641,6 +1664,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_JMP | BPF_JLE | BPF_K:
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP32 | BPF_JEQ | BPF_K:
case BPF_JMP32 | BPF_JGT | BPF_K:
case BPF_JMP32 | BPF_JGE | BPF_K:
case BPF_JMP32 | BPF_JNE | BPF_K:
case BPF_JMP32 | BPF_JSGT | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_K:
case BPF_JMP32 | BPF_JLT | BPF_K:
case BPF_JMP32 | BPF_JLE | BPF_K:
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
if (off == 0)
break;
rm = tmp2[0];
@ -1652,7 +1686,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
rd = arm_bpf_get_reg64(dst, tmp, ctx);
/* Check for the condition */
emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code));
emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code),
BPF_CLASS(code) == BPF_JMP);
/* Setup JUMP instruction */
jmp_offset = bpf2a32_offset(i+off, i, ctx);

View File

@ -62,6 +62,7 @@
#define ARM_INST_ADDS_I 0x02900000
#define ARM_INST_AND_R 0x00000000
#define ARM_INST_ANDS_R 0x00100000
#define ARM_INST_AND_I 0x02000000
#define ARM_INST_BIC_R 0x01c00000
@ -172,6 +173,7 @@
#define ARM_ADC_I(rd, rn, imm) _AL3_I(ARM_INST_ADC, rd, rn, imm)
#define ARM_AND_R(rd, rn, rm) _AL3_R(ARM_INST_AND, rd, rn, rm)
#define ARM_ANDS_R(rd, rn, rm) _AL3_R(ARM_INST_ANDS, rd, rn, rm)
#define ARM_AND_I(rd, rn, imm) _AL3_I(ARM_INST_AND, rd, rn, imm)
#define ARM_BIC_R(rd, rn, rm) _AL3_R(ARM_INST_BIC, rd, rn, rm)

View File

@ -60,8 +60,6 @@
#ifdef CONFIG_KASAN_SW_TAGS
#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
#else
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef __ASSEMBLY__

View File

@ -20,9 +20,6 @@ struct dev_archdata {
#ifdef CONFIG_IOMMU_API
void *iommu; /* private IOMMU data */
#endif
#ifdef CONFIG_XEN
const struct dma_map_ops *dev_dma_ops;
#endif
};
struct pdev_archdata {

View File

@ -60,8 +60,11 @@ static inline bool arm64_kernel_use_ng_mappings(void)
* later determine that kpti is required, then
* kpti_install_ng_mappings() will make them non-global.
*/
if (arm64_kernel_unmapped_at_el0())
return true;
if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
return arm64_kernel_unmapped_at_el0();
return false;
/*
* KASLR is enabled so we're going to be enabling kpti on non-broken

View File

@ -1 +1,77 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H
#define _ASM_ARM64_XEN_PAGE_COHERENT_H
#include <linux/dma-mapping.h>
#include <asm/page.h>
#include <xen/arm/page-coherent.h>
static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
{
return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
}
static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
{
dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
}
static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn))
dma_direct_sync_single_for_cpu(hwdev, handle, size, dir);
else
__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
}
static inline void xen_dma_sync_single_for_device(struct device *hwdev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(handle);
if (pfn_valid(pfn))
dma_direct_sync_single_for_device(hwdev, handle, size, dir);
else
__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
}
static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
unsigned long page_pfn = page_to_xen_pfn(page);
unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
unsigned long compound_pages =
(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
bool local = (page_pfn <= dev_pfn) &&
(dev_pfn - page_pfn < compound_pages);
if (local)
dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
else
__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
}
static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
unsigned long pfn = PFN_DOWN(handle);
/*
* Dom0 is mapped 1:1, while the Linux page can be spanned accross
* multiple Xen page, it's not possible to have a mix of local and
* foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
* foreign mfn will always return false. If the page is local we can
* safely call the native dma_ops function, otherwise we call the xen
* specific function.
*/
if (pfn_valid(pfn))
dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
else
__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
}
#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */

View File

@ -14,6 +14,7 @@
#include <linux/sched.h>
#include <linux/types.h>
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
@ -43,7 +44,7 @@ static __init u64 get_kaslr_seed(void *fdt)
return ret;
}
static __init const u8 *get_cmdline(void *fdt)
static __init const u8 *kaslr_get_cmdline(void *fdt)
{
static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
@ -109,7 +110,7 @@ u64 __init kaslr_early_init(u64 dt_phys)
* Check if 'nokaslr' appears on the command line, and
* return 0 if that is the case.
*/
cmdline = get_cmdline(fdt);
cmdline = kaslr_get_cmdline(fdt);
str = strstr(cmdline, "nokaslr");
if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
return 0;
@ -169,5 +170,8 @@ u64 __init kaslr_early_init(u64 dt_phys)
module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
module_alloc_base &= PAGE_MASK;
__flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
__flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed));
return offset;
}

View File

@ -466,9 +466,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
#ifdef CONFIG_XEN
if (xen_initial_domain()) {
dev->archdata.dev_dma_ops = dev->dma_ops;
if (xen_initial_domain())
dev->dma_ops = xen_dma_ops;
}
#endif
}

View File

@ -362,7 +362,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
const bool is64 = BPF_CLASS(code) == BPF_ALU64;
const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
BPF_CLASS(code) == BPF_JMP;
const bool isdw = BPF_SIZE(code) == BPF_DW;
u8 jmp_cond;
s32 jmp_offset;
@ -559,7 +560,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_X:
case BPF_JMP | BPF_JSLE | BPF_X:
emit(A64_CMP(1, dst, src), ctx);
case BPF_JMP32 | BPF_JEQ | BPF_X:
case BPF_JMP32 | BPF_JGT | BPF_X:
case BPF_JMP32 | BPF_JLT | BPF_X:
case BPF_JMP32 | BPF_JGE | BPF_X:
case BPF_JMP32 | BPF_JLE | BPF_X:
case BPF_JMP32 | BPF_JNE | BPF_X:
case BPF_JMP32 | BPF_JSGT | BPF_X:
case BPF_JMP32 | BPF_JSLT | BPF_X:
case BPF_JMP32 | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_X:
emit(A64_CMP(is64, dst, src), ctx);
emit_cond_jmp:
jmp_offset = bpf2a64_offset(i + off, i, ctx);
check_imm19(jmp_offset);
@ -601,7 +612,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
emit(A64_B_(jmp_cond, jmp_offset), ctx);
break;
case BPF_JMP | BPF_JSET | BPF_X:
emit(A64_TST(1, dst, src), ctx);
case BPF_JMP32 | BPF_JSET | BPF_X:
emit(A64_TST(is64, dst, src), ctx);
goto emit_cond_jmp;
/* IF (dst COND imm) JUMP off */
case BPF_JMP | BPF_JEQ | BPF_K:
@ -614,12 +626,23 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_K:
emit_a64_mov_i(1, tmp, imm, ctx);
emit(A64_CMP(1, dst, tmp), ctx);
case BPF_JMP32 | BPF_JEQ | BPF_K:
case BPF_JMP32 | BPF_JGT | BPF_K:
case BPF_JMP32 | BPF_JLT | BPF_K:
case BPF_JMP32 | BPF_JGE | BPF_K:
case BPF_JMP32 | BPF_JLE | BPF_K:
case BPF_JMP32 | BPF_JNE | BPF_K:
case BPF_JMP32 | BPF_JSGT | BPF_K:
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
emit_a64_mov_i(is64, tmp, imm, ctx);
emit(A64_CMP(is64, dst, tmp), ctx);
goto emit_cond_jmp;
case BPF_JMP | BPF_JSET | BPF_K:
emit_a64_mov_i(1, tmp, imm, ctx);
emit(A64_TST(1, dst, tmp), ctx);
case BPF_JMP32 | BPF_JSET | BPF_K:
emit_a64_mov_i(is64, tmp, imm, ctx);
emit(A64_TST(is64, dst, tmp), ctx);
goto emit_cond_jmp;
/* function call */
case BPF_JMP | BPF_CALL:

View File

@ -37,8 +37,6 @@ libs-y += arch/$(ARCH)/lib/
boot := arch/h8300/boot
archmrproper:
archclean:
$(Q)$(MAKE) $(clean)=$(boot)

View File

@ -16,8 +16,6 @@ KBUILD_DEFCONFIG := generic_defconfig
NM := $(CROSS_COMPILE)nm -B
READELF := $(CROSS_COMPILE)readelf
export AWK
CHECKFLAGS += -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
OBJCOPYFLAGS := --strip-all

View File

@ -3155,6 +3155,7 @@ config MIPS32_O32
config MIPS32_N32
bool "Kernel support for n32 binaries"
depends on 64BIT
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
select COMPAT
select MIPS32_COMPAT
select SYSVIPC_COMPAT if SYSVIPC

View File

@ -173,6 +173,31 @@ void __init plat_mem_setup(void)
pm_power_off = bcm47xx_machine_halt;
}
#ifdef CONFIG_BCM47XX_BCMA
static struct device * __init bcm47xx_setup_device(void)
{
struct device *dev;
int err;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return NULL;
err = dev_set_name(dev, "bcm47xx_soc");
if (err) {
pr_err("Failed to set SoC device name: %d\n", err);
kfree(dev);
return NULL;
}
err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
if (err)
pr_err("Failed to set SoC DMA mask: %d\n", err);
return dev;
}
#endif
/*
* This finishes bus initialization doing things that were not possible without
* kmalloc. Make sure to call it late enough (after mm_init).
@ -183,6 +208,10 @@ void __init bcm47xx_bus_setup(void)
if (bcm47xx_bus_type == BCM47XX_BUS_TYPE_BCMA) {
int err;
bcm47xx_bus.bcma.dev = bcm47xx_setup_device();
if (!bcm47xx_bus.bcma.dev)
panic("Failed to setup SoC device\n");
err = bcma_host_soc_init(&bcm47xx_bus.bcma);
if (err)
panic("Failed to initialize BCMA bus (err %d)", err);
@ -235,6 +264,8 @@ static int __init bcm47xx_register_bus_complete(void)
#endif
#ifdef CONFIG_BCM47XX_BCMA
case BCM47XX_BUS_TYPE_BCMA:
if (device_register(bcm47xx_bus.bcma.dev))
pr_err("Failed to register SoC device\n");
bcma_bus_register(&bcm47xx_bus.bcma.bus);
break;
#endif

View File

@ -98,7 +98,7 @@ static void octeon_kexec_smp_down(void *ignored)
" sync \n"
" synci ($0) \n");
relocated_kexec_smp_wait(NULL);
kexec_reboot();
}
#endif

View File

@ -66,6 +66,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_SERIAL_8250_PCI is not set
CONFIG_SERIAL_8250_NR_UARTS=1
CONFIG_SERIAL_8250_RUNTIME_UARTS=1
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_AR933X=y
CONFIG_SERIAL_AR933X_CONSOLE=y
# CONFIG_HW_RANDOM is not set

View File

@ -18,8 +18,6 @@
#define INT_NUM_EXTRA_START (INT_NUM_IM4_IRL0 + 32)
#define INT_NUM_IM_OFFSET (INT_NUM_IM1_IRL0 - INT_NUM_IM0_IRL0)
#define MIPS_CPU_TIMER_IRQ 7
#define MAX_IM 5
#endif /* _FALCON_IRQ__ */

View File

@ -19,8 +19,6 @@
#define LTQ_DMA_CH0_INT (INT_NUM_IM2_IRL0)
#define MIPS_CPU_TIMER_IRQ 7
#define MAX_IM 5
#endif

View File

@ -74,14 +74,15 @@ static int __init vdma_init(void)
get_order(VDMA_PGTBL_SIZE));
BUG_ON(!pgtbl);
dma_cache_wback_inv((unsigned long)pgtbl, VDMA_PGTBL_SIZE);
pgtbl = (VDMA_PGTBL_ENTRY *)KSEG1ADDR(pgtbl);
pgtbl = (VDMA_PGTBL_ENTRY *)CKSEG1ADDR((unsigned long)pgtbl);
/*
* Clear the R4030 translation table
*/
vdma_pgtbl_init();
r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE, CPHYSADDR(pgtbl));
r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE,
CPHYSADDR((unsigned long)pgtbl));
r4030_write_reg32(JAZZ_R4030_TRSTBL_LIM, VDMA_PGTBL_SIZE);
r4030_write_reg32(JAZZ_R4030_TRSTBL_INV, 0);

View File

@ -224,9 +224,11 @@ static struct irq_chip ltq_eiu_type = {
.irq_set_type = ltq_eiu_settype,
};
static void ltq_hw_irqdispatch(int module)
static void ltq_hw_irq_handler(struct irq_desc *desc)
{
int module = irq_desc_get_irq(desc) - 2;
u32 irq;
int hwirq;
irq = ltq_icu_r32(module, LTQ_ICU_IM0_IOSR);
if (irq == 0)
@ -237,7 +239,8 @@ static void ltq_hw_irqdispatch(int module)
* other bits might be bogus
*/
irq = __fls(irq);
do_IRQ((int)irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module));
hwirq = irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module);
generic_handle_irq(irq_linear_revmap(ltq_domain, hwirq));
/* if this is a EBU irq, we need to ack it or get a deadlock */
if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0) && LTQ_EBU_PCC_ISTAT)
@ -245,49 +248,6 @@ static void ltq_hw_irqdispatch(int module)
LTQ_EBU_PCC_ISTAT);
}
#define DEFINE_HWx_IRQDISPATCH(x) \
static void ltq_hw ## x ## _irqdispatch(void) \
{ \
ltq_hw_irqdispatch(x); \
}
DEFINE_HWx_IRQDISPATCH(0)
DEFINE_HWx_IRQDISPATCH(1)
DEFINE_HWx_IRQDISPATCH(2)
DEFINE_HWx_IRQDISPATCH(3)
DEFINE_HWx_IRQDISPATCH(4)
#if MIPS_CPU_TIMER_IRQ == 7
static void ltq_hw5_irqdispatch(void)
{
do_IRQ(MIPS_CPU_TIMER_IRQ);
}
#else
DEFINE_HWx_IRQDISPATCH(5)
#endif
static void ltq_hw_irq_handler(struct irq_desc *desc)
{
ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2);
}
asmlinkage void plat_irq_dispatch(void)
{
unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
int irq;
if (!pending) {
spurious_interrupt();
return;
}
pending >>= CAUSEB_IP;
while (pending) {
irq = fls(pending) - 1;
do_IRQ(MIPS_CPU_IRQ_BASE + irq);
pending &= ~BIT(irq);
}
}
static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
{
struct irq_chip *chip = &ltq_irq_type;
@ -343,38 +303,13 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
for (i = 0; i < MAX_IM; i++)
irq_set_chained_handler(i + 2, ltq_hw_irq_handler);
if (cpu_has_vint) {
pr_info("Setting up vectored interrupts\n");
set_vi_handler(2, ltq_hw0_irqdispatch);
set_vi_handler(3, ltq_hw1_irqdispatch);
set_vi_handler(4, ltq_hw2_irqdispatch);
set_vi_handler(5, ltq_hw3_irqdispatch);
set_vi_handler(6, ltq_hw4_irqdispatch);
set_vi_handler(7, ltq_hw5_irqdispatch);
}
ltq_domain = irq_domain_add_linear(node,
(MAX_IM * INT_NUM_IM_OFFSET) + MIPS_CPU_IRQ_CASCADE,
&irq_domain_ops, 0);
#ifndef CONFIG_MIPS_MT_SMP
set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 |
IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
#else
set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ0 | IE_IRQ1 |
IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
#endif
/* tell oprofile which irq to use */
ltq_perfcount_irq = irq_create_mapping(ltq_domain, LTQ_PERF_IRQ);
/*
* if the timer irq is not one of the mips irqs we need to
* create a mapping
*/
if (MIPS_CPU_TIMER_IRQ != 7)
irq_create_mapping(ltq_domain, MIPS_CPU_TIMER_IRQ);
/* the external interrupts are optional and xway only */
eiu_node = of_find_compatible_node(NULL, NULL, "lantiq,eiu-xway");
if (eiu_node && !of_address_to_resource(eiu_node, 0, &res)) {
@ -411,7 +346,7 @@ EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
unsigned int get_c0_compare_int(void)
{
return MIPS_CPU_TIMER_IRQ;
return CP0_LEGACY_COMPARE_IRQ;
}
static struct of_device_id __initdata of_irq_ids[] = {

View File

@ -369,7 +369,9 @@ int __init octeon_msi_initialize(void)
int irq;
struct irq_chip *msi;
if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) {
if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_INVALID) {
return 0;
} else if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) {
msi_rcv_reg[0] = CVMX_PEXP_NPEI_MSI_RCV0;
msi_rcv_reg[1] = CVMX_PEXP_NPEI_MSI_RCV1;
msi_rcv_reg[2] = CVMX_PEXP_NPEI_MSI_RCV2;

View File

@ -3,9 +3,6 @@ OBJCOPYFLAGS := -O binary -R .note -R .note.gnu.build-id -R .comment -S
KBUILD_DEFCONFIG := defconfig
comma = ,
ifdef CONFIG_FUNCTION_TRACER
arch-y += -malways-save-lp -mno-relax
endif
@ -54,8 +51,6 @@ endif
boot := arch/nds32/boot
core-y += $(boot)/dts/
.PHONY: FORCE
Image: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
@ -68,9 +63,6 @@ prepare: vdso_prepare
vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/nds32/kernel/vdso include/generated/vdso-offsets.h
CLEAN_FILES += include/asm-nds32/constants.h*
# We use MRPROPER_FILES and CLEAN_FILES now
archclean:
$(Q)$(MAKE) $(clean)=$(boot)

View File

@ -20,7 +20,6 @@
KBUILD_DEFCONFIG := or1ksim_defconfig
OBJCOPYFLAGS := -O binary -R .note -R .comment -S
LDFLAGS_vmlinux :=
LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
KBUILD_CFLAGS += -pipe -ffixed-r10 -D__linux__
@ -50,5 +49,3 @@ else
BUILTIN_DTB := n
endif
core-$(BUILTIN_DTB) += arch/openrisc/boot/dts/
all: vmlinux

View File

@ -337,6 +337,7 @@
#define PPC_INST_DIVWU 0x7c000396
#define PPC_INST_DIVD 0x7c0003d2
#define PPC_INST_RLWINM 0x54000000
#define PPC_INST_RLWINM_DOT 0x54000001
#define PPC_INST_RLWIMI 0x50000000
#define PPC_INST_RLDICL 0x78000000
#define PPC_INST_RLDICR 0x78000004

View File

@ -47,6 +47,7 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DAR,
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MMCRA,
PERF_REG_POWERPC_MAX,
};
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */

View File

@ -852,11 +852,12 @@ start_here:
/* set up the PTE pointers for the Abatron bdiGDB.
*/
tovirt(r6,r6)
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
stw r5, 0xf0(0) /* Must match your Abatron config file */
tophys(r5,r5)
lis r6, swapper_pg_dir@h
ori r6, r6, swapper_pg_dir@l
stw r6, 0(r5)
/* Now turn on the MMU for real! */

View File

@ -755,11 +755,12 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
&uc_transact->uc_mcontext))
goto badframe;
}
} else
#endif
/* Fall through, for non-TM restore */
if (!MSR_TM_ACTIVE(msr)) {
{
/*
* Fall through, for non-TM restore
*
* Unset MSR[TS] on the thread regs since MSR from user
* context does not have MSR active, and recheckpoint was
* not called since restore_tm_sigcontexts() was not called

View File

@ -967,13 +967,6 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64)
unsigned long __init arch_syscall_addr(int nr)
{
return sys_call_table[nr*2];
}
#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */
#ifdef PPC64_ELF_ABI_v1
char *arch_ftrace_match_adjust(char *str, const char *search)
{

View File

@ -165,6 +165,10 @@
#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) | \
__PPC_MB(mb) | __PPC_ME(me))
#define PPC_RLWINM_DOT(d, a, i, mb, me) EMIT(PPC_INST_RLWINM_DOT | \
___PPC_RA(d) | ___PPC_RS(a) | \
__PPC_SH(i) | __PPC_MB(mb) | \
__PPC_ME(me))
#define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) | \
__PPC_MB(mb) | __PPC_ME(me))

View File

@ -768,36 +768,58 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_JMP | BPF_JGT | BPF_X:
case BPF_JMP | BPF_JSGT | BPF_K:
case BPF_JMP | BPF_JSGT | BPF_X:
case BPF_JMP32 | BPF_JGT | BPF_K:
case BPF_JMP32 | BPF_JGT | BPF_X:
case BPF_JMP32 | BPF_JSGT | BPF_K:
case BPF_JMP32 | BPF_JSGT | BPF_X:
true_cond = COND_GT;
goto cond_branch;
case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP | BPF_JLT | BPF_X:
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP32 | BPF_JLT | BPF_K:
case BPF_JMP32 | BPF_JLT | BPF_X:
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSLT | BPF_X:
true_cond = COND_LT;
goto cond_branch;
case BPF_JMP | BPF_JGE | BPF_K:
case BPF_JMP | BPF_JGE | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JGE | BPF_K:
case BPF_JMP32 | BPF_JGE | BPF_X:
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_X:
true_cond = COND_GE;
goto cond_branch;
case BPF_JMP | BPF_JLE | BPF_K:
case BPF_JMP | BPF_JLE | BPF_X:
case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_X:
case BPF_JMP32 | BPF_JLE | BPF_K:
case BPF_JMP32 | BPF_JLE | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_X:
true_cond = COND_LE;
goto cond_branch;
case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP32 | BPF_JEQ | BPF_K:
case BPF_JMP32 | BPF_JEQ | BPF_X:
true_cond = COND_EQ;
goto cond_branch;
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JNE | BPF_X:
case BPF_JMP32 | BPF_JNE | BPF_K:
case BPF_JMP32 | BPF_JNE | BPF_X:
true_cond = COND_NE;
goto cond_branch;
case BPF_JMP | BPF_JSET | BPF_K:
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_X:
true_cond = COND_NE;
/* Fall through */
@ -809,18 +831,44 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_JMP | BPF_JLE | BPF_X:
case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP | BPF_JNE | BPF_X:
case BPF_JMP32 | BPF_JGT | BPF_X:
case BPF_JMP32 | BPF_JLT | BPF_X:
case BPF_JMP32 | BPF_JGE | BPF_X:
case BPF_JMP32 | BPF_JLE | BPF_X:
case BPF_JMP32 | BPF_JEQ | BPF_X:
case BPF_JMP32 | BPF_JNE | BPF_X:
/* unsigned comparison */
PPC_CMPLD(dst_reg, src_reg);
if (BPF_CLASS(code) == BPF_JMP32)
PPC_CMPLW(dst_reg, src_reg);
else
PPC_CMPLD(dst_reg, src_reg);
break;
case BPF_JMP | BPF_JSGT | BPF_X:
case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP | BPF_JSGE | BPF_X:
case BPF_JMP | BPF_JSLE | BPF_X:
case BPF_JMP32 | BPF_JSGT | BPF_X:
case BPF_JMP32 | BPF_JSLT | BPF_X:
case BPF_JMP32 | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_X:
/* signed comparison */
PPC_CMPD(dst_reg, src_reg);
if (BPF_CLASS(code) == BPF_JMP32)
PPC_CMPW(dst_reg, src_reg);
else
PPC_CMPD(dst_reg, src_reg);
break;
case BPF_JMP | BPF_JSET | BPF_X:
PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg);
case BPF_JMP32 | BPF_JSET | BPF_X:
if (BPF_CLASS(code) == BPF_JMP) {
PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
src_reg);
} else {
int tmp_reg = b2p[TMP_REG_1];
PPC_AND(tmp_reg, dst_reg, src_reg);
PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
31);
}
break;
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP | BPF_JEQ | BPF_K:
@ -828,43 +876,87 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP | BPF_JGE | BPF_K:
case BPF_JMP | BPF_JLE | BPF_K:
case BPF_JMP32 | BPF_JNE | BPF_K:
case BPF_JMP32 | BPF_JEQ | BPF_K:
case BPF_JMP32 | BPF_JGT | BPF_K:
case BPF_JMP32 | BPF_JLT | BPF_K:
case BPF_JMP32 | BPF_JGE | BPF_K:
case BPF_JMP32 | BPF_JLE | BPF_K:
{
bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
/*
* Need sign-extended load, so only positive
* values can be used as imm in cmpldi
*/
if (imm >= 0 && imm < 32768)
PPC_CMPLDI(dst_reg, imm);
else {
if (imm >= 0 && imm < 32768) {
if (is_jmp32)
PPC_CMPLWI(dst_reg, imm);
else
PPC_CMPLDI(dst_reg, imm);
} else {
/* sign-extending load */
PPC_LI32(b2p[TMP_REG_1], imm);
/* ... but unsigned comparison */
PPC_CMPLD(dst_reg, b2p[TMP_REG_1]);
if (is_jmp32)
PPC_CMPLW(dst_reg,
b2p[TMP_REG_1]);
else
PPC_CMPLD(dst_reg,
b2p[TMP_REG_1]);
}
break;
}
case BPF_JMP | BPF_JSGT | BPF_K:
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP32 | BPF_JSGT | BPF_K:
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
{
bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
/*
* signed comparison, so any 16-bit value
* can be used in cmpdi
*/
if (imm >= -32768 && imm < 32768)
PPC_CMPDI(dst_reg, imm);
else {
if (imm >= -32768 && imm < 32768) {
if (is_jmp32)
PPC_CMPWI(dst_reg, imm);
else
PPC_CMPDI(dst_reg, imm);
} else {
PPC_LI32(b2p[TMP_REG_1], imm);
PPC_CMPD(dst_reg, b2p[TMP_REG_1]);
if (is_jmp32)
PPC_CMPW(dst_reg,
b2p[TMP_REG_1]);
else
PPC_CMPD(dst_reg,
b2p[TMP_REG_1]);
}
break;
}
case BPF_JMP | BPF_JSET | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_K:
/* andi does not sign-extend the immediate */
if (imm >= 0 && imm < 32768)
/* PPC_ANDI is _only/always_ dot-form */
PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
else {
PPC_LI32(b2p[TMP_REG_1], imm);
PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
b2p[TMP_REG_1]);
int tmp_reg = b2p[TMP_REG_1];
PPC_LI32(tmp_reg, imm);
if (BPF_CLASS(code) == BPF_JMP) {
PPC_AND_DOT(tmp_reg, dst_reg,
tmp_reg);
} else {
PPC_AND(tmp_reg, dst_reg,
tmp_reg);
PPC_RLWINM_DOT(tmp_reg, tmp_reg,
0, 0, 31);
}
}
break;
}

View File

@ -70,6 +70,7 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
};
u64 perf_reg_value(struct pt_regs *regs, int idx)
@ -83,6 +84,11 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
!is_sier_available()))
return 0;
if (idx == PERF_REG_POWERPC_MMCRA &&
(IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
IS_ENABLED(CONFIG_PPC32)))
return 0;
return regs_get_register(regs, pt_regs_offset[idx]);
}

View File

@ -237,12 +237,12 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
continue;
seq_printf(m, "PPC4XX OCM : %d\n", ocm->index);
seq_printf(m, "PhysAddr : %pa[p]\n", &(ocm->phys));
seq_printf(m, "PhysAddr : %pa\n", &(ocm->phys));
seq_printf(m, "MemTotal : %d Bytes\n", ocm->memtotal);
seq_printf(m, "MemTotal(NC) : %d Bytes\n", ocm->nc.memtotal);
seq_printf(m, "MemTotal(C) : %d Bytes\n\n", ocm->c.memtotal);
seq_printf(m, "NC.PhysAddr : %pa[p]\n", &(ocm->nc.phys));
seq_printf(m, "NC.PhysAddr : %pa\n", &(ocm->nc.phys));
seq_printf(m, "NC.VirtAddr : 0x%p\n", ocm->nc.virt);
seq_printf(m, "NC.MemTotal : %d Bytes\n", ocm->nc.memtotal);
seq_printf(m, "NC.MemFree : %d Bytes\n", ocm->nc.memfree);
@ -252,7 +252,7 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
blk->size, blk->owner);
}
seq_printf(m, "\nC.PhysAddr : %pa[p]\n", &(ocm->c.phys));
seq_printf(m, "\nC.PhysAddr : %pa\n", &(ocm->c.phys));
seq_printf(m, "C.VirtAddr : 0x%p\n", ocm->c.virt);
seq_printf(m, "C.MemTotal : %d Bytes\n", ocm->c.memtotal);
seq_printf(m, "C.MemFree : %d Bytes\n", ocm->c.memfree);

View File

@ -538,8 +538,7 @@ static void __init chrp_init_IRQ(void)
/* see if there is a keyboard in the device tree
with a parent of type "adb" */
for_each_node_by_name(kbd, "keyboard")
if (kbd->parent && kbd->parent->type
&& strcmp(kbd->parent->type, "adb") == 0)
if (of_node_is_type(kbd->parent, "adb"))
break;
of_node_put(kbd);
if (kbd)

View File

@ -564,7 +564,7 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
}
} else {
/* Create a group for 1 GPU and attached NPUs for POWER8 */
pe->npucomp = kzalloc(sizeof(pe->npucomp), GFP_KERNEL);
pe->npucomp = kzalloc(sizeof(*pe->npucomp), GFP_KERNEL);
table_group = &pe->npucomp->table_group;
table_group->ops = &pnv_npu_peers_ops;
iommu_register_group(table_group, hose->global_number,

View File

@ -2681,7 +2681,8 @@ static void pnv_pci_ioda_setup_iommu_api(void)
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
if (phb->type == PNV_PHB_NPU_NVLINK)
if (phb->type == PNV_PHB_NPU_NVLINK ||
phb->type == PNV_PHB_NPU_OCAPI)
continue;
list_for_each_entry(pe, &phb->ioda.pe_list, list) {

View File

@ -264,7 +264,9 @@ void __init pSeries_final_fixup(void)
if (!of_device_is_compatible(nvdn->parent,
"ibm,power9-npu"))
continue;
#ifdef CONFIG_PPC_POWERNV
WARN_ON_ONCE(pnv_npu2_init(hose));
#endif
break;
}
}

View File

@ -25,7 +25,7 @@ static inline int init_new_context(struct task_struct *tsk,
atomic_set(&mm->context.flush_count, 0);
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
mm->context.compat_mm = 0;
mm->context.compat_mm = test_thread_flag(TIF_31BIT);
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste ||
test_thread_flag(TIF_PGSTE) ||
@ -90,8 +90,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
{
int cpu = smp_processor_id();
if (prev == next)
return;
S390_lowcore.user_asce = next->context.asce;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
/* Clear previous user-ASCE from CR1 and CR7 */
@ -103,7 +101,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
__ctl_load(S390_lowcore.vdso_asce, 7, 7);
clear_cpu_flag(CIF_ASCE_SECONDARY);
}
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
if (prev != next)
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
}
#define finish_arch_post_lock_switch finish_arch_post_lock_switch

View File

@ -63,10 +63,10 @@ static noinline __init void detect_machine_type(void)
if (stsi(vmms, 3, 2, 2) || !vmms->count)
return;
/* Running under KVM? If not we assume z/VM */
/* Detect known hypervisors */
if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
else
else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
}

View File

@ -1006,6 +1006,8 @@ void __init setup_arch(char **cmdline_p)
pr_info("Linux is running under KVM in 64-bit mode\n");
else if (MACHINE_IS_LPAR)
pr_info("Linux is running natively in 64-bit mode\n");
else
pr_info("Linux is running as a guest in 64-bit mode\n");
/* Have one command line that is parsed and saved in /proc/cmdline */
/* boot_command_line has been already set up in early.c */

View File

@ -381,8 +381,13 @@ void smp_call_online_cpu(void (*func)(void *), void *data)
*/
void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
struct lowcore *lc = pcpu_devices->lowcore;
if (pcpu_devices[0].address == stap())
lc = &S390_lowcore;
pcpu_delegate(&pcpu_devices[0], func, data,
pcpu_devices->lowcore->nodat_stack);
lc->nodat_stack);
}
int smp_find_processor_id(u16 address)
@ -1166,7 +1171,11 @@ static ssize_t __ref rescan_store(struct device *dev,
{
int rc;
rc = lock_device_hotplug_sysfs();
if (rc)
return rc;
rc = smp_rescan_cpus();
unlock_device_hotplug();
return rc ? rc : count;
}
static DEVICE_ATTR_WO(rescan);

View File

@ -224,10 +224,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
vdso_pages = vdso64_pages;
#ifdef CONFIG_COMPAT
if (is_compat_task()) {
mm->context.compat_mm = is_compat_task();
if (mm->context.compat_mm)
vdso_pages = vdso32_pages;
mm->context.compat_mm = 1;
}
#endif
/*
* vDSO has a problem and was disabled, just don't "enable" it for

View File

@ -1110,103 +1110,141 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
mask = 0xf000; /* j */
goto branch_oc;
case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
case BPF_JMP32 | BPF_JSGT | BPF_K: /* ((s32) dst > (s32) imm) */
mask = 0x2000; /* jh */
goto branch_ks;
case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
case BPF_JMP32 | BPF_JSLT | BPF_K: /* ((s32) dst < (s32) imm) */
mask = 0x4000; /* jl */
goto branch_ks;
case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
case BPF_JMP32 | BPF_JSGE | BPF_K: /* ((s32) dst >= (s32) imm) */
mask = 0xa000; /* jhe */
goto branch_ks;
case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
case BPF_JMP32 | BPF_JSLE | BPF_K: /* ((s32) dst <= (s32) imm) */
mask = 0xc000; /* jle */
goto branch_ks;
case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
case BPF_JMP32 | BPF_JGT | BPF_K: /* ((u32) dst_reg > (u32) imm) */
mask = 0x2000; /* jh */
goto branch_ku;
case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
case BPF_JMP32 | BPF_JLT | BPF_K: /* ((u32) dst_reg < (u32) imm) */
mask = 0x4000; /* jl */
goto branch_ku;
case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
case BPF_JMP32 | BPF_JGE | BPF_K: /* ((u32) dst_reg >= (u32) imm) */
mask = 0xa000; /* jhe */
goto branch_ku;
case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
case BPF_JMP32 | BPF_JLE | BPF_K: /* ((u32) dst_reg <= (u32) imm) */
mask = 0xc000; /* jle */
goto branch_ku;
case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
case BPF_JMP32 | BPF_JNE | BPF_K: /* ((u32) dst_reg != (u32) imm) */
mask = 0x7000; /* jne */
goto branch_ku;
case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
case BPF_JMP32 | BPF_JEQ | BPF_K: /* ((u32) dst_reg == (u32) imm) */
mask = 0x8000; /* je */
goto branch_ku;
case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
case BPF_JMP32 | BPF_JSET | BPF_K: /* ((u32) dst_reg & (u32) imm) */
mask = 0x7000; /* jnz */
/* lgfi %w1,imm (load sign extend imm) */
EMIT6_IMM(0xc0010000, REG_W1, imm);
/* ngr %w1,%dst */
EMIT4(0xb9800000, REG_W1, dst_reg);
if (BPF_CLASS(insn->code) == BPF_JMP32) {
/* llilf %w1,imm (load zero extend imm) */
EMIT6_IMM(0xc0010000, REG_W1, imm);
/* nr %w1,%dst */
EMIT2(0x1400, REG_W1, dst_reg);
} else {
/* lgfi %w1,imm (load sign extend imm) */
EMIT6_IMM(0xc0010000, REG_W1, imm);
/* ngr %w1,%dst */
EMIT4(0xb9800000, REG_W1, dst_reg);
}
goto branch_oc;
case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
case BPF_JMP32 | BPF_JSGT | BPF_X: /* ((s32) dst > (s32) src) */
mask = 0x2000; /* jh */
goto branch_xs;
case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
case BPF_JMP32 | BPF_JSLT | BPF_X: /* ((s32) dst < (s32) src) */
mask = 0x4000; /* jl */
goto branch_xs;
case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
case BPF_JMP32 | BPF_JSGE | BPF_X: /* ((s32) dst >= (s32) src) */
mask = 0xa000; /* jhe */
goto branch_xs;
case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
case BPF_JMP32 | BPF_JSLE | BPF_X: /* ((s32) dst <= (s32) src) */
mask = 0xc000; /* jle */
goto branch_xs;
case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
case BPF_JMP32 | BPF_JGT | BPF_X: /* ((u32) dst > (u32) src) */
mask = 0x2000; /* jh */
goto branch_xu;
case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
case BPF_JMP32 | BPF_JLT | BPF_X: /* ((u32) dst < (u32) src) */
mask = 0x4000; /* jl */
goto branch_xu;
case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
case BPF_JMP32 | BPF_JGE | BPF_X: /* ((u32) dst >= (u32) src) */
mask = 0xa000; /* jhe */
goto branch_xu;
case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
case BPF_JMP32 | BPF_JLE | BPF_X: /* ((u32) dst <= (u32) src) */
mask = 0xc000; /* jle */
goto branch_xu;
case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
case BPF_JMP32 | BPF_JNE | BPF_X: /* ((u32) dst != (u32) src) */
mask = 0x7000; /* jne */
goto branch_xu;
case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
case BPF_JMP32 | BPF_JEQ | BPF_X: /* ((u32) dst == (u32) src) */
mask = 0x8000; /* je */
goto branch_xu;
case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
case BPF_JMP32 | BPF_JSET | BPF_X: /* ((u32) dst & (u32) src) */
{
bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
mask = 0x7000; /* jnz */
/* ngrk %w1,%dst,%src */
EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
/* nrk or ngrk %w1,%dst,%src */
EMIT4_RRF((is_jmp32 ? 0xb9f40000 : 0xb9e40000),
REG_W1, dst_reg, src_reg);
goto branch_oc;
branch_ks:
/* lgfi %w1,imm (load sign extend imm) */
EMIT6_IMM(0xc0010000, REG_W1, imm);
/* cgrj %dst,%w1,mask,off */
EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
/* crj or cgrj %dst,%w1,mask,off */
EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
dst_reg, REG_W1, i, off, mask);
break;
branch_ku:
/* lgfi %w1,imm (load sign extend imm) */
EMIT6_IMM(0xc0010000, REG_W1, imm);
/* clgrj %dst,%w1,mask,off */
EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
/* clrj or clgrj %dst,%w1,mask,off */
EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
dst_reg, REG_W1, i, off, mask);
break;
branch_xs:
/* cgrj %dst,%src,mask,off */
EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
/* crj or cgrj %dst,%src,mask,off */
EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
dst_reg, src_reg, i, off, mask);
break;
branch_xu:
/* clgrj %dst,%src,mask,off */
EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
/* clrj or clgrj %dst,%src,mask,off */
EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
dst_reg, src_reg, i, off, mask);
break;
branch_oc:
/* brc mask,jmp_off (branch instruction needs 4 bytes) */
jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
break;
}
default: /* too complex, give up */
pr_err("Unknown opcode %02x\n", insn->code);
return -1;

View File

@ -198,7 +198,7 @@ config X86
select IRQ_FORCED_THREADING
select NEED_SG_DMA_LENGTH
select PCI_DOMAINS if PCI
select PCI_LOCKLESS_CONFIG
select PCI_LOCKLESS_CONFIG if PCI
select PERF_EVENTS
select RTC_LIB
select RTC_MC146818_LIB
@ -617,7 +617,7 @@ config X86_INTEL_QUARK
config X86_INTEL_LPSS
bool "Intel Low Power Subsystem Support"
depends on X86 && ACPI
depends on X86 && ACPI && PCI
select COMMON_CLK
select PINCTRL
select IOSF_MBI

View File

@ -361,7 +361,8 @@ ENTRY(entry_INT80_compat)
/* Need to switch before accessing the thread stack. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
movq %rsp, %rdi
/* In the Xen PV case we already run on the thread stack. */
ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
pushq 6*8(%rdi) /* regs->ss */
@ -370,8 +371,9 @@ ENTRY(entry_INT80_compat)
pushq 3*8(%rdi) /* regs->cs */
pushq 2*8(%rdi) /* regs->ip */
pushq 1*8(%rdi) /* regs->orig_ax */
pushq (%rdi) /* pt_regs->di */
.Lint80_keep_stack:
pushq %rsi /* pt_regs->si */
xorl %esi, %esi /* nospec si */
pushq %rdx /* pt_regs->dx */

View File

@ -178,6 +178,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
/*
* Init a new mm. Used on mm copies, like at fork()
* and on mm's that are brand-new, like at execve().
*/
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
@ -228,8 +232,22 @@ do { \
} while (0)
#endif
static inline void arch_dup_pkeys(struct mm_struct *oldmm,
struct mm_struct *mm)
{
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return;
/* Duplicate the oldmm pkey state in mm: */
mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;
#endif
}
static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
arch_dup_pkeys(oldmm, mm);
paravirt_arch_dup_mmap(oldmm, mm);
return ldt_dup_context(oldmm, mm);
}

View File

@ -711,7 +711,7 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
{
if (unlikely(!access_ok(ptr,len)))
return 0;
__uaccess_begin();
__uaccess_begin_nospec();
return 1;
}
#define user_access_begin(a,b) user_access_begin(a,b)

View File

@ -470,6 +470,7 @@ int crash_load_segments(struct kimage *image)
kbuf.memsz = kbuf.bufsz;
kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret) {
vfree((void *)image->arch.elf_headers);

View File

@ -21,10 +21,6 @@
#define HPET_MASK CLOCKSOURCE_MASK(32)
/* FSEC = 10^-15
NSEC = 10^-9 */
#define FSEC_PER_NSEC 1000000L
#define HPET_DEV_USED_BIT 2
#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
#define HPET_DEV_VALID 0x8

View File

@ -434,6 +434,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
kbuf.memsz = PAGE_ALIGN(header->init_size);
kbuf.buf_align = header->kernel_alignment;
kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;
@ -448,6 +449,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
kbuf.bufsz = kbuf.memsz = initrd_len;
kbuf.buf_align = PAGE_SIZE;
kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
goto out_free_params;

View File

@ -457,6 +457,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
#else
u64 ipi_bitmap = 0;
#endif
long ret;
if (cpumask_empty(mask))
return;
@ -482,8 +483,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
} else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
max = apic_id < max ? max : apic_id;
} else {
kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
min = max = apic_id;
ipi_bitmap = 0;
}
@ -491,8 +493,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector)
}
if (ipi_bitmap) {
kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret);
}
local_irq_restore(flags);

View File

@ -297,15 +297,16 @@ static int __init tsc_setup(char *str)
__setup("tsc=", tsc_setup);
#define MAX_RETRIES 5
#define SMI_TRESHOLD 50000
#define MAX_RETRIES 5
#define TSC_DEFAULT_THRESHOLD 0x20000
/*
* Read TSC and the reference counters. Take care of SMI disturbance
* Read TSC and the reference counters. Take care of any disturbances
*/
static u64 tsc_read_refs(u64 *p, int hpet)
{
u64 t1, t2;
u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
int i;
for (i = 0; i < MAX_RETRIES; i++) {
@ -315,7 +316,7 @@ static u64 tsc_read_refs(u64 *p, int hpet)
else
*p = acpi_pm_read_early();
t2 = get_cycles();
if ((t2 - t1) < SMI_TRESHOLD)
if ((t2 - t1) < thresh)
return t2;
}
return ULLONG_MAX;
@ -703,15 +704,15 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
* zero. In each wait loop iteration we read the TSC and check
* the delta to the previous read. We keep track of the min
* and max values of that delta. The delta is mostly defined
* by the IO time of the PIT access, so we can detect when a
* SMI/SMM disturbance happened between the two reads. If the
* by the IO time of the PIT access, so we can detect when
* any disturbance happened between the two reads. If the
* maximum time is significantly larger than the minimum time,
* then we discard the result and have another try.
*
* 2) Reference counter. If available we use the HPET or the
* PMTIMER as a reference to check the sanity of that value.
* We use separate TSC readouts and check inside of the
* reference read for a SMI/SMM disturbance. We dicard
* reference read for any possible disturbance. We dicard
* disturbed values here as well. We do that around the PIT
* calibration delay loop as we have to wait for a certain
* amount of time anyway.
@ -744,7 +745,7 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
if (ref1 == ref2)
continue;
/* Check, whether the sampling was disturbed by an SMI */
/* Check, whether the sampling was disturbed */
if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
continue;
@ -1268,7 +1269,7 @@ static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
*/
static void tsc_refine_calibration_work(struct work_struct *work)
{
static u64 tsc_start = -1, ref_start;
static u64 tsc_start = ULLONG_MAX, ref_start;
static int hpet;
u64 tsc_stop, ref_stop, delta;
unsigned long freq;
@ -1283,14 +1284,15 @@ static void tsc_refine_calibration_work(struct work_struct *work)
* delayed the first time we expire. So set the workqueue
* again once we know timers are working.
*/
if (tsc_start == -1) {
if (tsc_start == ULLONG_MAX) {
restart:
/*
* Only set hpet once, to avoid mixing hardware
* if the hpet becomes enabled later.
*/
hpet = is_hpet_enabled();
schedule_delayed_work(&tsc_irqwork, HZ);
tsc_start = tsc_read_refs(&ref_start, hpet);
schedule_delayed_work(&tsc_irqwork, HZ);
return;
}
@ -1300,9 +1302,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
if (ref_start == ref_stop)
goto out;
/* Check, whether the sampling was disturbed by an SMI */
if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
goto out;
/* Check, whether the sampling was disturbed */
if (tsc_stop == ULLONG_MAX)
goto restart;
delta = tsc_stop - tsc_start;
delta *= 1000000LL;

View File

@ -2,10 +2,6 @@
ccflags-y += -Iarch/x86/kvm
CFLAGS_x86.o := -I.
CFLAGS_svm.o := -I.
CFLAGS_vmx.o := -I.
KVM := ../../../virt/kvm
kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \

View File

@ -1636,7 +1636,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
if (ret != HV_STATUS_INVALID_PORT_ID)
break;
/* maybe userspace knows this conn_id: fall through */
/* fall through - maybe userspace knows this conn_id. */
case HVCALL_POST_MESSAGE:
/* don't bother userspace if it has no way to handle it */
if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
@ -1832,7 +1832,6 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE;
ent->eax |= HV_X64_MSR_RESET_AVAILABLE;
ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
ent->eax |= HV_X64_MSR_GUEST_IDLE_AVAILABLE;
ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS;
ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT;
@ -1848,11 +1847,11 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
case HYPERV_CPUID_ENLIGHTMENT_INFO:
ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
ent->eax |= HV_X64_SYSTEM_RESET_RECOMMENDED;
ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
if (evmcs_ver)
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
/*
* Default number of spinlock retry attempts, matches

View File

@ -1035,6 +1035,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
switch (delivery_mode) {
case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
/* fall through */
case APIC_DM_FIXED:
if (unlikely(trig_mode && !level))
break;
@ -1874,6 +1875,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_LVT0:
apic_manage_nmi_watchdog(apic, val);
/* fall through */
case APIC_LVTTHMR:
case APIC_LVTPC:
case APIC_LVT1:

View File

@ -4371,6 +4371,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
rsvd_bits(maxphyaddr, 51);
rsvd_check->rsvd_bits_mask[1][4] =
rsvd_check->rsvd_bits_mask[0][4];
/* fall through */
case PT64_ROOT_4LEVEL:
rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
nonleaf_bit8_rsvd | rsvd_bits(7, 7) |

View File

@ -3414,6 +3414,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_mmu_reset_context(&svm->vcpu);
kvm_mmu_load(&svm->vcpu);
/*
* Drop what we picked up for L2 via svm_complete_interrupts() so it
* doesn't end up in L1.
*/
svm->vcpu.arch.nmi_injected = false;
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
return 0;
}
@ -4395,7 +4403,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_IA32_APICBASE:
if (kvm_vcpu_apicv_active(vcpu))
avic_update_vapic_bar(to_svm(vcpu), data);
/* Follow through */
/* Fall through */
default:
return kvm_set_msr_common(vcpu, msr);
}
@ -4504,28 +4512,19 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
break;
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
int i;
struct kvm_vcpu *vcpu;
struct kvm *kvm = svm->vcpu.kvm;
struct kvm_lapic *apic = svm->vcpu.arch.apic;
/*
* At this point, we expect that the AVIC HW has already
* set the appropriate IRR bits on the valid target
* vcpus. So, we just need to kick the appropriate vcpu.
* Update ICR high and low, then emulate sending IPI,
* which is handled when writing APIC_ICR.
*/
kvm_for_each_vcpu(i, vcpu, kvm) {
bool m = kvm_apic_match_dest(vcpu, apic,
icrl & KVM_APIC_SHORT_MASK,
GET_APIC_DEST_FIELD(icrh),
icrl & KVM_APIC_DEST_MASK);
if (m && !avic_vcpu_is_running(vcpu))
kvm_vcpu_wake_up(vcpu);
}
kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
break;
}
case AVIC_IPI_FAILURE_INVALID_TARGET:
WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
index, svm->vcpu.vcpu_id, icrh, icrl);
break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");

View File

@ -1465,7 +1465,7 @@ TRACE_EVENT(kvm_hv_send_ipi_ex,
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH arch/x86/kvm
#define TRACE_INCLUDE_PATH ../../arch/x86/kvm
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace

View File

@ -332,16 +332,17 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool evmcs_already_enabled = vmx->nested.enlightened_vmcs_enabled;
vmx->nested.enlightened_vmcs_enabled = true;
if (vmcs_version)
*vmcs_version = nested_get_evmcs_version(vcpu);
/* We don't support disabling the feature for simplicity. */
if (vmx->nested.enlightened_vmcs_enabled)
if (evmcs_already_enabled)
return 0;
vmx->nested.enlightened_vmcs_enabled = true;
vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;

Some files were not shown because too many files have changed in this diff Show More