diff --git a/Documentation/admin-guide/device-mapper/dm-ima.rst b/Documentation/admin-guide/device-mapper/dm-ima.rst new file mode 100644 index 000000000000..a4aa50a828e0 --- /dev/null +++ b/Documentation/admin-guide/device-mapper/dm-ima.rst @@ -0,0 +1,715 @@ +====== +dm-ima +====== + +For a given system, various external services/infrastructure tools +(including the attestation service) interact with it - both during the +setup and during rest of the system run-time. They share sensitive data +and/or execute critical workload on that system. The external services +may want to verify the current run-time state of the relevant kernel +subsystems before fully trusting the system with business-critical +data/workload. + +Device mapper plays a critical role on a given system by providing +various important functionalities to the block devices using various +target types like crypt, verity, integrity etc. Each of these target +types’ functionalities can be configured with various attributes. +The attributes chosen to configure these target types can significantly +impact the security profile of the block device, and in-turn, of the +system itself. For instance, the type of encryption algorithm and the +key size determines the strength of encryption for a given block device. + +Therefore, verifying the current state of various block devices as well +as their various target attributes is crucial for external services before +fully trusting the system with business-critical data/workload. + +IMA kernel subsystem provides the necessary functionality for +device mapper to measure the state and configuration of +various block devices - + +- by device mapper itself, from within the kernel, +- in a tamper resistant way, +- and re-measured - triggered on state/configuration change. + +Setting the IMA Policy: +======================= +For IMA to measure the data on a given system, the IMA policy on the +system needs to be updated to have following line, and the system needs +to be restarted for the measurements to take effect. + +:: + + /etc/ima/ima-policy + measure func=CRITICAL_DATA label=device-mapper template=ima-buf + +The measurements will be reflected in the IMA logs, which are located at: + +:: + + /sys/kernel/security/integrity/ima/ascii_runtime_measurements + /sys/kernel/security/integrity/ima/binary_runtime_measurements + +Then IMA ASCII measurement log has the following format: + +:: + + + + PCR := Platform Configuration Register, in which the values are registered. + This is applicable if TPM chip is in use. + + TEMPLATE_DATA_DIGEST := Template data digest of the IMA record. + TEMPLATE_NAME := Template name that registered the integrity value (e.g. ima-buf). + + TEMPLATE_DATA := ":" + It contains data for the specific event to be measured, + in a given template data format. + + ALG := Algorithm to compute event digest + EVENT_DIGEST := Digest of the event data + EVENT_NAME := Description of the event (e.g. 'dm_table_load'). + EVENT_DATA := The event data to be measured. + +| + +| *NOTE #1:* +| The DM target data measured by IMA subsystem can alternatively + be queried from userspace by setting DM_IMA_MEASUREMENT_FLAG with + DM_TABLE_STATUS_CMD. + +| + +| *NOTE #2:* +| The Kernel configuration CONFIG_IMA_DISABLE_HTABLE allows measurement of duplicate records. +| To support recording duplicate IMA events in the IMA log, the Kernel needs to be configured with + CONFIG_IMA_DISABLE_HTABLE=y. + +Supported Device States: +======================== +Following device state changes will trigger IMA measurements: + + 1. Table load + #. Device resume + #. Device remove + #. Table clear + #. Device rename + +1. Table load: +--------------- +When a new table is loaded in a device's inactive table slot, +the device information and target specific details from the +targets in the table are measured. + +The IMA measurement log has the following format for 'dm_table_load': + +:: + + EVENT_NAME := "dm_table_load" + EVENT_DATA := ";" ";" + + dm_version_str := "dm_version=" "." "." + Same as Device Mapper driver version. + device_metadata := "," "," "," "," + "," ";" + + device_name := "name=" + device_uuid := "uuid=" + device_major := "major=" + device_minor := "minor=" + minor_count := "minor_count=" + num_device_targets := "num_targets=" + dm-device-name := Name of the device. If it contains special characters like '\', ',', ';', + they are prefixed with '\'. + dm-device-uuid := UUID of the device. If it contains special characters like '\', ',', ';', + they are prefixed with '\'. + + table_load_data := + Represents the data (as name=value pairs) from various targets in the table, + which is being loaded into the DM device's inactive table slot. + target_data := | + + target_data_row := "," "," "," "," + "," ";" + target_index := "target_index=" + Represents nth target in the table (from 0 to N-1 targets specified in ) + If all the data for N targets doesn't fit in the given buffer - then the data that fits + in the buffer (say from target 0 to x) is measured in a given IMA event. + The remaining data from targets x+1 to N-1 is measured in the subsequent IMA events, + with the same format as that of 'dm_table_load' + i.e. ";" ";" . + + target_begin := "target_begin=" + target_len := "target_len=" + target_name := Name of the target. 'linear', 'crypt', 'integrity' etc. + The targets that are supported for IMA measurements are documented below in the + 'Supported targets' section. + target_version := "target_version=" "." "." + target_attributes := Data containing comma separated list of name=value pairs of target specific attributes. + + For instance, if a linear device is created with the following table entries, + # dmsetup create linear1 + 0 2 linear /dev/loop0 512 + 2 2 linear /dev/loop0 512 + 4 2 linear /dev/loop0 512 + 6 2 linear /dev/loop0 512 + + Then IMA ASCII measurement log will have the following entry: + (converted from ASCII to text for readability) + + 10 a8c5ff755561c7a28146389d1514c318592af49a ima-buf sha256:4d73481ecce5eadba8ab084640d85bb9ca899af4d0a122989252a76efadc5b72 + dm_table_load + dm_version=4.45.0; + name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=4; + target_index=0,target_begin=0,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; + target_index=1,target_begin=2,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; + target_index=2,target_begin=4,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; + target_index=3,target_begin=6,target_len=2,target_name=linear,target_version=1.4.0,device_name=7:0,start=512; + +2. Device resume: +------------------ +When a suspended device is resumed, the device information and the hash of the +data from previous load of an active table are measured. + +The IMA measurement log has the following format for 'dm_device_resume': + +:: + + EVENT_NAME := "dm_device_resume" + EVENT_DATA := ";" ";" ";" ";" + + dm_version_str := As described in the 'Table load' section above. + device_metadata := As described in the 'Table load' section above. + active_table_hash := "active_table_hash=" ":" + Rerpresents the hash of the IMA data being measured for the + active table for the device. + table_hash_alg := Algorithm used to compute the hash. + table_hash := Hash of the ( ";" ";" ";") + as described in the 'dm_table_load' above. + Note: If the table_load data spans across multiple IMA 'dm_table_load' + events for a given device, the hash is computed combining all the event data + i.e. ( ";" ";" ";") + across all those events. + current_device_capacity := "current_device_capacity=" + + For instance, if a linear device is resumed with the following command, + #dmsetup resume linear1 + + then IMA ASCII measurement log will have an entry with: + (converted from ASCII to text for readability) + + 10 56c00cc062ffc24ccd9ac2d67d194af3282b934e ima-buf sha256:e7d12c03b958b4e0e53e7363a06376be88d98a1ac191fdbd3baf5e4b77f329b6 + dm_device_resume + dm_version=4.45.0; + name=linear1,uuid=,major=253,minor=0,minor_count=1,num_targets=4; + active_table_hash=sha256:4d73481ecce5eadba8ab084640d85bb9ca899af4d0a122989252a76efadc5b72;current_device_capacity=8; + +3. Device remove: +------------------ +When a device is removed, the device information and a sha256 hash of the +data from an active and inactive table are measured. + +The IMA measurement log has the following format for 'dm_device_remove': + +:: + + EVENT_NAME := "dm_device_remove" + EVENT_DATA := ";" ";" ";" + "," "," ";" ";" + + dm_version_str := As described in the 'Table load' section above. + device_active_metadata := Device metadata that reflects the currently loaded active table. + The format is same as 'device_metadata' described in the 'Table load' section above. + device_inactive_metadata := Device metadata that reflects the inactive table. + The format is same as 'device_metadata' described in the 'Table load' section above. + active_table_hash := Hash of the currently loaded active table. + The format is same as 'active_table_hash' described in the 'Device resume' section above. + inactive_table_hash := Hash of the inactive table. + The format is same as 'active_table_hash' described in the 'Device resume' section above. + remove_all := "remove_all=" + yes_no := "y" | "n" + current_device_capacity := "current_device_capacity=" + + For instance, if a linear device is removed with the following command, + #dmsetup remove l1 + + then IMA ASCII measurement log will have the following entry: + (converted from ASCII to text for readability) + + 10 790e830a3a7a31590824ac0642b3b31c2d0e8b38 ima-buf sha256:ab9f3c959367a8f5d4403d6ce9c3627dadfa8f9f0e7ec7899299782388de3840 + dm_device_remove + dm_version=4.45.0; + device_active_metadata=name=l1,uuid=,major=253,minor=2,minor_count=1,num_targets=2; + device_inactive_metadata=name=l1,uuid=,major=253,minor=2,minor_count=1,num_targets=1; + active_table_hash=sha256:4a7e62efaebfc86af755831998b7db6f59b60d23c9534fb16a4455907957953a, + inactive_table_hash=sha256:9d79c175bc2302d55a183e8f50ad4bafd60f7692fd6249e5fd213e2464384b86,remove_all=n; + current_device_capacity=2048; + +4. Table clear: +---------------- +When an inactive table is cleared from the device, the device information and a sha256 hash of the +data from an inactive table are measured. + +The IMA measurement log has the following format for 'dm_table_clear': + +:: + + EVENT_NAME := "dm_table_clear" + EVENT_DATA := ";" ";" ";" ";" + + dm_version_str := As described in the 'Table load' section above. + device_inactive_metadata := Device metadata that was captured during the load time inactive table being cleared. + The format is same as 'device_metadata' described in the 'Table load' section above. + inactive_table_hash := Hash of the inactive table being cleared from the device. + The format is same as 'active_table_hash' described in the 'Device resume' section above. + current_device_capacity := "current_device_capacity=" + + For instance, if a linear device's inactive table is cleared, + #dmsetup clear l1 + + then IMA ASCII measurement log will have an entry with: + (converted from ASCII to text for readability) + + 10 77d347408f557f68f0041acb0072946bb2367fe5 ima-buf sha256:42f9ca22163fdfa548e6229dece2959bc5ce295c681644240035827ada0e1db5 + dm_table_clear + dm_version=4.45.0; + name=l1,uuid=,major=253,minor=2,minor_count=1,num_targets=1; + inactive_table_hash=sha256:75c0dc347063bf474d28a9907037eba060bfe39d8847fc0646d75e149045d545;current_device_capacity=1024; + +5. Device rename: +------------------ +When an device's NAME or UUID is changed, the device information and the new NAME and UUID +are measured. + +The IMA measurement log has the following format for 'dm_device_rename': + +:: + + EVENT_NAME := "dm_device_rename" + EVENT_DATA := ";" ";" "," ";" ";" + + dm_version_str := As described in the 'Table load' section above. + device_active_metadata := Device metadata that reflects the currently loaded active table. + The format is same as 'device_metadata' described in the 'Table load' section above. + new_device_name := "new_name=" + dm-device-name := Same as described in 'Table load' section above + new_device_uuid := "new_uuid=" + dm-device-uuid := Same as described in 'Table load' section above + current_device_capacity := "current_device_capacity=" + + E.g 1: if a linear device's name is changed with the following command, + #dmsetup rename linear1 --setuuid 1234-5678 + + then IMA ASCII measurement log will have an entry with: + (converted from ASCII to text for readability) + + 10 8b0423209b4c66ac1523f4c9848c9b51ee332f48 ima-buf sha256:6847b7258134189531db593e9230b257c84f04038b5a18fd2e1473860e0569ac + dm_device_rename + dm_version=4.45.0; + name=linear1,uuid=,major=253,minor=2,minor_count=1,num_targets=1;new_name=linear1,new_uuid=1234-5678; + current_device_capacity=1024; + + E.g 2: if a linear device's name is changed with the following command, + # dmsetup rename linear1 linear=2 + + then IMA ASCII measurement log will have an entry with: + (converted from ASCII to text for readability) + + 10 bef70476b99c2bdf7136fae033aa8627da1bf76f ima-buf sha256:8c6f9f53b9ef9dc8f92a2f2cca8910e622543d0f0d37d484870cb16b95111402 + dm_device_rename + dm_version=4.45.0; + name=linear1,uuid=1234-5678,major=253,minor=2,minor_count=1,num_targets=1; + new_name=linear\=2,new_uuid=1234-5678; + current_device_capacity=1024; + +Supported targets: +================== + +Following targets are supported to measure their data using IMA: + + 1. cache + #. crypt + #. integrity + #. linear + #. mirror + #. multipath + #. raid + #. snapshot + #. striped + #. verity + +1. cache +--------- +The 'target_attributes' (described as part of EVENT_DATA in 'Table load' +section above) has the following data format for 'cache' target. + +:: + + target_attributes := "," "," "," "," + "," "," "," "," + "," ";" + + target_name := "target_name=cache" + target_version := "target_version=" "." "." + metadata_mode := "metadata_mode=" + cache_metadata_mode := "fail" | "ro" | "rw" + cache_device := "cache_device=" + cache_origin_device := "cache_origin_device=" + writethrough := "writethrough=" + writeback := "writeback=" + passthrough := "passthrough=" + no_discard_passdown := "no_discard_passdown=" + yes_no := "y" | "n" + + E.g. + When a 'cache' target is loaded, then IMA ASCII measurement log will have an entry + similar to the following, depicting what 'cache' attributes are measured in EVENT_DATA + for 'dm_table_load' event. + (converted from ASCII to text for readability) + + dm_version=4.45.0;name=cache1,uuid=cache_uuid,major=253,minor=2,minor_count=1,num_targets=1; + target_index=0,target_begin=0,target_len=28672,target_name=cache,target_version=2.2.0,metadata_mode=rw, + cache_metadata_device=253:4,cache_device=253:3,cache_origin_device=253:5,writethrough=y,writeback=n, + passthrough=n,metadata2=y,no_discard_passdown=n; + + +2. crypt +--------- +The 'target_attributes' (described as part of EVENT_DATA in 'Table load' +section above) has the following data format for 'crypt' target. + +:: + + target_attributes := "," "," "," "," + "," "," "," + "," "," [ ","] [ ","] + [ ","] [ ","] "," "," + "," ";" + + target_name := "target_name=crypt" + target_version := "target_version=" "." "." + allow_discards := "allow_discards=" + same_cpu_crypt := "same_cpu_crypt=" + submit_from_crypt_cpus := "submit_from_crypt_cpus=" + no_read_workqueue := "no_read_workqueue=" + no_write_workqueue := "no_write_workqueue=" + iv_large_sectors := "iv_large_sectors=" + integrity_tag_size := "integrity_tag_size=" + cipher_auth := "cipher_auth=" + sector_size := "sector_size=" + cipher_string := "cipher_string=" + key_size := "key_size=" + key_parts := "key_parts=" + key_extra_size := "key_extra_size=" + key_mac_size := "key_mac_size=" + yes_no := "y" | "n" + + E.g. + When a 'crypt' target is loaded, then IMA ASCII measurement log will have an entry + similar to the following, depicting what 'crypt' attributes are measured in EVENT_DATA + for 'dm_table_load' event. + (converted from ASCII to text for readability) + + dm_version=4.45.0; + name=crypt1,uuid=crypt_uuid1,major=253,minor=0,minor_count=1,num_targets=1; + target_index=0,target_begin=0,target_len=1953125,target_name=crypt,target_version=1.23.0, + allow_discards=y,same_cpu=n,submit_from_crypt_cpus=n,no_read_workqueue=n,no_write_workqueue=n, + iv_large_sectors=n,cipher_string=aes-xts-plain64,key_size=32,key_parts=1,key_extra_size=0,key_mac_size=0; + +3. integrity +------------- +The 'target_attributes' (described as part of EVENT_DATA in 'Table load' +section above) has the following data format for 'integrity' target. + +:: + + target_attributes := "," "," "," + "," "," [ ","] [ ","] "," + "," "," "," "," + "," "," ";" + + target_name := "target_name=integrity" + target_version := "target_version=" "." "." + dev_name := "dev_name=" + start := "start=" + tag_size := "tag_size=" + mode := "mode=" + integrity_mode_str := "J" | "B" | "D" | "R" + meta_device := "meta_device=" + block_size := "block_size=" + recalculate := "recalculate=" + allow_discards := "allow_discards=" + fix_padding := "fix_padding=" + fix_hmac := "fix_hmac=" + legacy_recalculate := "legacy_recalculate=" + journal_sectors := "journal_sectors=" + interleave_sectors := "interleave_sectors=" + buffer_sectors := "buffer_sectors=" + yes_no := "y" | "n" + + E.g. + When a 'integrity' target is loaded, then IMA ASCII measurement log will have an entry + similar to the following, depicting what 'integrity' attributes are measured in EVENT_DATA + for 'dm_table_load' event. + (converted from ASCII to text for readability) + + dm_version=4.45.0; + name=integrity1,uuid=,major=253,minor=1,minor_count=1,num_targets=1; + target_index=0,target_begin=0,target_len=7856,target_name=integrity,target_version=1.10.0, + dev_name=253:0,start=0,tag_size=32,mode=J,recalculate=n,allow_discards=n,fix_padding=n, + fix_hmac=n,legacy_recalculate=n,journal_sectors=88,interleave_sectors=32768,buffer_sectors=128; + + +4. linear +---------- +The 'target_attributes' (described as part of EVENT_DATA in 'Table load' +section above) has the following data format for 'linear' target. + +:: + + target_attributes := "," "," <,> ";" + + target_name := "target_name=linear" + target_version := "target_version=" "." "." + device_name := "device_name=" + start := "start=" + + E.g. + When a 'linear' target is loaded, then IMA ASCII measurement log will have an entry + similar to the following, depicting what 'linear' attributes are measured in EVENT_DATA + for 'dm_table_load' event. + (converted from ASCII to text for readability) + + dm_version=4.45.0; + name=linear1,uuid=linear_uuid1,major=253,minor=2,minor_count=1,num_targets=1; + target_index=0,target_begin=0,target_len=28672,target_name=linear,target_version=1.4.0, + device_name=253:1,start=2048; + +5. mirror +---------- +The 'target_attributes' (described as part of EVENT_DATA in 'Table load' +section above) has the following data format for 'mirror' target. + +:: + + target_attributes := "," "," "," + "," "," "," ";" + + target_name := "target_name=mirror" + target_version := "target_version=" "." "." + nr_mirrors := "nr_mirrors=" + mirror_device_data := | + mirror_device_row is repeated times - for described in . + mirror_device_row := "," + mirror_device_name := "mirror_device_" "=" + where ranges from 0 to ( -1) - for described in . + mirror_device_status := "mirror_device_" "_status=" + where ranges from 0 to ( -1) - for described in . + mirror_device_status_char := "A" | "F" | "D" | "S" | "R" | "U" + handle_errors := "handle_errors=" + keep_log := "keep_log=" + log_type_status := "log_type_status=" + yes_no := "y" | "n" + + E.g. + When a 'mirror' target is loaded, then IMA ASCII measurement log will have an entry + similar to the following, depicting what 'mirror' attributes are measured in EVENT_DATA + for 'dm_table_load' event. + (converted from ASCII to text for readability) + + dm_version=4.45.0; + name=mirror1,uuid=mirror_uuid1,major=253,minor=6,minor_count=1,num_targets=1; + target_index=0,target_begin=0,target_len=2048,target_name=mirror,target_version=1.14.0,nr_mirrors=2, + mirror_device_0=253:4,mirror_device_0_status=A, + mirror_device_1=253:5,mirror_device_1_status=A, + handle_errors=y,keep_log=n,log_type_status=; + +6. multipath +------------- +The 'target_attributes' (described as part of EVENT_DATA in 'Table load' +section above) has the following data format for 'multipath' target. + +:: + + target_attributes := "," "," + ["," "," "," ] ";" + + target_name := "target_name=multipath" + target_version := "target_version=" "." "." + nr_priority_groups := "nr_priority_groups=" + priority_groups := | + priority_groups_row := "pg_state_" "=" "," "nr_pgpaths_" "=" "," + "path_selector_name_" "=" "," + where ranges from 0 to ( -1) - for described in . + pg_state_str := "E" | "A" | "D" + := | + priority_group_paths_row := "path_name_" "_" "=" "," "is_active_" "_" "=" + "fail_count_" "_" "=" "," "path_selector_status_" "_" "=" + where ranges from 0 to ( -1) - for described in , + and ranges from 0 to ( -1) - for described in . + is_active_str := "A" | "F" + + E.g. + When a 'multipath' target is loaded, then IMA ASCII measurement log will have an entry + similar to the following, depicting what 'multipath' attributes are measured in EVENT_DATA + for 'dm_table_load' event. + (converted from ASCII to text for readability) + + dm_version=4.45.0; + name=mp,uuid=,major=253,minor=0,minor_count=1,num_targets=1; + target_index=0,target_begin=0,target_len=2097152,target_name=multipath,target_version=1.14.0,nr_priority_groups=2, + pg_state_0=E,nr_pgpaths_0=2,path_selector_name_0=queue-length, + path_name_0_0=8:16,is_active_0_0=A,fail_count_0_0=0,path_selector_status_0_0=, + path_name_0_1=8:32,is_active_0_1=A,fail_count_0_1=0,path_selector_status_0_1=, + pg_state_1=E,nr_pgpaths_1=2,path_selector_name_1=queue-length, + path_name_1_0=8:48,is_active_1_0=A,fail_count_1_0=0,path_selector_status_1_0=, + path_name_1_1=8:64,is_active_1_1=A,fail_count_1_1=0,path_selector_status_1_1=; + +7. raid +-------- +The 'target_attributes' (described as part of EVENT_DATA in 'Table load' +section above) has the following data format for 'raid' target. + +:: + + target_attributes := "," "," "," "," + ["," journal_dev_mode] ";" + + target_name := "target_name=raid" + target_version := "target_version=" "." "." + raid_type := "raid_type=" + raid_disks := "raid_disks=" + raid_state := "raid_state=" + raid_state_str := "frozen" | "reshape" |"resync" | "check" | "repair" | "recover" | "idle" |"undef" + raid_device_status := | + is repeated times - for described in . + raid_device_status_row := "raid_device_" "_status=" + where ranges from 0 to ( -1) - for described in . + raid_device_status_str := "A" | "D" | "a" | "-" + journal_dev_mode := "journal_dev_mode=" + journal_dev_mode_str := "writethrough" | "writeback" | "invalid" + + E.g. + When a 'raid' target is loaded, then IMA ASCII measurement log will have an entry + similar to the following, depicting what 'raid' attributes are measured in EVENT_DATA + for 'dm_table_load' event. + (converted from ASCII to text for readability) + + dm_version=4.45.0; + name=raid_LV1,uuid=uuid_raid_LV1,major=253,minor=12,minor_count=1,num_targets=1; + target_index=0,target_begin=0,target_len=2048,target_name=raid,target_version=1.15.1, + raid_type=raid10,raid_disks=4,raid_state=idle, + raid_device_0_status=A, + raid_device_1_status=A, + raid_device_2_status=A, + raid_device_3_status=A; + + +8. snapshot +------------ +The 'target_attributes' (described as part of EVENT_DATA in 'Table load' +section above) has the following data format for 'snapshot' target. + +:: + + target_attributes := "," "," "," + "," "," "," ";" + + target_name := "target_name=snapshot" + target_version := "target_version=" "." "." + snap_origin_name := "snap_origin_name=" + snap_cow_name := "snap_cow_name=" + snap_valid := "snap_valid=" + snap_merge_failed := "snap_merge_failed=" + snapshot_overflowed := "snapshot_overflowed=" + yes_no := "y" | "n" + + E.g. + When a 'snapshot' target is loaded, then IMA ASCII measurement log will have an entry + similar to the following, depicting what 'snapshot' attributes are measured in EVENT_DATA + for 'dm_table_load' event. + (converted from ASCII to text for readability) + + dm_version=4.45.0; + name=snap1,uuid=snap_uuid1,major=253,minor=13,minor_count=1,num_targets=1; + target_index=0,target_begin=0,target_len=4096,target_name=snapshot,target_version=1.16.0, + snap_origin_name=253:11,snap_cow_name=253:12,snap_valid=y,snap_merge_failed=n,snapshot_overflowed=n; + +9. striped +----------- +The 'target_attributes' (described as part of EVENT_DATA in 'Table load' +section above) has the following data format for 'striped' target. + +:: + + target_attributes := "," "," "," "," + ";" + + target_name := "target_name=striped" + target_version := "target_version=" "." "." + stripes := "stripes=" + chunk_size := "chunk_size=" + stripe_data := | + stripe_data_row := "," "," + stripe_device_name := "stripe_" "_device_name=" + where ranges from 0 to ( -1) - for described in . + stripe_physical_start := "stripe_" "_physical_start=" + where ranges from 0 to ( -1) - for described in . + stripe_status := "stripe_" "_status=" + where ranges from 0 to ( -1) - for described in . + stripe_status_str := "D" | "A" + + E.g. + When a 'striped' target is loaded, then IMA ASCII measurement log will have an entry + similar to the following, depicting what 'striped' attributes are measured in EVENT_DATA + for 'dm_table_load' event. + (converted from ASCII to text for readability) + + dm_version=4.45.0; + name=striped1,uuid=striped_uuid1,major=253,minor=5,minor_count=1,num_targets=1; + target_index=0,target_begin=0,target_len=640,target_name=striped,target_version=1.6.0,stripes=2,chunk_size=64, + stripe_0_device_name=253:0,stripe_0_physical_start=2048,stripe_0_status=A, + stripe_1_device_name=253:3,stripe_1_physical_start=2048,stripe_1_status=A; + +10. verity +---------- +The 'target_attributes' (described as part of EVENT_DATA in 'Table load' +section above) has the following data format for 'verity' target. + +:: + + target_attributes := "," "," "," "," + "," "," "," "," + "," "," ["," ] + ["," ] ";" + + target_name := "target_name=verity" + target_version := "target_version=" "." "." + hash_failed := "hash_failed=" + hash_failed_str := "C" | "V" + verity_version := "verity_version=" + data_device_name := "data_device_name=" + hash_device_name := "hash_device_name=" + verity_algorithm := "verity_algorithm=" + root_digest := "root_digest=" + salt := "salt=" + salt_str := "-" + ignore_zero_blocks := "ignore_zero_blocks=" + check_at_most_once := "check_at_most_once=" + root_hash_sig_key_desc := "root_hash_sig_key_desc=" + verity_mode := "verity_mode=" + verity_mode_str := "ignore_corruption" | "restart_on_corruption" | "panic_on_corruption" | "invalid" + yes_no := "y" | "n" + + E.g. + When a 'verity' target is loaded, then IMA ASCII measurement log will have an entry + similar to the following, depicting what 'verity' attributes are measured in EVENT_DATA + for 'dm_table_load' event. + (converted from ASCII to text for readability) + + dm_version=4.45.0; + name=test-verity,uuid=,major=253,minor=2,minor_count=1,num_targets=1; + target_index=0,target_begin=0,target_len=1953120,target_name=verity,target_version=1.8.0,hash_failed=V, + verity_version=1,data_device_name=253:1,hash_device_name=253:0,verity_algorithm=sha256, + root_digest=29cb87e60ce7b12b443ba6008266f3e41e93e403d7f298f8e3f316b29ff89c5e, + salt=e48da609055204e89ae53b655ca2216dd983cf3cb829f34f63a297d106d53e2d, + ignore_zero_blocks=n,check_at_most_once=n; diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst index 6cf8adc86fa8..cde52cc09645 100644 --- a/Documentation/admin-guide/device-mapper/index.rst +++ b/Documentation/admin-guide/device-mapper/index.rst @@ -13,6 +13,7 @@ Device Mapper dm-dust dm-ebs dm-flakey + dm-ima dm-init dm-integrity dm-io diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst index 65427d8dfca6..10429779a91a 100644 --- a/Documentation/admin-guide/device-mapper/writecache.rst +++ b/Documentation/admin-guide/device-mapper/writecache.rst @@ -78,13 +78,23 @@ Status: 2. the number of blocks 3. the number of free blocks 4. the number of blocks under writeback +5. the number of read requests +6. the number of read requests that hit the cache +7. the number of write requests +8. the number of write requests that hit uncommitted block +9. the number of write requests that hit committed block +10. the number of write requests that bypass the cache +11. the number of write requests that are allocated in the cache +12. the number of write requests that are blocked on the freelist +13. the number of flush requests +14. the number of discard requests Messages: flush - flush the cache device. The message returns successfully + Flush the cache device. The message returns successfully if the cache device was flushed without an error flush_on_suspend - flush the cache device on next suspend. Use this message + Flush the cache device on next suspend. Use this message when you are going to remove the cache device. The proper sequence for removing the cache device is: @@ -98,3 +108,5 @@ Messages: 6. the cache device is now inactive and it can be deleted cleaner See above "cleaner" constructor documentation. + clear_stats + Clear the statistics that are reported on the status line diff --git a/drivers/md/Makefile b/drivers/md/Makefile index a74aaf8b1445..816945eeed7f 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -96,6 +96,10 @@ ifeq ($(CONFIG_BLK_DEV_ZONED),y) dm-mod-objs += dm-zone.o endif +ifeq ($(CONFIG_IMA),y) +dm-mod-objs += dm-ima.o +endif + ifeq ($(CONFIG_DM_VERITY_FEC),y) dm-verity-objs += dm-verity-fec.o endif diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 8e4ced5a2516..bdd500447dea 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3122,6 +3122,30 @@ static void cache_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s", cache->ctr_args[i]); if (cache->nr_ctr_args) DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); + break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + if (get_cache_mode(cache) == CM_FAIL) + DMEMIT(",metadata_mode=fail"); + else if (get_cache_mode(cache) == CM_READ_ONLY) + DMEMIT(",metadata_mode=ro"); + else + DMEMIT(",metadata_mode=rw"); + + format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); + DMEMIT(",cache_metadata_device=%s", buf); + format_dev_t(buf, cache->cache_dev->bdev->bd_dev); + DMEMIT(",cache_device=%s", buf); + format_dev_t(buf, cache->origin_dev->bdev->bd_dev); + DMEMIT(",cache_origin_device=%s", buf); + DMEMIT(",writethrough=%c", writethrough_mode(cache) ? 'y' : 'n'); + DMEMIT(",writeback=%c", writeback_mode(cache) ? 'y' : 'n'); + DMEMIT(",passthrough=%c", passthrough_mode(cache) ? 'y' : 'n'); + DMEMIT(",metadata2=%c", cache->features.metadata_version == 2 ? 'y' : 'n'); + DMEMIT(",no_discard_passdown=%c", cache->features.discard_passdown ? 'n' : 'y'); + DMEMIT(";"); + break; } return; diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index a90bdf9b2ca6..84dbe08ad205 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -1499,6 +1499,11 @@ static void clone_status(struct dm_target *ti, status_type_t type, for (i = 0; i < clone->nr_ctr_args; i++) DMEMIT(" %s", clone->ctr_args[i]); + break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index edc1553c4eea..55dccdfbcb22 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -18,6 +18,7 @@ #include #include "dm.h" +#include "dm-ima.h" #define DM_RESERVED_MAX_IOS 1024 @@ -119,6 +120,10 @@ struct mapped_device { unsigned int nr_zones; unsigned int *zwp_offset; #endif + +#ifdef CONFIG_IMA + struct dm_ima_measurements ima; +#endif }; /* diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 50f4cbd600d5..916b7da16de2 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2223,11 +2223,11 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) || (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) { /* - * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context. + * in_hardirq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context. * irqs_disabled(): the kernel may run some IO completion from the idle thread, but * it is being executed with irqs disabled. */ - if (in_irq() || irqs_disabled()) { + if (in_hardirq() || irqs_disabled()) { tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); tasklet_schedule(&io->tasklet); return; @@ -2661,7 +2661,12 @@ static void *crypt_page_alloc(gfp_t gfp_mask, void *pool_data) struct crypt_config *cc = pool_data; struct page *page; - if (unlikely(percpu_counter_compare(&cc->n_allocated_pages, dm_crypt_pages_per_client) >= 0) && + /* + * Note, percpu_counter_read_positive() may over (and under) estimate + * the current usage by at most (batch - 1) * num_online_cpus() pages, + * but avoids potential spinlock contention of an exact result. + */ + if (unlikely(percpu_counter_read_positive(&cc->n_allocated_pages) >= dm_crypt_pages_per_client) && likely(gfp_mask & __GFP_NORETRY)) return NULL; @@ -3485,7 +3490,34 @@ static void crypt_status(struct dm_target *ti, status_type_t type, if (test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags)) DMEMIT(" iv_large_sectors"); } + break; + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n'); + DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n'); + DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ? + 'y' : 'n'); + DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ? + 'y' : 'n'); + DMEMIT(",no_write_workqueue=%c", test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags) ? + 'y' : 'n'); + DMEMIT(",iv_large_sectors=%c", test_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags) ? + 'y' : 'n'); + + if (cc->on_disk_tag_size) + DMEMIT(",integrity_tag_size=%u,cipher_auth=%s", + cc->on_disk_tag_size, cc->cipher_auth); + if (cc->sector_size != (1 << SECTOR_SHIFT)) + DMEMIT(",sector_size=%d", cc->sector_size); + if (cc->cipher_string) + DMEMIT(",cipher_string=%s", cc->cipher_string); + + DMEMIT(",key_size=%u", cc->key_size); + DMEMIT(",key_parts=%u", cc->key_parts); + DMEMIT(",key_extra_size=%u", cc->key_extra_size); + DMEMIT(",key_mac_size=%u", cc->key_mac_size); + DMEMIT(";"); break; } } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2628a832787b..59e51d285b0e 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -326,6 +326,10 @@ static void delay_status(struct dm_target *ti, status_type_t type, DMEMIT_DELAY_CLASS(&dc->flush); } break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c index cbe1058ee589..3163e2b1418e 100644 --- a/drivers/md/dm-dust.c +++ b/drivers/md/dm-dust.c @@ -527,6 +527,10 @@ static void dust_status(struct dm_target *ti, status_type_t type, DMEMIT("%s %llu %u", dd->dev->name, (unsigned long long)dd->start, dd->blksz); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index 0c509dae0ff8..d25989660a76 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -401,6 +401,9 @@ static void ebs_status(struct dm_target *ti, status_type_t type, snprintf(result, maxlen, ec->u_bs_set ? "%s %llu %u %u" : "%s %llu %u", ec->dev->name, (unsigned long long) ec->start, ec->e_bs, ec->u_bs); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 3b748393fca5..2a78f6874143 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1644,6 +1644,10 @@ static void era_status(struct dm_target *ti, status_type_t type, format_dev_t(buf, era->origin_dev->bdev->bd_dev); DMEMIT("%s %u", buf, era->sectors_per_block); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 5877220c01ed..4b94ffe6f2d4 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -440,6 +440,10 @@ static void flakey_status(struct dm_target *ti, status_type_t type, fc->corrupt_bio_value, fc->corrupt_bio_flags); break; + + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c new file mode 100644 index 000000000000..3fd69ab12a8e --- /dev/null +++ b/drivers/md/dm-ima.c @@ -0,0 +1,750 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Microsoft Corporation + * + * Author: Tushar Sugandhi + * + * File: dm-ima.c + * Enables IMA measurements for DM targets + */ + +#include "dm-core.h" +#include "dm-ima.h" + +#include +#include +#include +#include + +#define DM_MSG_PREFIX "ima" + +/* + * Internal function to prefix separator characters in input buffer with escape + * character, so that they don't interfere with the construction of key-value pairs, + * and clients can split the key1=val1,key2=val2,key3=val3; pairs properly. + */ +static void fix_separator_chars(char **buf) +{ + int l = strlen(*buf); + int i, j, sp = 0; + + for (i = 0; i < l; i++) + if ((*buf)[i] == '\\' || (*buf)[i] == ';' || (*buf)[i] == '=' || (*buf)[i] == ',') + sp++; + + if (!sp) + return; + + for (i = l-1, j = i+sp; i >= 0; i--) { + (*buf)[j--] = (*buf)[i]; + if ((*buf)[i] == '\\' || (*buf)[i] == ';' || (*buf)[i] == '=' || (*buf)[i] == ',') + (*buf)[j--] = '\\'; + } +} + +/* + * Internal function to allocate memory for IMA measurements. + */ +static void *dm_ima_alloc(size_t len, gfp_t flags, bool noio) +{ + unsigned int noio_flag; + void *ptr; + + if (noio) + noio_flag = memalloc_noio_save(); + + ptr = kzalloc(len, flags); + + if (noio) + memalloc_noio_restore(noio_flag); + + return ptr; +} + +/* + * Internal function to allocate and copy name and uuid for IMA measurements. + */ +static int dm_ima_alloc_and_copy_name_uuid(struct mapped_device *md, char **dev_name, + char **dev_uuid, bool noio) +{ + int r; + *dev_name = dm_ima_alloc(DM_NAME_LEN*2, GFP_KERNEL, noio); + if (!(*dev_name)) { + r = -ENOMEM; + goto error; + } + + *dev_uuid = dm_ima_alloc(DM_UUID_LEN*2, GFP_KERNEL, noio); + if (!(*dev_uuid)) { + r = -ENOMEM; + goto error; + } + + r = dm_copy_name_and_uuid(md, *dev_name, *dev_uuid); + if (r) + goto error; + + fix_separator_chars(dev_name); + fix_separator_chars(dev_uuid); + + return 0; +error: + kfree(*dev_name); + kfree(*dev_uuid); + *dev_name = NULL; + *dev_uuid = NULL; + return r; +} + +/* + * Internal function to allocate and copy device data for IMA measurements. + */ +static int dm_ima_alloc_and_copy_device_data(struct mapped_device *md, char **device_data, + unsigned int num_targets, bool noio) +{ + char *dev_name = NULL, *dev_uuid = NULL; + int r; + + r = dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio); + if (r) + return r; + + *device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio); + if (!(*device_data)) { + r = -ENOMEM; + goto error; + } + + scnprintf(*device_data, DM_IMA_DEVICE_BUF_LEN, + "name=%s,uuid=%s,major=%d,minor=%d,minor_count=%d,num_targets=%u;", + dev_name, dev_uuid, md->disk->major, md->disk->first_minor, + md->disk->minors, num_targets); +error: + kfree(dev_name); + kfree(dev_uuid); + return r; +} + +/* + * Internal wrapper function to call IMA to measure DM data. + */ +static void dm_ima_measure_data(const char *event_name, const void *buf, size_t buf_len, + bool noio) +{ + unsigned int noio_flag; + + if (noio) + noio_flag = memalloc_noio_save(); + + ima_measure_critical_data(DM_NAME, event_name, buf, buf_len, false); + + if (noio) + memalloc_noio_restore(noio_flag); +} + +/* + * Internal function to allocate and copy current device capacity for IMA measurements. + */ +static int dm_ima_alloc_and_copy_capacity_str(struct mapped_device *md, char **capacity_str, + bool noio) +{ + sector_t capacity; + + capacity = get_capacity(md->disk); + + *capacity_str = dm_ima_alloc(DM_IMA_DEVICE_CAPACITY_BUF_LEN, GFP_KERNEL, noio); + if (!(*capacity_str)) + return -ENOMEM; + + scnprintf(*capacity_str, DM_IMA_DEVICE_BUF_LEN, "current_device_capacity=%llu;", + capacity); + + return 0; +} + +/* + * Initialize/reset the dm ima related data structure variables. + */ +void dm_ima_reset_data(struct mapped_device *md) +{ + memset(&(md->ima), 0, sizeof(md->ima)); + md->ima.dm_version_str_len = strlen(DM_IMA_VERSION_STR); +} + +/* + * Build up the IMA data for each target, and finally measure. + */ +void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags) +{ + size_t device_data_buf_len, target_metadata_buf_len, target_data_buf_len, l = 0; + char *target_metadata_buf = NULL, *target_data_buf = NULL, *digest_buf = NULL; + char *ima_buf = NULL, *device_data_buf = NULL; + int digest_size, last_target_measured = -1, r; + status_type_t type = STATUSTYPE_IMA; + size_t cur_total_buf_len = 0; + unsigned int num_targets, i; + SHASH_DESC_ON_STACK(shash, NULL); + struct crypto_shash *tfm = NULL; + u8 *digest = NULL; + bool noio = false; + /* + * In below hash_alg_prefix_len assignment +1 is for the additional char (':'), + * when prefixing the hash value with the hash algorithm name. e.g. sha256:. + */ + const size_t hash_alg_prefix_len = strlen(DM_IMA_TABLE_HASH_ALG) + 1; + char table_load_event_name[] = "dm_table_load"; + + ima_buf = dm_ima_alloc(DM_IMA_MEASUREMENT_BUF_LEN, GFP_KERNEL, noio); + if (!ima_buf) + return; + + target_metadata_buf = dm_ima_alloc(DM_IMA_TARGET_METADATA_BUF_LEN, GFP_KERNEL, noio); + if (!target_metadata_buf) + goto error; + + target_data_buf = dm_ima_alloc(DM_IMA_TARGET_DATA_BUF_LEN, GFP_KERNEL, noio); + if (!target_data_buf) + goto error; + + num_targets = dm_table_get_num_targets(table); + + if (dm_ima_alloc_and_copy_device_data(table->md, &device_data_buf, num_targets, noio)) + goto error; + + tfm = crypto_alloc_shash(DM_IMA_TABLE_HASH_ALG, 0, 0); + if (IS_ERR(tfm)) + goto error; + + shash->tfm = tfm; + digest_size = crypto_shash_digestsize(tfm); + digest = dm_ima_alloc(digest_size, GFP_KERNEL, noio); + if (!digest) + goto error; + + r = crypto_shash_init(shash); + if (r) + goto error; + + memcpy(ima_buf + l, DM_IMA_VERSION_STR, table->md->ima.dm_version_str_len); + l += table->md->ima.dm_version_str_len; + + device_data_buf_len = strlen(device_data_buf); + memcpy(ima_buf + l, device_data_buf, device_data_buf_len); + l += device_data_buf_len; + + for (i = 0; i < num_targets; i++) { + struct dm_target *ti = dm_table_get_target(table, i); + + if (!ti) + goto error; + + last_target_measured = 0; + + /* + * First retrieve the target metadata. + */ + scnprintf(target_metadata_buf, DM_IMA_TARGET_METADATA_BUF_LEN, + "target_index=%d,target_begin=%llu,target_len=%llu,", + i, ti->begin, ti->len); + target_metadata_buf_len = strlen(target_metadata_buf); + + /* + * Then retrieve the actual target data. + */ + if (ti->type->status) + ti->type->status(ti, type, status_flags, target_data_buf, + DM_IMA_TARGET_DATA_BUF_LEN); + else + target_data_buf[0] = '\0'; + + target_data_buf_len = strlen(target_data_buf); + + /* + * Check if the total data can fit into the IMA buffer. + */ + cur_total_buf_len = l + target_metadata_buf_len + target_data_buf_len; + + /* + * IMA measurements for DM targets are best-effort. + * If the total data buffered so far, including the current target, + * is too large to fit into DM_IMA_MEASUREMENT_BUF_LEN, measure what + * we have in the current buffer, and continue measuring the remaining + * targets by prefixing the device metadata again. + */ + if (unlikely(cur_total_buf_len >= DM_IMA_MEASUREMENT_BUF_LEN)) { + dm_ima_measure_data(table_load_event_name, ima_buf, l, noio); + r = crypto_shash_update(shash, (const u8 *)ima_buf, l); + if (r < 0) + goto error; + + memset(ima_buf, 0, DM_IMA_MEASUREMENT_BUF_LEN); + l = 0; + + /* + * Each new "dm_table_load" entry in IMA log should have device data + * prefix, so that multiple records from the same "dm_table_load" for + * a given device can be linked together. + */ + memcpy(ima_buf + l, DM_IMA_VERSION_STR, table->md->ima.dm_version_str_len); + l += table->md->ima.dm_version_str_len; + + memcpy(ima_buf + l, device_data_buf, device_data_buf_len); + l += device_data_buf_len; + + /* + * If this iteration of the for loop turns out to be the last target + * in the table, dm_ima_measure_data("dm_table_load", ...) doesn't need + * to be called again, just the hash needs to be finalized. + * "last_target_measured" tracks this state. + */ + last_target_measured = 1; + } + + /* + * Fill-in all the target metadata, so that multiple targets for the same + * device can be linked together. + */ + memcpy(ima_buf + l, target_metadata_buf, target_metadata_buf_len); + l += target_metadata_buf_len; + + memcpy(ima_buf + l, target_data_buf, target_data_buf_len); + l += target_data_buf_len; + } + + if (!last_target_measured) { + dm_ima_measure_data(table_load_event_name, ima_buf, l, noio); + + r = crypto_shash_update(shash, (const u8 *)ima_buf, l); + if (r < 0) + goto error; + } + + /* + * Finalize the table hash, and store it in table->md->ima.inactive_table.hash, + * so that the table data can be verified against the future device state change + * events, e.g. resume, rename, remove, table-clear etc. + */ + r = crypto_shash_final(shash, digest); + if (r < 0) + goto error; + + digest_buf = dm_ima_alloc((digest_size*2) + hash_alg_prefix_len + 1, GFP_KERNEL, noio); + + if (!digest_buf) + goto error; + + snprintf(digest_buf, hash_alg_prefix_len + 1, "%s:", DM_IMA_TABLE_HASH_ALG); + + for (i = 0; i < digest_size; i++) + snprintf((digest_buf + hash_alg_prefix_len + (i*2)), 3, "%02x", digest[i]); + + if (table->md->ima.active_table.hash != table->md->ima.inactive_table.hash) + kfree(table->md->ima.inactive_table.hash); + + table->md->ima.inactive_table.hash = digest_buf; + table->md->ima.inactive_table.hash_len = strlen(digest_buf); + table->md->ima.inactive_table.num_targets = num_targets; + + if (table->md->ima.active_table.device_metadata != + table->md->ima.inactive_table.device_metadata) + kfree(table->md->ima.inactive_table.device_metadata); + + table->md->ima.inactive_table.device_metadata = device_data_buf; + table->md->ima.inactive_table.device_metadata_len = device_data_buf_len; + + goto exit; +error: + kfree(digest_buf); + kfree(device_data_buf); +exit: + kfree(digest); + if (tfm) + crypto_free_shash(tfm); + kfree(ima_buf); + kfree(target_metadata_buf); + kfree(target_data_buf); +} + +/* + * Measure IMA data on device resume. + */ +void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) +{ + char *device_table_data, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL; + char active[] = "active_table_hash="; + unsigned int active_len = strlen(active), capacity_len = 0; + unsigned int l = 0; + bool noio = true; + bool nodata = true; + int r; + + device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio); + if (!device_table_data) + return; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) + goto error; + + memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len); + l += md->ima.dm_version_str_len; + + if (swap) { + if (md->ima.active_table.hash != md->ima.inactive_table.hash) + kfree(md->ima.active_table.hash); + + md->ima.active_table.hash = NULL; + md->ima.active_table.hash_len = 0; + + if (md->ima.active_table.device_metadata != + md->ima.inactive_table.device_metadata) + kfree(md->ima.active_table.device_metadata); + + md->ima.active_table.device_metadata = NULL; + md->ima.active_table.device_metadata_len = 0; + md->ima.active_table.num_targets = 0; + + if (md->ima.inactive_table.hash) { + md->ima.active_table.hash = md->ima.inactive_table.hash; + md->ima.active_table.hash_len = md->ima.inactive_table.hash_len; + md->ima.inactive_table.hash = NULL; + md->ima.inactive_table.hash_len = 0; + } + + if (md->ima.inactive_table.device_metadata) { + md->ima.active_table.device_metadata = + md->ima.inactive_table.device_metadata; + md->ima.active_table.device_metadata_len = + md->ima.inactive_table.device_metadata_len; + md->ima.active_table.num_targets = md->ima.inactive_table.num_targets; + md->ima.inactive_table.device_metadata = NULL; + md->ima.inactive_table.device_metadata_len = 0; + md->ima.inactive_table.num_targets = 0; + } + } + + if (md->ima.active_table.device_metadata) { + memcpy(device_table_data + l, md->ima.active_table.device_metadata, + md->ima.active_table.device_metadata_len); + l += md->ima.active_table.device_metadata_len; + + nodata = false; + } + + if (md->ima.active_table.hash) { + memcpy(device_table_data + l, active, active_len); + l += active_len; + + memcpy(device_table_data + l, md->ima.active_table.hash, + md->ima.active_table.hash_len); + l += md->ima.active_table.hash_len; + + memcpy(device_table_data + l, ";", 1); + l++; + + nodata = false; + } + + if (nodata) { + r = dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio); + if (r) + goto error; + + scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;device_resume=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); + l += strlen(device_table_data); + + } + + capacity_len = strlen(capacity_str); + memcpy(device_table_data + l, capacity_str, capacity_len); + l += capacity_len; + + dm_ima_measure_data("dm_device_resume", device_table_data, l, noio); + + kfree(dev_name); + kfree(dev_uuid); +error: + kfree(capacity_str); + kfree(device_table_data); +} + +/* + * Measure IMA data on remove. + */ +void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) +{ + char *device_table_data, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL; + char active_table_str[] = "active_table_hash="; + char inactive_table_str[] = "inactive_table_hash="; + char device_active_str[] = "device_active_metadata="; + char device_inactive_str[] = "device_inactive_metadata="; + char remove_all_str[] = "remove_all="; + unsigned int active_table_len = strlen(active_table_str); + unsigned int inactive_table_len = strlen(inactive_table_str); + unsigned int device_active_len = strlen(device_active_str); + unsigned int device_inactive_len = strlen(device_inactive_str); + unsigned int remove_all_len = strlen(remove_all_str); + unsigned int capacity_len = 0; + unsigned int l = 0; + bool noio = true; + bool nodata = true; + int r; + + device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN*2, GFP_KERNEL, noio); + if (!device_table_data) + goto exit; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) { + kfree(device_table_data); + goto exit; + } + + memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len); + l += md->ima.dm_version_str_len; + + if (md->ima.active_table.device_metadata) { + memcpy(device_table_data + l, device_active_str, device_active_len); + l += device_active_len; + + memcpy(device_table_data + l, md->ima.active_table.device_metadata, + md->ima.active_table.device_metadata_len); + l += md->ima.active_table.device_metadata_len; + + nodata = false; + } + + if (md->ima.inactive_table.device_metadata) { + memcpy(device_table_data + l, device_inactive_str, device_inactive_len); + l += device_inactive_len; + + memcpy(device_table_data + l, md->ima.inactive_table.device_metadata, + md->ima.inactive_table.device_metadata_len); + l += md->ima.inactive_table.device_metadata_len; + + nodata = false; + } + + if (md->ima.active_table.hash) { + memcpy(device_table_data + l, active_table_str, active_table_len); + l += active_table_len; + + memcpy(device_table_data + l, md->ima.active_table.hash, + md->ima.active_table.hash_len); + l += md->ima.active_table.hash_len; + + memcpy(device_table_data + l, ",", 1); + l++; + + nodata = false; + } + + if (md->ima.inactive_table.hash) { + memcpy(device_table_data + l, inactive_table_str, inactive_table_len); + l += inactive_table_len; + + memcpy(device_table_data + l, md->ima.inactive_table.hash, + md->ima.inactive_table.hash_len); + l += md->ima.inactive_table.hash_len; + + memcpy(device_table_data + l, ",", 1); + l++; + + nodata = false; + } + /* + * In case both active and inactive tables, and corresponding + * device metadata is cleared/missing - record the name and uuid + * in IMA measurements. + */ + if (nodata) { + if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio)) + goto error; + + scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;device_remove=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); + l += strlen(device_table_data); + } + + memcpy(device_table_data + l, remove_all_str, remove_all_len); + l += remove_all_len; + memcpy(device_table_data + l, remove_all ? "y;" : "n;", 2); + l += 2; + + capacity_len = strlen(capacity_str); + memcpy(device_table_data + l, capacity_str, capacity_len); + l += capacity_len; + + dm_ima_measure_data("dm_device_remove", device_table_data, l, noio); + +error: + kfree(device_table_data); + kfree(capacity_str); +exit: + kfree(md->ima.active_table.device_metadata); + + if (md->ima.active_table.device_metadata != + md->ima.inactive_table.device_metadata) + kfree(md->ima.inactive_table.device_metadata); + + kfree(md->ima.active_table.hash); + + if (md->ima.active_table.hash != md->ima.inactive_table.hash) + kfree(md->ima.inactive_table.hash); + + dm_ima_reset_data(md); + + kfree(dev_name); + kfree(dev_uuid); +} + +/* + * Measure ima data on table clear. + */ +void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) +{ + unsigned int l = 0, capacity_len = 0; + char *device_table_data = NULL, *dev_name = NULL, *dev_uuid = NULL, *capacity_str = NULL; + char inactive_str[] = "inactive_table_hash="; + unsigned int inactive_len = strlen(inactive_str); + bool noio = true; + bool nodata = true; + int r; + + device_table_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN, GFP_KERNEL, noio); + if (!device_table_data) + return; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) + goto error1; + + memcpy(device_table_data + l, DM_IMA_VERSION_STR, md->ima.dm_version_str_len); + l += md->ima.dm_version_str_len; + + if (md->ima.inactive_table.device_metadata_len && + md->ima.inactive_table.hash_len) { + memcpy(device_table_data + l, md->ima.inactive_table.device_metadata, + md->ima.inactive_table.device_metadata_len); + l += md->ima.inactive_table.device_metadata_len; + + memcpy(device_table_data + l, inactive_str, inactive_len); + l += inactive_len; + + memcpy(device_table_data + l, md->ima.inactive_table.hash, + md->ima.inactive_table.hash_len); + + l += md->ima.inactive_table.hash_len; + + memcpy(device_table_data + l, ";", 1); + l++; + + nodata = false; + } + + if (nodata) { + if (dm_ima_alloc_and_copy_name_uuid(md, &dev_name, &dev_uuid, noio)) + goto error2; + + scnprintf(device_table_data, DM_IMA_DEVICE_BUF_LEN, + "%sname=%s,uuid=%s;table_clear=no_data;", + DM_IMA_VERSION_STR, dev_name, dev_uuid); + l += strlen(device_table_data); + } + + capacity_len = strlen(capacity_str); + memcpy(device_table_data + l, capacity_str, capacity_len); + l += capacity_len; + + dm_ima_measure_data("dm_table_clear", device_table_data, l, noio); + + if (new_map) { + if (md->ima.inactive_table.hash && + md->ima.inactive_table.hash != md->ima.active_table.hash) + kfree(md->ima.inactive_table.hash); + + md->ima.inactive_table.hash = NULL; + md->ima.inactive_table.hash_len = 0; + + if (md->ima.inactive_table.device_metadata && + md->ima.inactive_table.device_metadata != md->ima.active_table.device_metadata) + kfree(md->ima.inactive_table.device_metadata); + + md->ima.inactive_table.device_metadata = NULL; + md->ima.inactive_table.device_metadata_len = 0; + md->ima.inactive_table.num_targets = 0; + + if (md->ima.active_table.hash) { + md->ima.inactive_table.hash = md->ima.active_table.hash; + md->ima.inactive_table.hash_len = md->ima.active_table.hash_len; + } + + if (md->ima.active_table.device_metadata) { + md->ima.inactive_table.device_metadata = + md->ima.active_table.device_metadata; + md->ima.inactive_table.device_metadata_len = + md->ima.active_table.device_metadata_len; + md->ima.inactive_table.num_targets = + md->ima.active_table.num_targets; + } + } + + kfree(dev_name); + kfree(dev_uuid); +error2: + kfree(capacity_str); +error1: + kfree(device_table_data); +} + +/* + * Measure IMA data on device rename. + */ +void dm_ima_measure_on_device_rename(struct mapped_device *md) +{ + char *old_device_data = NULL, *new_device_data = NULL, *combined_device_data = NULL; + char *new_dev_name = NULL, *new_dev_uuid = NULL, *capacity_str = NULL; + bool noio = true; + int r; + + if (dm_ima_alloc_and_copy_device_data(md, &new_device_data, + md->ima.active_table.num_targets, noio)) + return; + + if (dm_ima_alloc_and_copy_name_uuid(md, &new_dev_name, &new_dev_uuid, noio)) + goto error; + + combined_device_data = dm_ima_alloc(DM_IMA_DEVICE_BUF_LEN * 2, GFP_KERNEL, noio); + if (!combined_device_data) + goto error; + + r = dm_ima_alloc_and_copy_capacity_str(md, &capacity_str, noio); + if (r) + goto error; + + old_device_data = md->ima.active_table.device_metadata; + + md->ima.active_table.device_metadata = new_device_data; + md->ima.active_table.device_metadata_len = strlen(new_device_data); + + scnprintf(combined_device_data, DM_IMA_DEVICE_BUF_LEN * 2, + "%s%snew_name=%s,new_uuid=%s;%s", DM_IMA_VERSION_STR, old_device_data, + new_dev_name, new_dev_uuid, capacity_str); + + dm_ima_measure_data("dm_device_rename", combined_device_data, strlen(combined_device_data), + noio); + + goto exit; + +error: + kfree(new_device_data); +exit: + kfree(capacity_str); + kfree(combined_device_data); + kfree(old_device_data); + kfree(new_dev_name); + kfree(new_dev_uuid); +} diff --git a/drivers/md/dm-ima.h b/drivers/md/dm-ima.h new file mode 100644 index 000000000000..b8c3b614670b --- /dev/null +++ b/drivers/md/dm-ima.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2021 Microsoft Corporation + * + * Author: Tushar Sugandhi + * + * File: dm-ima.h + * Header file for device mapper IMA measurements. + */ + +#ifndef DM_IMA_H +#define DM_IMA_H + +#define DM_IMA_MEASUREMENT_BUF_LEN 4096 +#define DM_IMA_DEVICE_BUF_LEN 1024 +#define DM_IMA_TARGET_METADATA_BUF_LEN 128 +#define DM_IMA_TARGET_DATA_BUF_LEN 2048 +#define DM_IMA_DEVICE_CAPACITY_BUF_LEN 128 +#define DM_IMA_TABLE_HASH_ALG "sha256" + +#define __dm_ima_stringify(s) #s +#define __dm_ima_str(s) __dm_ima_stringify(s) + +#define DM_IMA_VERSION_STR "dm_version=" \ + __dm_ima_str(DM_VERSION_MAJOR) "." \ + __dm_ima_str(DM_VERSION_MINOR) "." \ + __dm_ima_str(DM_VERSION_PATCHLEVEL) ";" + +#ifdef CONFIG_IMA + +struct dm_ima_device_table_metadata { + /* + * Contains data specific to the device which is common across + * all the targets in the table (e.g. name, uuid, major, minor, etc). + * The values are stored in comma separated list of key1=val1,key2=val2; + * pairs delimited by a semicolon at the end of the list. + */ + char *device_metadata; + unsigned int device_metadata_len; + unsigned int num_targets; + + /* + * Contains the sha256 hashes of the IMA measurements of the target + * attributes' key-value pairs from the active/inactive tables. + */ + char *hash; + unsigned int hash_len; +}; + +/* + * This structure contains device metadata, and table hash for + * active and inactive tables for ima measurements. + */ +struct dm_ima_measurements { + struct dm_ima_device_table_metadata active_table; + struct dm_ima_device_table_metadata inactive_table; + unsigned int dm_version_str_len; +}; + +void dm_ima_reset_data(struct mapped_device *md); +void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags); +void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap); +void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all); +void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map); +void dm_ima_measure_on_device_rename(struct mapped_device *md); + +#else + +static inline void dm_ima_reset_data(struct mapped_device *md) {} +static inline void dm_ima_measure_on_table_load(struct dm_table *table, unsigned int status_flags) {} +static inline void dm_ima_measure_on_device_resume(struct mapped_device *md, bool swap) {} +static inline void dm_ima_measure_on_device_remove(struct mapped_device *md, bool remove_all) {} +static inline void dm_ima_measure_on_table_clear(struct mapped_device *md, bool new_map) {} +static inline void dm_ima_measure_on_device_rename(struct mapped_device *md) {} + +#endif /* CONFIG_IMA */ + +#endif /* DM_IMA_H */ diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index a9ea361769a7..dc03b70f6e65 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3306,6 +3306,30 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, EMIT_ALG(journal_mac_alg, "journal_mac"); break; } + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",dev_name=%s,start=%llu,tag_size=%u,mode=%c", + ic->dev->name, ic->start, ic->tag_size, ic->mode); + + if (ic->meta_dev) + DMEMIT(",meta_device=%s", ic->meta_dev->name); + if (ic->sectors_per_block != 1) + DMEMIT(",block_size=%u", ic->sectors_per_block << SECTOR_SHIFT); + + DMEMIT(",recalculate=%c", (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) ? + 'y' : 'n'); + DMEMIT(",allow_discards=%c", ic->discard ? 'y' : 'n'); + DMEMIT(",fix_padding=%c", + ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) ? 'y' : 'n'); + DMEMIT(",fix_hmac=%c", + ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) != 0) ? 'y' : 'n'); + DMEMIT(",legacy_recalculate=%c", ic->legacy_recalculate ? 'y' : 'n'); + + DMEMIT(",journal_sectors=%u", ic->initial_sectors - SB_SECTORS); + DMEMIT(",interleave_sectors=%u", 1U << ic->sb->log2_interleave_sectors); + DMEMIT(",buffer_sectors=%u", 1U << ic->log2_buffer_sectors); + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 2575074a2204..21fe8652b095 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -6,7 +6,7 @@ */ #include "dm-core.h" - +#include "dm-ima.h" #include #include #include @@ -20,6 +20,7 @@ #include #include +#include #define DM_MSG_PREFIX "ioctl" #define DM_DRIVER_EMAIL "dm-devel@redhat.com" @@ -347,6 +348,7 @@ static void dm_hash_remove_all(bool keep_open_devices, bool mark_deferred, bool dm_sync_table(md); dm_table_destroy(t); } + dm_ima_measure_on_device_remove(md, true); dm_put(md); if (likely(keep_open_devices)) dm_destroy(md); @@ -483,6 +485,9 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, param->flags |= DM_UEVENT_GENERATED_FLAG; md = hc->md; + + dm_ima_measure_on_device_rename(md); + up_write(&_hash_lock); kfree(old_name); @@ -981,6 +986,8 @@ static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_si param->flags &= ~DM_DEFERRED_REMOVE; + dm_ima_measure_on_device_remove(md, false); + if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr)) param->flags |= DM_UEVENT_GENERATED_FLAG; @@ -1159,8 +1166,12 @@ static int do_resume(struct dm_ioctl *param) if (dm_suspended_md(md)) { r = dm_resume(md); - if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr)) - param->flags |= DM_UEVENT_GENERATED_FLAG; + if (!r) { + dm_ima_measure_on_device_resume(md, new_map ? true : false); + + if (!dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr)) + param->flags |= DM_UEVENT_GENERATED_FLAG; + } } /* @@ -1224,6 +1235,8 @@ static void retrieve_status(struct dm_table *table, if (param->flags & DM_STATUS_TABLE_FLAG) type = STATUSTYPE_TABLE; + else if (param->flags & DM_IMA_MEASUREMENT_FLAG) + type = STATUSTYPE_IMA; else type = STATUSTYPE_INFO; @@ -1425,6 +1438,8 @@ static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_si if (r) goto err_unlock_md_type; + dm_ima_measure_on_table_load(t, STATUSTYPE_IMA); + immutable_target_type = dm_get_immutable_target_type(md); if (immutable_target_type && (immutable_target_type != dm_table_get_immutable_target_type(t)) && @@ -1493,6 +1508,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s struct hash_cell *hc; struct mapped_device *md; struct dm_table *old_map = NULL; + bool has_new_map = false; down_write(&_hash_lock); @@ -1506,6 +1522,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s if (hc->new_map) { old_map = hc->new_map; hc->new_map = NULL; + has_new_map = true; } param->flags &= ~DM_INACTIVE_PRESENT_FLAG; @@ -1517,6 +1534,7 @@ static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_s dm_sync_table(md); dm_table_destroy(old_map); } + dm_ima_measure_on_table_clear(md, has_new_map); dm_put(md); return 0; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index c91f1e2e2f65..679b4c0a2eea 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -106,6 +106,7 @@ static void linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; + size_t sz = 0; switch (type) { case STATUSTYPE_INFO: @@ -113,8 +114,13 @@ static void linear_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%s %llu", lc->dev->name, - (unsigned long long)lc->start); + DMEMIT("%s %llu", lc->dev->name, (unsigned long long)lc->start); + break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",device_name=%s,start=%llu;", lc->dev->name, + (unsigned long long)lc->start); break; } } diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 52090bee17c2..9ab93ebea889 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -820,6 +820,9 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, DMEMIT("integrated_flush "); DMEMIT("%s ", table_args); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return (r) ? 0 : (int)sz; } diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 57882654ffee..d93a4db23512 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -834,6 +834,10 @@ static void log_writes_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: DMEMIT("%s %s", lc->dev->name, lc->logdev->name); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 33e71ea6cc14..1ecf75ef276a 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -793,6 +793,11 @@ static int core_status(struct dm_dirty_log *log, status_type_t status, DMEMIT("%s %u %u ", log->type->name, lc->sync == DEFAULTSYNC ? 1 : 2, lc->region_size); DMEMIT_SYNC; + break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; @@ -817,6 +822,11 @@ static int disk_status(struct dm_dirty_log *log, status_type_t status, lc->sync == DEFAULTSYNC ? 2 : 3, lc->log_dev->name, lc->region_size); DMEMIT_SYNC; + break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index bced42f082b0..694aaca4eea2 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1790,7 +1790,7 @@ static void multipath_resume(struct dm_target *ti) static void multipath_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { - int sz = 0; + int sz = 0, pg_counter, pgpath_counter; unsigned long flags; struct multipath *m = ti->private; struct priority_group *pg; @@ -1904,6 +1904,44 @@ static void multipath_status(struct dm_target *ti, status_type_t type, } } break; + + case STATUSTYPE_IMA: + sz = 0; /*reset the result pointer*/ + + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",nr_priority_groups=%u", m->nr_priority_groups); + + pg_counter = 0; + list_for_each_entry(pg, &m->priority_groups, list) { + if (pg->bypassed) + state = 'D'; /* Disabled */ + else if (pg == m->current_pg) + state = 'A'; /* Currently Active */ + else + state = 'E'; /* Enabled */ + DMEMIT(",pg_state_%d=%c", pg_counter, state); + DMEMIT(",nr_pgpaths_%d=%u", pg_counter, pg->nr_pgpaths); + DMEMIT(",path_selector_name_%d=%s", pg_counter, pg->ps.type->name); + + pgpath_counter = 0; + list_for_each_entry(p, &pg->pgpaths, list) { + DMEMIT(",path_name_%d_%d=%s,is_active_%d_%d=%c,fail_count_%d_%d=%u", + pg_counter, pgpath_counter, p->path.dev->name, + pg_counter, pgpath_counter, p->is_active ? 'A' : 'F', + pg_counter, pgpath_counter, p->fail_count); + if (pg->ps.type->status) { + DMEMIT(",path_selector_status_%d_%d=", + pg_counter, pgpath_counter); + sz += pg->ps.type->status(&pg->ps, &p->path, + type, result + sz, + maxlen - sz); + } + pgpath_counter++; + } + pg_counter++; + } + DMEMIT(";"); + break; } spin_unlock_irqrestore(&m->lock, flags); diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c index 186f91e2752c..1856a1b125cc 100644 --- a/drivers/md/dm-ps-historical-service-time.c +++ b/drivers/md/dm-ps-historical-service-time.c @@ -255,6 +255,9 @@ static int hst_status(struct path_selector *ps, struct dm_path *path, case STATUSTYPE_TABLE: DMEMIT("0 "); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c index cb8e83bfb1a7..f74501e65a8e 100644 --- a/drivers/md/dm-ps-io-affinity.c +++ b/drivers/md/dm-ps-io-affinity.c @@ -170,6 +170,9 @@ static int ioa_status(struct path_selector *ps, struct dm_path *path, pi = path->pscontext; DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask)); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-ps-queue-length.c b/drivers/md/dm-ps-queue-length.c index 5fd018d18418..cef70657bbbc 100644 --- a/drivers/md/dm-ps-queue-length.c +++ b/drivers/md/dm-ps-queue-length.c @@ -102,6 +102,9 @@ static int ql_status(struct path_selector *ps, struct dm_path *path, case STATUSTYPE_TABLE: DMEMIT("%u ", pi->repeat_count); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ps-round-robin.c b/drivers/md/dm-ps-round-robin.c index bdbb7e6e8212..27f44c5fa04e 100644 --- a/drivers/md/dm-ps-round-robin.c +++ b/drivers/md/dm-ps-round-robin.c @@ -100,6 +100,10 @@ static int rr_status(struct path_selector *ps, struct dm_path *path, pi = path->pscontext; DMEMIT("%u ", pi->repeat_count); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-ps-service-time.c b/drivers/md/dm-ps-service-time.c index 9cfda665e9eb..3ec9c33265c5 100644 --- a/drivers/md/dm-ps-service-time.c +++ b/drivers/md/dm-ps-service-time.c @@ -99,6 +99,9 @@ static int st_status(struct path_selector *ps, struct dm_path *path, DMEMIT("%u %u ", pi->repeat_count, pi->relative_throughput); break; + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index bf4a467fc73a..d9ef52159a22 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3671,6 +3671,45 @@ static void raid_status(struct dm_target *ti, status_type_t type, for (i = 0; i < rs->raid_disks; i++) DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev), __get_dev_name(rs->dev[i].data_dev)); + break; + + case STATUSTYPE_IMA: + rt = get_raid_type_by_ll(mddev->new_level, mddev->new_layout); + if (!rt) + return; + + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",raid_type=%s,raid_disks=%d", rt->name, mddev->raid_disks); + + /* Access most recent mddev properties for status output */ + smp_rmb(); + recovery = rs->md.recovery; + state = decipher_sync_action(mddev, recovery); + DMEMIT(",raid_state=%s", sync_str(state)); + + for (i = 0; i < rs->raid_disks; i++) { + DMEMIT(",raid_device_%d_status=", i); + DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev)); + } + + if (rt_is_raid456(rt)) { + DMEMIT(",journal_dev_mode="); + switch (rs->journal_dev.mode) { + case R5C_JOURNAL_MODE_WRITE_THROUGH: + DMEMIT("%s", + _raid456_journal_mode[R5C_JOURNAL_MODE_WRITE_THROUGH].param); + break; + case R5C_JOURNAL_MODE_WRITE_BACK: + DMEMIT("%s", + _raid456_journal_mode[R5C_JOURNAL_MODE_WRITE_BACK].param); + break; + default: + DMEMIT("invalid"); + break; + } + } + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ebb4810cc3b4..8811d484fdd1 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1435,6 +1435,23 @@ static void mirror_status(struct dm_target *ti, status_type_t type, } break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",nr_mirrors=%d", ms->nr_mirrors); + for (m = 0; m < ms->nr_mirrors; m++) { + DMEMIT(",mirror_device_%d=%s", m, ms->mirror[m].dev->name); + DMEMIT(",mirror_device_%d_status=%c", + m, device_status_char(&(ms->mirror[m]))); + } + + DMEMIT(",handle_errors=%c", errors_handled(ms) ? 'y' : 'n'); + DMEMIT(",keep_log=%c", keep_log(ms) ? 'y' : 'n'); + + DMEMIT(",log_type_status="); + sz += log->type->status(log, type, result+sz, maxlen-sz); + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 9ab4bf651ca9..3bb5cff5d6fc 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -908,6 +908,10 @@ static unsigned persistent_status(struct dm_exception_store *store, case STATUSTYPE_TABLE: DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P", (unsigned long long)store->chunk_size); + break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 4d50a12cf00c..0e0ae4c36b37 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -95,6 +95,10 @@ static unsigned transient_status(struct dm_exception_store *store, break; case STATUSTYPE_TABLE: DMEMIT(" N %llu", (unsigned long long)store->chunk_size); + break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return sz; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 751ec5ea1dbb..dcf34c6b05ad 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2390,6 +2390,16 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, DMEMIT(" discard_passdown_origin"); } break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",snap_origin_name=%s", snap->origin->name); + DMEMIT(",snap_cow_name=%s", snap->cow->name); + DMEMIT(",snap_valid=%c", snap->valid ? 'y' : 'n'); + DMEMIT(",snap_merge_failed=%c", snap->merge_failed ? 'y' : 'n'); + DMEMIT(",snapshot_overflowed=%c", snap->snapshot_overflowed ? 'y' : 'n'); + DMEMIT(";"); + break; } } @@ -2734,6 +2744,9 @@ static void origin_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: snprintf(result, maxlen, "%s", o->dev->name); break; + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index df359d33cda8..6660b6b53d5b 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -428,6 +428,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s %llu", sc->stripe[i].dev->name, (unsigned long long)sc->stripe[i].physical_start); break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",stripes=%d,chunk_size=%llu", sc->stripes, + (unsigned long long)sc->chunk_size); + + for (i = 0; i < sc->stripes; i++) { + DMEMIT(",stripe_%d_device_name=%s", i, sc->stripe[i].dev->name); + DMEMIT(",stripe_%d_physical_start=%llu", i, + (unsigned long long)sc->stripe[i].physical_start); + DMEMIT(",stripe_%d_status=%c", i, + atomic_read(&(sc->stripe[i].error_count)) ? 'D' : 'A'); + } + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 262e2b0fd975..028a92ff6d57 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -504,6 +504,10 @@ static void switch_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s %llu", sctx->path_list[path_nr].dmdev->name, (unsigned long long)sctx->path_list[path_nr].start); break; + + case STATUSTYPE_IMA: + result[0] = '\0'; + break; } } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 985baee3a678..4c67b77c23c1 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -4012,6 +4012,10 @@ static void pool_status(struct dm_target *ti, status_type_t type, (unsigned long long)pt->low_water_blocks); emit_flags(&pt->requested_pf, result, sz, maxlen); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; @@ -4423,6 +4427,10 @@ static void thin_status(struct dm_target *ti, status_type_t type, if (tc->origin_dev) DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev)); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index 7357c1bd5863..fdc8921e5c19 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -156,6 +156,10 @@ static void unstripe_status(struct dm_target *ti, status_type_t type, uc->stripes, (unsigned long long)uc->chunk_size, uc->unstripe, uc->dev->name, (unsigned long long)uc->physical_start); break; + + case STATUSTYPE_IMA: + *result = '\0'; + break; } } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index d3e76aefc1a6..22a5ac82446a 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -772,6 +772,49 @@ static void verity_status(struct dm_target *ti, status_type_t type, DMEMIT(" " DM_VERITY_ROOT_HASH_VERIFICATION_OPT_SIG_KEY " %s", v->signature_key_desc); break; + + case STATUSTYPE_IMA: + DMEMIT_TARGET_NAME_VERSION(ti->type); + DMEMIT(",hash_failed=%c", v->hash_failed ? 'C' : 'V'); + DMEMIT(",verity_version=%u", v->version); + DMEMIT(",data_device_name=%s", v->data_dev->name); + DMEMIT(",hash_device_name=%s", v->hash_dev->name); + DMEMIT(",verity_algorithm=%s", v->alg_name); + + DMEMIT(",root_digest="); + for (x = 0; x < v->digest_size; x++) + DMEMIT("%02x", v->root_digest[x]); + + DMEMIT(",salt="); + if (!v->salt_size) + DMEMIT("-"); + else + for (x = 0; x < v->salt_size; x++) + DMEMIT("%02x", v->salt[x]); + + DMEMIT(",ignore_zero_blocks=%c", v->zero_digest ? 'y' : 'n'); + DMEMIT(",check_at_most_once=%c", v->validated_blocks ? 'y' : 'n'); + if (v->signature_key_desc) + DMEMIT(",root_hash_sig_key_desc=%s", v->signature_key_desc); + + if (v->mode != DM_VERITY_MODE_EIO) { + DMEMIT(",verity_mode="); + switch (v->mode) { + case DM_VERITY_MODE_LOGGING: + DMEMIT(DM_VERITY_OPT_LOGGING); + break; + case DM_VERITY_MODE_RESTART: + DMEMIT(DM_VERITY_OPT_RESTART); + break; + case DM_VERITY_MODE_PANIC: + DMEMIT(DM_VERITY_OPT_PANIC); + break; + default: + DMEMIT("invalid"); + } + } + DMEMIT(";"); + break; } } diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 3d2cf811ec3e..18320444fb0a 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -206,6 +206,19 @@ struct dm_writecache { struct bio_set bio_set; mempool_t copy_pool; + + struct { + unsigned long long reads; + unsigned long long read_hits; + unsigned long long writes; + unsigned long long write_hits_uncommitted; + unsigned long long write_hits_committed; + unsigned long long writes_around; + unsigned long long writes_allocate; + unsigned long long writes_blocked_on_freelist; + unsigned long long flushes; + unsigned long long discards; + } stats; }; #define WB_LIST_INLINE 16 @@ -1157,6 +1170,18 @@ static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache return 0; } +static int process_clear_stats_mesg(unsigned argc, char **argv, struct dm_writecache *wc) +{ + if (argc != 1) + return -EINVAL; + + wc_lock(wc); + memset(&wc->stats, 0, sizeof wc->stats); + wc_unlock(wc); + + return 0; +} + static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, char *result, unsigned maxlen) { @@ -1169,6 +1194,8 @@ static int writecache_message(struct dm_target *ti, unsigned argc, char **argv, r = process_flush_on_suspend_mesg(argc, argv, wc); else if (!strcasecmp(argv[0], "cleaner")) r = process_cleaner_mesg(argc, argv, wc); + else if (!strcasecmp(argv[0], "clear_stats")) + r = process_clear_stats_mesg(argc, argv, wc); else DMERR("unrecognised message received: %s", argv[0]); @@ -1293,31 +1320,218 @@ static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio) bio_list_add(&wc->flush_list, bio); } -static int writecache_map(struct dm_target *ti, struct bio *bio) +enum wc_map_op { + WC_MAP_SUBMIT, + WC_MAP_REMAP, + WC_MAP_REMAP_ORIGIN, + WC_MAP_RETURN, + WC_MAP_ERROR, +}; + +static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio, + struct wc_entry *e) +{ + if (e) { + sector_t next_boundary = + read_original_sector(wc, e) - bio->bi_iter.bi_sector; + if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) + dm_accept_partial_bio(bio, next_boundary); + } + + return WC_MAP_REMAP_ORIGIN; +} + +static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio) +{ + enum wc_map_op map_op; + struct wc_entry *e; + +read_next_block: + wc->stats.reads++; + e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); + if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { + wc->stats.read_hits++; + if (WC_MODE_PMEM(wc)) { + bio_copy_block(wc, bio, memory_data(wc, e)); + if (bio->bi_iter.bi_size) + goto read_next_block; + map_op = WC_MAP_SUBMIT; + } else { + dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); + bio_set_dev(bio, wc->ssd_dev->bdev); + bio->bi_iter.bi_sector = cache_sector(wc, e); + if (!writecache_entry_is_committed(wc, e)) + writecache_wait_for_ios(wc, WRITE); + map_op = WC_MAP_REMAP; + } + } else { + map_op = writecache_map_remap_origin(wc, bio, e); + } + + return map_op; +} + +static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio, + struct wc_entry *e, bool search_used) +{ + unsigned bio_size = wc->block_size; + sector_t start_cache_sec = cache_sector(wc, e); + sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); + + while (bio_size < bio->bi_iter.bi_size) { + if (!search_used) { + struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); + if (!f) + break; + write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + + (bio_size >> SECTOR_SHIFT), wc->seq_count); + writecache_insert_entry(wc, f); + wc->uncommitted_blocks++; + } else { + struct wc_entry *f; + struct rb_node *next = rb_next(&e->rb_node); + if (!next) + break; + f = container_of(next, struct wc_entry, rb_node); + if (f != e + 1) + break; + if (read_original_sector(wc, f) != + read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) + break; + if (unlikely(f->write_in_progress)) + break; + if (writecache_entry_is_committed(wc, f)) + wc->overwrote_committed = true; + e = f; + } + bio_size += wc->block_size; + current_cache_sec += wc->block_size >> SECTOR_SHIFT; + } + + bio_set_dev(bio, wc->ssd_dev->bdev); + bio->bi_iter.bi_sector = start_cache_sec; + dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); + + if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { + wc->uncommitted_blocks = 0; + queue_work(wc->writeback_wq, &wc->flush_work); + } else { + writecache_schedule_autocommit(wc); + } + + return WC_MAP_REMAP; +} + +static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio) { struct wc_entry *e; + + do { + bool found_entry = false; + bool search_used = false; + wc->stats.writes++; + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); + if (e) { + if (!writecache_entry_is_committed(wc, e)) { + wc->stats.write_hits_uncommitted++; + search_used = true; + goto bio_copy; + } + wc->stats.write_hits_committed++; + if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { + wc->overwrote_committed = true; + search_used = true; + goto bio_copy; + } + found_entry = true; + } else { + if (unlikely(wc->cleaner) || + (wc->metadata_only && !(bio->bi_opf & REQ_META))) + goto direct_write; + } + e = writecache_pop_from_freelist(wc, (sector_t)-1); + if (unlikely(!e)) { + if (!WC_MODE_PMEM(wc) && !found_entry) { +direct_write: + wc->stats.writes_around++; + e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); + return writecache_map_remap_origin(wc, bio, e); + } + wc->stats.writes_blocked_on_freelist++; + writecache_wait_on_freelist(wc); + continue; + } + write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); + writecache_insert_entry(wc, e); + wc->uncommitted_blocks++; + wc->stats.writes_allocate++; +bio_copy: + if (WC_MODE_PMEM(wc)) + bio_copy_block(wc, bio, memory_data(wc, e)); + else + return writecache_bio_copy_ssd(wc, bio, e, search_used); + } while (bio->bi_iter.bi_size); + + if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks)) + writecache_flush(wc); + else + writecache_schedule_autocommit(wc); + + return WC_MAP_SUBMIT; +} + +static enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio *bio) +{ + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + + if (WC_MODE_PMEM(wc)) { + wc->stats.flushes++; + writecache_flush(wc); + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + else if (unlikely(wc->cleaner) || unlikely(wc->metadata_only)) + return WC_MAP_REMAP_ORIGIN; + return WC_MAP_SUBMIT; + } + /* SSD: */ + if (dm_bio_get_target_bio_nr(bio)) + return WC_MAP_REMAP_ORIGIN; + wc->stats.flushes++; + writecache_offload_bio(wc, bio); + return WC_MAP_RETURN; +} + +static enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio) +{ + wc->stats.discards++; + + if (writecache_has_error(wc)) + return WC_MAP_ERROR; + + if (WC_MODE_PMEM(wc)) { + writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); + return WC_MAP_REMAP_ORIGIN; + } + /* SSD: */ + writecache_offload_bio(wc, bio); + return WC_MAP_RETURN; +} + +static int writecache_map(struct dm_target *ti, struct bio *bio) +{ struct dm_writecache *wc = ti->private; + enum wc_map_op map_op; bio->bi_private = NULL; wc_lock(wc); if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - if (writecache_has_error(wc)) - goto unlock_error; - if (WC_MODE_PMEM(wc)) { - writecache_flush(wc); - if (writecache_has_error(wc)) - goto unlock_error; - if (unlikely(wc->cleaner) || unlikely(wc->metadata_only)) - goto unlock_remap_origin; - goto unlock_submit; - } else { - if (dm_bio_get_target_bio_nr(bio)) - goto unlock_remap_origin; - writecache_offload_bio(wc, bio); - goto unlock_return; - } + map_op = writecache_map_flush(wc, bio); + goto done; } bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); @@ -1327,182 +1541,57 @@ static int writecache_map(struct dm_target *ti, struct bio *bio) DMERR("I/O is not aligned, sector %llu, size %u, block size %u", (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size, wc->block_size); - goto unlock_error; + map_op = WC_MAP_ERROR; + goto done; } if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { - if (writecache_has_error(wc)) - goto unlock_error; - if (WC_MODE_PMEM(wc)) { - writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio)); - goto unlock_remap_origin; - } else { - writecache_offload_bio(wc, bio); - goto unlock_return; - } + map_op = writecache_map_discard(wc, bio); + goto done; } - if (bio_data_dir(bio) == READ) { -read_next_block: - e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); - if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) { - if (WC_MODE_PMEM(wc)) { - bio_copy_block(wc, bio, memory_data(wc, e)); - if (bio->bi_iter.bi_size) - goto read_next_block; - goto unlock_submit; - } else { - dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); - bio_set_dev(bio, wc->ssd_dev->bdev); - bio->bi_iter.bi_sector = cache_sector(wc, e); - if (!writecache_entry_is_committed(wc, e)) - writecache_wait_for_ios(wc, WRITE); - goto unlock_remap; + if (bio_data_dir(bio) == READ) + map_op = writecache_map_read(wc, bio); + else + map_op = writecache_map_write(wc, bio); +done: + switch (map_op) { + case WC_MAP_REMAP_ORIGIN: + if (likely(wc->pause != 0)) { + if (bio_op(bio) == REQ_OP_WRITE) { + dm_iot_io_begin(&wc->iot, 1); + bio->bi_private = (void *)2; } - } else { - if (e) { - sector_t next_boundary = - read_original_sector(wc, e) - bio->bi_iter.bi_sector; - if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { - dm_accept_partial_bio(bio, next_boundary); - } - } - goto unlock_remap_origin; } - } else { - do { - bool found_entry = false; - bool search_used = false; - if (writecache_has_error(wc)) - goto unlock_error; - e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); - if (e) { - if (!writecache_entry_is_committed(wc, e)) { - search_used = true; - goto bio_copy; - } - if (!WC_MODE_PMEM(wc) && !e->write_in_progress) { - wc->overwrote_committed = true; - search_used = true; - goto bio_copy; - } - found_entry = true; - } else { - if (unlikely(wc->cleaner) || - (wc->metadata_only && !(bio->bi_opf & REQ_META))) - goto direct_write; - } - e = writecache_pop_from_freelist(wc, (sector_t)-1); - if (unlikely(!e)) { - if (!WC_MODE_PMEM(wc) && !found_entry) { -direct_write: - e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); - if (e) { - sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector; - BUG_ON(!next_boundary); - if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) { - dm_accept_partial_bio(bio, next_boundary); - } - } - goto unlock_remap_origin; - } - writecache_wait_on_freelist(wc); - continue; - } - write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count); - writecache_insert_entry(wc, e); - wc->uncommitted_blocks++; -bio_copy: - if (WC_MODE_PMEM(wc)) { - bio_copy_block(wc, bio, memory_data(wc, e)); - } else { - unsigned bio_size = wc->block_size; - sector_t start_cache_sec = cache_sector(wc, e); - sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); + bio_set_dev(bio, wc->dev->bdev); + wc_unlock(wc); + return DM_MAPIO_REMAPPED; - while (bio_size < bio->bi_iter.bi_size) { - if (!search_used) { - struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); - if (!f) - break; - write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + - (bio_size >> SECTOR_SHIFT), wc->seq_count); - writecache_insert_entry(wc, f); - wc->uncommitted_blocks++; - } else { - struct wc_entry *f; - struct rb_node *next = rb_next(&e->rb_node); - if (!next) - break; - f = container_of(next, struct wc_entry, rb_node); - if (f != e + 1) - break; - if (read_original_sector(wc, f) != - read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT)) - break; - if (unlikely(f->write_in_progress)) - break; - if (writecache_entry_is_committed(wc, f)) - wc->overwrote_committed = true; - e = f; - } - bio_size += wc->block_size; - current_cache_sec += wc->block_size >> SECTOR_SHIFT; - } + case WC_MAP_REMAP: + /* make sure that writecache_end_io decrements bio_in_progress: */ + bio->bi_private = (void *)1; + atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); + wc_unlock(wc); + return DM_MAPIO_REMAPPED; - bio_set_dev(bio, wc->ssd_dev->bdev); - bio->bi_iter.bi_sector = start_cache_sec; - dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); + case WC_MAP_SUBMIT: + wc_unlock(wc); + bio_endio(bio); + return DM_MAPIO_SUBMITTED; - if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { - wc->uncommitted_blocks = 0; - queue_work(wc->writeback_wq, &wc->flush_work); - } else { - writecache_schedule_autocommit(wc); - } - goto unlock_remap; - } - } while (bio->bi_iter.bi_size); + case WC_MAP_RETURN: + wc_unlock(wc); + return DM_MAPIO_SUBMITTED; - if (unlikely(bio->bi_opf & REQ_FUA || - wc->uncommitted_blocks >= wc->autocommit_blocks)) - writecache_flush(wc); - else - writecache_schedule_autocommit(wc); - goto unlock_submit; + case WC_MAP_ERROR: + wc_unlock(wc); + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + + default: + BUG(); + return -1; } - -unlock_remap_origin: - if (likely(wc->pause != 0)) { - if (bio_op(bio) == REQ_OP_WRITE) { - dm_iot_io_begin(&wc->iot, 1); - bio->bi_private = (void *)2; - } - } - bio_set_dev(bio, wc->dev->bdev); - wc_unlock(wc); - return DM_MAPIO_REMAPPED; - -unlock_remap: - /* make sure that writecache_end_io decrements bio_in_progress: */ - bio->bi_private = (void *)1; - atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]); - wc_unlock(wc); - return DM_MAPIO_REMAPPED; - -unlock_submit: - wc_unlock(wc); - bio_endio(bio); - return DM_MAPIO_SUBMITTED; - -unlock_return: - wc_unlock(wc); - return DM_MAPIO_SUBMITTED; - -unlock_error: - wc_unlock(wc); - bio_io_error(bio); - return DM_MAPIO_SUBMITTED; } static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status) @@ -2568,9 +2657,20 @@ static void writecache_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc), + DMEMIT("%ld %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu", + writecache_has_error(wc), (unsigned long long)wc->n_blocks, (unsigned long long)wc->freelist_size, - (unsigned long long)wc->writeback_size); + (unsigned long long)wc->writeback_size, + wc->stats.reads, + wc->stats.read_hits, + wc->stats.writes, + wc->stats.write_hits_uncommitted, + wc->stats.write_hits_committed, + wc->stats.writes_around, + wc->stats.writes_allocate, + wc->stats.writes_blocked_on_freelist, + wc->stats.flushes, + wc->stats.discards); break; case STATUSTYPE_TABLE: DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', @@ -2623,12 +2723,15 @@ static void writecache_status(struct dm_target *ti, status_type_t type, if (wc->pause_set) DMEMIT(" pause_writeback %u", wc->pause_value); break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } } static struct target_type writecache_target = { .name = "writecache", - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = writecache_ctr, .dtr = writecache_dtr, diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 7e88df64d197..ae1bc48c0043 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1119,6 +1119,9 @@ static void dmz_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s", buf); } break; + case STATUSTYPE_IMA: + *result = '\0'; + break; } return; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7981b7287628..84e9145b1714 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -8,6 +8,7 @@ #include "dm-core.h" #include "dm-rq.h" #include "dm-uevent.h" +#include "dm-ima.h" #include #include @@ -261,9 +262,13 @@ static void (*_exits[])(void) = { static int __init dm_init(void) { const int count = ARRAY_SIZE(_inits); - int r, i; +#if (IS_ENABLED(CONFIG_IMA) && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)) + DMWARN("CONFIG_IMA_DISABLE_HTABLE is disabled." + " Duplicate IMA measurements will not be recorded in the IMA log."); +#endif + for (i = 0; i < count; i++) { r = _inits[i](); if (r) @@ -271,8 +276,7 @@ static int __init dm_init(void) } return 0; - - bad: +bad: while (i--) _exits[i](); @@ -1997,6 +2001,8 @@ int dm_create(int minor, struct mapped_device **result) if (!md) return -ENXIO; + dm_ima_reset_data(md); + *result = md; return 0; } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 94f2cd6a8e83..114553b487ef 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -31,7 +31,7 @@ enum dm_queue_mode { DM_TYPE_DAX_BIO_BASED = 3, }; -typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; +typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE, STATUSTYPE_IMA } status_type_t; union map_info { void *ptr; @@ -602,6 +602,10 @@ void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm); #define DMEMIT(x...) sz += ((sz >= maxlen) ? \ 0 : scnprintf(result + sz, maxlen - sz, x)) +#define DMEMIT_TARGET_NAME_VERSION(y) \ + DMEMIT("target_name=%s,target_version=%u.%u.%u", \ + (y)->name, (y)->version[0], (y)->version[1], (y)->version[2]) + /* * Definitions of return values from target end_io function. */ diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index e5c6e458bdf7..c12ce30b52df 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -376,4 +376,10 @@ enum { */ #define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */ +/* + * If set, returns in the in buffer passed by UM, the raw table information + * that would be measured by IMA subsystem on device state change. + */ +#define DM_IMA_MEASUREMENT_FLAG (1 << 19) /* In */ + #endif /* _LINUX_DM_IOCTL_H */ diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 287b90509006..673833f94069 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -985,6 +985,7 @@ void ima_measure_critical_data(const char *event_label, CRITICAL_DATA, 0, event_label, hash); } +EXPORT_SYMBOL_GPL(ima_measure_critical_data); static int __init init_ima(void) {