mirror of https://gitee.com/openkylin/linux.git
Drivers: hv: Introduce a policy for controlling channel affinity
Introduce a mechanism to control how channels will be affinitized. We will support two policies: 1. HV_BALANCED: All performance critical channels will be dstributed evenly amongst all the available NUMA nodes. Once the Node is assigned, we will assign the CPU based on a simple round robin scheme. 2. HV_LOCALIZED: Only the primary channels are distributed across all NUMA nodes. Sub-channels will be in the same NUMA node as the primary channel. This is the current behaviour. The default policy will be the HV_BALANCED as it can minimize the remote memory access on NUMA machines with applications that span NUMA nodes. Signed-off-by: K. Y. Srinivasan <kys@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
bb08d431a9
commit
509879bdb3
|
@ -356,8 +356,9 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
|
|||
* We need to free the bit for init_vp_index() to work in the case
|
||||
* of sub-channel, when we reload drivers like hv_netvsc.
|
||||
*/
|
||||
cpumask_clear_cpu(channel->target_cpu,
|
||||
&primary_channel->alloced_cpus_in_node);
|
||||
if (channel->affinity_policy == HV_LOCALIZED)
|
||||
cpumask_clear_cpu(channel->target_cpu,
|
||||
&primary_channel->alloced_cpus_in_node);
|
||||
|
||||
vmbus_release_relid(relid);
|
||||
|
||||
|
@ -548,17 +549,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
|
|||
}
|
||||
|
||||
/*
|
||||
* We distribute primary channels evenly across all the available
|
||||
* NUMA nodes and within the assigned NUMA node we will assign the
|
||||
* first available CPU to the primary channel.
|
||||
* The sub-channels will be assigned to the CPUs available in the
|
||||
* NUMA node evenly.
|
||||
* Based on the channel affinity policy, we will assign the NUMA
|
||||
* nodes.
|
||||
*/
|
||||
if (!primary) {
|
||||
|
||||
if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
|
||||
while (true) {
|
||||
next_node = next_numa_node_id++;
|
||||
if (next_node == nr_node_ids)
|
||||
if (next_node == nr_node_ids) {
|
||||
next_node = next_numa_node_id = 0;
|
||||
continue;
|
||||
}
|
||||
if (cpumask_empty(cpumask_of_node(next_node)))
|
||||
continue;
|
||||
break;
|
||||
|
@ -582,15 +583,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
|
|||
|
||||
cur_cpu = -1;
|
||||
|
||||
/*
|
||||
* Normally Hyper-V host doesn't create more subchannels than there
|
||||
* are VCPUs on the node but it is possible when not all present VCPUs
|
||||
* on the node are initialized by guest. Clear the alloced_cpus_in_node
|
||||
* to start over.
|
||||
*/
|
||||
if (cpumask_equal(&primary->alloced_cpus_in_node,
|
||||
cpumask_of_node(primary->numa_node)))
|
||||
cpumask_clear(&primary->alloced_cpus_in_node);
|
||||
if (primary->affinity_policy == HV_LOCALIZED) {
|
||||
/*
|
||||
* Normally Hyper-V host doesn't create more subchannels
|
||||
* than there are VCPUs on the node but it is possible when not
|
||||
* all present VCPUs on the node are initialized by guest.
|
||||
* Clear the alloced_cpus_in_node to start over.
|
||||
*/
|
||||
if (cpumask_equal(&primary->alloced_cpus_in_node,
|
||||
cpumask_of_node(primary->numa_node)))
|
||||
cpumask_clear(&primary->alloced_cpus_in_node);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
cur_cpu = cpumask_next(cur_cpu, &available_mask);
|
||||
|
@ -601,17 +604,24 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
|
|||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* NOTE: in the case of sub-channel, we clear the sub-channel
|
||||
* related bit(s) in primary->alloced_cpus_in_node in
|
||||
* hv_process_channel_removal(), so when we reload drivers
|
||||
* like hv_netvsc in SMP guest, here we're able to re-allocate
|
||||
* bit from primary->alloced_cpus_in_node.
|
||||
*/
|
||||
if (!cpumask_test_cpu(cur_cpu,
|
||||
&primary->alloced_cpus_in_node)) {
|
||||
cpumask_set_cpu(cur_cpu,
|
||||
&primary->alloced_cpus_in_node);
|
||||
if (primary->affinity_policy == HV_LOCALIZED) {
|
||||
/*
|
||||
* NOTE: in the case of sub-channel, we clear the
|
||||
* sub-channel related bit(s) in
|
||||
* primary->alloced_cpus_in_node in
|
||||
* hv_process_channel_removal(), so when we
|
||||
* reload drivers like hv_netvsc in SMP guest, here
|
||||
* we're able to re-allocate
|
||||
* bit from primary->alloced_cpus_in_node.
|
||||
*/
|
||||
if (!cpumask_test_cpu(cur_cpu,
|
||||
&primary->alloced_cpus_in_node)) {
|
||||
cpumask_set_cpu(cur_cpu,
|
||||
&primary->alloced_cpus_in_node);
|
||||
cpumask_set_cpu(cur_cpu, alloced_mask);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
cpumask_set_cpu(cur_cpu, alloced_mask);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -674,6 +674,11 @@ enum hv_signal_policy {
|
|||
HV_SIGNAL_POLICY_EXPLICIT,
|
||||
};
|
||||
|
||||
enum hv_numa_policy {
|
||||
HV_BALANCED = 0,
|
||||
HV_LOCALIZED,
|
||||
};
|
||||
|
||||
enum vmbus_device_type {
|
||||
HV_IDE = 0,
|
||||
HV_SCSI,
|
||||
|
@ -876,6 +881,18 @@ struct vmbus_channel {
|
|||
*/
|
||||
bool low_latency;
|
||||
|
||||
/*
|
||||
* NUMA distribution policy:
|
||||
* We support teo policies:
|
||||
* 1) Balanced: Here all performance critical channels are
|
||||
* distributed evenly amongst all the NUMA nodes.
|
||||
* This policy will be the default policy.
|
||||
* 2) Localized: All channels of a given instance of a
|
||||
* performance critical service will be assigned CPUs
|
||||
* within a selected NUMA node.
|
||||
*/
|
||||
enum hv_numa_policy affinity_policy;
|
||||
|
||||
};
|
||||
|
||||
static inline void set_channel_lock_state(struct vmbus_channel *c, bool state)
|
||||
|
@ -895,6 +912,12 @@ static inline void set_channel_signal_state(struct vmbus_channel *c,
|
|||
c->signal_policy = policy;
|
||||
}
|
||||
|
||||
static inline void set_channel_affinity_state(struct vmbus_channel *c,
|
||||
enum hv_numa_policy policy)
|
||||
{
|
||||
c->affinity_policy = policy;
|
||||
}
|
||||
|
||||
static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
|
||||
{
|
||||
c->batched_reading = state;
|
||||
|
|
Loading…
Reference in New Issue