qemu/net/tap-win32.c

833 lines
24 KiB
C
Raw Normal View History

/*
* TAP-Win32 -- A kernel driver to provide virtual tap device functionality
* on Windows. Originally derived from the CIPE-Win32
* project by Damion K. Wilson, with extensive modifications by
* James Yonan.
*
* All source code which derives from the CIPE-Win32 project is
* Copyright (C) Damion K. Wilson, 2003, and is released under the
* GPL version 2 (see below).
*
* All other source code is Copyright (C) James Yonan, 2003-2004,
* and is released under the GPL version 2 (see below).
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program (see the file COPYING included with this
* distribution); if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "tap_int.h"
#include "qemu-common.h"
#include "clients.h" /* net_init_tap */
net: Pad short frames to minimum size before sending from SLiRP/TAP The minimum Ethernet frame length is 60 bytes. For short frames with smaller length like ARP packets (only 42 bytes), on a real world NIC it can choose either padding its length to the minimum required 60 bytes, or sending it out directly to the wire. Such behavior can be hardcoded or controled by a register bit. Similarly on the receive path, NICs can choose either dropping such short frames directly or handing them over to software to handle. On the other hand, for the network backends like SLiRP/TAP, they don't expose a way to control the short frame behavior. As of today they just send/receive data from/to the other end connected to them, which means any sized packet is acceptable. So they can send and receive short frames without any problem. It is observed that ARP packets sent from SLiRP/TAP are 42 bytes, and SLiRP/TAP just send these ARP packets to the other end which might be a NIC model that does not allow short frames to pass through. To provide better compatibility, for packets sent from QEMU network backends like SLiRP/TAP, we change to pad short frames before sending it out to the other end, if the other end does not forbid it via the nc->do_not_pad flag. This ensures a backend as an Ethernet sender does not violate the spec. But with this change, the behavior of dropping short frames from SLiRP/TAP interfaces in the NIC model cannot be emulated because it always receives a packet that is spec complaint. The capability of sending short frames from NIC models is still supported and short frames can still pass through SLiRP/TAP. This commit should be able to fix the issue as reported with some NIC models before, that ARP requests get dropped, preventing the guest from becoming visible on the network. It was workarounded in these NIC models on the receive path, that when a short frame is received, it is padded up to 60 bytes. The following 2 commits seem to be the one to workaround this issue in e1000 and vmxenet3 before, and should probably be reverted. commit 78aeb23eded2 ("e1000: Pad short frames to minimum size (60 bytes)") commit 40a87c6c9b11 ("vmxnet3: Pad short frames to minimum size (60 bytes)") Signed-off-by: Bin Meng <bmeng.cn@gmail.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
2021-03-17 14:26:29 +08:00
#include "net/eth.h"
#include "net/net.h"
#include "net/tap.h" /* tap_has_ufo, ... */
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
#include <windows.h>
#include <winioctl.h>
//=============
// TAP IOCTLs
//=============
#define TAP_CONTROL_CODE(request,method) \
CTL_CODE (FILE_DEVICE_UNKNOWN, request, method, FILE_ANY_ACCESS)
#define TAP_IOCTL_GET_MAC TAP_CONTROL_CODE (1, METHOD_BUFFERED)
#define TAP_IOCTL_GET_VERSION TAP_CONTROL_CODE (2, METHOD_BUFFERED)
#define TAP_IOCTL_GET_MTU TAP_CONTROL_CODE (3, METHOD_BUFFERED)
#define TAP_IOCTL_GET_INFO TAP_CONTROL_CODE (4, METHOD_BUFFERED)
#define TAP_IOCTL_CONFIG_POINT_TO_POINT TAP_CONTROL_CODE (5, METHOD_BUFFERED)
#define TAP_IOCTL_SET_MEDIA_STATUS TAP_CONTROL_CODE (6, METHOD_BUFFERED)
#define TAP_IOCTL_CONFIG_DHCP_MASQ TAP_CONTROL_CODE (7, METHOD_BUFFERED)
#define TAP_IOCTL_GET_LOG_LINE TAP_CONTROL_CODE (8, METHOD_BUFFERED)
#define TAP_IOCTL_CONFIG_DHCP_SET_OPT TAP_CONTROL_CODE (9, METHOD_BUFFERED)
//=================
// Registry keys
//=================
#define ADAPTER_KEY "SYSTEM\\CurrentControlSet\\Control\\Class\\{4D36E972-E325-11CE-BFC1-08002BE10318}"
#define NETWORK_CONNECTIONS_KEY "SYSTEM\\CurrentControlSet\\Control\\Network\\{4D36E972-E325-11CE-BFC1-08002BE10318}"
//======================
// Filesystem prefixes
//======================
#define USERMODEDEVICEDIR "\\\\.\\Global\\"
#define TAPSUFFIX ".tap"
//======================
// Compile time configuration
//======================
//#define DEBUG_TAP_WIN32
/* FIXME: The asynch write path appears to be broken at
* present. WriteFile() ignores the lpNumberOfBytesWritten parameter
* for overlapped writes, with the result we return zero bytes sent,
* and after handling a single packet, receive is disabled for this
* interface. */
/* #define TUN_ASYNCHRONOUS_WRITES 1 */
#define TUN_BUFFER_SIZE 1560
#define TUN_MAX_BUFFER_COUNT 32
/*
* The data member "buffer" must be the first element in the tun_buffer
* structure. See the function, tap_win32_free_buffer.
*/
typedef struct tun_buffer_s {
unsigned char buffer [TUN_BUFFER_SIZE];
unsigned long read_size;
struct tun_buffer_s* next;
} tun_buffer_t;
typedef struct tap_win32_overlapped {
HANDLE handle;
HANDLE read_event;
HANDLE write_event;
HANDLE output_queue_semaphore;
HANDLE free_list_semaphore;
HANDLE tap_semaphore;
CRITICAL_SECTION output_queue_cs;
CRITICAL_SECTION free_list_cs;
OVERLAPPED read_overlapped;
OVERLAPPED write_overlapped;
tun_buffer_t buffers[TUN_MAX_BUFFER_COUNT];
tun_buffer_t* free_list;
tun_buffer_t* output_queue_front;
tun_buffer_t* output_queue_back;
} tap_win32_overlapped_t;
static tap_win32_overlapped_t tap_overlapped;
static tun_buffer_t* get_buffer_from_free_list(tap_win32_overlapped_t* const overlapped)
{
tun_buffer_t* buffer = NULL;
WaitForSingleObject(overlapped->free_list_semaphore, INFINITE);
EnterCriticalSection(&overlapped->free_list_cs);
buffer = overlapped->free_list;
// assert(buffer != NULL);
overlapped->free_list = buffer->next;
LeaveCriticalSection(&overlapped->free_list_cs);
buffer->next = NULL;
return buffer;
}
static void put_buffer_on_free_list(tap_win32_overlapped_t* const overlapped, tun_buffer_t* const buffer)
{
EnterCriticalSection(&overlapped->free_list_cs);
buffer->next = overlapped->free_list;
overlapped->free_list = buffer;
LeaveCriticalSection(&overlapped->free_list_cs);
ReleaseSemaphore(overlapped->free_list_semaphore, 1, NULL);
}
static tun_buffer_t* get_buffer_from_output_queue(tap_win32_overlapped_t* const overlapped, const int block)
{
tun_buffer_t* buffer = NULL;
DWORD result, timeout = block ? INFINITE : 0L;
// Non-blocking call
result = WaitForSingleObject(overlapped->output_queue_semaphore, timeout);
switch (result)
{
// The semaphore object was signaled.
case WAIT_OBJECT_0:
EnterCriticalSection(&overlapped->output_queue_cs);
buffer = overlapped->output_queue_front;
overlapped->output_queue_front = buffer->next;
if(overlapped->output_queue_front == NULL) {
overlapped->output_queue_back = NULL;
}
LeaveCriticalSection(&overlapped->output_queue_cs);
break;
// Semaphore was nonsignaled, so a time-out occurred.
case WAIT_TIMEOUT:
// Cannot open another window.
break;
}
return buffer;
}
static tun_buffer_t* get_buffer_from_output_queue_immediate (tap_win32_overlapped_t* const overlapped)
{
return get_buffer_from_output_queue(overlapped, 0);
}
static void put_buffer_on_output_queue(tap_win32_overlapped_t* const overlapped, tun_buffer_t* const buffer)
{
EnterCriticalSection(&overlapped->output_queue_cs);
if(overlapped->output_queue_front == NULL && overlapped->output_queue_back == NULL) {
overlapped->output_queue_front = overlapped->output_queue_back = buffer;
} else {
buffer->next = NULL;
overlapped->output_queue_back->next = buffer;
overlapped->output_queue_back = buffer;
}
LeaveCriticalSection(&overlapped->output_queue_cs);
ReleaseSemaphore(overlapped->output_queue_semaphore, 1, NULL);
}
static int is_tap_win32_dev(const char *guid)
{
HKEY netcard_key;
LONG status;
DWORD len;
int i = 0;
status = RegOpenKeyEx(
HKEY_LOCAL_MACHINE,
ADAPTER_KEY,
0,
KEY_READ,
&netcard_key);
if (status != ERROR_SUCCESS) {
return FALSE;
}
for (;;) {
char enum_name[256];
char unit_string[256];
HKEY unit_key;
char component_id_string[] = "ComponentId";
char component_id[256];
char net_cfg_instance_id_string[] = "NetCfgInstanceId";
char net_cfg_instance_id[256];
DWORD data_type;
len = sizeof (enum_name);
status = RegEnumKeyEx(
netcard_key,
i,
enum_name,
&len,
NULL,
NULL,
NULL,
NULL);
if (status == ERROR_NO_MORE_ITEMS)
break;
else if (status != ERROR_SUCCESS) {
return FALSE;
}
snprintf (unit_string, sizeof(unit_string), "%s\\%s",
ADAPTER_KEY, enum_name);
status = RegOpenKeyEx(
HKEY_LOCAL_MACHINE,
unit_string,
0,
KEY_READ,
&unit_key);
if (status != ERROR_SUCCESS) {
return FALSE;
} else {
len = sizeof (component_id);
status = RegQueryValueEx(
unit_key,
component_id_string,
NULL,
&data_type,
(LPBYTE)component_id,
&len);
if (!(status != ERROR_SUCCESS || data_type != REG_SZ)) {
len = sizeof (net_cfg_instance_id);
status = RegQueryValueEx(
unit_key,
net_cfg_instance_id_string,
NULL,
&data_type,
(LPBYTE)net_cfg_instance_id,
&len);
if (status == ERROR_SUCCESS && data_type == REG_SZ) {
if (/* !strcmp (component_id, TAP_COMPONENT_ID) &&*/
!strcmp (net_cfg_instance_id, guid)) {
RegCloseKey (unit_key);
RegCloseKey (netcard_key);
return TRUE;
}
}
}
RegCloseKey (unit_key);
}
++i;
}
RegCloseKey (netcard_key);
return FALSE;
}
static int get_device_guid(
char *name,
int name_size,
char *actual_name,
int actual_name_size)
{
LONG status;
HKEY control_net_key;
DWORD len;
int i = 0;
int stop = 0;
status = RegOpenKeyEx(
HKEY_LOCAL_MACHINE,
NETWORK_CONNECTIONS_KEY,
0,
KEY_READ,
&control_net_key);
if (status != ERROR_SUCCESS) {
return -1;
}
while (!stop)
{
char enum_name[256];
char connection_string[256];
HKEY connection_key;
char name_data[256];
DWORD name_type;
const char name_string[] = "Name";
len = sizeof (enum_name);
status = RegEnumKeyEx(
control_net_key,
i,
enum_name,
&len,
NULL,
NULL,
NULL,
NULL);
if (status == ERROR_NO_MORE_ITEMS)
break;
else if (status != ERROR_SUCCESS) {
return -1;
}
snprintf(connection_string,
sizeof(connection_string),
"%s\\%s\\Connection",
NETWORK_CONNECTIONS_KEY, enum_name);
status = RegOpenKeyEx(
HKEY_LOCAL_MACHINE,
connection_string,
0,
KEY_READ,
&connection_key);
if (status == ERROR_SUCCESS) {
len = sizeof (name_data);
status = RegQueryValueEx(
connection_key,
name_string,
NULL,
&name_type,
(LPBYTE)name_data,
&len);
if (status != ERROR_SUCCESS || name_type != REG_SZ) {
++i;
continue;
}
else {
if (is_tap_win32_dev(enum_name)) {
snprintf(name, name_size, "%s", enum_name);
if (actual_name) {
if (strcmp(actual_name, "") != 0) {
if (strcmp(name_data, actual_name) != 0) {
RegCloseKey (connection_key);
++i;
continue;
}
}
else {
snprintf(actual_name, actual_name_size, "%s", name_data);
}
}
stop = 1;
}
}
RegCloseKey (connection_key);
}
++i;
}
RegCloseKey (control_net_key);
if (stop == 0)
return -1;
return 0;
}
static int tap_win32_set_status(HANDLE handle, int status)
{
unsigned long len = 0;
return DeviceIoControl(handle, TAP_IOCTL_SET_MEDIA_STATUS,
&status, sizeof (status),
&status, sizeof (status), &len, NULL);
}
static void tap_win32_overlapped_init(tap_win32_overlapped_t* const overlapped, const HANDLE handle)
{
overlapped->handle = handle;
overlapped->read_event = CreateEvent(NULL, FALSE, FALSE, NULL);
overlapped->write_event = CreateEvent(NULL, FALSE, FALSE, NULL);
overlapped->read_overlapped.Offset = 0;
overlapped->read_overlapped.OffsetHigh = 0;
overlapped->read_overlapped.hEvent = overlapped->read_event;
overlapped->write_overlapped.Offset = 0;
overlapped->write_overlapped.OffsetHigh = 0;
overlapped->write_overlapped.hEvent = overlapped->write_event;
InitializeCriticalSection(&overlapped->output_queue_cs);
InitializeCriticalSection(&overlapped->free_list_cs);
overlapped->output_queue_semaphore = CreateSemaphore(
NULL, // default security attributes
0, // initial count
TUN_MAX_BUFFER_COUNT, // maximum count
NULL); // unnamed semaphore
if(!overlapped->output_queue_semaphore) {
fprintf(stderr, "error creating output queue semaphore!\n");
}
overlapped->free_list_semaphore = CreateSemaphore(
NULL, // default security attributes
TUN_MAX_BUFFER_COUNT, // initial count
TUN_MAX_BUFFER_COUNT, // maximum count
NULL); // unnamed semaphore
if(!overlapped->free_list_semaphore) {
fprintf(stderr, "error creating free list semaphore!\n");
}
overlapped->free_list = overlapped->output_queue_front = overlapped->output_queue_back = NULL;
{
unsigned index;
for(index = 0; index < TUN_MAX_BUFFER_COUNT; index++) {
tun_buffer_t* element = &overlapped->buffers[index];
element->next = overlapped->free_list;
overlapped->free_list = element;
}
}
/* To count buffers, initially no-signal. */
overlapped->tap_semaphore = CreateSemaphore(NULL, 0, TUN_MAX_BUFFER_COUNT, NULL);
if(!overlapped->tap_semaphore)
fprintf(stderr, "error creating tap_semaphore.\n");
}
static int tap_win32_write(tap_win32_overlapped_t *overlapped,
const void *buffer, unsigned long size)
{
unsigned long write_size;
BOOL result;
DWORD error;
#ifdef TUN_ASYNCHRONOUS_WRITES
result = GetOverlappedResult( overlapped->handle, &overlapped->write_overlapped,
&write_size, FALSE);
if (!result && GetLastError() == ERROR_IO_INCOMPLETE)
WaitForSingleObject(overlapped->write_event, INFINITE);
#endif
result = WriteFile(overlapped->handle, buffer, size,
&write_size, &overlapped->write_overlapped);
#ifdef TUN_ASYNCHRONOUS_WRITES
/* FIXME: we can't sensibly set write_size here, without waiting
* for the IO to complete! Moreover, we can't return zero,
* because that will disable receive on this interface, and we
* also can't assume it will succeed and return the full size,
* because that will result in the buffer being reclaimed while
* the IO is in progress. */
#error Async writes are broken. Please disable TUN_ASYNCHRONOUS_WRITES.
#else /* !TUN_ASYNCHRONOUS_WRITES */
if (!result) {
error = GetLastError();
if (error == ERROR_IO_PENDING) {
result = GetOverlappedResult(overlapped->handle,
&overlapped->write_overlapped,
&write_size, TRUE);
}
}
#endif
if (!result) {
#ifdef DEBUG_TAP_WIN32
LPTSTR msgbuf;
error = GetLastError();
FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
&msgbuf, 0, NULL);
fprintf(stderr, "Tap-Win32: Error WriteFile %d - %s\n", error, msgbuf);
LocalFree(msgbuf);
#endif
return 0;
}
return write_size;
}
static DWORD WINAPI tap_win32_thread_entry(LPVOID param)
{
tap_win32_overlapped_t *overlapped = (tap_win32_overlapped_t*)param;
unsigned long read_size;
BOOL result;
DWORD dwError;
tun_buffer_t* buffer = get_buffer_from_free_list(overlapped);
for (;;) {
result = ReadFile(overlapped->handle,
buffer->buffer,
sizeof(buffer->buffer),
&read_size,
&overlapped->read_overlapped);
if (!result) {
dwError = GetLastError();
if (dwError == ERROR_IO_PENDING) {
WaitForSingleObject(overlapped->read_event, INFINITE);
result = GetOverlappedResult( overlapped->handle, &overlapped->read_overlapped,
&read_size, FALSE);
if (!result) {
#ifdef DEBUG_TAP_WIN32
LPVOID lpBuffer;
dwError = GetLastError();
FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
NULL, dwError, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR) & lpBuffer, 0, NULL );
fprintf(stderr, "Tap-Win32: Error GetOverlappedResult %d - %s\n", dwError, lpBuffer);
LocalFree( lpBuffer );
#endif
}
} else {
#ifdef DEBUG_TAP_WIN32
LPVOID lpBuffer;
FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
NULL, dwError, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR) & lpBuffer, 0, NULL );
fprintf(stderr, "Tap-Win32: Error ReadFile %d - %s\n", dwError, lpBuffer);
LocalFree( lpBuffer );
#endif
}
}
if(read_size > 0) {
buffer->read_size = read_size;
put_buffer_on_output_queue(overlapped, buffer);
ReleaseSemaphore(overlapped->tap_semaphore, 1, NULL);
buffer = get_buffer_from_free_list(overlapped);
}
}
return 0;
}
static int tap_win32_read(tap_win32_overlapped_t *overlapped,
uint8_t **pbuf, int max_size)
{
int size = 0;
tun_buffer_t* buffer = get_buffer_from_output_queue_immediate(overlapped);
if(buffer != NULL) {
*pbuf = buffer->buffer;
size = (int)buffer->read_size;
if(size > max_size) {
size = max_size;
}
}
return size;
}
static void tap_win32_free_buffer(tap_win32_overlapped_t *overlapped,
uint8_t *pbuf)
{
tun_buffer_t* buffer = (tun_buffer_t*)pbuf;
put_buffer_on_free_list(overlapped, buffer);
}
static int tap_win32_open(tap_win32_overlapped_t **phandle,
const char *preferred_name)
{
char device_path[256];
char device_guid[0x100];
int rc;
HANDLE handle;
BOOL bret;
char name_buffer[0x100] = {0, };
struct {
unsigned long major;
unsigned long minor;
unsigned long debug;
} version;
DWORD version_len;
DWORD idThread;
if (preferred_name != NULL) {
snprintf(name_buffer, sizeof(name_buffer), "%s", preferred_name);
}
rc = get_device_guid(device_guid, sizeof(device_guid), name_buffer, sizeof(name_buffer));
if (rc)
return -1;
snprintf (device_path, sizeof(device_path), "%s%s%s",
USERMODEDEVICEDIR,
device_guid,
TAPSUFFIX);
handle = CreateFile (
device_path,
GENERIC_READ | GENERIC_WRITE,
0,
0,
OPEN_EXISTING,
FILE_ATTRIBUTE_SYSTEM | FILE_FLAG_OVERLAPPED,
0 );
if (handle == INVALID_HANDLE_VALUE) {
return -1;
}
bret = DeviceIoControl(handle, TAP_IOCTL_GET_VERSION,
&version, sizeof (version),
&version, sizeof (version), &version_len, NULL);
if (bret == FALSE) {
CloseHandle(handle);
return -1;
}
if (!tap_win32_set_status(handle, TRUE)) {
return -1;
}
tap_win32_overlapped_init(&tap_overlapped, handle);
*phandle = &tap_overlapped;
CreateThread(NULL, 0, tap_win32_thread_entry,
(LPVOID)&tap_overlapped, 0, &idThread);
return 0;
}
/********************************************/
typedef struct TAPState {
NetClientState nc;
tap_win32_overlapped_t *handle;
} TAPState;
static void tap_cleanup(NetClientState *nc)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
qemu_del_wait_object(s->handle->tap_semaphore, NULL, NULL);
/* FIXME: need to kill thread and close file handle:
tap_win32_close(s);
*/
}
static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
return tap_win32_write(s->handle, buf, size);
}
static void tap_win32_send(void *opaque)
{
TAPState *s = opaque;
uint8_t *buf, *orig_buf;
int max_size = 4096;
int size;
net: Pad short frames to minimum size before sending from SLiRP/TAP The minimum Ethernet frame length is 60 bytes. For short frames with smaller length like ARP packets (only 42 bytes), on a real world NIC it can choose either padding its length to the minimum required 60 bytes, or sending it out directly to the wire. Such behavior can be hardcoded or controled by a register bit. Similarly on the receive path, NICs can choose either dropping such short frames directly or handing them over to software to handle. On the other hand, for the network backends like SLiRP/TAP, they don't expose a way to control the short frame behavior. As of today they just send/receive data from/to the other end connected to them, which means any sized packet is acceptable. So they can send and receive short frames without any problem. It is observed that ARP packets sent from SLiRP/TAP are 42 bytes, and SLiRP/TAP just send these ARP packets to the other end which might be a NIC model that does not allow short frames to pass through. To provide better compatibility, for packets sent from QEMU network backends like SLiRP/TAP, we change to pad short frames before sending it out to the other end, if the other end does not forbid it via the nc->do_not_pad flag. This ensures a backend as an Ethernet sender does not violate the spec. But with this change, the behavior of dropping short frames from SLiRP/TAP interfaces in the NIC model cannot be emulated because it always receives a packet that is spec complaint. The capability of sending short frames from NIC models is still supported and short frames can still pass through SLiRP/TAP. This commit should be able to fix the issue as reported with some NIC models before, that ARP requests get dropped, preventing the guest from becoming visible on the network. It was workarounded in these NIC models on the receive path, that when a short frame is received, it is padded up to 60 bytes. The following 2 commits seem to be the one to workaround this issue in e1000 and vmxenet3 before, and should probably be reverted. commit 78aeb23eded2 ("e1000: Pad short frames to minimum size (60 bytes)") commit 40a87c6c9b11 ("vmxnet3: Pad short frames to minimum size (60 bytes)") Signed-off-by: Bin Meng <bmeng.cn@gmail.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
2021-03-17 14:26:29 +08:00
uint8_t min_pkt[ETH_ZLEN];
size_t min_pktsz = sizeof(min_pkt);
size = tap_win32_read(s->handle, &buf, max_size);
if (size > 0) {
orig_buf = buf;
if (net_peer_needs_padding(&s->nc)) {
net: Pad short frames to minimum size before sending from SLiRP/TAP The minimum Ethernet frame length is 60 bytes. For short frames with smaller length like ARP packets (only 42 bytes), on a real world NIC it can choose either padding its length to the minimum required 60 bytes, or sending it out directly to the wire. Such behavior can be hardcoded or controled by a register bit. Similarly on the receive path, NICs can choose either dropping such short frames directly or handing them over to software to handle. On the other hand, for the network backends like SLiRP/TAP, they don't expose a way to control the short frame behavior. As of today they just send/receive data from/to the other end connected to them, which means any sized packet is acceptable. So they can send and receive short frames without any problem. It is observed that ARP packets sent from SLiRP/TAP are 42 bytes, and SLiRP/TAP just send these ARP packets to the other end which might be a NIC model that does not allow short frames to pass through. To provide better compatibility, for packets sent from QEMU network backends like SLiRP/TAP, we change to pad short frames before sending it out to the other end, if the other end does not forbid it via the nc->do_not_pad flag. This ensures a backend as an Ethernet sender does not violate the spec. But with this change, the behavior of dropping short frames from SLiRP/TAP interfaces in the NIC model cannot be emulated because it always receives a packet that is spec complaint. The capability of sending short frames from NIC models is still supported and short frames can still pass through SLiRP/TAP. This commit should be able to fix the issue as reported with some NIC models before, that ARP requests get dropped, preventing the guest from becoming visible on the network. It was workarounded in these NIC models on the receive path, that when a short frame is received, it is padded up to 60 bytes. The following 2 commits seem to be the one to workaround this issue in e1000 and vmxenet3 before, and should probably be reverted. commit 78aeb23eded2 ("e1000: Pad short frames to minimum size (60 bytes)") commit 40a87c6c9b11 ("vmxnet3: Pad short frames to minimum size (60 bytes)") Signed-off-by: Bin Meng <bmeng.cn@gmail.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com>
2021-03-17 14:26:29 +08:00
if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) {
buf = min_pkt;
size = min_pktsz;
}
}
qemu_send_packet(&s->nc, buf, size);
tap_win32_free_buffer(s->handle, orig_buf);
}
}
static bool tap_has_ufo(NetClientState *nc)
{
return false;
}
static bool tap_has_vnet_hdr(NetClientState *nc)
{
return false;
}
int tap_probe_vnet_hdr_len(int fd, int len)
{
return 0;
}
void tap_fd_set_vnet_hdr_len(int fd, int len)
{
}
int tap_fd_set_vnet_le(int fd, int is_le)
{
return -EINVAL;
}
int tap_fd_set_vnet_be(int fd, int is_be)
{
return -EINVAL;
}
static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
{
}
static void tap_set_offload(NetClientState *nc, int csum, int tso4,
int tso6, int ecn, int ufo)
{
}
struct vhost_net *tap_get_vhost_net(NetClientState *nc)
{
return NULL;
}
static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
{
return false;
}
static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
{
abort();
}
static NetClientInfo net_tap_win32_info = {
qapi: Change Netdev into a flat union This is a mostly-mechanical conversion that creates a new flat union 'Netdev' QAPI type that covers all the branches of the former 'NetClientOptions' simple union, where the branches are now listed in a new 'NetClientDriver' enum rather than generated from the simple union. The existence of a flat union has no change to the command line syntax accepted for new code, and will make it possible for a future patch to switch the QMP command to parse a boxed union for no change to valid QMP; but it does have some ripple effect on the C code when dealing with the new types. While making the conversion, note that the 'NetLegacy' type remains unchanged: it applies only to legacy command line options, and will not be ported to QMP, so it should remain a wrapper around a simple union; to avoid confusion, the type named 'NetClientOptions' is now gone, and we introduce 'NetLegacyOptions' in its place. Then, in the C code, we convert from NetLegacy to Netdev as soon as possible, so that the bulk of the net stack only has to deal with one QAPI type, not two. Note that since the old legacy code always rejected 'hubport', we can just omit that branch from the new 'NetLegacyOptions' simple union. Based on an idea originally by Zoltán Kővágó <DirtY.iCE.hu@gmail.com>: Message-Id: <01a527fbf1a5de880091f98cf011616a78adeeee.1441627176.git.DirtY.iCE.hu@gmail.com> although the sed script in that patch no longer applies due to other changes in the tree since then, and I also did some manual cleanups (such as fixing whitespace to keep checkpatch happy). Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1468468228-27827-13-git-send-email-eblake@redhat.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> [Fixup from Eric squashed in] Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-14 11:50:23 +08:00
.type = NET_CLIENT_DRIVER_TAP,
.size = sizeof(TAPState),
.receive = tap_receive,
.cleanup = tap_cleanup,
.has_ufo = tap_has_ufo,
.has_vnet_hdr = tap_has_vnet_hdr,
.has_vnet_hdr_len = tap_has_vnet_hdr_len,
.using_vnet_hdr = tap_using_vnet_hdr,
.set_offload = tap_set_offload,
.set_vnet_hdr_len = tap_set_vnet_hdr_len,
};
static int tap_win32_init(NetClientState *peer, const char *model,
const char *name, const char *ifname)
{
NetClientState *nc;
TAPState *s;
tap_win32_overlapped_t *handle;
if (tap_win32_open(&handle, ifname) < 0) {
printf("tap: Could not open '%s'\n", ifname);
return -1;
}
nc = qemu_new_net_client(&net_tap_win32_info, peer, model, name);
s = DO_UPCAST(TAPState, nc, nc);
snprintf(s->nc.info_str, sizeof(s->nc.info_str),
"tap: ifname=%s", ifname);
s->handle = handle;
qemu_add_wait_object(s->handle->tap_semaphore, tap_win32_send, s);
return 0;
}
int net_init_tap(const Netdev *netdev, const char *name,
NetClientState *peer, Error **errp)
{
/* FIXME error_setg(errp, ...) on failure */
const NetdevTapOptions *tap;
qapi: Change Netdev into a flat union This is a mostly-mechanical conversion that creates a new flat union 'Netdev' QAPI type that covers all the branches of the former 'NetClientOptions' simple union, where the branches are now listed in a new 'NetClientDriver' enum rather than generated from the simple union. The existence of a flat union has no change to the command line syntax accepted for new code, and will make it possible for a future patch to switch the QMP command to parse a boxed union for no change to valid QMP; but it does have some ripple effect on the C code when dealing with the new types. While making the conversion, note that the 'NetLegacy' type remains unchanged: it applies only to legacy command line options, and will not be ported to QMP, so it should remain a wrapper around a simple union; to avoid confusion, the type named 'NetClientOptions' is now gone, and we introduce 'NetLegacyOptions' in its place. Then, in the C code, we convert from NetLegacy to Netdev as soon as possible, so that the bulk of the net stack only has to deal with one QAPI type, not two. Note that since the old legacy code always rejected 'hubport', we can just omit that branch from the new 'NetLegacyOptions' simple union. Based on an idea originally by Zoltán Kővágó <DirtY.iCE.hu@gmail.com>: Message-Id: <01a527fbf1a5de880091f98cf011616a78adeeee.1441627176.git.DirtY.iCE.hu@gmail.com> although the sed script in that patch no longer applies due to other changes in the tree since then, and I also did some manual cleanups (such as fixing whitespace to keep checkpatch happy). Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1468468228-27827-13-git-send-email-eblake@redhat.com> Reviewed-by: Markus Armbruster <armbru@redhat.com> [Fixup from Eric squashed in] Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-14 11:50:23 +08:00
assert(netdev->type == NET_CLIENT_DRIVER_TAP);
tap = &netdev->u.tap;
if (!tap->has_ifname) {
error_report("tap: no interface name");
return -1;
}
if (tap_win32_init(peer, "tap", name, tap->ifname) == -1) {
return -1;
}
return 0;
}
int tap_enable(NetClientState *nc)
{
abort();
}
int tap_disable(NetClientState *nc)
{
abort();
}