7713 lines
213 KiB
C
7713 lines
213 KiB
C
/* ****************************************************************
|
|
* Portions Copyright 2005, 2009-2011 VMware, Inc.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
* ****************************************************************/
|
|
|
|
/******************************************************************
|
|
*
|
|
* linux_net.c
|
|
*
|
|
* From linux-2.6.24-7/include/linux/netdevice.h:
|
|
*
|
|
* Authors: Ross Biro
|
|
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
|
|
* Corey Minyard <wf-rch!minyard@relay.EU.net>
|
|
* Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
|
|
* Alan Cox, <Alan.Cox@linux.org>
|
|
* Bjorn Ekwall. <bj0rn@blox.se>
|
|
* Pekka Riikonen <priikone@poseidon.pspt.fi>
|
|
*
|
|
* From linux-2.6.27-rc9/net/core/dev.c:
|
|
*
|
|
* Authors: Ross Biro
|
|
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
|
|
* Mark Evans, <evansmp@uhura.aston.ac.uk>
|
|
*
|
|
* Additional Authors:
|
|
* Florian la Roche <rzsfl@rz.uni-sb.de>
|
|
* Alan Cox <gw4pts@gw4pts.ampr.org>
|
|
* David Hinds <dahinds@users.sourceforge.net>
|
|
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
|
|
* Adam Sulmicki <adam@cfar.umd.edu>
|
|
* Pekka Riikonen <priikone@poesidon.pspt.fi>
|
|
*
|
|
* From linux-2.6.27-rc9/net/sched/sch_generic.c:
|
|
*
|
|
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
|
|
* Jamal Hadi Salim, <hadi@cyberus.ca> 990601
|
|
*
|
|
******************************************************************/
|
|
|
|
#define NET_DRIVER // Special case for Net portion of VMKLINUX
|
|
|
|
#include <linux/version.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/string.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/if_vlan.h>
|
|
#include <linux/in.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/tcp.h>
|
|
#include <linux/ethtool.h>
|
|
#include <linux/rtnetlink.h> /* BUG_TRAP */
|
|
#include <linux/workqueue.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include <asm/uaccess.h>
|
|
#include <asm/page.h> /* phys_to_page */
|
|
|
|
#include "vmkapi.h"
|
|
#include "linux_stubs.h"
|
|
#include "linux_pci.h"
|
|
#include "linux_stress.h"
|
|
#include "linux_task.h"
|
|
#include "linux_net.h"
|
|
#include "linux_cna.h"
|
|
#include "linux_dcb.h"
|
|
|
|
#include <vmkplexer_chardevs.h>
|
|
|
|
#define VMKLNX_LOG_HANDLE LinNet
|
|
#include "vmklinux_log.h"
|
|
|
|
/* default watchdog timeout value and timer period for device */
|
|
#define WATCHDOG_DEF_TIMEO 5 * HZ
|
|
#define WATCHDOG_DEF_TIMER 1000
|
|
|
|
|
|
enum {
|
|
LIN_NET_HARD_QUEUE_XOFF = 0x0001, /* hardware queue is stopped */
|
|
};
|
|
|
|
/*
|
|
* NOTE: Try not to put any critical (data path) fields in LinNetDev.
|
|
* Instead, embed them in net_device, where they are next to
|
|
* their cache line brethrens.
|
|
*/
|
|
struct LinNetDev {
|
|
unsigned int napiNextId; /* Next unique id for napi context. */
|
|
unsigned long flags; /* vmklinux private device flags */
|
|
unsigned short padded; /* Padding added by alloc_netdev() */
|
|
struct net_device linNetDev __attribute__((aligned(NETDEV_ALIGN)));
|
|
/*
|
|
* WARNING: linNetDev must be last because it is assumed that
|
|
* private data area follows immediately after.
|
|
*/
|
|
};
|
|
|
|
typedef struct LinNetDev LinNetDev;
|
|
typedef int (*PollHandler) (void* clientData, vmk_uint32 vector);
|
|
|
|
#define get_LinNetDev(net_device) \
|
|
((LinNetDev*)(((char*)net_device)-(offsetof(struct LinNetDev, linNetDev))))
|
|
|
|
static vmk_Timer devWatchdogTimer;
|
|
static void link_state_work_cb(struct work_struct *work);
|
|
static void watchdog_work_cb(struct work_struct *work);
|
|
static struct delayed_work linkStateWork;
|
|
static struct delayed_work watchdogWork;
|
|
static unsigned linkStateTimerPeriod;
|
|
static vmk_ConfigParamHandle linkStateTimerPeriodConfigHandle;
|
|
static vmk_ConfigParamHandle maxNetifTxQueueLenConfigHandle;
|
|
static unsigned blockTotalSleepMsec;
|
|
static vmk_ConfigParamHandle blockTotalSleepMsecHandle;
|
|
struct net_device *dev_base = NULL;
|
|
EXPORT_SYMBOL(dev_base);
|
|
DEFINE_RWLOCK(dev_base_lock);
|
|
int netdev_max_backlog = 300;
|
|
static const unsigned eth_crc32_poly_le = 0xedb88320;
|
|
static unsigned eth_crc32_poly_tbl_le[256];
|
|
static uint64_t max_phys_addr;
|
|
|
|
static vmk_ConfigParamHandle useHwIPv6CsumHandle;
|
|
static vmk_ConfigParamHandle useHwCsumForIPv6CsumHandle;
|
|
static vmk_ConfigParamHandle useHwTSO6Handle;
|
|
static vmk_ConfigParamHandle useHwTSOHandle;
|
|
|
|
/*
|
|
* The global packet list for receiving packets when the system is in
|
|
* the panic/debug status.
|
|
*/
|
|
static vmk_PktList debugPktList = NULL;
|
|
|
|
/* Stress option handles */
|
|
static vmk_StressOptionHandle stressNetGenTinyArpRarp;
|
|
static vmk_StressOptionHandle stressNetIfCorruptEthHdr;
|
|
static vmk_StressOptionHandle stressNetIfCorruptRxData;
|
|
static vmk_StressOptionHandle stressNetIfCorruptRxTcpUdp;
|
|
static vmk_StressOptionHandle stressNetIfCorruptTx;
|
|
static vmk_StressOptionHandle stressNetIfFailHardTx;
|
|
static vmk_StressOptionHandle stressNetIfFailRx;
|
|
static vmk_StressOptionHandle stressNetIfFailTxAndStopQueue;
|
|
static vmk_StressOptionHandle stressNetIfForceHighDMAOverflow;
|
|
static vmk_StressOptionHandle stressNetIfForceRxSWCsum;
|
|
static vmk_StressOptionHandle stressNetNapiForceBackupWorldlet;
|
|
static vmk_StressOptionHandle stressNetBlockDevIsSluggish;
|
|
|
|
/* LRO config option */
|
|
static vmk_ConfigParamHandle vmklnxLROEnabledConfigHandle;
|
|
static vmk_ConfigParamHandle vmklnxLROMaxAggrConfigHandle;
|
|
unsigned int vmklnxLROEnabled;
|
|
unsigned int vmklnxLROMaxAggr;
|
|
|
|
extern void LinStress_SetupStress(void);
|
|
extern void LinStress_CleanupStress(void);
|
|
extern void LinStress_CorruptSkbData(struct sk_buff*, unsigned int,
|
|
unsigned int);
|
|
extern void LinStress_CorruptRxData(vmk_PktHandle*, struct sk_buff *);
|
|
extern void LinStress_CorruptEthHdr(struct sk_buff *skb);
|
|
|
|
static VMK_ReturnStatus map_pkt_to_skb(struct net_device *dev,
|
|
struct netdev_queue *queue,
|
|
vmk_PktHandle *pkt,
|
|
struct sk_buff **pskb);
|
|
static void do_free_skb(struct sk_buff *skb);
|
|
static struct sk_buff *do_alloc_skb(kmem_cache_t *skb, gfp_t flags);
|
|
static VMK_ReturnStatus BlockNetDev(void *clientData);
|
|
static void SetNICLinkStatus(struct net_device *dev);
|
|
static VMK_ReturnStatus skb_gen_pkt_frags(struct sk_buff *skb);
|
|
|
|
static inline VMK_ReturnStatus
|
|
marshall_from_vmknetq_id(vmk_NetqueueQueueID vmkqid,
|
|
vmknetddi_queueops_queueid_t *qid);
|
|
static ATOMIC_NOTIFIER_HEAD(netdev_notifier_list);
|
|
static vmk_Bool napi_poll(void *ptr);
|
|
|
|
inline void vmklnx_set_skb_frags_owner_vmkernel(struct sk_buff *);
|
|
|
|
/*
|
|
* Deal with the transition away from exposing vmk_Worldlet and
|
|
* vmk_Uplink* directly through the vmklnx headers.
|
|
*/
|
|
VMK_ASSERT_LIST(VMKLNX_NET,
|
|
VMK_ASSERT_ON_COMPILE(sizeof(vmk_Worldlet) == sizeof(void *));
|
|
VMK_ASSERT_ON_COMPILE(sizeof(vmk_LinkState) ==
|
|
sizeof(vmklnx_uplink_link_state));
|
|
VMK_ASSERT_ON_COMPILE(sizeof(vmk_UplinkPTOpFunc) ==
|
|
sizeof(void *));
|
|
VMK_ASSERT_ON_COMPILE(sizeof(vmk_NetqueueQueueID) ==
|
|
sizeof(vmk_uint64));
|
|
VMK_ASSERT_ON_COMPILE(VMKLNX_UPLINK_LINK_DOWN == VMK_LINK_STATE_DOWN);
|
|
VMK_ASSERT_ON_COMPILE(VMKLNX_UPLINK_LINK_UP == VMK_LINK_STATE_UP);
|
|
VMK_ASSERT_ON_COMPILE(sizeof(vmk_UplinkWatchdogPanicModState) ==
|
|
sizeof(vmklnx_uplink_watchdog_panic_mod_state));
|
|
VMK_ASSERT_ON_COMPILE(sizeof(vmk_UplinkWatchdogPanicModState) ==
|
|
sizeof(vmklnx_uplink_watchdog_panic_mod_state));
|
|
VMK_ASSERT_ON_COMPILE(VMKLNX_UPLINK_WATCHDOG_PANIC_MOD_DISABLE ==
|
|
VMK_UPLINK_WATCHDOG_PANIC_MOD_DISABLE);
|
|
VMK_ASSERT_ON_COMPILE(VMKLNX_UPLINK_WATCHDOG_PANIC_MOD_ENABLE ==
|
|
VMK_UPLINK_WATCHDOG_PANIC_MOD_ENABLE);
|
|
VMK_ASSERT_ON_COMPILE(sizeof(vmk_NetqueueQueueID) == sizeof(vmk_uint64));
|
|
VMK_ASSERT_ON_COMPILE(VMKLNX_PKT_HEAP_MAX_SIZE == VMK_PKT_HEAP_MAX_SIZE);
|
|
)
|
|
|
|
/*
|
|
* Section: Receive path
|
|
*/
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* map_skb_to_pkt --
|
|
*
|
|
* Converts sk_buff to PktHandle before handing packet to vmkernel.
|
|
*
|
|
* Results:
|
|
* NET_RX_SUCCESS on success; NET_RX_DROP if the packet is dropped.
|
|
*
|
|
* Side effects:
|
|
* Drops packet on the floor if unsuccessful.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static int
|
|
map_skb_to_pkt(struct sk_buff *skb)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
vmk_PktHandle *pkt = NULL;
|
|
struct net_device *dev = skb->dev;
|
|
|
|
/* we need to ensure the blocked status */
|
|
if (unlikely(test_bit(__LINK_STATE_BLOCKED, &dev->state))) {
|
|
VMK_ASSERT(!(dev->features & NETIF_F_CNA));
|
|
goto drop;
|
|
}
|
|
if (unlikely(skb->len == 0)) {
|
|
static uint32_t logThrottleCounter = 0;
|
|
VMKLNX_THROTTLED_INFO(logThrottleCounter,
|
|
"dropping zero length packet "
|
|
"(skb->len=%u, skb->data_len=%u)\n",
|
|
skb->len, skb->data_len);
|
|
VMK_ASSERT(!(dev->features & NETIF_F_CNA));
|
|
goto drop;
|
|
}
|
|
|
|
if (unlikely(skb_gen_pkt_frags(skb) != VMK_OK)) {
|
|
VMK_ASSERT(!(dev->features & NETIF_F_CNA));
|
|
goto drop;
|
|
}
|
|
pkt = skb->pkt;
|
|
|
|
if (unlikely(vmk_PktFrameLenSet(pkt, skb->len) != VMK_OK)) {
|
|
printk("unable to set skb->pkt %p frame length with skb->len = %u\n",
|
|
pkt, skb->len);
|
|
VMK_ASSERT(VMK_FALSE);
|
|
goto drop;
|
|
}
|
|
|
|
if (skb_shinfo(skb)->gso_type != 0) {
|
|
switch (skb_shinfo(skb)->gso_type) {
|
|
case SKB_GSO_TCPV4:
|
|
status = vmk_PktSetLargeTcpPacket(pkt, skb_shinfo(skb)->gso_size);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
break;
|
|
default:
|
|
printk("unable to process gso type 0x%x on the rx path\n",
|
|
skb_shinfo(skb)->gso_type);
|
|
VMK_ASSERT(VMK_FALSE);
|
|
goto drop;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The following extracts vlan tag from skb.
|
|
* The check just looks at a field of skb, so we
|
|
* don't bother to check whether vlan is enabled.
|
|
*/
|
|
if (vlan_rx_tag_present(skb)) {
|
|
VMK_ASSERT(vmk_PktVlanIDGet(pkt) == 0);
|
|
|
|
if ((vlan_rx_tag_get(skb) & VLAN_VID_MASK) > VLAN_MAX_VALID_VID) {
|
|
static uint32_t logThrottleCounter = 0;
|
|
VMKLNX_THROTTLED_INFO(logThrottleCounter,
|
|
"invalid vlan tag: %d dropped",
|
|
vlan_rx_tag_get(skb) & VLAN_VID_MASK);
|
|
VMK_ASSERT(!(dev->features & NETIF_F_CNA));
|
|
goto drop;
|
|
}
|
|
status = vmk_PktVlanIDSet(pkt, vlan_rx_tag_get(skb) & VLAN_VID_MASK);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_PktPrioritySet(pkt,
|
|
(vlan_rx_tag_get(skb) & VLAN_1PTAG_MASK) >> VLAN_1PTAG_SHIFT);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
VMKLNX_DEBUG(2, "%s: rx vlan tag %u present with priority %u",
|
|
dev->name, vmk_PktVlanIDGet(pkt), vmk_PktPriorityGet(pkt));
|
|
|
|
#ifdef VMX86_DEBUG
|
|
{
|
|
// generate arp/rarp frames that are < ETH_MIN_FRAME_LEN to
|
|
// create test cases for PR 106153.
|
|
struct ethhdr *eh = (struct ethhdr *)vmk_PktFrameMappedPointerGet(pkt);
|
|
|
|
if ((eh->h_proto == ntohs(ETH_P_ARP)
|
|
|| eh->h_proto == ntohs(ETH_P_RARP))
|
|
&& VMKLNX_STRESS_DEBUG_COUNTER(stressNetGenTinyArpRarp)) {
|
|
int old_frameMappedLen;
|
|
int target_len = (ETH_ZLEN - VLAN_HLEN);
|
|
|
|
old_frameMappedLen = vmk_PktFrameMappedLenGet(pkt);
|
|
|
|
if (target_len <= old_frameMappedLen) {
|
|
int old_len;
|
|
int len;
|
|
|
|
old_len = vmk_PktFrameLenGet(pkt);
|
|
vmk_PktFrameLenSet(pkt, target_len);
|
|
len = vmk_PktFrameLenGet(pkt);
|
|
VMKLNX_DEBUG(1, "shorten arp/rarp pkt to %d from %d",
|
|
len, old_len);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
if (skb->ip_summed != CHECKSUM_NONE &&
|
|
!VMKLNX_STRESS_DEBUG_OPTION(stressNetIfForceRxSWCsum)) {
|
|
status = vmk_PktSetCsumVfd(pkt);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
}
|
|
|
|
if (likely(!(dev->features & NETIF_F_CNA))) {
|
|
if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfCorruptRxTcpUdp)) {
|
|
LinStress_CorruptSkbData(skb, 40, 14);
|
|
}
|
|
if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfCorruptRxData)) {
|
|
LinStress_CorruptRxData(pkt, skb);
|
|
}
|
|
if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfCorruptEthHdr)) {
|
|
LinStress_CorruptEthHdr(skb);
|
|
}
|
|
}
|
|
|
|
dev->linnet_rx_packets++;
|
|
|
|
if (!(dev->features & NETIF_F_CNA)) {
|
|
do_free_skb(skb);
|
|
} else {
|
|
/*
|
|
* Packets received for FCOE will be free'd by the OpenFCOE stack.
|
|
*/
|
|
vmk_PktSetCompletionData(pkt, skb, dev->genCount, VMK_TRUE);
|
|
}
|
|
return NET_RX_SUCCESS;
|
|
|
|
drop:
|
|
dev_kfree_skb_any(skb);
|
|
dev->linnet_rx_dropped++;
|
|
VMK_ASSERT(!(dev->features & NETIF_F_CNA));
|
|
return NET_RX_DROP;
|
|
}
|
|
|
|
/**
|
|
* netif_rx - post buffer to the network code
|
|
* @skb: buffer to post
|
|
*
|
|
* This function receives a packet from a device driver and queues it for
|
|
* the upper (protocol) levels to process. It always succeeds. The buffer
|
|
* may be dropped during processing for congestion control or by the
|
|
* protocol layers.
|
|
*
|
|
* RETURN VALUE:
|
|
* NET_RX_SUCCESS (no congestion)
|
|
* NET_RX_DROP (packet was dropped)
|
|
*
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: netif_rx */
|
|
int
|
|
netif_rx(struct sk_buff *skb)
|
|
{
|
|
struct net_device *dev = skb->dev;
|
|
vmk_PktHandle *pkt;
|
|
int status;
|
|
|
|
VMK_ASSERT(dev);
|
|
|
|
VMKLNX_DEBUG(1, "Napi is not enabled for device %s\n", dev->name);
|
|
|
|
pkt = skb->pkt;
|
|
VMK_ASSERT(pkt);
|
|
|
|
status = map_skb_to_pkt(skb);
|
|
if (likely(status == NET_RX_SUCCESS)) {
|
|
vmk_PktQueueForRxProcess(pkt, dev->uplinkDev);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
EXPORT_SYMBOL(netif_rx);
|
|
|
|
/**
|
|
* netif_receive_skb - process receive buffer from network
|
|
* @skb: buffer to process
|
|
*
|
|
* netif_receive_skb() is the main receive data processing function.
|
|
* It always succeeds. The buffer may be dropped during processing
|
|
* for congestion control or by the protocol layers.
|
|
*
|
|
* ESX Deviation Notes:
|
|
* This function may only be called from the napi poll callback routine.
|
|
*
|
|
* RETURN VALUE:
|
|
* NET_RX_SUCCESS (no congestion)
|
|
* NET_RX_DROP (packet was dropped)
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: netif_receive_skb */
|
|
int
|
|
netif_receive_skb(struct sk_buff *skb)
|
|
{
|
|
struct net_device *dev = skb->dev;
|
|
vmk_NetPoll pollPriv;
|
|
vmk_Worldlet wdt;
|
|
struct napi_struct *napi = NULL;
|
|
vmk_PktHandle *pkt;
|
|
int status;
|
|
|
|
VMK_ASSERT(dev);
|
|
|
|
/*
|
|
* When the system is not in the panic/debug status, put the arrived packets into
|
|
* skb->napi->rxPktList.
|
|
*/
|
|
if (skb->napi == NULL) {
|
|
if (unlikely(vmk_WorldletGetCurrent(&wdt, (void **)&pollPriv) != VMK_OK)) {
|
|
VMK_ASSERT(VMK_FALSE);
|
|
dev_kfree_skb_any(skb);
|
|
dev->linnet_rx_dropped++;
|
|
status = NET_RX_DROP;
|
|
goto done;
|
|
} else {
|
|
/*
|
|
* When the system is in the panic/debug status, the current worldlet is the
|
|
* debug worldlet rather than the napi_poll worldlet. In this case, put the
|
|
* arrived packets into debugPktList. This list will be processed by
|
|
* FlushRxBuffers, because netdump/netdebug will bypass the vswitch to read
|
|
* the packets.
|
|
*/
|
|
if (vmk_NetPollGetCurrent(&pollPriv) == VMK_OK) {
|
|
napi = (struct napi_struct *)vmk_NetPollGetPrivate(pollPriv);
|
|
}
|
|
if (!napi) {
|
|
pkt = skb->pkt;
|
|
status = map_skb_to_pkt(skb);
|
|
if (likely(status == NET_RX_SUCCESS)) {
|
|
if (debugPktList == NULL) {
|
|
if (vmk_PktListAlloc(&debugPktList) != VMK_OK) {
|
|
dev_kfree_skb_any(skb);
|
|
dev->linnet_rx_dropped++;
|
|
status = NET_RX_DROP;
|
|
goto done;
|
|
}
|
|
vmk_PktListInit(debugPktList);
|
|
}
|
|
VMK_ASSERT(pkt);
|
|
vmk_PktListAppendPkt(debugPktList, pkt);
|
|
}
|
|
goto done;
|
|
} else {
|
|
VMK_ASSERT(pollPriv != NULL);
|
|
skb->napi = napi;
|
|
}
|
|
}
|
|
}
|
|
|
|
VMK_ASSERT(skb->napi != NULL);
|
|
VMK_ASSERT(skb->napi->dev == skb->dev);
|
|
|
|
pkt = skb->pkt;
|
|
napi = skb->napi;
|
|
|
|
status = map_skb_to_pkt(skb);
|
|
if (likely(status == NET_RX_SUCCESS)) {
|
|
VMK_ASSERT(napi);
|
|
VMK_ASSERT(pkt);
|
|
vmk_NetPollQueueRxPkt(napi->net_poll, pkt);
|
|
}
|
|
|
|
done:
|
|
return status;
|
|
}
|
|
EXPORT_SYMBOL(netif_receive_skb);
|
|
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* napi_poll --
|
|
*
|
|
* Callback registered with the net poll handler.
|
|
* This handler is responsible of polling the different napi context.
|
|
*
|
|
* Results:
|
|
* VMK_TRUE if we need to keep polling and VMK_FALSE otherwise.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
|
|
static vmk_Bool
|
|
napi_poll(void *ptr)
|
|
{
|
|
VMK_ReturnStatus status = VMK_OK;
|
|
struct napi_struct *napi = (struct napi_struct *)ptr;
|
|
|
|
/*
|
|
* napi_schedule_prep()/napi_schedule() depend on accurately seeing whether
|
|
* or not the worldlet is running and assume that the check for polling
|
|
* executes only after the worldlet has been dispatched. If the CPU
|
|
* aggressively prefetches the test_bit() load here so that it occurs
|
|
* prior to the worldlet being dispatched then __napi_schedule() could
|
|
* avoid kicking the worldlet (seeing that it had not yet run), but at
|
|
* the same time the aggressive prefetch would result in us seeing a
|
|
* clear napi->state and returning VMK_WDT_SUSPEND from here.
|
|
* Consequently an smp_mb() is required here; we need to ensure that none of
|
|
* our loads here occur prior to any stores that may have occurred by the
|
|
* caller of this function.
|
|
*/
|
|
smp_mb();
|
|
|
|
if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
|
|
VMKAPI_MODULE_CALL(napi->dev->module_id, status, napi->poll, napi,
|
|
napi->weight);
|
|
if (vmklnxLROEnabled && !(napi->dev->features & NETIF_F_SW_LRO)) {
|
|
/* Flush all the lro sessions as we are done polling the napi context */
|
|
lro_flush_all(&napi->lro_mgr);
|
|
}
|
|
}
|
|
|
|
if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
|
|
return VMK_TRUE;
|
|
} else {
|
|
return VMK_FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netdev_poll --
|
|
*
|
|
* Callback registered for the devices that are unable to create their own
|
|
* poll. This handler is responsible of polling the different napi context.
|
|
*
|
|
* Results:
|
|
* VMK_TRUE if we need to keep polling and VMK_FALSE otherwise.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
|
|
static vmk_Bool
|
|
netdev_poll(void *private)
|
|
{
|
|
struct net_device *dev = private;
|
|
vmk_Bool needWork;
|
|
struct napi_struct *napi;
|
|
VMK_ReturnStatus status = VMK_OK;
|
|
|
|
needWork = VMK_FALSE;
|
|
|
|
spin_lock(&dev->napi_lock);
|
|
list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
|
if (napi->dev_poll &&
|
|
(test_bit(NAPI_STATE_SCHED, &napi->state))) {
|
|
needWork = VMK_TRUE;
|
|
list_move_tail(&napi->dev_list, &dev->napi_list);
|
|
break;
|
|
}
|
|
}
|
|
spin_unlock(&dev->napi_lock);
|
|
|
|
if (!needWork) {
|
|
return VMK_FALSE;
|
|
}
|
|
|
|
VMKAPI_MODULE_CALL(napi->dev->module_id, status, napi->poll, napi,
|
|
napi->weight);
|
|
if (vmklnxLROEnabled && !(napi->dev->features & NETIF_F_SW_LRO)) {
|
|
/* Flush all the lro sessions as we are done polling the napi context */
|
|
lro_flush_all(&napi->lro_mgr);
|
|
}
|
|
|
|
return VMK_TRUE;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* napi_poll_init --
|
|
*
|
|
* Initialize a napi context . If the function is unable to create a unique
|
|
* net poll it will attach the napi context to the one provided by the
|
|
* device it belongs to.
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_NO_MEMORY if resources could not be allocated.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
napi_poll_init(struct napi_struct *napi)
|
|
{
|
|
VMK_ReturnStatus ret;
|
|
vmk_ServiceAcctID serviceID;
|
|
vmk_NetPollProperties pollInit;
|
|
|
|
spin_lock(&napi->dev->napi_lock);
|
|
napi->napi_id = get_LinNetDev(napi->dev)->napiNextId++;
|
|
spin_unlock(&napi->dev->napi_lock);
|
|
|
|
ret = vmk_ServiceGetID("netdev", &serviceID);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
|
|
napi->napi_wdt_priv.dev = napi->dev;
|
|
napi->napi_wdt_priv.napi = napi;
|
|
napi->dev_poll = VMK_FALSE;
|
|
napi->vector = 0;
|
|
|
|
pollInit.poll = napi_poll;
|
|
pollInit.priv = napi;
|
|
|
|
if (napi->dev->features & NETIF_F_CNA) {
|
|
pollInit.deliveryCallback = LinuxCNA_Poll;
|
|
pollInit.features = VMK_NETPOLL_CUSTOM_DELIVERY_CALLBACK;
|
|
} else {
|
|
pollInit.deliveryCallback = NULL;
|
|
pollInit.features = VMK_NETPOLL_NONE;
|
|
}
|
|
ret = vmk_NetPollInit(&pollInit, serviceID, (vmk_NetPoll *)&napi->net_poll) ;
|
|
if (ret != VMK_OK) {
|
|
VMKLNX_WARN("Unable to create net poll for %s, using backup",
|
|
napi->dev->name);
|
|
if (napi->dev->reg_state == NETREG_REGISTERED) {
|
|
napi->net_poll = napi->dev->net_poll;
|
|
napi->net_poll_type = NETPOLL_BACKUP;
|
|
|
|
/*
|
|
* Use device global net poll for polling this napi_struct,
|
|
* if net poll creation fails
|
|
*/
|
|
napi->dev_poll = VMK_TRUE;
|
|
} else {
|
|
napi->dev->reg_state = NETREG_EARLY_NAPI_ADD_FAILED;
|
|
return VMK_FAILURE;
|
|
}
|
|
} else {
|
|
napi->net_poll_type = NETPOLL_DEFAULT;
|
|
}
|
|
|
|
if (napi->dev->uplinkDev) {
|
|
vmk_Name pollName;
|
|
(void) vmk_NameFormat(&pollName, "-%d", napi->napi_id);
|
|
vmk_NetPollRegisterUplink(napi->net_poll, napi->dev->uplinkDev, pollName, VMK_TRUE);
|
|
}
|
|
|
|
spin_lock(&napi->dev->napi_lock);
|
|
list_add(&napi->dev_list, &napi->dev->napi_list);
|
|
spin_unlock(&napi->dev->napi_lock);
|
|
|
|
/*
|
|
* Keep track of which poll is (most probably) driving the
|
|
* default queue. For netqueue capable nics, we call
|
|
* VMKNETDDI_QUEUEOPS_OP_GET_DEFAULT_QUEUE to figure out the
|
|
* default poll. For non-netqueue nics, the first suceessful
|
|
* netif_napi_add wins.
|
|
*/
|
|
if (!napi->dev->default_net_poll && napi->net_poll) {
|
|
napi->dev->default_net_poll = napi->net_poll;
|
|
}
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netdev_poll_init --
|
|
*
|
|
* Initialize a device's backup net poll for the napi context not able to
|
|
* create their own.
|
|
*
|
|
* Results:
|
|
* VMK_OK if everything is ok, VMK_* otherwise.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netdev_poll_init(struct net_device *dev)
|
|
{
|
|
VMK_ReturnStatus ret;
|
|
vmk_ServiceAcctID serviceID;
|
|
vmk_NetPollProperties pollInit;
|
|
|
|
VMK_ASSERT(dev);
|
|
|
|
ret = vmk_ServiceGetID("netdev", &serviceID);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
|
|
dev->napi_wdt_priv.dev = dev;
|
|
dev->napi_wdt_priv.napi = NULL;
|
|
|
|
pollInit.poll = netdev_poll;
|
|
pollInit.priv = dev;
|
|
|
|
if (dev->features & NETIF_F_CNA) {
|
|
pollInit.deliveryCallback = LinuxCNADev_Poll;
|
|
pollInit.features = VMK_NETPOLL_CUSTOM_DELIVERY_CALLBACK;
|
|
} else {
|
|
pollInit.deliveryCallback = NULL;
|
|
pollInit.features = VMK_NETPOLL_NONE;
|
|
}
|
|
|
|
ret = vmk_NetPollInit(&pollInit, serviceID, (vmk_NetPoll *)&dev->net_poll) ;
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* napi_poll_cleanup --
|
|
*
|
|
* Cleanup a napi structure.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
napi_poll_cleanup(struct napi_struct *napi)
|
|
{
|
|
VMK_ASSERT(napi);
|
|
|
|
if (napi->net_poll == napi->dev->default_net_poll) {
|
|
napi->dev->default_net_poll = NULL;
|
|
}
|
|
|
|
if (likely(!napi->dev_poll)) {
|
|
if (napi->vector) {
|
|
vmk_NetPollVectorUnSet(napi->net_poll);
|
|
napi->vector = 0;
|
|
}
|
|
|
|
if (napi->net_poll) {
|
|
vmk_NetPollCleanup(napi->net_poll);
|
|
napi->net_poll = NULL;
|
|
}
|
|
}
|
|
list_del_init(&napi->dev_list);
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netdev_poll_cleanup --
|
|
*
|
|
* Cleanup all napi structures associated with the device.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
netdev_poll_cleanup(struct net_device *dev)
|
|
{
|
|
VMK_ASSERT(dev);
|
|
struct list_head *ele, *next;
|
|
struct napi_struct *napi;
|
|
|
|
/*
|
|
* Cleanup all napi structs
|
|
*/
|
|
list_for_each_safe(ele, next, &dev->napi_list) {
|
|
napi = list_entry(ele, struct napi_struct, dev_list);
|
|
napi_poll_cleanup(napi);
|
|
}
|
|
|
|
if (dev->net_poll) {
|
|
vmk_NetPollCleanup(dev->net_poll);
|
|
dev->net_poll = NULL;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* __napi_schedule - schedule for receive
|
|
* @napi: entry to schedule
|
|
*
|
|
* The entry's receive function will be scheduled to run
|
|
*
|
|
* RETURN VALUE:
|
|
* None
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: __napi_schedule */
|
|
void
|
|
__napi_schedule(struct napi_struct *napi)
|
|
{
|
|
vmk_uint32 myVector = 0;
|
|
vmk_Bool inIntr = vmk_ContextIsInterruptHandler(&myVector);
|
|
VMK_ASSERT(napi);
|
|
|
|
if (unlikely(napi->vector != myVector)) {
|
|
if (likely(inIntr)) {
|
|
vmk_NetPollVectorSet(napi->net_poll, myVector);
|
|
napi->vector = myVector;
|
|
}
|
|
}
|
|
|
|
vmk_NetPollActivate(napi->net_poll);
|
|
|
|
}
|
|
EXPORT_SYMBOL(__napi_schedule);
|
|
|
|
/**
|
|
* napi_disable_timeout - prevent NAPI from scheduling
|
|
* @napi: napi context
|
|
*
|
|
* Stop NAPI from being scheduled on this context.
|
|
* Waits till any outstanding processing completes
|
|
*
|
|
* RETURN VALUE:
|
|
* None
|
|
*/
|
|
static vmk_Bool
|
|
napi_disable_timeout(struct napi_struct *napi, int timeout)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
vmk_NetPollState state;
|
|
vmk_Bool doTimeout=(timeout == -1)?VMK_FALSE:VMK_TRUE;
|
|
vmk_Bool timedOut=VMK_TRUE;
|
|
|
|
VMK_ASSERT(napi);
|
|
|
|
while (timeout) {
|
|
set_bit(NAPI_STATE_DISABLE, &napi->state);
|
|
status = vmk_NetPollCheckState(napi->net_poll, &state);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
/* If the poll isn't running/set to run, then we see if we can
|
|
* disable it from running in the future by blocking off
|
|
* NAPI_STATE_SCHED.
|
|
*/
|
|
if (state == VMK_NETPOLL_DISABLED &&
|
|
!test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) {
|
|
vmk_NetPollCheckState(napi->net_poll, &state);
|
|
VMK_ASSERT(state == VMK_NETPOLL_DISABLED);
|
|
timedOut=VMK_FALSE;
|
|
break;
|
|
}
|
|
/**
|
|
* Give the flush a chance to run.
|
|
*/
|
|
schedule_timeout_interruptible(1);
|
|
if (doTimeout) {
|
|
timeout--;
|
|
}
|
|
}
|
|
if (!timedOut) {
|
|
set_bit(NAPI_STATE_UNUSED, &napi->state);
|
|
}
|
|
|
|
if (napi->vector) {
|
|
vmk_NetPollVectorUnSet(napi->net_poll);
|
|
napi->vector = 0;
|
|
}
|
|
|
|
clear_bit (NAPI_STATE_DISABLE, &napi->state);
|
|
return timedOut;
|
|
}
|
|
|
|
/**
|
|
* napi_disable - prevent NAPI from scheduling
|
|
* @napi: napi context
|
|
*
|
|
* Stop NAPI from being scheduled on this context.
|
|
* Waits till any outstanding processing completes.
|
|
*
|
|
* RETURN VALUE:
|
|
* None
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: napi_disable */
|
|
void
|
|
napi_disable(struct napi_struct *napi)
|
|
{
|
|
napi_disable_timeout (napi, -1);
|
|
}
|
|
EXPORT_SYMBOL(napi_disable);
|
|
|
|
/**
|
|
* netif_napi_add - initialize a napi context
|
|
* @dev: network device
|
|
* @napi: napi context
|
|
* @poll: polling function
|
|
* @weight: default weight
|
|
*
|
|
* netif_napi_add() must be used to initialize a napi context prior to calling
|
|
* *any* of the other napi related functions.
|
|
*
|
|
* RETURN VALUE:
|
|
* None
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: netif_napi_add */
|
|
void
|
|
netif_napi_add(struct net_device *dev,
|
|
struct napi_struct *napi,
|
|
int (*poll)(struct napi_struct *, int),
|
|
int weight)
|
|
{
|
|
struct net_lro_mgr *lro_mgr;
|
|
|
|
napi->poll = poll;
|
|
napi->weight = weight;
|
|
napi->dev = dev;
|
|
|
|
lro_mgr = &napi->lro_mgr;
|
|
lro_mgr->dev = dev;
|
|
lro_mgr->features = LRO_F_NAPI;
|
|
lro_mgr->ip_summed = CHECKSUM_UNNECESSARY;
|
|
lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY;
|
|
lro_mgr->max_desc = LRO_DEFAULT_MAX_DESC;
|
|
lro_mgr->lro_arr = napi->lro_desc;
|
|
lro_mgr->get_skb_header = vmklnx_net_lro_get_skb_header;
|
|
lro_mgr->get_frag_header = NULL;
|
|
lro_mgr->max_aggr = vmklnxLROMaxAggr;
|
|
lro_mgr->frag_align_pad = 0;
|
|
|
|
napi_poll_init(napi);
|
|
|
|
set_bit(NAPI_STATE_SCHED, &napi->state);
|
|
set_bit(NAPI_STATE_UNUSED, &napi->state);
|
|
}
|
|
EXPORT_SYMBOL(netif_napi_add);
|
|
|
|
/**
|
|
* netif_napi_del - remove a napi context
|
|
* @napi: napi context
|
|
*
|
|
* netif_napi_del() removes a napi context from the network device napi list
|
|
*
|
|
* RETURN VALUE:
|
|
* None
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: netif_napi_del */
|
|
void
|
|
netif_napi_del(struct napi_struct *napi)
|
|
{
|
|
napi_poll_cleanup(napi);
|
|
}
|
|
EXPORT_SYMBOL(netif_napi_del);
|
|
|
|
/**
|
|
* napi_enable - enable NAPI scheduling
|
|
* @n: napi context
|
|
*
|
|
* Resume NAPI from being scheduled on this context.
|
|
* Must be paired with napi_disable.
|
|
*
|
|
* RETURN VALUE:
|
|
* None
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: napi_enable */
|
|
void
|
|
napi_enable(struct napi_struct *napi)
|
|
{
|
|
struct net_lro_mgr *lro_mgr;
|
|
int idx;
|
|
|
|
BUG_ON(!test_bit(NAPI_STATE_SCHED, &napi->state));
|
|
|
|
lro_mgr = &napi->lro_mgr;
|
|
for (idx = 0; idx < lro_mgr->max_desc; idx++) {
|
|
memset(&napi->lro_desc[idx], 0, sizeof(struct net_lro_desc));
|
|
}
|
|
|
|
smp_mb__before_clear_bit();
|
|
clear_bit(NAPI_STATE_SCHED, &napi->state);
|
|
clear_bit(NAPI_STATE_UNUSED, &napi->state);
|
|
}
|
|
EXPORT_SYMBOL(napi_enable);
|
|
|
|
|
|
/*
|
|
* Section: Skb helpers
|
|
*/
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* skb_append_frags_to_pkt --
|
|
*
|
|
* Append skb frags to the packet handle associated to it.
|
|
*
|
|
* Results:
|
|
* VMK_OK on success; VMK_* otherwise.
|
|
*
|
|
* Side effects:
|
|
* Drops packet on the floor if unsuccessful.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static inline VMK_ReturnStatus
|
|
skb_append_frags_to_pkt(struct sk_buff *skb)
|
|
{
|
|
VMK_ReturnStatus status = VMK_OK;
|
|
int i;
|
|
|
|
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
|
|
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
|
|
|
|
status = vmk_PktAppendFrag(skb->pkt,
|
|
page_to_phys(frag->page) + frag->page_offset,
|
|
frag->size);
|
|
if (unlikely(status != VMK_OK)) {
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
* The frags should not be coalesced with the first sg entry (flat buffer).
|
|
* If this happens let's just drop the packet instead of leaking.
|
|
*/
|
|
if (unlikely(vmk_PktFragsNb(skb->pkt) <= 1)) {
|
|
VMK_ASSERT(VMK_FALSE);
|
|
return VMK_FAILURE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Let vmkernel know it needs to release those frags explicitly.
|
|
*/
|
|
vmk_PktSetPageFrags(skb->pkt);
|
|
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* skb_append_fraglist_to_pkt --
|
|
*
|
|
* Append skb frag list to the packet handle associated to it.
|
|
*
|
|
* Results:
|
|
* VMK_OK on success; VMK_* otherwise.
|
|
*
|
|
* Side effects:
|
|
* Drops packet on the floor if unsuccessful.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static inline VMK_ReturnStatus
|
|
skb_append_fraglist_to_pkt(struct sk_buff *skb)
|
|
{
|
|
VMK_ReturnStatus status = VMK_OK;
|
|
struct sk_buff *frag_skb = skb_shinfo(skb)->frag_list;
|
|
|
|
while (frag_skb) {
|
|
/*
|
|
* LRO might have pulled the all flat buffer if header split mode
|
|
* is activated.
|
|
*/
|
|
if (skb_headlen(frag_skb)) {
|
|
status = vmk_PktAppend(skb->pkt, frag_skb->pkt,
|
|
skb_headroom(frag_skb), skb_headlen(frag_skb));
|
|
if (unlikely(status != VMK_OK)) {
|
|
return status;
|
|
}
|
|
}
|
|
|
|
if (skb_shinfo(frag_skb)->nr_frags) {
|
|
int i;
|
|
|
|
for (i = 0; i < skb_shinfo(frag_skb)->nr_frags; i++) {
|
|
skb_frag_t *frag = &skb_shinfo(frag_skb)->frags[i];
|
|
|
|
status = vmk_PktAppendFrag(skb->pkt,
|
|
page_to_phys(frag->page) + frag->page_offset,
|
|
frag->size);
|
|
if (unlikely(status != VMK_OK)) {
|
|
return status;
|
|
}
|
|
}
|
|
}
|
|
|
|
frag_skb = frag_skb->next;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* skb_gen_pkt_frags --
|
|
*
|
|
* Append the skb frags and frag list to the packet handle associated to it.
|
|
*
|
|
* Results:
|
|
* VMK_OK on success; VMK_* otherwise.
|
|
*
|
|
* Side effects:
|
|
* Drops packet on the floor if unsuccessful.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
skb_gen_pkt_frags(struct sk_buff *skb)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
|
|
status = vmk_PktAdjust(skb->pkt, skb_headroom(skb), skb_headlen(skb));
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
if (skb_shinfo(skb)->nr_frags) {
|
|
status = skb_append_frags_to_pkt(skb);
|
|
if (unlikely(status != VMK_OK)) {
|
|
return status;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Since we removed packet completion in vmklinux, we
|
|
* cannot support skb chaining anymore.
|
|
*/
|
|
if (skb_shinfo(skb)->frag_list) {
|
|
VMK_ASSERT(VMK_FALSE);
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* do_init_skb_bits --
|
|
*
|
|
* Initialize a socket buffer.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static inline void
|
|
do_init_skb_bits(struct sk_buff *skb, kmem_cache_t *cache)
|
|
{
|
|
skb->qid = VMKNETDDI_QUEUEOPS_INVALID_QUEUEID;
|
|
skb->next = NULL;
|
|
skb->prev = NULL;
|
|
skb->head = NULL;
|
|
skb->data = NULL;
|
|
skb->tail = NULL;
|
|
skb->end = NULL;
|
|
skb->dev = NULL;
|
|
skb->pkt = NULL;
|
|
atomic_set(&skb->users, 1);
|
|
skb->cache = cache;
|
|
skb->mhead = 0;
|
|
skb->len = 0;
|
|
skb->data_len = 0;
|
|
skb->ip_summed = CHECKSUM_NONE;
|
|
skb->csum = 0;
|
|
skb->priority = 0;
|
|
skb->protocol = 0;
|
|
skb->truesize = 0;
|
|
skb->mac.raw = NULL;
|
|
skb->nh.raw = NULL;
|
|
skb->h.raw = NULL;
|
|
skb->napi = NULL;
|
|
skb->lro_ready = 0;
|
|
|
|
/* VLAN_RX_SKB_CB shares the same space so this is sufficient */
|
|
VLAN_TX_SKB_CB(skb)->magic = 0;
|
|
VLAN_TX_SKB_CB(skb)->vlan_tag = 0;
|
|
|
|
atomic_set(&(skb_shinfo(skb)->dataref), 1);
|
|
atomic_set(&(skb_shinfo(skb)->fragsref), 1);
|
|
skb_shinfo(skb)->nr_frags = 0;
|
|
skb_shinfo(skb)->frag_list = NULL;
|
|
skb_shinfo(skb)->gso_size = 0;
|
|
skb_shinfo(skb)->gso_segs = 0;
|
|
skb_shinfo(skb)->gso_type = 0;
|
|
skb_shinfo(skb)->ip6_frag_id = 0;
|
|
|
|
get_LinSkb(skb)->flags = LIN_SKB_FLAGS_FRAGSOWNER_VMKLNX;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* do_bind_skb_to_pkt --
|
|
*
|
|
* Bind a socket buffer to a packet handle.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static inline void
|
|
do_bind_skb_to_pkt(struct sk_buff *skb, vmk_PktHandle *pkt, unsigned int size)
|
|
{
|
|
skb->pkt = pkt;
|
|
skb->head = (void *) vmk_PktFrameMappedPointerGet(pkt);
|
|
skb->end = skb->head + size;
|
|
skb->data = skb->head;
|
|
skb->tail = skb->head;
|
|
|
|
#ifdef VMX86_DEBUG
|
|
VMK_ASSERT(vmk_PktFrameMappedLenGet(pkt) >= size);
|
|
|
|
/*
|
|
* linux guarantees physical contiguity of the pages backing
|
|
* skb's returned by this routine, so the drivers assume that.
|
|
* we guarantee this by backing the buffers returned from
|
|
* vmk_PktAlloc with a large-page, low-memory heap which is
|
|
* guaranteed to be physically contiguous, so we just double
|
|
* check it here.
|
|
*/
|
|
{
|
|
vmk_Bool isFlat;
|
|
|
|
isFlat = vmk_PktIsFlatBuffer(pkt);
|
|
VMK_ASSERT(isFlat);
|
|
}
|
|
#endif // VMX86_DEBUG
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* do_alloc_skb --
|
|
*
|
|
* Allocate a socket buffer.
|
|
*
|
|
* Results:
|
|
* A pointer to the allocated socket buffer.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static struct sk_buff *
|
|
do_alloc_skb(kmem_cache_t *cache, gfp_t flags)
|
|
{
|
|
struct LinSkb *linSkb;
|
|
|
|
VMK_ASSERT(cache != NULL);
|
|
|
|
if (!cache) {
|
|
VMKLNX_WARN("No skb cache provided.");
|
|
return NULL;
|
|
}
|
|
|
|
linSkb = vmklnx_kmem_cache_alloc(cache, flags);
|
|
if (unlikely(linSkb == NULL)) {
|
|
return NULL;
|
|
}
|
|
|
|
do_init_skb_bits(&linSkb->skb, cache);
|
|
return &linSkb->skb;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* vmklnx_net_alloc_skb --
|
|
*
|
|
* Allocate a socket buffer for a specified size and bind it to a packet handle.
|
|
*
|
|
* Results:
|
|
* A pointer the allocated socket buffer.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
struct sk_buff *
|
|
vmklnx_net_alloc_skb(struct kmem_cache_s *cache, unsigned int size, struct net_device *dev, gfp_t flags)
|
|
{
|
|
vmk_PktHandle *pkt;
|
|
struct sk_buff *skb;
|
|
|
|
skb = do_alloc_skb(cache, flags);
|
|
|
|
if (unlikely(skb == NULL)) {
|
|
goto done;
|
|
}
|
|
|
|
if (dev && dev->uplinkDev) {
|
|
/*
|
|
* Do a packet allocation aimed at the specified device.
|
|
* The packet will be allocated in memory that will be
|
|
* easy to DMA map to.
|
|
*/
|
|
vmk_PktAllocForUplink(size, dev->uplinkDev, &pkt);
|
|
} else {
|
|
/* Do a simple packet allocation. */
|
|
vmk_PktAlloc(size, &pkt);
|
|
}
|
|
|
|
if (unlikely(pkt == NULL)) {
|
|
do_free_skb(skb);
|
|
skb = NULL;
|
|
goto done;
|
|
}
|
|
|
|
do_bind_skb_to_pkt(skb, pkt, size);
|
|
|
|
done:
|
|
return skb;
|
|
}
|
|
EXPORT_SYMBOL(vmklnx_net_alloc_skb);
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* vmklnx_set_skb_frags_owner_vmkernel --
|
|
*
|
|
* Toggle skb frag ownership for the given skb to VMkernel.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* Sets the skb frag ownership to VMkernel for the given skb.
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
|
|
inline void
|
|
vmklnx_set_skb_frags_owner_vmkernel(struct sk_buff *skb)
|
|
{
|
|
get_LinSkb(skb)->flags &= ~LIN_SKB_FLAGS_FRAGSOWNER_VMKLNX;
|
|
get_LinSkb(skb)->flags |= LIN_SKB_FLAGS_FRAGSOWNER_VMKERNEL;
|
|
return;
|
|
}
|
|
EXPORT_SYMBOL(vmklnx_set_skb_frags_owner_vmkernel);
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* vmklnx_is_skb_frags_owner --
|
|
*
|
|
* Indicate if the skb frags belongs to vmklinux.
|
|
*
|
|
* We do not always want to call put_page() on skb frags. For
|
|
* instance, in the TX path the frags belong to the guest
|
|
* OS. However, in the RX path with packet split and others we
|
|
* need to call put_page() since the frags belong to vmklinux.
|
|
*
|
|
* Results:
|
|
* 1 if the frags belong to vmklinux, 0 otherwise.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
|
|
int
|
|
vmklnx_is_skb_frags_owner(struct sk_buff *skb)
|
|
{
|
|
VMK_ASSERT(skb_shinfo(skb)->nr_frags);
|
|
|
|
return (get_LinSkb(skb)->flags & LIN_SKB_FLAGS_FRAGSOWNER_VMKLNX);
|
|
}
|
|
EXPORT_SYMBOL(vmklnx_is_skb_frags_owner);
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* skb_release_data --
|
|
*
|
|
* Release data associated to a skb.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
void
|
|
skb_release_data(struct sk_buff *skb)
|
|
{
|
|
VMK_ASSERT((atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) == 1);
|
|
|
|
if (atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
|
|
if (unlikely(skb->mhead)) {
|
|
skb->mhead = 0;
|
|
vmklnx_kfree(vmklnxLowHeap, skb->head);
|
|
}
|
|
|
|
if (likely(atomic_dec_and_test(&(skb_shinfo(skb)->fragsref)))) {
|
|
if (skb->pkt) {
|
|
if ((in_irq() || irqs_disabled()) && !vmklnx_is_panic()) {
|
|
vmk_PktReleaseIRQ(skb->pkt);
|
|
} else {
|
|
vmk_NetPoll pollPriv;
|
|
struct napi_struct *napi;
|
|
|
|
/*
|
|
* Try to queue packets in NAPI's compPktList in order to
|
|
* release them in batch, but first thoroughly check if we
|
|
* got called from a napi context (PR #396873).
|
|
*/
|
|
if (vmk_NetPollGetCurrent(&pollPriv) == VMK_OK &&
|
|
(napi = (struct napi_struct *) vmk_NetPollGetPrivate(pollPriv)) != NULL &&
|
|
napi->net_poll_type == NETPOLL_DEFAULT) {
|
|
vmk_NetPollQueueCompPkt(pollPriv, skb->pkt);
|
|
} else {
|
|
vmk_PktRelease(skb->pkt);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (skb_shinfo(skb)->nr_frags && vmklnx_is_skb_frags_owner(skb)) {
|
|
int i;
|
|
|
|
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
|
|
put_page(skb_shinfo(skb)->frags[i].page);
|
|
}
|
|
skb_shinfo(skb)->nr_frags = 0;
|
|
}
|
|
|
|
if (skb_shinfo(skb)->frag_list) {
|
|
struct sk_buff *frag_skb = skb_shinfo(skb)->frag_list;
|
|
struct sk_buff *next_skb;
|
|
|
|
while (frag_skb) {
|
|
next_skb = frag_skb->next;
|
|
kfree_skb(frag_skb);
|
|
frag_skb = next_skb;
|
|
}
|
|
skb_shinfo(skb)->frag_list = NULL;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* do_free_skb --
|
|
*
|
|
* Release socket buffer.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
do_free_skb(struct sk_buff *skb)
|
|
{
|
|
vmklnx_kmem_cache_free(skb->cache, get_LinSkb(skb));
|
|
}
|
|
|
|
/**
|
|
* __kfree_skb - private function
|
|
* @skb: buffer
|
|
*
|
|
* Free an sk_buff. Release anything attached to the buffer.
|
|
* Clean the state. This is an internal helper function. Users should
|
|
* always call kfree_skb
|
|
*
|
|
* RETURN VALUE:
|
|
* None
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: __kfree_skb */
|
|
void
|
|
__kfree_skb(struct sk_buff *skb)
|
|
{
|
|
if (unlikely(!atomic_dec_and_test(&skb->users))) {
|
|
return;
|
|
}
|
|
|
|
skb_release_data(skb);
|
|
do_free_skb(skb);
|
|
}
|
|
EXPORT_SYMBOL(__kfree_skb);
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* skb_debug_info --
|
|
* Debug function to print contents of a socket buffer.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
void
|
|
skb_debug_info(struct sk_buff *skb)
|
|
{
|
|
int f;
|
|
skb_frag_t *frag;
|
|
|
|
printk(KERN_ERR "skb\n"
|
|
" head <%p>\n"
|
|
" mhead <%u>\n"
|
|
" data <%p>\n"
|
|
" tail <%p>\n"
|
|
" end <%p>\n"
|
|
" data_len <%u>\n"
|
|
" nr_frags <%u>\n"
|
|
" dataref <%u>\n"
|
|
" gso_size <%u>\n",
|
|
skb->head, skb->mhead,
|
|
skb->data, skb->tail, skb->end,
|
|
skb->data_len,
|
|
skb_shinfo(skb)->nr_frags,
|
|
atomic_read(&(skb_shinfo(skb)->dataref)),
|
|
skb_shinfo(skb)->gso_size);
|
|
|
|
for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
|
|
frag = &skb_shinfo(skb)->frags[f];
|
|
printk(KERN_ERR "skb frag %d\n"
|
|
" page <0x%llx>\n"
|
|
" page_offset <%u>\n"
|
|
" size <%u>\n",
|
|
f, page_to_phys(frag->page),
|
|
frag->page_offset, frag->size);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Section: Transmit path
|
|
*/
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* ipv6_set_hraw --
|
|
*
|
|
* Parse an IPv6 skb to find the appropriate value for initializing
|
|
* skb->h.raw. If skb->h.raw is initialized, also sets *protocol to
|
|
* the last nexthdr found.
|
|
*
|
|
* Results:
|
|
* None
|
|
*
|
|
* Side effects:
|
|
* None
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
ipv6_set_hraw(struct sk_buff *skb, vmk_uint8 *protocol)
|
|
{
|
|
vmk_uint8 nextHdr = skb->nh.ipv6h->nexthdr;
|
|
vmk_uint8 *nextHdrPtr = (vmk_uint8 *) (skb->nh.ipv6h + 1);
|
|
|
|
if (nextHdrPtr > skb->end) {
|
|
// this happens if the source doesn't take care to map entire header
|
|
return;
|
|
|
|
}
|
|
// take care of most common situation:
|
|
if ((nextHdr == IPPROTO_TCP)
|
|
|| (nextHdr == IPPROTO_UDP)
|
|
|| (nextHdr == IPPROTO_ICMPV6)) {
|
|
skb->h.raw = nextHdrPtr;
|
|
(*protocol) = nextHdr;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* This will be the value if "end" not found within
|
|
* linear region.
|
|
*/
|
|
VMK_ASSERT(skb->h.raw == NULL);
|
|
do {
|
|
switch (nextHdr) {
|
|
case IPPROTO_ROUTING:
|
|
case IPPROTO_HOPOPTS:
|
|
case IPPROTO_DSTOPTS:
|
|
// continue searching
|
|
nextHdr = *nextHdrPtr;
|
|
nextHdrPtr += nextHdrPtr[1] * 8 + 8;
|
|
break;
|
|
|
|
case IPPROTO_AH:
|
|
// continue searching
|
|
nextHdr = *nextHdrPtr;
|
|
nextHdrPtr += nextHdrPtr[1] * 4 + 8;
|
|
break;
|
|
|
|
/*
|
|
* We do NOT handle the IPPROTO_FRAGMENT case here. Thus,
|
|
* if any packet has a IPv6 fragment header, this function
|
|
* will return protocol == IPPROTO_FRAGMENT and *not*
|
|
* find the L4 protocol. As the returned protocol is only
|
|
* used for TSO and CSUM cases, and a fragment header is
|
|
* not allowed in either case, this behavior is desirable,
|
|
* as it allows handling this case in the caller.
|
|
*/
|
|
|
|
default:
|
|
// not recursing
|
|
skb->h.raw = nextHdrPtr;
|
|
(*protocol) = nextHdr;
|
|
return;
|
|
break;
|
|
}
|
|
} while (nextHdrPtr < skb->end);
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* map_pkt_to_skb --
|
|
*
|
|
* Converts PktHandle to sk_buff before handing packet to linux driver.
|
|
*
|
|
* Results:
|
|
* Returns VMK_ReturnStatus
|
|
*
|
|
* Side effects:
|
|
* This is ugly. Too much memory writes / packet. We should look at
|
|
* optimizing this. Maybe an skb cache or something, instead of
|
|
* having to touch 20+ variables for each packet.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
map_pkt_to_skb(struct net_device *dev,
|
|
struct netdev_queue *queue,
|
|
vmk_PktHandle *pkt,
|
|
struct sk_buff **pskb)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
struct sk_buff *skb;
|
|
int i;
|
|
vmk_uint16 sgInspected;
|
|
unsigned int headLen, bytesLeft;
|
|
vmk_uint32 frameLen;
|
|
VMK_ReturnStatus ret = VMK_OK;
|
|
vmk_PktFrag frag;
|
|
vmk_Bool must_vlantag, must_tso, must_csum, pkt_ipv4;
|
|
vmk_uint8 protocol, ipVersion;
|
|
vmk_uint32 ehLen;
|
|
vmk_uint32 ipHdrLength;
|
|
|
|
skb = do_alloc_skb(dev->skb_pool, GFP_ATOMIC);
|
|
|
|
if (unlikely(skb == NULL)) {
|
|
ret = VMK_NO_MEMORY;
|
|
goto done;
|
|
}
|
|
|
|
skb->pkt = pkt;
|
|
skb->queue_mapping = queue - dev->_tx;
|
|
|
|
VMK_ASSERT(dev);
|
|
VMK_ASSERT(pkt);
|
|
#ifdef VMX86_DEBUG
|
|
{
|
|
vmk_Bool consistent;
|
|
|
|
consistent = vmk_PktCheckInternalConsistency(pkt);
|
|
VMK_ASSERT(consistent);
|
|
}
|
|
#endif
|
|
|
|
VMK_ASSERT(vmk_PktFrameMappedLenGet(pkt) > 0);
|
|
|
|
skb->head = (void *) vmk_PktFrameMappedPointerGet(pkt);
|
|
|
|
frameLen = vmk_PktFrameLenGet(pkt);
|
|
|
|
skb->len = frameLen;
|
|
skb->dev = dev;
|
|
skb->data = skb->head;
|
|
|
|
headLen = min(vmk_PktFrameMappedLenGet(pkt), frameLen);
|
|
skb->end = skb->tail = skb->head + headLen;
|
|
skb->mac.raw = skb->data;
|
|
|
|
must_csum = vmk_PktIsMustCsum(pkt);
|
|
|
|
|
|
status = vmk_PktInetFrameLayoutGetComponents(pkt,
|
|
&ehLen,
|
|
&ipHdrLength,
|
|
&ipVersion,
|
|
&protocol);
|
|
if (status == VMK_OK) {
|
|
/*
|
|
* Pkt has inet layout attributes associated, populate
|
|
* the skb details from the layout components.
|
|
*/
|
|
skb->nh.raw = skb->mac.raw + ehLen;
|
|
skb->h.raw = skb->nh.raw + ipHdrLength;
|
|
|
|
if (ipVersion == 4) {
|
|
const int eth_p_ip_nbo = htons(ETH_P_IP);
|
|
pkt_ipv4 = VMK_TRUE;
|
|
skb->protocol = eth_p_ip_nbo;
|
|
} else {
|
|
pkt_ipv4 = VMK_FALSE;
|
|
if (ipVersion == 6) {
|
|
skb->protocol = ETH_P_IPV6_NBO;
|
|
}
|
|
}
|
|
VMKLNX_DEBUG(3, "inet layout %d %d %d protocol %x ipVers %d proto %x "
|
|
"ipv4 %d csum %d tso %d",
|
|
ehLen,
|
|
ipHdrLength,
|
|
vmk_PktInetFrameLayoutGetL4HdrLength(pkt, VMK_FALSE),
|
|
skb->protocol,
|
|
protocol,
|
|
pkt_ipv4,
|
|
ipVersion,
|
|
must_csum,
|
|
vmk_PktIsLargeTcpPacket(pkt));
|
|
} else {
|
|
struct ethhdr *eh;
|
|
|
|
eh = (struct ethhdr *) skb->head;
|
|
ehLen = eth_header_len(eh);
|
|
skb->nh.raw = skb->mac.raw + ehLen;
|
|
skb->protocol = eth_header_frame_type(eh);
|
|
sgInspected = 1;
|
|
|
|
if (eth_header_is_ipv4(eh)) {
|
|
if (headLen < ehLen + sizeof(*skb->nh.iph)) {
|
|
ret = VMK_FAILURE;
|
|
goto done;
|
|
}
|
|
skb->h.raw = skb->nh.raw + skb->nh.iph->ihl*4;
|
|
pkt_ipv4 = VMK_TRUE;
|
|
protocol = skb->nh.iph->protocol;
|
|
} else {
|
|
pkt_ipv4 = VMK_FALSE;
|
|
protocol = 0xff; // unused value.
|
|
if (skb->protocol == ETH_P_IPV6_NBO) {
|
|
ipv6_set_hraw(skb, &protocol);
|
|
VMKLNX_DEBUG(3, "ipv6 %ld offset %ld %d",
|
|
skb->h.raw - (vmk_uint8 *)(skb->data),
|
|
skb->h.raw - (vmk_uint8 *)(skb->nh.ipv6h), protocol);
|
|
}
|
|
}
|
|
}
|
|
|
|
VMKLNX_DEBUG(10, "head: %u bytes at VA 0x%p", headLen, skb->head);
|
|
|
|
/*
|
|
* See if the packet requires VLAN tagging
|
|
*/
|
|
must_vlantag = vmk_PktMustVlanTag(pkt);
|
|
|
|
if (must_vlantag) {
|
|
vmk_VlanID vlanID;
|
|
vmk_VlanPriority priority;
|
|
|
|
VMKLNX_DEBUG(2, "%s: tx vlan tag %u present with priority %u",
|
|
dev->name, vmk_PktVlanIDGet(pkt), vmk_PktPriorityGet(pkt));
|
|
|
|
vlanID = vmk_PktVlanIDGet(pkt);
|
|
priority = vmk_PktPriorityGet(pkt);
|
|
|
|
vlan_put_tag(skb, vlanID | (priority << VLAN_1PTAG_SHIFT));
|
|
}
|
|
|
|
/*
|
|
* See if the packet requires checksum offloading or TSO
|
|
*/
|
|
|
|
must_tso = vmk_PktIsLargeTcpPacket(pkt);
|
|
|
|
if (must_tso) {
|
|
vmk_uint32 tsoMss = vmk_PktGetLargeTcpPacketMss(pkt);
|
|
unsigned short inetHdrLen;
|
|
|
|
/*
|
|
* backends should check the tsoMss before setting MUST_TSO flag
|
|
*/
|
|
VMK_ASSERT(tsoMss);
|
|
|
|
if (!pkt_ipv4 &&
|
|
(skb->protocol != ntohs(ETH_P_IPV6))) {
|
|
static uint32_t throttle = 0;
|
|
VMKLNX_THROTTLED_WARN(throttle,
|
|
"%s: non-ip packet with TSO (proto=0x%x)",
|
|
dev->name,
|
|
skb->protocol);
|
|
ret = VMK_FAILURE;
|
|
goto done;
|
|
}
|
|
|
|
if (!skb->h.raw || (protocol != IPPROTO_TCP)) {
|
|
/*
|
|
* This check will also trigger for IPv6 packets that
|
|
* have a fragment header, as ipv6_set_hraw() sets protocol
|
|
* to IPPROTO_FRAGMENT.
|
|
*/
|
|
static uint32_t throttle = 0;
|
|
VMKLNX_THROTTLED_WARN(throttle,
|
|
"%s: non-tcp packet with TSO (ip%s, proto=0x%x, hraw=%p)",
|
|
dev->name,
|
|
pkt_ipv4 ? "v4" : "v6",
|
|
protocol, skb->h.raw);
|
|
ret = VMK_FAILURE;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Perform some sanity checks on TSO frames, because buggy and/or
|
|
* malicious guests might generate invalid packets which may wedge
|
|
* the physical hardware if we let them through.
|
|
*/
|
|
inetHdrLen = (skb->h.raw + tcp_hdrlen(skb)) - skb->nh.raw;
|
|
|
|
// Reject if the frame doesn't require TSO in the first place
|
|
if (unlikely(frameLen - ehLen - inetHdrLen <= tsoMss)) {
|
|
static uint32_t throttle = 0;
|
|
VMKLNX_THROTTLED_WARN(throttle,
|
|
"%s: runt TSO packet (tsoMss=%d, frameLen=%d)",
|
|
dev->name, tsoMss, frameLen);
|
|
ret = VMK_FAILURE;
|
|
goto done;
|
|
}
|
|
|
|
// Reject if segmented frame will exceed MTU
|
|
if (unlikely(tsoMss + inetHdrLen > dev->mtu)) {
|
|
static uint32_t logThrottleCounter = 0;
|
|
VMKLNX_THROTTLED_WARN(logThrottleCounter,
|
|
"%s: oversized tsoMss: %d, mtu=%d",
|
|
dev->name, tsoMss, dev->mtu);
|
|
ret = VMK_FAILURE;
|
|
goto done;
|
|
}
|
|
|
|
skb_shinfo(skb)->gso_size = tsoMss;
|
|
skb_shinfo(skb)->gso_segs = (skb->len + tsoMss - 1) / tsoMss;
|
|
skb_shinfo(skb)->gso_type = pkt_ipv4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
|
|
|
|
/*
|
|
* If congestion window has been reduced due to the
|
|
* previous TCP segment
|
|
*/
|
|
if (unlikely(skb->h.th->cwr == 1)) {
|
|
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
|
|
}
|
|
} else {
|
|
/*
|
|
* We are dropping packets that are larger than the MTU of the NIC
|
|
* since they could potentially wedge the NIC or PSOD in the driver.
|
|
*/
|
|
if (unlikely(frameLen - ehLen > dev->mtu)) {
|
|
static uint32_t linuxTxWarnCounter;
|
|
VMKLNX_THROTTLED_WARN(linuxTxWarnCounter,
|
|
"%s: %d bytes packet couldn't be sent (mtu=%d)",
|
|
dev->name, frameLen, dev->mtu);
|
|
ret = VMK_FAILURE;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if (must_csum || must_tso) {
|
|
|
|
switch (protocol) {
|
|
|
|
case IPPROTO_TCP:
|
|
skb->csum = 16;
|
|
skb->ip_summed = CHECKSUM_HW;
|
|
break;
|
|
|
|
case IPPROTO_UDP:
|
|
skb->csum = 6;
|
|
skb->ip_summed = CHECKSUM_HW;
|
|
break;
|
|
|
|
/*
|
|
* XXX add cases for other protos once we use NETIF_F_HW_CSUM
|
|
* in some device. I think the e1000 can do it, but the Intel
|
|
* driver doesn't advertise so.
|
|
*/
|
|
|
|
default:
|
|
VMKLNX_DEBUG(0, "%s: guest driver requested xsum offload on "
|
|
"unsupported type %d", dev->name, protocol);
|
|
ret = VMK_FAILURE;
|
|
goto done;
|
|
}
|
|
|
|
VMK_ASSERT(skb->h.raw);
|
|
} else {
|
|
skb->ip_summed = CHECKSUM_NONE; // XXX: for now
|
|
}
|
|
|
|
bytesLeft = frameLen - headLen;
|
|
for (i = sgInspected; bytesLeft > 0; i++) {
|
|
skb_frag_t *skb_frag;
|
|
|
|
if (unlikely(i - sgInspected >= MAX_SKB_FRAGS)) {
|
|
static uint32_t fragsThrottleCounter = 0;
|
|
VMKLNX_THROTTLED_INFO(fragsThrottleCounter,
|
|
"too many frags (> %u) bytesLeft %d",
|
|
MAX_SKB_FRAGS, bytesLeft);
|
|
#ifdef VMX86_DEBUG
|
|
VMK_ASSERT(VMK_FALSE);
|
|
#endif
|
|
ret = VMK_FAILURE;
|
|
goto done;
|
|
}
|
|
|
|
if (vmk_PktFragGet(pkt, &frag, i) != VMK_OK) {
|
|
ret = VMK_FAILURE;
|
|
goto done;
|
|
}
|
|
skb_frag = &skb_shinfo(skb)->frags[i - sgInspected];
|
|
/* Going to use the frag->page to store page number and
|
|
frag->page_offset for offset within that page */
|
|
skb_frag->page = phys_to_page(frag.addr);
|
|
skb_frag->page_offset = offset_in_page(frag.addr);
|
|
skb_frag->size = min(frag.length, bytesLeft);
|
|
VMKLNX_DEBUG(10, "frag: %u bytes at MA 0x%llx",
|
|
skb_frag->size, page_to_phys(skb_frag->page) + skb_frag->page_offset);
|
|
skb->data_len += skb_frag->size;
|
|
bytesLeft -= skb_frag->size;
|
|
skb_shinfo(skb)->nr_frags++;
|
|
|
|
vmk_MAAssertIOAbility(frag.addr, frag.length);
|
|
}
|
|
|
|
/*
|
|
* Those frags are VMkernel's buffers. Nothing special to do in the
|
|
* Vmklinux layer for completion.
|
|
*/
|
|
vmklnx_set_skb_frags_owner_vmkernel(skb);
|
|
|
|
if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfCorruptTx)) {
|
|
LinStress_CorruptSkbData(skb, 60, 0);
|
|
}
|
|
|
|
done:
|
|
|
|
if ((ret != VMK_OK) && (skb != NULL)) {
|
|
do_free_skb(skb);
|
|
skb = NULL;
|
|
}
|
|
|
|
*pskb = skb;
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netdev_pick_tx_queue --
|
|
*
|
|
* Pick device tx subqueue for transmission. The upper layers must ensure
|
|
* that all packets in pktList are destined for the same queue.
|
|
*
|
|
* Results:
|
|
* pointer to netdev_queue
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static inline struct netdev_queue *
|
|
netdev_pick_tx_queue(struct net_device *dev, vmk_NetqueueQueueID vmkqid)
|
|
{
|
|
int queue_idx = 0;
|
|
vmknetddi_queueops_queueid_t qid = VMKNETDDI_QUEUEOPS_INVALID_QUEUEID;
|
|
VMK_ReturnStatus status;
|
|
|
|
if (!vmkqid) {
|
|
goto out;
|
|
}
|
|
|
|
status = marshall_from_vmknetq_id(vmkqid, &qid);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
if (status == VMK_OK) {
|
|
queue_idx = VMKNETDDI_QUEUEOPS_QUEUEID_VAL(qid);
|
|
if (unlikely(queue_idx >= dev->real_num_tx_queues ||
|
|
queue_idx >= dev->num_tx_queues)) {
|
|
queue_idx = 0;
|
|
}
|
|
}
|
|
|
|
out:
|
|
VMK_ASSERT(queue_idx < dev->num_tx_queues);
|
|
VMK_ASSERT(queue_idx >= 0);
|
|
return &dev->_tx[queue_idx];
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netdev_tx --
|
|
*
|
|
* Transmit packets
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus indicating the outcome.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netdev_tx(struct net_device *dev,
|
|
vmk_PktList pktList,
|
|
vmk_NetqueueQueueID vmkqid)
|
|
{
|
|
VMK_ReturnStatus ret = VMK_OK;
|
|
VMK_PKTLIST_STACK_DEF_INIT(freeList);
|
|
vmk_uint32 pktsCount;
|
|
vmk_PktHandle *pkt;
|
|
struct sk_buff *skb;
|
|
struct netdev_queue *queue;
|
|
|
|
queue = netdev_pick_tx_queue(dev, vmkqid);
|
|
VMK_ASSERT(queue);
|
|
|
|
if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfFailTxAndStopQueue)) {
|
|
netif_tx_stop_queue(queue);
|
|
}
|
|
|
|
if (unlikely(test_bit(__LINK_STATE_BLOCKED, &dev->state)) ||
|
|
VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfFailHardTx)) {
|
|
vmk_PktListAppend(freeList, pktList);
|
|
goto out;
|
|
}
|
|
|
|
spin_lock(&queue->_xmit_lock);
|
|
while (!vmk_PktListIsEmpty(pktList)) {
|
|
int xmit_status = -1;
|
|
VMK_ReturnStatus mapRet = VMK_OK;
|
|
|
|
VMK_ASSERT(dev->flags & IFF_UP);
|
|
|
|
/*
|
|
* Queue state can change even before the device is opened!
|
|
* Upper layers have no way of knowing about it until after
|
|
* the device is opened. All we can do is check for a stopped
|
|
* queue here and return the appropriate error.
|
|
*/
|
|
if (unlikely(netif_tx_queue_stopped(queue))) {
|
|
spin_unlock(&queue->_xmit_lock);
|
|
ret = VMK_BUSY;
|
|
goto out;
|
|
}
|
|
|
|
if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfFailHardTx)) {
|
|
pkt = vmk_PktListPopFirstPkt(pktList);
|
|
VMK_ASSERT(pkt);
|
|
VMKLNX_DEBUG(1, "Failing Hard Transmit. pkt = %p, device = %s\n",
|
|
pkt, dev->name);
|
|
vmk_PktListAppendPkt(freeList, pkt);
|
|
continue;
|
|
}
|
|
|
|
pkt = vmk_PktListPopFirstPkt(pktList);
|
|
VMK_ASSERT(pkt);
|
|
|
|
mapRet = map_pkt_to_skb(dev, queue, pkt, &skb);
|
|
if (unlikely(mapRet != VMK_OK)) {
|
|
#if defined(VMX86_LOG)
|
|
static uint32_t logThrottleCounter = 0;
|
|
#endif
|
|
VMKLNX_THROTTLED_DEBUG(logThrottleCounter, 0,
|
|
"%s: Unable to map packet to skb (%s). Dropping",
|
|
dev->name, vmk_StatusToString(mapRet));
|
|
vmk_PktListAppendPkt(freeList, pkt);
|
|
continue;
|
|
}
|
|
|
|
VMKAPI_MODULE_CALL(dev->module_id, xmit_status,
|
|
*dev->hard_start_xmit, skb, dev);
|
|
|
|
if (unlikely(xmit_status != NETDEV_TX_OK)) {
|
|
spin_unlock(&queue->_xmit_lock);
|
|
VMKLNX_DEBUG(1, "hard_start_xmit failed (status %d; Q stopped %d. "
|
|
"Queuing packet. pkt=%p dev=%s\n",
|
|
xmit_status, netif_tx_queue_stopped(queue),
|
|
skb->pkt, dev->name);
|
|
|
|
/* destroy skb and its resources besides the packet handle itself. */
|
|
atomic_inc(&(skb_shinfo(skb)->fragsref));
|
|
dev_kfree_skb_any(skb);
|
|
|
|
/*
|
|
* sticking pkt back this way may cause tx re-ordering,
|
|
* but this should be very rare.
|
|
*/
|
|
vmk_PktListAppendPkt(pktList, pkt);
|
|
if (xmit_status == NETDEV_TX_BUSY) {
|
|
ret = VMK_BUSY;
|
|
} else {
|
|
ret = VMK_FAILURE;
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
dev->linnet_tx_packets++;
|
|
}
|
|
|
|
spin_unlock(&queue->_xmit_lock);
|
|
|
|
out:
|
|
/*
|
|
* Free whatever could not be txed
|
|
*/
|
|
pktsCount = vmk_PktListGetCount(freeList);
|
|
if (unlikely(pktsCount)) {
|
|
dev->linnet_tx_dropped += pktsCount;
|
|
vmk_PktListReleaseAllPkts(freeList);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Section: Control operations and queue management
|
|
*/
|
|
|
|
void __netif_schedule(struct netdev_queue *queue)
|
|
{
|
|
//XXX: does nothing. scheduling is done by the vmkernel now.
|
|
}
|
|
EXPORT_SYMBOL(__netif_schedule);
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* vmklnx_netif_start_tx_queue --
|
|
*
|
|
* queue started
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
void
|
|
vmklnx_netif_start_tx_queue(struct netdev_queue *queue)
|
|
{
|
|
struct net_device *dev = queue->dev;
|
|
u16 qidx = queue - dev->_tx;
|
|
VMK_ASSERT(qidx < dev->num_tx_queues);
|
|
|
|
if (dev->uplinkDev) {
|
|
struct tx_netqueue_info *txinfo = dev->tx_netqueue_info;
|
|
VMK_ASSERT(txinfo);
|
|
|
|
if (txinfo[qidx].valid) {
|
|
VMK_ASSERT(txinfo[qidx].vmkqid != VMK_NETQUEUE_INVALID_QUEUEID);
|
|
vmk_UplinkQueueStart(dev->uplinkDev, txinfo[qidx].vmkqid);
|
|
}
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(vmklnx_netif_start_tx_queue);
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* vmklnx_netif_stop_tx_queue --
|
|
*
|
|
* queue stopped
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
void
|
|
vmklnx_netif_stop_tx_queue(struct netdev_queue *queue)
|
|
{
|
|
struct net_device *dev = queue->dev;
|
|
u16 qidx = queue - dev->_tx;
|
|
VMK_ASSERT(qidx < dev->num_tx_queues);
|
|
|
|
if (dev->uplinkDev) {
|
|
struct tx_netqueue_info *txinfo = dev->tx_netqueue_info;
|
|
VMK_ASSERT(txinfo);
|
|
|
|
if (txinfo[qidx].valid) {
|
|
VMK_ASSERT(txinfo[qidx].vmkqid != VMK_NETQUEUE_INVALID_QUEUEID);
|
|
vmk_UplinkQueueStop(dev->uplinkDev, txinfo[qidx].vmkqid);
|
|
}
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(vmklnx_netif_stop_tx_queue);
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* vmklnx_netif_set_poll_cna --
|
|
*
|
|
* Change net poll routine to do CNA processing.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
void
|
|
vmklnx_netif_set_poll_cna(struct napi_struct *napi)
|
|
{
|
|
if (napi->net_poll) {
|
|
vmk_NetPollProperties pollInit;
|
|
if (napi->net_poll_type == NETPOLL_BACKUP) {
|
|
pollInit.poll = netdev_poll;
|
|
pollInit.priv = napi->dev;
|
|
pollInit.deliveryCallback = LinuxCNADev_Poll;
|
|
} else {
|
|
pollInit.poll = napi_poll;
|
|
pollInit.priv = napi;
|
|
pollInit.deliveryCallback = LinuxCNA_Poll;
|
|
}
|
|
pollInit.features = VMK_NETPOLL_CUSTOM_DELIVERY_CALLBACK;
|
|
vmk_NetPollChangeCallback(napi->net_poll, &pollInit);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(vmklnx_netif_set_poll_cna);
|
|
|
|
/**
|
|
* dev_close - shutdown an interface.
|
|
* @dev: device to shutdown
|
|
*
|
|
* This function moves an active device into down state. The device's
|
|
* private close function is invoked.
|
|
*
|
|
* ESX Deviation Notes:
|
|
* netdev notifier chain is not called.
|
|
*
|
|
* RETURN VALUE:
|
|
* 0
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: dev_close */
|
|
int
|
|
dev_close(struct net_device *dev)
|
|
{
|
|
unsigned int i;
|
|
|
|
ASSERT_RTNL();
|
|
|
|
#ifdef VMX86_DEBUG
|
|
{
|
|
VMK_ASSERT(test_bit(__LINK_STATE_START, &dev->state));
|
|
VMK_ASSERT(dev->flags & IFF_UP);
|
|
}
|
|
#endif
|
|
|
|
for (i = 0; i < dev->num_tx_queues; i++) {
|
|
struct netdev_queue *queue = &dev->_tx[i];
|
|
spin_unlock_wait(&queue->_xmit_lock);
|
|
}
|
|
|
|
clear_bit(__LINK_STATE_START, &dev->state);
|
|
smp_mb__after_clear_bit(); /* Commit netif_running(). */
|
|
|
|
if (dev->stop) {
|
|
VMKLNX_DEBUG(0, "Calling device stop %p", dev->stop);
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->stop, dev);
|
|
VMKLNX_DEBUG(0, "Device stopped");
|
|
}
|
|
|
|
dev->flags &= ~IFF_UP;
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(dev_close);
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* init_watchdog_timeo --
|
|
*
|
|
* Init watchdog timeout
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
init_watchdog_timeo(struct net_device *dev)
|
|
{
|
|
if (dev->tx_timeout) {
|
|
if (dev->watchdog_timeo <= 0) {
|
|
dev->watchdog_timeo = WATCHDOG_DEF_TIMEO;
|
|
}
|
|
dev->watchdog_timeohit_period_start = jiffies;
|
|
dev->watchdog_timeohit_cnt = 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* dev_open - prepare an interface for use.
|
|
* @dev: device to open
|
|
*
|
|
* Takes a device from down to up state. The device's private open
|
|
* function is invoked.
|
|
*
|
|
* ESX Deviation Notes:
|
|
* Device's notifier chain is not called.
|
|
* Device is put in promiscuous mode after it is opened unless it is
|
|
* a passthru device, in which case RX filters are pushed through the
|
|
* passthru APIs.
|
|
*
|
|
* Calling this function on an active interface is a nop. On a failure
|
|
* a negative errno code is returned.
|
|
*
|
|
* RETURN VALUE:
|
|
* 0 on success
|
|
* negative error code returned by the device on error
|
|
*
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: dev_open */
|
|
int
|
|
dev_open(struct net_device *dev)
|
|
{
|
|
int ret = 0;
|
|
|
|
ASSERT_RTNL();
|
|
|
|
if (dev->flags & IFF_UP) {
|
|
return 0;
|
|
}
|
|
|
|
set_bit(__LINK_STATE_START, &dev->state);
|
|
if (dev->open) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, ret, dev->open, dev);
|
|
if (ret == 0) {
|
|
VMKLNX_DEBUG(0, "%s opened successfully\n", dev->name);
|
|
|
|
dev->flags |= IFF_UP;
|
|
if (!(dev->features & NETIF_F_CNA)) {
|
|
init_watchdog_timeo(dev);
|
|
|
|
if (!dev->pt_ops) {
|
|
/*
|
|
* Regular uplinks are put in promiscuous mode.
|
|
*/
|
|
dev->flags |= IFF_PROMISC;
|
|
} else {
|
|
/*
|
|
* Passthru devices should not be in promiscuous mode:
|
|
*
|
|
* UPT: device is used only for one vNIC, vf_set_mc,
|
|
* vf_set_rx_mode and vf_set_multicast are used to
|
|
* program filtering.
|
|
*
|
|
* NPA: device has embedded l2 switching and adds filter
|
|
* for every unicast MAC addresses on the vSwitch.
|
|
* pf_add_mac_filter/pf_del_mac_filter and pf_mirror_all
|
|
* are used to program filtering.
|
|
*
|
|
* However, for NPA, device must be in all-multi mode.
|
|
*/
|
|
if (!(dev->features & NETIF_F_UPT)) {
|
|
dev->flags |= IFF_ALLMULTI;
|
|
}
|
|
}
|
|
|
|
VMKLNX_DEBUG(0, "%s set_multi %x %lx %p\n", dev->name, dev->flags, dev->features, dev->pt_ops);
|
|
if (dev->set_multicast_list) {
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id,
|
|
dev->set_multicast_list,
|
|
dev);
|
|
}
|
|
} else {
|
|
/* unblock the device */
|
|
clear_bit(__LINK_STATE_BLOCKED, &dev->state);
|
|
}
|
|
} else {
|
|
clear_bit(__LINK_STATE_START, &dev->state);
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(dev_open);
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* vmklnx_free_netdev
|
|
*
|
|
* Internal implementation of free_netdev, frees net_device and associated
|
|
* structures. Exposed verion of free_netdev is an inline because it
|
|
* touches driver private data structs.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
void
|
|
vmklnx_free_netdev(struct kmem_cache_s *pmCache, struct net_device *dev)
|
|
{
|
|
LinNetDev *linDev = get_LinNetDev(dev);
|
|
|
|
if (dev->skb_pool) {
|
|
dev->skb_pool = NULL;
|
|
}
|
|
|
|
kfree(dev->tx_netqueue_info);
|
|
kfree(dev->_tx);
|
|
kfree((char *)linDev - linDev->padded);
|
|
}
|
|
EXPORT_SYMBOL(vmklnx_free_netdev);
|
|
|
|
static void
|
|
netdev_init_one_queue(struct net_device *dev,
|
|
struct netdev_queue *queue,
|
|
void *_unused)
|
|
{
|
|
queue->dev = dev;
|
|
}
|
|
|
|
static void
|
|
netdev_init_queues(struct net_device *dev)
|
|
{
|
|
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
|
|
}
|
|
|
|
struct net_device *
|
|
vmklnx_alloc_netdev_mq(struct module *this_module,
|
|
int sizeof_priv,
|
|
const char *name,
|
|
void (*setup)(struct net_device *),
|
|
unsigned int queue_count)
|
|
{
|
|
int i;
|
|
LinNetDev *linDev;
|
|
struct netdev_queue *tx;
|
|
struct net_device *dev;
|
|
int alloc_size;
|
|
void *p;
|
|
struct tx_netqueue_info *tx_netqueue_info;
|
|
|
|
VMK_ASSERT(this_module->skb_cache);
|
|
VMK_ASSERT(this_module->moduleID != 0 && this_module->moduleID != VMK_INVALID_MODULE_ID);
|
|
|
|
BUG_ON(strlen(name) >= sizeof(dev->name));
|
|
|
|
alloc_size = sizeof(struct LinNetDev);
|
|
|
|
if (sizeof_priv) {
|
|
/* ensure 32-byte alignment of private area */
|
|
alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
|
|
alloc_size += sizeof_priv;
|
|
}
|
|
|
|
/* ensure 32-byte alignment of whole construct */
|
|
alloc_size += NETDEV_ALIGN_CONST;
|
|
|
|
p = kzalloc(alloc_size, GFP_KERNEL);
|
|
if (!p) {
|
|
printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
|
|
return NULL;
|
|
}
|
|
|
|
linDev = (LinNetDev *)
|
|
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
|
|
linDev->padded = (char *)linDev - (char *)p;
|
|
|
|
tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL);
|
|
if (!tx) {
|
|
printk(KERN_ERR "alloc_netdev: Unable to allocate "
|
|
"tx qdiscs.\n");
|
|
kfree(p);
|
|
return NULL;
|
|
}
|
|
|
|
alloc_size = sizeof (struct tx_netqueue_info) * queue_count;
|
|
tx_netqueue_info = kzalloc(alloc_size, GFP_KERNEL);
|
|
if (!tx_netqueue_info) {
|
|
printk(KERN_ERR "alloc_netdev: Unable to allocate tx_netqueue_info.\n");
|
|
kfree(tx);
|
|
kfree(p);
|
|
return NULL;
|
|
}
|
|
|
|
/* make default queue valid */
|
|
tx_netqueue_info[0].valid = VMK_TRUE;
|
|
tx_netqueue_info[0].vmkqid = VMK_NETQUEUE_DEFAULT_QUEUEID;
|
|
|
|
for (i = 1; i < queue_count; i++) {
|
|
tx_netqueue_info[i].valid = VMK_FALSE;
|
|
tx_netqueue_info[i].vmkqid = VMK_NETQUEUE_INVALID_QUEUEID;
|
|
}
|
|
|
|
dev = &linDev->linNetDev;
|
|
dev->skb_pool = this_module->skb_cache;
|
|
dev->_tx = tx;
|
|
dev->num_tx_queues = queue_count;
|
|
dev->real_num_tx_queues = queue_count;
|
|
dev->tx_netqueue_info = tx_netqueue_info;
|
|
|
|
if (sizeof_priv) {
|
|
dev->priv = ((char *)dev +
|
|
((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
|
|
& ~NETDEV_ALIGN_CONST));
|
|
}
|
|
|
|
netdev_init_queues(dev);
|
|
|
|
dev->module_id = this_module->moduleID;
|
|
INIT_LIST_HEAD(&dev->napi_list);
|
|
spin_lock_init(&dev->napi_lock);
|
|
set_bit(__NETQUEUE_STATE, (void*)&dev->netq_state);
|
|
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, setup, dev);
|
|
strcpy(dev->name, name);
|
|
|
|
return dev;
|
|
}
|
|
EXPORT_SYMBOL(vmklnx_alloc_netdev_mq);
|
|
|
|
#ifndef ARPHRD_ETHER
|
|
#define ARPHRD_ETHER 1 /* Ethernet 10Mbps. */
|
|
#endif
|
|
|
|
/**
|
|
* ether_setup - setup the given Ethernet network device
|
|
* @dev: network device
|
|
*
|
|
* Initializes fields of the given network device with Ethernet-generic
|
|
* values
|
|
*
|
|
* ESX Deviation Notes:
|
|
* This function does not initialize any function pointers in the
|
|
* given net_device
|
|
*
|
|
* RETURN VALUE:
|
|
* This function does not return a value
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: ether_setup */
|
|
void
|
|
ether_setup(struct net_device *dev)
|
|
{
|
|
dev->type = ARPHRD_ETHER;
|
|
dev->hard_header_len = ETH_HLEN; /* XXX should this include 802.1pq? */
|
|
dev->mtu = ETH_DATA_LEN; /* eth_mtu */
|
|
dev->addr_len = ETH_ALEN;
|
|
/* XXX */
|
|
dev->tx_queue_len = 100; /* Ethernet wants good queues */
|
|
|
|
memset(dev->broadcast, 0xFF, ETH_ALEN);
|
|
|
|
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
|
|
}
|
|
EXPORT_SYMBOL(ether_setup);
|
|
|
|
|
|
/**
|
|
* netif_device_attach - mark device as attached
|
|
* @dev: network device
|
|
*
|
|
* Mark device as attached from system and restart if needed.
|
|
*
|
|
* RETURN VALUE:
|
|
* None
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: netif_device_attach */
|
|
void
|
|
netif_device_attach(struct net_device *dev)
|
|
{
|
|
if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
|
|
netif_running(dev)) {
|
|
netif_tx_wake_all_queues(dev);
|
|
__netdev_watchdog_up(dev);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(netif_device_attach);
|
|
|
|
/**
|
|
* netif_device_detach - mark device as removed
|
|
* @dev: network device
|
|
*
|
|
* Mark device as removed from system and therefore no longer available.
|
|
*
|
|
* RETURN VALUE:
|
|
* None
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: netif_device_detach */
|
|
void
|
|
netif_device_detach(struct net_device *dev)
|
|
{
|
|
if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
|
|
netif_running(dev)) {
|
|
netif_tx_stop_all_queues(dev);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(netif_device_detach);
|
|
|
|
static void
|
|
__netdev_init_queue_locks_one(struct net_device *dev,
|
|
struct netdev_queue *queue,
|
|
void *_unused)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
struct netdev_soft_queue *softq = &queue->softq;
|
|
|
|
spin_lock_init(&queue->_xmit_lock);
|
|
queue->xmit_lock_owner = -1;
|
|
queue->processing_tx = 0;
|
|
|
|
spin_lock_init(&softq->queue_lock);
|
|
softq->state = 0;
|
|
softq->outputList = (vmk_PktList) vmk_HeapAlloc(vmklnxLowHeap,
|
|
vmk_PktListSizeInBytes);
|
|
if (softq->outputList == NULL) {
|
|
VMK_ASSERT(VMK_FALSE);
|
|
return;
|
|
}
|
|
vmk_PktListInit(softq->outputList);
|
|
status = vmk_ConfigParamGetUint(maxNetifTxQueueLenConfigHandle,
|
|
&softq->outputListMaxSize);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netdev_init_queue_locks --
|
|
*
|
|
* Init device queues locks.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
netdev_init_queue_locks(struct net_device *dev)
|
|
{
|
|
netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
|
|
}
|
|
|
|
static void
|
|
__netdev_destroy_queue_locks_one(struct net_device *dev,
|
|
struct netdev_queue *queue,
|
|
void *_unused)
|
|
{
|
|
struct netdev_soft_queue *softq = &queue->softq;
|
|
|
|
vmk_HeapFree(vmklnxLowHeap, softq->outputList);
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netdev_destroy_queue_locks --
|
|
*
|
|
* Destroy device queues locks.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
netdev_destroy_queue_locks(struct net_device *dev)
|
|
{
|
|
netdev_for_each_tx_queue(dev, __netdev_destroy_queue_locks_one, NULL);
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netdev_ioctl --
|
|
* Process an ioctl request for a given device.
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus indicating the outcome.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netdev_ioctl(struct net_device *dev, uint32_t cmd, void *args, uint32_t *result,
|
|
vmk_IoctlCallerSize callerSize, vmk_Bool callerHasRtnlLock)
|
|
{
|
|
VMK_ReturnStatus ret = VMK_OK;
|
|
|
|
VMK_ASSERT(dev);
|
|
|
|
if (args && result) {
|
|
if (cmd == SIOCGIFHWADDR) {
|
|
struct ifreq *ifr = args;
|
|
memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, 6);
|
|
ifr->ifr_hwaddr.sa_family = dev->type;
|
|
*result = 0;
|
|
return VMK_OK;
|
|
}
|
|
|
|
if (cmd == SIOCETHTOOL) {
|
|
struct ifreq *ifr = args;
|
|
|
|
if (callerHasRtnlLock == VMK_FALSE) {
|
|
rtnl_lock();
|
|
}
|
|
|
|
ret = vmklnx_ethtool_ioctl(dev, ifr, result, callerSize);
|
|
|
|
/* Some drivers call dev_close() when ethtool ops like .set_ringparam failed.
|
|
* The following check will update dev->gflags accordingly to avoid a second
|
|
* dev_close() when CloseNetDev() is called.
|
|
*/
|
|
if (ret && !(dev->flags & IFF_UP))
|
|
dev->gflags &= ~IFF_DEV_IS_OPEN;
|
|
|
|
if (callerHasRtnlLock == VMK_FALSE) {
|
|
rtnl_unlock();
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
if (dev->do_ioctl) {
|
|
if (callerHasRtnlLock == VMK_FALSE) {
|
|
rtnl_lock();
|
|
}
|
|
VMKAPI_MODULE_CALL(dev->module_id, *result, dev->do_ioctl, dev,
|
|
args, cmd);
|
|
if (callerHasRtnlLock == VMK_FALSE) {
|
|
rtnl_unlock();
|
|
}
|
|
ret = VMK_OK;
|
|
} else {
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "net_device: %p, cmd: 0x%x, args: %p, result: %p",
|
|
dev, cmd, args, result);
|
|
ret = VMK_FAILURE;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* link_state_work_cb --
|
|
*
|
|
* Periodic work function to check the status of various physical NICS.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
link_state_work_cb(struct work_struct *work)
|
|
{
|
|
struct net_device *cur;
|
|
uint32_t result;
|
|
unsigned speed = 0, duplex = 0, linkState = 0;
|
|
VMK_ReturnStatus status;
|
|
unsigned newLinkStateTimerPeriod;
|
|
struct ethtool_cmd *cmd;
|
|
|
|
cmd = compat_alloc_user_space(sizeof(*cmd));
|
|
if (cmd == NULL) {
|
|
VMKLNX_WARN("Aborting link state watchdog due to compat_alloc_user_space() failure.");
|
|
goto reschedule_work;
|
|
}
|
|
|
|
/*
|
|
* Since the ethtool ioctls require the rtnl_lock,
|
|
* we should acquire the lock first before getting
|
|
* dev_base_lock. This is the order used by other
|
|
* code paths that require both locks.
|
|
*/
|
|
rtnl_lock();
|
|
write_lock(&dev_base_lock);
|
|
|
|
cur = dev_base;
|
|
while (cur) {
|
|
struct ifreq ifr;
|
|
|
|
vmk_Bool link_changed = VMK_FALSE;
|
|
|
|
|
|
memset(&ifr, 0, sizeof(ifr));
|
|
memcpy(ifr.ifr_name, cur->name, sizeof(ifr.ifr_name));
|
|
|
|
/* get link speed and duplexity */
|
|
put_user(ETHTOOL_GSET, &cmd->cmd);
|
|
ifr.ifr_data = (void *) cmd;
|
|
if (netdev_ioctl(cur, SIOCETHTOOL, &ifr, &result,
|
|
VMK_IOCTL_CALLER_64, VMK_TRUE) == VMK_OK) {
|
|
get_user(speed, &cmd->speed);
|
|
get_user(duplex, &cmd->duplex);
|
|
}
|
|
|
|
/* get link state */
|
|
put_user(ETHTOOL_GLINK, &cmd->cmd);
|
|
ifr.ifr_data = (void *) cmd;
|
|
if (netdev_ioctl(cur, SIOCETHTOOL, &ifr, &result,
|
|
VMK_IOCTL_CALLER_64, VMK_TRUE) == VMK_OK) {
|
|
struct ethtool_value value;
|
|
copy_from_user(&value, cmd, sizeof(struct ethtool_value));
|
|
linkState = value.data ? VMKLNX_UPLINK_LINK_UP :
|
|
VMKLNX_UPLINK_LINK_DOWN;
|
|
}
|
|
|
|
/* set speed, duplexity and link state if changed */
|
|
if (cur->link_state != linkState) {
|
|
cur->link_state = linkState;
|
|
link_changed = VMK_TRUE;
|
|
if (linkState == VMKLNX_UPLINK_LINK_DOWN) {
|
|
/* Tell people we are going down */
|
|
call_netdevice_notifiers(NETDEV_GOING_DOWN, cur);
|
|
} else {
|
|
call_netdevice_notifiers(NETDEV_UP, cur);
|
|
}
|
|
netif_toggled_clear(cur);
|
|
} else if(netif_carrier_ok(cur)) {
|
|
if (netif_toggled_test_and_clear(cur)) {
|
|
/* Tell people we had a link flap */
|
|
VMKLNX_DEBUG(0, "link flap on %s", cur->name);
|
|
call_netdevice_notifiers(NETDEV_GOING_DOWN, cur);
|
|
call_netdevice_notifiers(NETDEV_UP, cur);
|
|
}
|
|
}
|
|
if (netif_carrier_ok(cur)) {
|
|
if (cur->full_duplex != duplex) {
|
|
cur->full_duplex = duplex;
|
|
link_changed = VMK_TRUE;
|
|
}
|
|
if (cur->link_speed != speed) {
|
|
cur->link_speed = speed;
|
|
link_changed = VMK_TRUE;
|
|
}
|
|
}
|
|
if (link_changed) {
|
|
SetNICLinkStatus(cur);
|
|
}
|
|
|
|
cur = cur->next;
|
|
}
|
|
|
|
write_unlock(&dev_base_lock);
|
|
rtnl_unlock();
|
|
|
|
reschedule_work:
|
|
status = vmk_ConfigParamGetUint(linkStateTimerPeriodConfigHandle,
|
|
&newLinkStateTimerPeriod);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
if (linkStateTimerPeriod != newLinkStateTimerPeriod) {
|
|
linkStateTimerPeriod = newLinkStateTimerPeriod;
|
|
}
|
|
schedule_delayed_work(&linkStateWork,
|
|
msecs_to_jiffies(linkStateTimerPeriod));
|
|
|
|
/* Periodic update of the LRO config option */
|
|
status = vmk_ConfigParamGetUint(vmklnxLROEnabledConfigHandle,
|
|
&vmklnxLROEnabled);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamGetUint(vmklnxLROMaxAggrConfigHandle,
|
|
&vmklnxLROMaxAggr);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netdev_watchdog --
|
|
*
|
|
* Device watchdog
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
netdev_watchdog(struct net_device *dev)
|
|
{
|
|
int some_queue_stopped = 0;
|
|
|
|
netif_tx_lock(dev);
|
|
if (netif_device_present(dev) &&
|
|
/* don't bother if the device is being closed */
|
|
netif_running(dev) &&
|
|
/* only after the device is opened */
|
|
(dev->flags & IFF_UP) &&
|
|
netif_carrier_ok(dev)) {
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < dev->real_num_tx_queues; i++) {
|
|
struct netdev_queue *txq;
|
|
|
|
txq = netdev_get_tx_queue(dev, i);
|
|
if (netif_tx_queue_stopped(txq)) {
|
|
some_queue_stopped = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (some_queue_stopped &&
|
|
time_after(jiffies, (dev->trans_start +
|
|
dev->watchdog_timeo))) {
|
|
VMKLNX_WARN("NETDEV WATCHDOG: %s: transmit timed out", dev->name);
|
|
|
|
dev->watchdog_timeohit_stats++;
|
|
vmk_UplinkWatchdogTimeoutHit(dev->uplinkDev);
|
|
|
|
/* call driver to reset the device */
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->tx_timeout, dev);
|
|
WARN_ON_ONCE(1);
|
|
|
|
#ifdef VMX86_DEBUG
|
|
// PR 167776: Reset counter every hour or so. We'll panic
|
|
// only if we go beyond a certain number of watchdog timouts
|
|
// in an hour.
|
|
if (time_after(jiffies,
|
|
dev->watchdog_timeohit_period_start + NETDEV_TICKS_PER_HOUR)) {
|
|
dev->watchdog_timeohit_cnt = 0;
|
|
dev->watchdog_timeohit_period_start = jiffies;
|
|
}
|
|
|
|
if (!VMKLNX_STRESS_DEBUG_OPTION(stressNetIfFailTxAndStopQueue)) {
|
|
dev->watchdog_timeohit_cnt++;
|
|
|
|
if (dev->watchdog_timeohit_cnt >= dev->watchdog_timeohit_cfg) {
|
|
dev->watchdog_timeohit_cnt = 0;
|
|
if (dev->watchdog_timeohit_panic == VMKLNX_UPLINK_WATCHDOG_PANIC_MOD_ENABLE) {
|
|
VMK_ASSERT_BUG(VMK_FALSE);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
netif_tx_unlock(dev);
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* watchdog_timer_cb --
|
|
*
|
|
* Watchdog timer callback for all registered devices.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
watchdog_work_cb(struct work_struct *work)
|
|
{
|
|
struct net_device *dev = NULL;
|
|
|
|
write_lock(&dev_base_lock);
|
|
|
|
for (dev = dev_base; dev; dev = dev->next) {
|
|
netdev_watchdog(dev);
|
|
}
|
|
|
|
write_unlock(&dev_base_lock);
|
|
|
|
schedule_delayed_work(&watchdogWork,
|
|
msecs_to_jiffies(WATCHDOG_DEF_TIMER));
|
|
}
|
|
|
|
/**
|
|
* __dev_get_by_name - find a device by its name
|
|
* @name: name to find
|
|
*
|
|
* Find an interface by name. The returned handle does not have the
|
|
* usage count incremented and the caller must be careful defore using
|
|
* the handle. %NULL is returned if no matching device is found.
|
|
*
|
|
* RETURN VALUE:
|
|
* Pointer to device structure on success
|
|
* %NULL is returned if no matching device is found
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: __dev_get_by_name */
|
|
struct net_device *
|
|
__dev_get_by_name(const char *name)
|
|
{
|
|
struct net_device *dev;
|
|
|
|
read_lock(&dev_base_lock);
|
|
|
|
dev = dev_base;
|
|
while (dev) {
|
|
if (!strncmp(dev->name, name, sizeof(dev->name))) {
|
|
break;
|
|
}
|
|
dev = dev->next;
|
|
}
|
|
|
|
read_unlock(&dev_base_lock);
|
|
|
|
return dev;
|
|
}
|
|
EXPORT_SYMBOL(__dev_get_by_name);
|
|
|
|
/**
|
|
* dev_get_by_name - find a device by its name
|
|
* @name: name to find
|
|
*
|
|
* Find an interface by name. The returned handle has the usage count
|
|
* incremented and the caller must use dev_put() to release it when it
|
|
* is no longer needed. %NULL is returned if no matching device is
|
|
* found.
|
|
*
|
|
* RETURN VALUE:
|
|
* Pointer to device structure on success
|
|
* %NULL is returned if no matching device is found
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: dev_get_by_name */
|
|
struct net_device *
|
|
dev_get_by_name(const char *name)
|
|
{
|
|
struct net_device *dev;
|
|
|
|
dev = __dev_get_by_name(name);
|
|
if (dev) {
|
|
dev_hold(dev);
|
|
}
|
|
return dev;
|
|
}
|
|
EXPORT_SYMBOL(dev_get_by_name);
|
|
|
|
/**
|
|
* dev_alloc_name - allocate a name for a device
|
|
* @dev: device
|
|
* @name: name format string
|
|
*
|
|
* Passed a format string - eg "lt%d" it will try and find a suitable
|
|
* id. It scans list of devices to build up a free map, then chooses
|
|
* the first empty slot. Returns the number of the unit assigned or
|
|
* a negative errno code.
|
|
*
|
|
* RETURN VALUE:
|
|
* Number of the unit assigned on success
|
|
* Negative errno code on error
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: dev_alloc_name */
|
|
int
|
|
dev_alloc_name(struct net_device *dev, const char *name)
|
|
{
|
|
int i;
|
|
char buf[VMK_DEVICE_NAME_MAX_LENGTH];
|
|
const int max_netdevices = 8*PAGE_SIZE;
|
|
char *p;
|
|
|
|
p = strnchr(name, VMK_DEVICE_NAME_MAX_LENGTH - 1, '%');
|
|
if (p && (p[1] != 'd' || strchr(p+2, '%'))) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
for (i = 0; i < max_netdevices; i++) {
|
|
snprintf(buf, sizeof(buf), name, i);
|
|
|
|
if (vmk_UplinkIsNameAvailable(buf)) {
|
|
strcpy(dev->name, buf);
|
|
return i;
|
|
}
|
|
}
|
|
return -ENFILE;
|
|
}
|
|
EXPORT_SYMBOL(dev_alloc_name);
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* set_device_pci_name --
|
|
*
|
|
* Set device's pci name
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
set_device_pci_name(struct net_device *dev, struct pci_dev *pdev)
|
|
{
|
|
/* We normally have the pci device name, because
|
|
* execfg-init or esxcfg-init-eesx generates the pci device names.
|
|
*
|
|
* We just override it with the one named by the driver.
|
|
*/
|
|
VMK_ASSERT_ON_COMPILE(VMK_DEVICE_NAME_MAX_LENGTH >= IFNAMSIZ);
|
|
if (LinuxPCI_IsValidPCIBusDev(pdev)) {
|
|
LinuxPCIDevExt *pe = container_of(pdev, LinuxPCIDevExt, linuxDev);
|
|
vmk_PCISetDeviceName(pe->vmkDev, dev->name);
|
|
strncpy(pdev->name, dev->name, sizeof(pdev->name));
|
|
}
|
|
if (strnlen(dev->name, VMK_DEVICE_NAME_MAX_LENGTH) > (IFNAMSIZ - 1)) {
|
|
VMKLNX_WARN("Net device name length(%zd) exceeds IFNAMSIZ - 1(%d)",
|
|
strnlen(dev->name, VMK_DEVICE_NAME_MAX_LENGTH), IFNAMSIZ - 1);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* register_netdevice - register a network device
|
|
* @dev: device to register
|
|
*
|
|
* Take a completed network device structure and add it to the kernel
|
|
* interfaces. 0 is returned on success. A negative errno code is returned
|
|
* on a failure to set up the device, or if the name is a duplicate.
|
|
*
|
|
* RETURN VALUE:
|
|
* 0 on success
|
|
* negative errno code on error
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: register_netdevice */
|
|
int
|
|
register_netdevice(struct net_device *dev)
|
|
{
|
|
int ret = 0;
|
|
|
|
/*
|
|
* netif_napi_add can be called before register_netdev, unfortunately.
|
|
* fail register_netdev, if the prior napi_add had failed. it's most
|
|
* likely a low memory condition and we'll fail somewhere further down
|
|
* the line if we go on.
|
|
*/
|
|
if (dev->reg_state == NETREG_EARLY_NAPI_ADD_FAILED) {
|
|
VMKLNX_WARN("%s: early napi registration failed, bailing", dev->name);
|
|
ret = -EIO;
|
|
goto out;
|
|
}
|
|
|
|
netdev_init_queue_locks(dev);
|
|
dev->iflink = -1;
|
|
dev->vlan_group = NULL;
|
|
|
|
/* Init, if this function is available */
|
|
int rv = 0;
|
|
if (dev->init != 0) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, rv, dev->init, dev);
|
|
if (rv != 0) {
|
|
ret = -EIO;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if (netdev_poll_init(dev) != VMK_OK) {
|
|
ret = -ENOMEM;
|
|
goto err_uninit;
|
|
}
|
|
|
|
set_bit(__LINK_STATE_PRESENT, &dev->state);
|
|
|
|
write_lock(&dev_base_lock);
|
|
|
|
/* CNA devices don't belong to the same uplink namespace. */
|
|
if (dev->features & NETIF_F_CNA) {
|
|
if (LinuxCNA_RegisterNetDev(dev) != VMK_OK) {
|
|
ret = -EIO;
|
|
write_unlock(&dev_base_lock);
|
|
goto err_cna_reg;
|
|
}
|
|
} else {
|
|
dev->next = dev_base;
|
|
dev_base = dev;
|
|
}
|
|
|
|
|
|
write_unlock(&dev_base_lock);
|
|
|
|
dev_hold(dev);
|
|
dev->reg_state = NETREG_REGISTERED;
|
|
|
|
out:
|
|
return ret;
|
|
|
|
err_cna_reg:
|
|
netdev_poll_cleanup(dev);
|
|
err_uninit:
|
|
if (dev->uninit) {
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->uninit, dev);
|
|
}
|
|
goto out;
|
|
}
|
|
|
|
/**
|
|
* register_netdev - register a network device
|
|
* @dev: device to register
|
|
*
|
|
* Take a completed network device structure and add it to the kernel
|
|
* interfaces. 0 is returned on success. A negative errno code is returned
|
|
* on a failure to set up the device, or if the name is a duplicate.
|
|
*
|
|
* This is a wrapper around register_netdevice that expands the device name
|
|
* if you passed a format string to alloc_netdev.
|
|
*
|
|
* RETURN VALUE:
|
|
* 0 on success
|
|
* negative errno code on error
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: register_netdev */
|
|
int
|
|
register_netdev(struct net_device *dev)
|
|
{
|
|
int err = 0;
|
|
|
|
rtnl_lock();
|
|
|
|
if (strchr(dev->name, '%')) {
|
|
err = dev_alloc_name(dev, dev->name);
|
|
} else if (dev->name[0]==0 || dev->name[0]==' ') {
|
|
err = dev_alloc_name(dev, "vmnic%d");
|
|
}
|
|
|
|
if (err >= 0) {
|
|
struct pci_dev *pdev = dev->pdev;
|
|
|
|
if (dev->useDriverNamingDevice) {
|
|
/* net_device already named, we need update the PCI device name list */
|
|
set_device_pci_name(dev, pdev);
|
|
}
|
|
err = register_netdevice(dev);
|
|
}
|
|
|
|
rtnl_unlock();
|
|
|
|
if (dev->pdev == NULL) {
|
|
/*
|
|
* For pseudo network interfaces, we connect and open the
|
|
* uplink at this point. For Real PCI NIC's, they do
|
|
* this in pci_announce_device() and vmk_PCIPostInsert()
|
|
* respectively.
|
|
*/
|
|
if (LinNet_ConnectUplink(dev, NULL)
|
|
|| (vmk_UplinkOpen(dev->uplinkDev) != VMK_OK)) {
|
|
err = -EIO;
|
|
}
|
|
}
|
|
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(register_netdev);
|
|
|
|
int
|
|
unregister_netdevice(struct net_device *dev)
|
|
{
|
|
struct net_device **cur;
|
|
|
|
VMK_ASSERT(atomic_read(&dev->refcnt) == 1);
|
|
|
|
if (dev->nicMajor > 0) {
|
|
vmkplxr_UnregisterChardev(dev->nicMajor, 0, dev->name);
|
|
}
|
|
|
|
if (dev->flags & IFF_UP) {
|
|
dev_close(dev);
|
|
}
|
|
|
|
VMK_ASSERT(dev->reg_state == NETREG_REGISTERED);
|
|
dev->reg_state = NETREG_UNREGISTERING;
|
|
|
|
if (dev->uninit) {
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->uninit, dev);
|
|
}
|
|
|
|
/* CNA devices don't belong to the same uplink namespace. */
|
|
if (dev->features & NETIF_F_CNA) {
|
|
LinuxCNA_UnRegisterNetDev(dev);
|
|
} else {
|
|
write_lock(&dev_base_lock);
|
|
cur = &dev_base;
|
|
while (*cur && *cur != dev) {
|
|
cur = &(*cur)->next;
|
|
}
|
|
if (*cur) {
|
|
*cur = (*cur)->next;
|
|
}
|
|
write_unlock(&dev_base_lock);
|
|
}
|
|
|
|
dev->reg_state = NETREG_UNREGISTERED;
|
|
|
|
netdev_poll_cleanup(dev);
|
|
|
|
VMK_ASSERT(dev->vlan_group == NULL);
|
|
if (dev->vlan_group) {
|
|
vmk_HeapFree(VMK_MODULE_HEAP_ID, dev->vlan_group);
|
|
dev->vlan_group = NULL;
|
|
}
|
|
|
|
netdev_destroy_queue_locks(dev);
|
|
|
|
/*
|
|
* Disassociate the pci_dev from this net device
|
|
*/
|
|
if (dev->pdev != NULL) {
|
|
dev->pdev->netdev = NULL;
|
|
dev->pdev = NULL;
|
|
}
|
|
|
|
dev_put(dev);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* unregister_netdev - remove device from the kernel
|
|
* @dev: device
|
|
*
|
|
* This function shuts down a device interface and removes it from the
|
|
* kernel tables.
|
|
*
|
|
* This is just a wrapper for unregister_netdevice. In general you want
|
|
* to use this and not unregister_netdevice.
|
|
*
|
|
* RETURN VALUE:
|
|
* None
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: unregister_netdev */
|
|
void
|
|
unregister_netdev(struct net_device *dev)
|
|
{
|
|
unsigned long warning_time;
|
|
|
|
VMKLNX_DEBUG(0, "Unregistering %s", dev->name);
|
|
|
|
if (dev->pdev == NULL) {
|
|
/*
|
|
* Close and disconnect the uplink here if
|
|
* the device is a pseudo NIC. For real PCI
|
|
* NIC, the uplink is closed and disconnected
|
|
* via vmk_PCIDoPreRemove().
|
|
*/
|
|
vmk_UplinkClose(dev->uplinkDev);
|
|
}
|
|
|
|
/*
|
|
* Fixed PR366444 - Moved the 'refcnt' check here from within
|
|
* unregister_netdevice()
|
|
*
|
|
* We will be stuck in the while loop below if someone forgot
|
|
* to drop the reference count.
|
|
*/
|
|
warning_time = jiffies;
|
|
rtnl_lock();
|
|
while (atomic_read(&dev->refcnt) > 1) {
|
|
rtnl_unlock();
|
|
|
|
if ((jiffies - warning_time) > 10*HZ) {
|
|
VMKLNX_WARN("waiting for %s to become free. Usage count = %d",
|
|
dev->name, atomic_read(&dev->refcnt));
|
|
warning_time = jiffies;
|
|
}
|
|
|
|
current->state = TASK_INTERRUPTIBLE;
|
|
schedule_timeout(HZ/4);
|
|
current->state = TASK_RUNNING;
|
|
|
|
rtnl_lock();
|
|
}
|
|
|
|
unregister_netdevice(dev);
|
|
rtnl_unlock();
|
|
VMKLNX_DEBUG(0, "Done Unregistering %s", dev->name);
|
|
}
|
|
EXPORT_SYMBOL(unregister_netdev);
|
|
|
|
/*
|
|
* register_netdevice_notifier - register a network notifier block
|
|
* @nb: notifier
|
|
*
|
|
* Register a notifier to be called when network device events occur.
|
|
* When registered, all registration and up events are replayed
|
|
* to the new notifier to allow device to have a race free
|
|
* view of the network device list.
|
|
*
|
|
* RETURN VALUE:
|
|
* 0 on success, -1 on failure.
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: register_netdevice_notifier */
|
|
|
|
int register_netdevice_notifier(struct notifier_block *nb)
|
|
{
|
|
return atomic_notifier_chain_register(&netdev_notifier_list, nb);
|
|
}
|
|
EXPORT_SYMBOL(register_netdevice_notifier);
|
|
|
|
/*
|
|
* unregister_netdevice_notifier - unregister a network notifier block
|
|
* @nb: notifier
|
|
*
|
|
* Unregister a previously regustered notifier block.
|
|
*
|
|
* RETURN VALUE:
|
|
* 0 on success, -1 on failure.
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: unregister_netdevice_notifier */
|
|
int unregister_netdevice_notifier(struct notifier_block *nb)
|
|
{
|
|
return atomic_notifier_chain_unregister(&netdev_notifier_list, nb);
|
|
}
|
|
EXPORT_SYMBOL(unregister_netdevice_notifier);
|
|
|
|
int call_netdevice_notifiers(unsigned long val, void *v)
|
|
{
|
|
return atomic_notifier_call_chain (&netdev_notifier_list, val,
|
|
(struct net_device *)v);
|
|
}
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* create_dev_name --
|
|
*
|
|
* create a unique name for a network device.
|
|
*
|
|
* Results:
|
|
* none
|
|
*
|
|
* Side effects:
|
|
* pdev->name field is set to vmnic%d
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
create_dev_name(char *name, int length)
|
|
{
|
|
/*
|
|
* We use 32 as the starting number because we do not want to overlap with
|
|
* the names used in the initprocess. It is assumed that the first 32
|
|
* devices (vmnic0 - vmnic31) may be used during boot.
|
|
*/
|
|
#define NET_ANON_START_ID VMK_CONST64U(32)
|
|
static vmk_atomic64 nameCounter = NET_ANON_START_ID;
|
|
|
|
snprintf(name, length, "vmnic%"VMK_FMT64"u",
|
|
vmk_AtomicReadInc64(&nameCounter));
|
|
}
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* netdev_name_adapter --
|
|
*
|
|
* Set the PCI adapter name, if not already set. If the PCI adapter
|
|
* already has a name and the name is registered as an uplink then
|
|
* create a new name for a new uplink port. Copy it to the net_device
|
|
* structure.
|
|
*
|
|
* Results:
|
|
* none
|
|
*
|
|
* Side effects:
|
|
* dev->name field is set.
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
netdev_name_adapter(struct net_device *dev, struct pci_dev *pdev)
|
|
{
|
|
LinuxPCIDevExt *pe;
|
|
char devName[VMK_DEVICE_NAME_MAX_LENGTH];
|
|
char *name = NULL;
|
|
|
|
if (pdev == NULL) {
|
|
// Pseudo devices may handle their own naming.
|
|
if (dev->name[0] != 0) {
|
|
return;
|
|
}
|
|
create_dev_name(dev->name, sizeof dev->name);
|
|
VMKLNX_INFO("Pseudo device %s", dev->name);
|
|
return;
|
|
}
|
|
|
|
pe = container_of(pdev, LinuxPCIDevExt, linuxDev);
|
|
|
|
/* Make sure a name exists */
|
|
devName[0] = '\0';
|
|
vmk_PCIGetDeviceName(pe->vmkDev, devName, sizeof devName);
|
|
|
|
/*
|
|
* If we do not have a name for the physical device then create one, else
|
|
* if the uplink port has already been registered then we assume that the
|
|
* we are called for a new port on the device and therefore create a new
|
|
* which we are not passing on to the physical device.
|
|
*/
|
|
if (devName[0] == '\0') {
|
|
create_dev_name(pdev->name, sizeof pdev->name);
|
|
vmk_PCISetDeviceName(pe->vmkDev, pdev->name);
|
|
name = pdev->name;
|
|
VMKLNX_INFO("%s at " PCI_DEVICE_BUS_ADDRESS, pdev->name,
|
|
pci_domain_nr(pdev->bus),
|
|
pdev->bus->number,
|
|
PCI_SLOT(pdev->devfn),
|
|
PCI_FUNC(pdev->devfn));
|
|
} else {
|
|
if (!vmk_UplinkIsNameAvailable(devName)) {
|
|
create_dev_name(pdev->name, sizeof pdev->name);
|
|
name = pdev->name;
|
|
} else {
|
|
name = devName;
|
|
/*
|
|
* If we already have a name for the physical device in vmkernel,
|
|
* copy the name into pdev->name.
|
|
*/
|
|
snprintf(pdev->name, sizeof(pdev->name), "%s", name);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Give the PCI device name to net_device
|
|
*/
|
|
snprintf(dev->name, sizeof (dev->name), "%s", name);
|
|
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netdev_query_capabilities --
|
|
*
|
|
* Checks hardware device's capability and return the information in a
|
|
* 32 bit "capability" value
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static vmk_UplinkCapabilities
|
|
netdev_query_capabilities(struct net_device *dev)
|
|
{
|
|
vmk_UplinkCapabilities capability = 0;
|
|
VMK_ReturnStatus status;
|
|
unsigned int permitHwIPv6Csum = 0;
|
|
unsigned int permitHwCsumForIPv6Csum = 0;
|
|
unsigned int permitHwTSO6 = 0;
|
|
unsigned int permitHwTSO = 0;
|
|
vmk_MA maxPhysAddr = vmk_MachMemMaxAddr();
|
|
|
|
status = vmk_ConfigParamGetUint(useHwIPv6CsumHandle, &permitHwIPv6Csum);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamGetUint(useHwCsumForIPv6CsumHandle, &permitHwCsumForIPv6Csum);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamGetUint(useHwTSOHandle, &permitHwTSO);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamGetUint(useHwTSO6Handle, &permitHwTSO6);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
VMKLNX_DEBUG(0, "Checking device: %s's capabilities", dev->name);
|
|
if (dev->features & NETIF_F_HW_VLAN_TX) {
|
|
VMKLNX_DEBUG(0, "device: %s has hw_vlan_tx capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_HW_TX_VLAN, VMK_TRUE);
|
|
}
|
|
if (dev->features & NETIF_F_HW_VLAN_RX) {
|
|
VMKLNX_DEBUG(0, "device: %s has hw_vlan_rx capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_HW_RX_VLAN, VMK_TRUE);
|
|
}
|
|
if (dev->features & NETIF_F_IP_CSUM) {
|
|
VMKLNX_DEBUG(0, "device: %s has IP CSUM capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_IP4_CSUM, VMK_TRUE);
|
|
}
|
|
if (permitHwIPv6Csum) {
|
|
if (dev->features & NETIF_F_IPV6_CSUM) {
|
|
VMKLNX_DEBUG(0, "device: %s has IPV6 CSUM capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_IP6_CSUM, VMK_TRUE);
|
|
} else {
|
|
/*
|
|
* When NETIF_F_IPV6_CSUM isn't available, then software
|
|
* CSUM for IP6 headers will be done. If software csum
|
|
* is included, there's no reason to also examine the pktLists
|
|
* for ip6 extension header offloads
|
|
*/
|
|
if (!(dev->features & NETIF_F_HW_CSUM)) {
|
|
vmk_UplinkCapabilitySet(&capability,
|
|
VMK_PORT_CLIENT_CAP_IP6_CSUM_EXT_HDRS,
|
|
VMK_TRUE);
|
|
}
|
|
}
|
|
}
|
|
if (dev->features & NETIF_F_HW_CSUM) {
|
|
VMKLNX_DEBUG(0, "device: %s has HW CSUM capability", dev->name);
|
|
// IP is the subset of HW we support.
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_IP4_CSUM, VMK_TRUE);
|
|
if (permitHwCsumForIPv6Csum) {
|
|
VMKLNX_DEBUG(0, "device: %s has HW CSUM => IPv6 CSUM capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_IP6_CSUM, VMK_TRUE);
|
|
}
|
|
}
|
|
if ((dev->features & NETIF_F_SG) &&
|
|
(MAX_SKB_FRAGS >= VMK_PKT_FRAGS_MAX_LENGTH)) {
|
|
VMKLNX_DEBUG(0, "device: %s has SG capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_SG, VMK_TRUE);
|
|
}
|
|
if (!(dev->features & NETIF_F_FRAG_CANT_SPAN_PAGES)) {
|
|
VMKLNX_DEBUG(0, "device: %s has Frag Span Pages capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_SG_SPAN_PAGES,
|
|
VMK_TRUE);
|
|
}
|
|
if ((dev->features & NETIF_F_HIGHDMA) ||
|
|
((dev->features & NETIF_F_DMA39) && maxPhysAddr <= DMA_BIT_MASK(39)) ||
|
|
((dev->features & NETIF_F_DMA40) && maxPhysAddr <= DMA_BIT_MASK(40)) ||
|
|
((dev->features & NETIF_F_DMA48) && maxPhysAddr <= DMA_BIT_MASK(48))) {
|
|
VMKLNX_DEBUG(0, "device: %s has high dma capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_HIGH_DMA, VMK_TRUE);
|
|
}
|
|
if (permitHwTSO && (dev->features & NETIF_F_TSO)) {
|
|
VMKLNX_DEBUG(0, "device: %s has TSO capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_TSO, VMK_TRUE);
|
|
}
|
|
|
|
if (permitHwTSO6) {
|
|
if (dev->features & NETIF_F_TSO6) {
|
|
VMKLNX_DEBUG(0, "device: %s has TSO6 capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_TSO6, VMK_TRUE);
|
|
} else {
|
|
/*
|
|
* When NETIF_F_TSO6 isn't available, then software TSO6
|
|
* will be done, but when software TSO6 is enabled, there's
|
|
* no reason to also review the pktLists for IP6 extension
|
|
* headers.
|
|
*/
|
|
vmk_UplinkCapabilitySet(&capability,
|
|
VMK_PORT_CLIENT_CAP_TSO6_EXT_HDRS,
|
|
VMK_TRUE);
|
|
}
|
|
}
|
|
|
|
if (dev->features & NETIF_F_UPT) {
|
|
VMKLNX_DEBUG(0, "device: %s has UPT capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_UPT, VMK_TRUE);
|
|
}
|
|
|
|
if (dev->pt_ops && !(dev->features & NETIF_F_UPT)) {
|
|
VMKLNX_DEBUG(0, "device: %s has NPA capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_NPA, VMK_TRUE);
|
|
}
|
|
|
|
if (dev->dcbnl_ops) {
|
|
VMKLNX_DEBUG(0, "device: %s has DCB capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_DCB, VMK_TRUE);
|
|
}
|
|
|
|
/*
|
|
* All devices have the RDONLY_INETHDRS capability. Its a property
|
|
* of a device driver, when VMK_TRUE, it means the device driver does
|
|
* NOT modify the inet headers. When VMK_FALSE, it means the device
|
|
* driver DOES modify the inet headers, and that privte copies of
|
|
* the pktHandles need to be make for the safety of the pktHandles
|
|
* without priavate writable buffers.
|
|
*/
|
|
if (dev->features & NETIF_F_RDONLYINETHDRS) {
|
|
VMKLNX_DEBUG(0, "device: %s has RDONLY_INETHDRS capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability,
|
|
VMK_PORT_CLIENT_CAP_RDONLY_INETHDRS, VMK_TRUE);
|
|
} else {
|
|
VMKLNX_DEBUG(0, "device: %s does not have RDONLY_INETHDRS capability",
|
|
dev->name);
|
|
vmk_UplinkCapabilitySet(&capability,
|
|
VMK_PORT_CLIENT_CAP_RDONLY_INETHDRS, VMK_FALSE);
|
|
}
|
|
|
|
/*
|
|
* PR #324545: Artificially turn this feature on so that the VMkernel
|
|
* doesn't activate any unnecessary & wasteful SW workaround.
|
|
* The VMkernel shouldn't generate this kind of frames anyway.
|
|
*/
|
|
if (VMK_TRUE) {
|
|
VMKLNX_DEBUG(0, "device: %s has TSO256k capability", dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_TSO256k, VMK_TRUE);
|
|
}
|
|
|
|
if (dev->features & NETIF_F_TSO) {
|
|
/*
|
|
* If a pNIC can do TSO, but not any of the following,
|
|
* our software path for any of these missing functions
|
|
* may end up trying to allocate very large buffers and
|
|
* not able to do it. We'd like to know about such
|
|
* devices during development.
|
|
* NB: we already know that some e1000 devices,
|
|
* e.g. 82544EI (e1000 XT), can do TSO but not High_DMA.
|
|
*/
|
|
VMK_ASSERT(dev->features & NETIF_F_SG);
|
|
VMK_ASSERT(!(dev->features & NETIF_F_FRAG_CANT_SPAN_PAGES));
|
|
|
|
if (!(dev->features & NETIF_F_SG) ||
|
|
(dev->features & NETIF_F_FRAG_CANT_SPAN_PAGES)) {
|
|
VMKLNX_WARN("%s: disabling hardware TSO because dev "
|
|
"has no hardware SG",
|
|
dev->name);
|
|
vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_TSO, VMK_FALSE);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* To support encapsulated offloads, the pNic must be able to
|
|
* parameterize the location of the header, csum, etc. Some
|
|
* nics can parameterize, some can't. Some nics use 8-bit
|
|
* offsets, some use 16-bits.
|
|
*
|
|
*/
|
|
|
|
if (dev->features & NETIF_F_OFFLOAD_16OFFSET) {
|
|
VMKLNX_DEBUG(0, "device: %s has TSO-CSUM offloads "
|
|
"with 16 bit offsets (8-bit also enabled)",
|
|
dev->name);
|
|
|
|
vmk_UplinkCapabilitySet(&capability,
|
|
VMK_PORT_CLIENT_CAP_OFFLOAD_16OFFSET,
|
|
VMK_TRUE);
|
|
if (!((dev->features & NETIF_F_OFFLOAD_8OFFSET))) {
|
|
VMKLNX_DEBUG(0, "device: %s missing 8-bit offsets also enabled",
|
|
dev->name);
|
|
}
|
|
vmk_UplinkCapabilitySet(&capability,
|
|
VMK_PORT_CLIENT_CAP_OFFLOAD_8OFFSET,
|
|
VMK_TRUE);
|
|
} else if (dev->features & NETIF_F_OFFLOAD_8OFFSET) {
|
|
VMKLNX_DEBUG(0, "device: %s has TSO-CSUM with 8 bit offset capability",
|
|
dev->name);
|
|
|
|
vmk_UplinkCapabilitySet(&capability,
|
|
VMK_PORT_CLIENT_CAP_OFFLOAD_8OFFSET,
|
|
VMK_TRUE);
|
|
} else {
|
|
VMKLNX_DEBUG(0, "device: %s no TSO-CSUM offset capability",
|
|
dev->name);
|
|
/*
|
|
* By enabling 16OFFSET, 8OFFSET is still disabled,and the
|
|
* software version of the cap will be inserted.
|
|
*/
|
|
vmk_UplinkCapabilitySet(&capability,
|
|
VMK_PORT_CLIENT_CAP_OFFLOAD_16OFFSET,
|
|
VMK_TRUE);
|
|
}
|
|
|
|
if (!(dev->features & NETIF_F_NO_SCHED)) {
|
|
vmk_UplinkCapabilitySet(&capability,
|
|
VMK_PORT_CLIENT_CAP_SCHED,
|
|
VMK_TRUE);
|
|
VMKLNX_DEBUG(0, "device: %s is network scheduling compliant",
|
|
dev->name);
|
|
} else {
|
|
VMKLNX_DEBUG(0, "device: %s is not network scheduling compliant",
|
|
dev->name);
|
|
}
|
|
|
|
VMKLNX_DEBUG(0, "device %s vmnet cap is 0x%"VMK_FMT64"x",
|
|
dev->name, capability);
|
|
|
|
return capability;
|
|
}
|
|
|
|
/*
|
|
* Section: calltable functions, called through vmk_UplinkFunctions
|
|
*/
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* IoctlNetDev --
|
|
*
|
|
* Handle an ioctl request from the VMKernel for the given device name.
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus indicating the outcome.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
IoctlNetDev(char *uplinkName, uint32_t cmd, void *args, uint32_t *result)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
struct net_device *dev;
|
|
|
|
dev = dev_get_by_name(uplinkName);
|
|
if (!dev) {
|
|
return VMK_NOT_FOUND;
|
|
}
|
|
|
|
status = netdev_ioctl(dev, cmd, args, result, VMK_IOCTL_CALLER_64, VMK_FALSE);
|
|
dev_put(dev);
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* SetNICLinkStatus --
|
|
*
|
|
* Push new link status up to the vmkernel.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* May cause teaming failover events to be scheduled.
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
void
|
|
SetNICLinkStatus(struct net_device *dev)
|
|
{
|
|
vmk_UplinkLinkInfo linkInfo;
|
|
|
|
linkInfo.linkState = dev->link_state;
|
|
linkInfo.linkSpeed = linkInfo.linkState ? dev->link_speed : 0;
|
|
linkInfo.fullDuplex = linkInfo.linkState ? dev->full_duplex : VMK_FALSE;
|
|
|
|
/* Test if the uplink is connected (for a pseudo device) */
|
|
if (dev->uplinkDev) {
|
|
vmk_UplinkUpdateLinkState(dev->uplinkDev, &linkInfo);
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* DevStartTxImmediate --
|
|
*
|
|
* External entry point for transmitting packets. Packets are queued and
|
|
* then Tx-ed immediately.
|
|
*
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus indicating the outcome.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
DevStartTxImmediate(void *clientData, vmk_PktList pktList)
|
|
{
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
vmk_PktHandle *pkt = vmk_PktListGetFirstPkt(pktList);
|
|
vmk_NetqueueQueueID vmkqid;
|
|
|
|
VMK_ASSERT(pkt);
|
|
vmkqid = vmk_PktQueueIDGet(pkt);
|
|
#ifdef VMX86_DEBUG
|
|
{
|
|
VMK_PKTLIST_ITER_STACK_DEF(iter);
|
|
vmk_PktListIterStart(iter, pktList);
|
|
while (!vmk_PktListIterIsAtEnd(iter)) {
|
|
pkt = vmk_PktListIterGetPkt(iter);
|
|
VMK_ASSERT(vmk_PktQueueIDGet(pkt) == vmkqid);
|
|
vmk_PktListIterMove(iter);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return netdev_tx(dev, pktList, vmkqid);
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* OpenNetDev --
|
|
*
|
|
* Handler for calling the device's open function. If successful, the device
|
|
* state is changed to indicate that the device has been opened.
|
|
*
|
|
* Results:
|
|
* Returns whatever the device's open function returns.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
OpenNetDev(void *clientData)
|
|
{
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
int status = 0;
|
|
|
|
if (dev->open == NULL) {
|
|
VMKLNX_WARN("NULL open function for device %s", dev->name);
|
|
return 1;
|
|
}
|
|
|
|
rtnl_lock();
|
|
if ((dev->gflags & IFF_DEV_IS_OPEN) == 0) {
|
|
status = dev_open(dev);
|
|
if (status == 0) {
|
|
dev->gflags |= IFF_DEV_IS_OPEN;
|
|
}
|
|
}
|
|
rtnl_unlock();
|
|
|
|
return status == 0 ? VMK_OK : VMK_FAILURE;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* CloseNetDev --
|
|
*
|
|
* Handler for closing the device. If successful, the device state is
|
|
* modified to indicate that the device is now non-functional.
|
|
*
|
|
* Results:
|
|
* Returns whatever the stop function of the module owning the device
|
|
* returns.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
CloseNetDev(void *clientData)
|
|
{
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
int status = 0;
|
|
|
|
VMK_ASSERT(dev->stop != NULL);
|
|
VMKLNX_DEBUG(0, "Stopping device %s", dev->name);
|
|
|
|
rtnl_lock();
|
|
if (dev->gflags & IFF_DEV_IS_OPEN ) {
|
|
status = dev_close(dev);
|
|
if (status == 0) {
|
|
dev->gflags &= ~IFF_DEV_IS_OPEN;
|
|
}
|
|
}
|
|
rtnl_unlock();
|
|
|
|
return status == 0 ? VMK_OK : VMK_FAILURE;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* BlockNetDev --
|
|
*
|
|
* Handler for blocking the device. If successful, the device state is
|
|
* modified to indicate that the device is now blocked.
|
|
*
|
|
* Results:
|
|
* VMK_OK always.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
BlockNetDev(void *clientData)
|
|
{
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
struct napi_struct *napi;
|
|
|
|
if (test_and_set_bit(__LINK_STATE_BLOCKED, &dev->state)) {
|
|
VMKLNX_DEBUG(0, "%s is actually already blocked.", dev->name);
|
|
return VMK_OK;
|
|
}
|
|
|
|
// Disable napi so as to give a chance for all packets in the middle of
|
|
// rx processing to be handed off to the kernel
|
|
spin_lock(&dev->napi_lock);
|
|
list_for_each_entry(napi, &dev->napi_list, dev_list)
|
|
if (!(test_bit(NAPI_STATE_UNUSED, &napi->state))) {
|
|
while (1) {
|
|
if (!napi_disable_timeout(napi, 50)) {
|
|
// make sure we don't have packets stuck in the napi context
|
|
VMKLNX_DEBUG(0, "Flushing napi context (%d) pending packets for %s",
|
|
napi->napi_id, dev->name);
|
|
vmk_NetPollProcessRx(napi->net_poll);
|
|
napi_enable (napi);
|
|
break;
|
|
}
|
|
if (test_bit(NAPI_STATE_UNUSED, &napi->state)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
spin_unlock(&dev->napi_lock);
|
|
|
|
/* Emulate a case where it takes longer to complete the rx packets in flight */
|
|
if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetBlockDevIsSluggish)) {
|
|
msleep(blockTotalSleepMsec);
|
|
}
|
|
|
|
VMKLNX_DEBUG(0, "%s is blocked.", dev->name);
|
|
return VMK_OK;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* UnblockNetDev --
|
|
*
|
|
* Handler for unblocking the device. If successful, the device state is
|
|
* modified to indicate that the device is now unblocked.
|
|
*
|
|
* Results:
|
|
* VMK_OK always.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
UnblockNetDev(void *clientData)
|
|
{
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
|
|
if (!test_bit(__LINK_STATE_BLOCKED, &dev->state)) {
|
|
VMKLNX_DEBUG(0, "%s is actually already unblocked.", dev->name);
|
|
return VMK_OK;
|
|
}
|
|
|
|
smp_mb__before_clear_bit();
|
|
clear_bit(__LINK_STATE_BLOCKED, &dev->state);
|
|
|
|
VMKLNX_DEBUG(0, "%s is unblocked.", dev->name);
|
|
return VMK_OK;
|
|
}
|
|
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* LinNet_EnableHwVlan --
|
|
*
|
|
* Enable HW vlan on the netdev
|
|
* If enable is FALSE, hardware vlan is expected to be enabled already.
|
|
*
|
|
* Results:
|
|
* Return VMK_OK if there is VLan HW tx/rx acceleration support;
|
|
* Return VMK_VLAN_NO_HW_ACCEL otherwise.
|
|
*
|
|
* Side effects:
|
|
* hw vlan register is updated.
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
VMK_ReturnStatus
|
|
LinNet_EnableHwVlan(struct net_device *dev)
|
|
{
|
|
struct vlan_group *grp = dev->vlan_group;
|
|
|
|
/*
|
|
* dev->vlan_group is only allocated after vlan_rx_register() has been
|
|
* called successfully. If dev->vlan_group is not NULL, it means
|
|
* vlan has already been enabled and no need to do it again
|
|
*/
|
|
if(grp != NULL) {
|
|
VMKLNX_DEBUG(1, "%s: HW VLAN already enabled", dev->name);
|
|
return VMK_OK;
|
|
}
|
|
|
|
VMK_ASSERT(dev->features & NETIF_F_HW_VLAN_RX);
|
|
|
|
/* call driver's vlan_rx_register handler to enable vlan */
|
|
VMK_ASSERT(dev->vlan_rx_register);
|
|
if (!dev->vlan_rx_register) {
|
|
VMKLNX_DEBUG(0, "%s: no vlan_rx_register handler", dev->name);
|
|
return VMK_VLAN_NO_HW_ACCEL;
|
|
}
|
|
|
|
grp = vmk_HeapAlloc(VMK_MODULE_HEAP_ID, sizeof (struct vlan_group));
|
|
if (grp == NULL) {
|
|
VMKLNX_DEBUG(0, "%s: failed to allocate vlan_group", dev->name);
|
|
return VMK_NO_MEMORY;
|
|
}
|
|
vmk_Memset(grp, 0, sizeof (struct vlan_group));
|
|
dev->vlan_group = grp;
|
|
|
|
VMKLNX_DEBUG(0, "%s: enabling vlan", dev->name);
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->vlan_rx_register, dev, grp);
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* SetupVlanGroupDevice --
|
|
*
|
|
* Enable HW vlan and add new vlan id's based on the bitmap.
|
|
* If enable is FALSE, hardware vlan is expected to be enabled
|
|
* already, If bitmap is null, just do enable.
|
|
*
|
|
* Results:
|
|
* Return VMK_OK if there is VLan HW tx/rx acceleration support;
|
|
* Return VMK_VLAN_NO_HW_ACCEL otherwise.
|
|
*
|
|
* Side effects:
|
|
* hw vlan register is updated.
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
SetupVlanGroupDevice(void *clientData, vmk_Bool enable, void *bitmap)
|
|
{
|
|
struct net_device *dev = (struct net_device *) clientData;
|
|
struct vlan_group *grp = dev->vlan_group;
|
|
VMK_ReturnStatus status;
|
|
|
|
rtnl_lock();
|
|
if (enable || grp == NULL) {
|
|
status = LinNet_EnableHwVlan(dev);
|
|
if (status != VMK_OK) {
|
|
goto end;
|
|
}
|
|
grp = dev->vlan_group;
|
|
}
|
|
|
|
/* if hw doesn't support rx vlan filter, bail out here */
|
|
if (!(dev->features & NETIF_F_HW_VLAN_FILTER)) {
|
|
status = VMK_OK;
|
|
goto end;
|
|
}
|
|
|
|
/* now compare bitmap with vlan_group and make up the difference */
|
|
if (bitmap) {
|
|
vmk_VlanID vid;
|
|
VMK_ASSERT(dev->vlan_rx_add_vid);
|
|
if (!dev->vlan_rx_add_vid) {
|
|
VMKLNX_DEBUG(0, "%s: driver has no vlan_rx_add_vid handler",
|
|
dev->name);
|
|
status = VMK_FAILURE;
|
|
goto end;
|
|
}
|
|
|
|
for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
|
|
if (test_bit(vid, bitmap) && grp->vlan_devices[vid] == NULL) {
|
|
grp->vlan_devices[vid] = dev;
|
|
VMKLNX_DEBUG(1, "%s: adding vlan id %d", dev->name, (int)vid);
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->vlan_rx_add_vid, dev,
|
|
vid);
|
|
}
|
|
}
|
|
}
|
|
status = VMK_OK;
|
|
end:
|
|
rtnl_unlock();
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* LinNet_RemoveVlanGroupDevice --
|
|
*
|
|
* Delete vlan id's based on bitmap and disable hw vlan.
|
|
* Either bitmap or disable should be set, but not both.
|
|
* If neither is set, there is no work to do (illegal?).
|
|
*
|
|
* Results:
|
|
* VMK_OK if successfully added/deleted.
|
|
* VMK_FAILURE otherwise.
|
|
*
|
|
* Side effects:
|
|
* HW vlan table is updated. HW will may stop passing vlan.
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
VMK_ReturnStatus
|
|
LinNet_RemoveVlanGroupDevice(void *clientData, vmk_Bool disable, void *bitmap)
|
|
{
|
|
struct net_device *dev = (struct net_device *) clientData;
|
|
struct vlan_group *grp = dev->vlan_group;
|
|
VMK_ReturnStatus status;
|
|
|
|
VMK_ASSERT(dev->features & NETIF_F_HW_VLAN_RX);
|
|
|
|
rtnl_lock();
|
|
/* Unregister vid's if hardware supports vlan filter */
|
|
if (dev->features & NETIF_F_HW_VLAN_FILTER) {
|
|
vmk_VlanID vid;
|
|
VMK_ASSERT(dev->vlan_rx_kill_vid);
|
|
if (!dev->vlan_rx_kill_vid) {
|
|
VMKLNX_DEBUG(0, "%s: no vlan_rx_kill_vid handler", dev->name);
|
|
status = VMK_FAILURE;
|
|
goto end;
|
|
}
|
|
|
|
if (grp == NULL) {
|
|
VMKLNX_DEBUG(0, "%s: the vlan_group of this device is NULL",
|
|
dev->name);
|
|
status = VMK_FAILURE;
|
|
goto end;
|
|
}
|
|
|
|
for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
|
|
if (grp->vlan_devices[vid] == NULL) {
|
|
continue;
|
|
}
|
|
/* delete all if disable is true, else consult bitmap */
|
|
if (disable || (bitmap && !test_bit(vid, bitmap))) {
|
|
grp->vlan_devices[vid] = NULL;
|
|
VMKLNX_DEBUG(1, "%s: deleting vlan id %d", dev->name, (int)vid);
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->vlan_rx_kill_vid, dev,
|
|
vid);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (disable) {
|
|
VMK_ASSERT(dev->vlan_rx_register);
|
|
if (!dev->vlan_rx_register) {
|
|
VMKLNX_DEBUG(0, "%s: no vlan_rx_register handler", dev->name);
|
|
status = VMK_VLAN_NO_HW_ACCEL;
|
|
goto end;
|
|
}
|
|
|
|
VMKLNX_DEBUG(0, "%s: disabling vlan", dev->name);
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->vlan_rx_register, dev, NULL);
|
|
|
|
VMK_ASSERT(grp);
|
|
if (grp) {
|
|
dev->vlan_group = NULL;
|
|
vmk_HeapFree(VMK_MODULE_HEAP_ID, grp);
|
|
}
|
|
}
|
|
status = VMK_OK;
|
|
end:
|
|
rtnl_unlock();
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* NICGetMTU --
|
|
*
|
|
* Returns the MTU value for the given NIC
|
|
*
|
|
* Results:
|
|
* MTU for the given device.
|
|
*
|
|
* Side effects:
|
|
* none.
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
NICGetMTU(void *device, vmk_uint32 *mtu)
|
|
{
|
|
struct net_device *dev = (struct net_device *) device;
|
|
|
|
*mtu = dev->mtu;
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* NICSetMTU --
|
|
*
|
|
* Set new MTU for the given NIC
|
|
*
|
|
* Results:
|
|
* VMK_OK if the new_mtu is accepted by the device.
|
|
* VMK_FAILURE or VMK_NOT_SUPPORTED otherwise.
|
|
*
|
|
* Side effects:
|
|
* The device queue is stopped. For most devices the entire ring is
|
|
* reallocated, and the device is reset.
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
NICSetMTU(void *device, vmk_uint32 new_mtu)
|
|
{
|
|
int ret = 0;
|
|
struct net_device *dev = (struct net_device *) device;
|
|
|
|
if (!dev->change_mtu) { // 3Com doesn't even register change_mtu!
|
|
VMKLNX_DEBUG(0, "Changing MTU not supported by device.");
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
/* PRs 478842, 478939
|
|
* Update trans_start here so that netdev_watchdog will not mistake
|
|
* a stopped tx_queue as a sign of pNIc hang when change MTU is undergoing.
|
|
*/
|
|
rtnl_lock();
|
|
dev->trans_start = jiffies;
|
|
VMKAPI_MODULE_CALL(dev->module_id, ret, dev->change_mtu, dev, new_mtu);
|
|
|
|
/* Some drivers call dev_close() when change_mtu failed. The following check
|
|
* will update dev->gflags accordingly to avoid a second dev_close()
|
|
* when CloseNetDev() is called.
|
|
*/
|
|
if (ret && !(dev->flags & IFF_UP))
|
|
dev->gflags &= ~IFF_DEV_IS_OPEN;
|
|
rtnl_unlock();
|
|
|
|
if (ret == 0) {
|
|
VMKLNX_DEBUG(0, "%s: MTU changed to %d", dev->name, new_mtu);
|
|
} else {
|
|
VMKLNX_DEBUG(0, "%s: Failed to change MTU to %d", dev->name, new_mtu);
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* NICSetLinkStateDown --
|
|
* Set NIC hardware to link down state to inform link peer.
|
|
*
|
|
* Results:
|
|
* VMK_OK or failure code.
|
|
*
|
|
* Side effects:
|
|
* Device is closed and settings may be lost.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
NICSetLinkStateDown(struct net_device *dev)
|
|
{
|
|
struct ethtool_ops *ops;
|
|
|
|
if ((dev->gflags & IFF_DEV_IS_OPEN) == 0) {
|
|
return VMK_OK;
|
|
}
|
|
|
|
/* disable wol so link is down */
|
|
ops = dev->ethtool_ops;
|
|
if (ops && ops->set_wol) {
|
|
int error;
|
|
struct ethtool_wolinfo wolInfo[1];
|
|
vmk_LogMessage("Disable WOL on device %s", dev->name);
|
|
memset(wolInfo, 0, sizeof (wolInfo));
|
|
rtnl_lock();
|
|
VMKAPI_MODULE_CALL(dev->module_id, error, ops->set_wol, dev, wolInfo);
|
|
rtnl_unlock();
|
|
if (error != 0) {
|
|
vmk_LogMessage("Failed to disable wol on device %s", dev->name);
|
|
}
|
|
}
|
|
|
|
/* now close the device to take the link down */
|
|
return CloseNetDev((void *)dev);
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* NICSetLinkStateUp --
|
|
* Set NIC hardware to link up state to inform link peer.
|
|
*
|
|
* Results:
|
|
* VMK_OK or failure code.
|
|
*
|
|
* Side effects:
|
|
* Device is opened.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
NICSetLinkStateUp(struct net_device *dev)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
|
|
if (dev->gflags & IFF_DEV_IS_OPEN) {
|
|
return VMK_OK; /* nothing to do */
|
|
}
|
|
|
|
status = OpenNetDev((void *)dev);
|
|
if (status != VMK_OK) {
|
|
return status;
|
|
}
|
|
|
|
/* Now the link is up, unblock device and restore wol state */
|
|
if (UnblockNetDev((void *)dev) != VMK_OK) {
|
|
vmk_LogMessage("Failed to unblock device %s", dev->name);
|
|
}
|
|
|
|
/* hostd will reenable wol when it processes link up */
|
|
return VMK_OK;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* NICSetLinkStatus --
|
|
* Set NIC hardware speed and duplex.
|
|
*
|
|
* Results:
|
|
* VMK_OK or failure code.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
NICSetLinkStatus(void *clientData, vmk_UplinkLinkInfo *linkInfo)
|
|
{
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
struct ethtool_cmd cmd;
|
|
uint32_t result;
|
|
VMK_ReturnStatus status;
|
|
|
|
if (linkInfo->linkState == VMK_LINK_STATE_DOWN) {
|
|
vmk_LogMessage("Taking down link on device %s", dev->name);
|
|
return NICSetLinkStateDown(dev);
|
|
}
|
|
|
|
status = NICSetLinkStateUp(dev);
|
|
if (status != VMK_OK) {
|
|
vmk_LogMessage("Failed to bring link up on device %s", dev->name);
|
|
return status;
|
|
}
|
|
|
|
/* get meaningful ethtool_cmd value first */
|
|
if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) {
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
memset(&cmd, 0, sizeof(struct ethtool_cmd));
|
|
cmd.cmd = ETHTOOL_GSET;
|
|
rtnl_lock();
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->ethtool_ops->get_settings,
|
|
dev, &cmd);
|
|
rtnl_unlock();
|
|
|
|
if (result)
|
|
return vmklnx_errno_to_vmk_return_status(result);
|
|
|
|
/* set link speed and duplexity according to linkInfo */
|
|
cmd.cmd = ETHTOOL_SSET;
|
|
if (linkInfo->linkState == VMK_LINK_STATE_DOWN) {
|
|
cmd.autoneg = 1;
|
|
cmd.speed = ~0;
|
|
cmd.duplex = ~0;
|
|
} else {
|
|
cmd.speed = linkInfo->linkSpeed;
|
|
cmd.duplex = linkInfo->fullDuplex;
|
|
if (cmd.speed != 0) {
|
|
cmd.autoneg = 0;
|
|
} else {
|
|
cmd.autoneg = 1;
|
|
cmd.advertising = cmd.supported &
|
|
(ADVERTISED_100baseT_Full |
|
|
ADVERTISED_100baseT_Half |
|
|
ADVERTISED_10baseT_Full |
|
|
ADVERTISED_10baseT_Half |
|
|
ADVERTISED_1000baseT_Full |
|
|
ADVERTISED_1000baseT_Half |
|
|
ADVERTISED_Autoneg |
|
|
ADVERTISED_2500baseX_Full |
|
|
ADVERTISED_10000baseT_Full);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We call ethtool_ops directly to bypass copy_from_user(),
|
|
* which doesn't handle in-kernel buffers (except for BH callers).
|
|
*
|
|
* See ethtool_set_settings()
|
|
*/
|
|
if (!dev->ethtool_ops || !dev->ethtool_ops->set_settings) {
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
rtnl_lock();
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->ethtool_ops->set_settings,
|
|
dev, &cmd);
|
|
rtnl_unlock();
|
|
return vmklnx_errno_to_vmk_return_status(result);
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------> *
|
|
* NICResetDev --
|
|
*
|
|
* Handler for resetting the device. If successful, the device state is
|
|
* reset and the link state should go down and then up.
|
|
*
|
|
* Results:
|
|
* VMK_OK always.
|
|
*
|
|
* Side effects:
|
|
* Link state should bounce as seen from physical switch.
|
|
*
|
|
*----------------------------------------------------------------------------> */
|
|
|
|
static VMK_ReturnStatus
|
|
NICResetDev(void *clientData)
|
|
{
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
|
|
netif_tx_lock(dev);
|
|
VMK_ASSERT(dev->tx_timeout != NULL);
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->tx_timeout, dev);
|
|
netif_tx_unlock(dev);
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static inline VMK_ReturnStatus
|
|
marshall_to_vmknetq_features(vmknetddi_queueops_features_t features,
|
|
vmk_NetqueueFeatures *vmkfeatures)
|
|
{
|
|
if (features & VMKNETDDI_QUEUEOPS_FEATURE_RXQUEUES) {
|
|
*vmkfeatures |= VMK_NETQUEUE_FEATURE_RXQUEUES;
|
|
}
|
|
if (features & VMKNETDDI_QUEUEOPS_FEATURE_TXQUEUES) {
|
|
*vmkfeatures |= VMK_NETQUEUE_FEATURE_TXQUEUES;
|
|
}
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static inline VMK_ReturnStatus
|
|
marshall_from_vmknetq_type(vmk_NetqueueQueueType vmkqtype,
|
|
vmknetddi_queueops_queue_t *qtype)
|
|
{
|
|
if (vmkqtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
|
|
*qtype = VMKNETDDI_QUEUEOPS_QUEUE_TYPE_TX;
|
|
} else if (vmkqtype == VMK_NETQUEUE_QUEUE_TYPE_RX) {
|
|
*qtype = VMKNETDDI_QUEUEOPS_QUEUE_TYPE_RX;
|
|
} else {
|
|
VMKLNX_DEBUG(0, "invalid vmkqueue type 0x%x", (uint32_t)vmkqtype);
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static inline VMK_ReturnStatus
|
|
marshall_to_vmknetq_id(vmknetddi_queueops_queueid_t qid,
|
|
vmk_NetqueueQueueID *vmkqid)
|
|
{
|
|
if ( !VMKNETDDI_QUEUEOPS_IS_TX_QUEUEID(qid) &&
|
|
!VMKNETDDI_QUEUEOPS_IS_RX_QUEUEID(qid) ) {
|
|
VMKLNX_WARN("invalid queue id 0x%x", qid);
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
vmk_NetqueueSetQueueIDUserVal(vmkqid, qid);
|
|
return VMK_OK;
|
|
}
|
|
|
|
static inline VMK_ReturnStatus
|
|
marshall_from_vmknetq_id(vmk_NetqueueQueueID vmkqid,
|
|
vmknetddi_queueops_queueid_t *qid)
|
|
{
|
|
VMK_DEBUG_ONLY(
|
|
vmk_NetqueueQueueType qtype = vmk_NetqueueQueueIDType(vmkqid);
|
|
|
|
if (unlikely((qtype != VMK_NETQUEUE_QUEUE_TYPE_TX) &&
|
|
(qtype != VMK_NETQUEUE_QUEUE_TYPE_RX))) {
|
|
VMKLNX_WARN("invalid vmk queue type 0x%"VMK_FMT64"x", vmkqid);
|
|
return VMK_FAILURE;
|
|
});
|
|
|
|
*qid = vmk_NetqueueQueueIDUserVal(vmkqid);
|
|
return VMK_OK;
|
|
}
|
|
|
|
static inline VMK_ReturnStatus
|
|
marshall_from_vmknetq_filter_type(vmk_NetqueueFilter *vmkfilter,
|
|
vmknetddi_queueops_filter_t *filter)
|
|
{
|
|
if (vmkfilter->class != VMK_NETQUEUE_FILTER_MACADDR &&
|
|
vmkfilter->class != VMK_NETQUEUE_FILTER_VLAN &&
|
|
vmkfilter->class != VMK_NETQUEUE_FILTER_VLANMACADDR) {
|
|
VMKLNX_DEBUG(0, "unsupported vmk filter class");
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
if (vmkfilter->class == VMK_NETQUEUE_FILTER_MACADDR) {
|
|
filter->class = VMKNETDDI_QUEUEOPS_FILTER_MACADDR;
|
|
memcpy(filter->u.macaddr, vmkfilter->u.macaddr, 6);
|
|
}
|
|
|
|
if (vmkfilter->class == VMK_NETQUEUE_FILTER_VLAN) {
|
|
filter->class = VMKNETDDI_QUEUEOPS_FILTER_VLAN;
|
|
filter->u.vlan_id = vmkfilter->u.vlan_id;
|
|
}
|
|
|
|
if (vmkfilter->class == VMK_NETQUEUE_FILTER_VLANMACADDR) {
|
|
filter->class = VMKNETDDI_QUEUEOPS_FILTER_VLANMACADDR;
|
|
memcpy(filter->u.vlanmac.macaddr, vmkfilter->u.vlanmac.macaddr, 6);
|
|
filter->u.vlanmac.vlan_id = vmkfilter->u.vlanmac.vlan_id;
|
|
}
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static inline VMK_ReturnStatus
|
|
marshall_to_vmknetq_supported_filter_class(vmknetddi_queueops_filter_class_t class,
|
|
vmk_NetqueueFilterClass *vmkclass)
|
|
{
|
|
*vmkclass = VMK_NETQUEUE_FILTER_NONE;
|
|
|
|
if (class & VMKNETDDI_QUEUEOPS_FILTER_MACADDR) {
|
|
*vmkclass |= VMK_NETQUEUE_FILTER_MACADDR;
|
|
}
|
|
|
|
if (class & VMKNETDDI_QUEUEOPS_FILTER_VLAN) {
|
|
*vmkclass |= VMK_NETQUEUE_FILTER_VLAN;
|
|
}
|
|
|
|
if (class & VMKNETDDI_QUEUEOPS_FILTER_VLANMACADDR) {
|
|
*vmkclass |= VMK_NETQUEUE_FILTER_VLANMACADDR;
|
|
}
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static inline VMK_ReturnStatus
|
|
marshall_to_vmknetq_filter_id(vmknetddi_queueops_filterid_t fid,
|
|
vmk_NetqueueFilterID *vmkfid)
|
|
{
|
|
return vmk_NetqueueMkFilterID(vmkfid, VMKNETDDI_QUEUEOPS_FILTERID_VAL(fid));
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
marshall_from_vmknetq_filter_id(vmk_NetqueueFilterID vmkfid,
|
|
vmknetddi_queueops_filterid_t *fid)
|
|
{
|
|
*fid = VMKNETDDI_QUEUEOPS_MK_FILTERID(vmk_NetqueueFilterIDVal(vmkfid));
|
|
return VMK_OK;
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
marshall_from_vmknetq_pri(vmk_NetqueuePriority vmkpri,
|
|
vmknetddi_queueops_tx_priority_t *pri)
|
|
{
|
|
*pri = (vmknetddi_queueops_tx_priority_t)vmkpri;
|
|
return VMK_OK;
|
|
}
|
|
|
|
static inline VMK_ReturnStatus
|
|
marshall_to_vmknetq_queue_features(vmknetddi_queueops_queue_features_t features,
|
|
vmk_NetqueueQueueFeatures *vmkfeatures)
|
|
{
|
|
if (features & VMKNETDDI_QUEUEOPS_QUEUE_FEAT_LRO) {
|
|
*vmkfeatures |= VMK_NETQUEUE_QUEUE_FEAT_LRO;
|
|
}
|
|
if (features & VMKNETDDI_QUEUEOPS_QUEUE_FEAT_PAIR) {
|
|
*vmkfeatures |= VMK_NETQUEUE_QUEUE_FEAT_PAIR;
|
|
}
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static inline VMK_ReturnStatus
|
|
marshall_from_vmknetq_queue_features(vmk_NetqueueQueueFeatures vmkfeatures,
|
|
vmknetddi_queueops_queue_features_t *features)
|
|
{
|
|
if (vmkfeatures & VMK_NETQUEUE_QUEUE_FEAT_LRO) {
|
|
*features |= VMKNETDDI_QUEUEOPS_QUEUE_FEAT_LRO;
|
|
}
|
|
if (vmkfeatures & VMK_NETQUEUE_QUEUE_FEAT_PAIR) {
|
|
*features |= VMKNETDDI_QUEUEOPS_QUEUE_FEAT_PAIR;
|
|
}
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
marshall_from_vmknetq_attr(vmk_NetqueueQueueAttr *vmkattr,
|
|
u16 nattr,
|
|
vmknetddi_queueops_queueattr_t *attr)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < nattr; i++) {
|
|
switch (vmkattr[i].type) {
|
|
case VMK_NETQUEUE_QUEUE_ATTR_PRIOR:
|
|
attr[i].type = VMKNETDDI_QUEUEOPS_QUEUE_ATTR_PRIOR;
|
|
marshall_from_vmknetq_pri(vmkattr[i].args.priority,
|
|
&attr[i].args.priority);
|
|
break;
|
|
|
|
case VMK_NETQUEUE_QUEUE_ATTR_FEAT:
|
|
attr[i].type = VMKNETDDI_QUEUEOPS_QUEUE_ATTR_FEAT;
|
|
marshall_from_vmknetq_queue_features(vmkattr[i].args.features,
|
|
&attr[i].args.features);
|
|
break;
|
|
|
|
default:
|
|
return VMK_FAILURE;
|
|
}
|
|
}
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_get_version --
|
|
*
|
|
* Get driver Netqueue version
|
|
*
|
|
* Results:
|
|
* VMK_OK on success. VMK_NOT_SUPPORTED, if operation is not supported by
|
|
* device. VMK_FAILURE, if operation fails.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_get_version(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_get_version_args_t args;
|
|
|
|
vmk_NetqueueOpGetVersionArgs *vmkargs = (vmk_NetqueueOpGetVersionArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
VMK_ASSERT(dev);
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_GET_VERSION, &args);
|
|
if (result != 0) {
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
vmkargs->major = args.major;
|
|
vmkargs->minor = args.minor;
|
|
ret = VMK_OK;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_get_features --
|
|
*
|
|
* Get driver Netqueue features
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* Netqueue ops are not supprted by the driver
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_get_features(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_get_features_args_t args;
|
|
|
|
vmk_NetqueueOpGetFeaturesArgs *vmkargs = (vmk_NetqueueOpGetFeaturesArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
VMK_ASSERT(dev);
|
|
args.netdev = dev;
|
|
args.features = VMKNETDDI_QUEUEOPS_FEATURE_NONE;
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_GET_FEATURES, &args);
|
|
if (result != 0) {
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
ret = marshall_to_vmknetq_features(args.features, &vmkargs->features);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_get_queue_count --
|
|
*
|
|
* Get count of tx or rx queues supprted by the driver
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_get_queue_count(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_get_queue_count_args_t args;
|
|
|
|
vmk_NetqueueOpGetQueueCountArgs *vmkargs =
|
|
(vmk_NetqueueOpGetQueueCountArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
VMK_ASSERT(dev);
|
|
args.netdev = dev;
|
|
|
|
if (marshall_from_vmknetq_type(vmkargs->qtype, &args.type) != VMK_OK) {
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_GET_QUEUE_COUNT, &args);
|
|
if (result != 0) {
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
vmkargs->count = args.count;
|
|
ret = VMK_OK;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_get_filter_count --
|
|
*
|
|
* Get number of rx filters supprted by the driver
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_get_filter_count(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_get_filter_count_args_t args;
|
|
|
|
vmk_NetqueueOpGetFilterCountArgs *vmkargs = (vmk_NetqueueOpGetFilterCountArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
VMK_ASSERT(dev);
|
|
args.netdev = dev;
|
|
|
|
if (marshall_from_vmknetq_type(vmkargs->qtype, &args.type) != VMK_OK) {
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_GET_FILTER_COUNT, &args);
|
|
if (result != 0) {
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
vmkargs->count = args.count;
|
|
ret = VMK_OK;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* dev_add_netqueue_qid --
|
|
*
|
|
* Record new netqueue qid
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
dev_add_netqueue_qid(struct net_device *dev,
|
|
u16 qidx,
|
|
vmk_NetqueueQueueID vmkqid)
|
|
{
|
|
VMK_ReturnStatus ret = VMK_OK;
|
|
struct tx_netqueue_info *txinfo = dev->tx_netqueue_info;
|
|
VMK_ASSERT(txinfo);
|
|
|
|
if (qidx < dev->num_tx_queues) {
|
|
VMK_ASSERT(txinfo[qidx].valid == VMK_FALSE);
|
|
txinfo[qidx].valid = VMK_TRUE;
|
|
txinfo[qidx].vmkqid = vmkqid;
|
|
} else {
|
|
ret = VMK_FAILURE;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* dev_remove_netqueue_qid --
|
|
*
|
|
* Removed recorded netqueue qid
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static void
|
|
dev_remove_netqueue_qid(struct net_device *dev,
|
|
u32 qidx)
|
|
{
|
|
struct tx_netqueue_info *txinfo = dev->tx_netqueue_info;
|
|
VMK_ASSERT(txinfo);
|
|
|
|
VMK_ASSERT(txinfo[qidx].valid == VMK_TRUE);
|
|
txinfo[qidx].valid = VMK_FALSE;
|
|
txinfo[qidx].vmkqid = VMK_NETQUEUE_INVALID_QUEUEID;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_alloc_queue --
|
|
*
|
|
* Call driver netqueue_op for allocating queue
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_alloc_queue(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_alloc_queue_args_t args;
|
|
vmknetddi_queueop_free_queue_args_t freeargs;
|
|
vmk_NetqueueOpAllocQueueArgs *vmkargs = (vmk_NetqueueOpAllocQueueArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
vmk_NetqueueQueueType qtype = vmkargs->qtype;
|
|
|
|
VMK_ASSERT(dev);
|
|
|
|
args.netdev = dev;
|
|
args.napi = NULL;
|
|
args.queue_mapping = 0;
|
|
|
|
if (!(qtype == VMK_NETQUEUE_QUEUE_TYPE_RX) &&
|
|
!(qtype == VMK_NETQUEUE_QUEUE_TYPE_TX)) {
|
|
VMKLNX_DEBUG(0, "invalid vmkqueue type 0x%x", qtype);
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
ret = marshall_from_vmknetq_type(qtype, &args.type);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_ALLOC_QUEUE, &args);
|
|
if (result != 0) {
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
|
|
VMK_ASSERT(VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid) < dev->num_tx_queues);
|
|
if (args.queue_mapping) {
|
|
VMK_ASSERT(args.queue_mapping ==
|
|
VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid));
|
|
}
|
|
} else {
|
|
VMK_ASSERT(qtype == VMK_NETQUEUE_QUEUE_TYPE_RX);
|
|
if (args.napi != NULL) {
|
|
vmkargs->net_poll = args.napi->net_poll;
|
|
}
|
|
}
|
|
|
|
ret = marshall_to_vmknetq_id(args.queueid, &vmkargs->qid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (unlikely(ret != VMK_OK)) {
|
|
goto error_free;
|
|
}
|
|
|
|
if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
|
|
u16 qidx = VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid);
|
|
ret = dev_add_netqueue_qid(dev, qidx, vmkargs->qid);
|
|
if (ret != VMK_OK) {
|
|
VMKLNX_DEBUG(0, "%s: failed to add netqueue qidx=%d", dev->name, qidx);
|
|
goto error_free;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
out:
|
|
return ret;
|
|
|
|
error_free:
|
|
VMK_ASSERT(ret != VMK_OK);
|
|
|
|
freeargs.netdev = dev;
|
|
freeargs.queueid = args.queueid;
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_FREE_QUEUE, &freeargs);
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_alloc_queue_with_attr --
|
|
*
|
|
* Call driver netqueue_op for allocating queue with attributes
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_alloc_queue_with_attr(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_alloc_queue_with_attr_args_t args;
|
|
vmknetddi_queueop_free_queue_args_t freeargs;
|
|
vmk_NetqueueOpAllocQueueArgs vmkallocargs;
|
|
vmk_NetqueueOpAllocQueueWithAttrArgs *vmkargs =
|
|
(vmk_NetqueueOpAllocQueueWithAttrArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
vmk_NetqueueQueueType qtype = vmkargs->qtype;
|
|
vmknetddi_queueops_queueattr_t attr[VMKNETDDI_QUEUEOPS_QUEUE_ATTR_NUM];
|
|
|
|
/* If alloc without attributes, just call normal alloc queue */
|
|
if (vmkargs->nattr == 0) {
|
|
memset(&vmkallocargs, 0, sizeof(vmkallocargs));
|
|
vmkallocargs.net_poll = NULL;
|
|
vmkallocargs.qtype = qtype;
|
|
vmkallocargs.qid = vmkargs->qid;
|
|
ret = netqueue_op_alloc_queue(clientData, &vmkallocargs);
|
|
if (ret == VMK_OK) {
|
|
vmkargs->net_poll = vmkallocargs.net_poll;
|
|
vmkargs->qid = vmkallocargs.qid;
|
|
return VMK_OK;
|
|
} else {
|
|
return VMK_FAILURE;
|
|
}
|
|
}
|
|
|
|
VMK_ASSERT(dev);
|
|
args.netdev = dev;
|
|
args.napi = NULL;
|
|
args.queue_mapping = 0;
|
|
|
|
if (vmkargs->nattr > VMKNETDDI_QUEUEOPS_QUEUE_ATTR_NUM) {
|
|
VMK_ASSERT(VMK_FALSE);
|
|
return VMK_LIMIT_EXCEEDED;
|
|
}
|
|
|
|
args.nattr = vmkargs->nattr;
|
|
|
|
if (!(qtype == VMK_NETQUEUE_QUEUE_TYPE_RX) &&
|
|
!(qtype == VMK_NETQUEUE_QUEUE_TYPE_TX)) {
|
|
VMKLNX_DEBUG(0, "invalid vmkqueue type 0x%x", qtype);
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
ret = marshall_from_vmknetq_type(qtype, &args.type);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
memset(attr, 0, sizeof(attr));
|
|
ret = marshall_from_vmknetq_attr(vmkargs->attr, vmkargs->nattr, attr);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return VMK_FAILURE;
|
|
}
|
|
args.attr = attr;
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_ALLOC_QUEUE_WITH_ATTR, &args);
|
|
if (result != 0) {
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
|
|
VMK_ASSERT(VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid) < dev->num_tx_queues);
|
|
if (args.queue_mapping) {
|
|
VMK_ASSERT(args.queue_mapping ==
|
|
VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid));
|
|
}
|
|
} else {
|
|
VMK_ASSERT(qtype == VMK_NETQUEUE_QUEUE_TYPE_RX);
|
|
if (args.napi != NULL) {
|
|
vmkargs->net_poll = args.napi->net_poll;
|
|
}
|
|
}
|
|
|
|
ret = marshall_to_vmknetq_id(args.queueid, &vmkargs->qid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (unlikely(ret != VMK_OK)) {
|
|
VMKLNX_DEBUG(0, "invalid qid. freeing allocated queue");
|
|
goto error_free;
|
|
}
|
|
|
|
if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
|
|
u16 qidx = VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid);
|
|
ret = dev_add_netqueue_qid(dev, qidx, vmkargs->qid);
|
|
if (ret != VMK_OK) {
|
|
VMKLNX_DEBUG(0, "%s: failed to add netqueue qidx=%d", dev->name, qidx);
|
|
goto error_free;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
out:
|
|
return ret;
|
|
|
|
error_free:
|
|
VMK_ASSERT(ret != VMK_OK);
|
|
|
|
freeargs.netdev = dev;
|
|
freeargs.queueid = args.queueid;
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_FREE_QUEUE, &freeargs);
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_free_queue --
|
|
*
|
|
* Free queue
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_free_queue(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_free_queue_args_t args;
|
|
vmk_NetqueueOpFreeQueueArgs *vmkargs = (vmk_NetqueueOpFreeQueueArgs *)opArgs;
|
|
vmk_NetqueueQueueID vmkqid = vmkargs->qid;
|
|
vmk_NetqueueQueueType qtype = vmk_NetqueueQueueIDType(vmkqid);
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
|
|
VMK_ASSERT(dev);
|
|
args.netdev = dev;
|
|
|
|
ret = marshall_from_vmknetq_id(vmkqid, &args.queueid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
|
|
dev_remove_netqueue_qid(dev,
|
|
VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid));
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_FREE_QUEUE, &args);
|
|
if (result != 0) {
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
ret = VMK_OK;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_get_queue_vector --
|
|
*
|
|
* Get interrupt vector for the queue
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_get_queue_vector(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_get_queue_vector_args_t args;
|
|
|
|
vmk_NetqueueOpGetQueueVectorArgs *vmkargs = (vmk_NetqueueOpGetQueueVectorArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
VMK_ASSERT(dev);
|
|
args.netdev = dev;
|
|
|
|
ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_GET_QUEUE_VECTOR, &args);
|
|
if (result != 0) {
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
vmkargs->vector = args.vector;
|
|
ret = VMK_OK;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_get_default_queue --
|
|
*
|
|
* Get default queue for tx/rx operations
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_get_default_queue(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_get_default_queue_args_t args;
|
|
vmk_NetqueueOpGetDefaultQueueArgs *vmkargs =
|
|
(vmk_NetqueueOpGetDefaultQueueArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
vmk_NetqueueQueueType qtype = vmkargs->qtype;
|
|
|
|
VMK_ASSERT(dev);
|
|
args.netdev = dev;
|
|
args.napi = NULL;
|
|
args.queue_mapping = 0;
|
|
|
|
ret = marshall_from_vmknetq_type(qtype, &args.type);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_GET_DEFAULT_QUEUE, &args);
|
|
if (result != 0) {
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
|
|
VMK_ASSERT(VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid) < dev->num_tx_queues);
|
|
} else {
|
|
VMK_ASSERT(qtype == VMK_NETQUEUE_QUEUE_TYPE_RX);
|
|
if (args.napi != NULL) {
|
|
vmkargs->net_poll = args.napi->net_poll;
|
|
}
|
|
}
|
|
|
|
ret = marshall_to_vmknetq_id(args.queueid, &vmkargs->qid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_apply_rx_filter --
|
|
*
|
|
* Apply rx filter on queue
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_apply_rx_filter(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_apply_rx_filter_args_t args;
|
|
|
|
vmk_NetqueueOpApplyRxFilterArgs *vmkargs =
|
|
(vmk_NetqueueOpApplyRxFilterArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
VMK_ASSERT(dev);
|
|
args.netdev = dev;
|
|
|
|
ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
ret = marshall_from_vmknetq_filter_type(&vmkargs->filter, &args.filter);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_APPLY_RX_FILTER, &args);
|
|
if (result != 0) {
|
|
VMKLNX_DEBUG(0, "vmknetddi_queueops_apply_rx_filter returned %d", result);
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
ret = marshall_to_vmknetq_filter_id(args.filterid, &vmkargs->fid);
|
|
vmkargs->pairhwqid = args.pairtxqid;
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_remove_rx_filter --
|
|
*
|
|
* Remove rx filter from queue
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_remove_rx_filter(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
int result;
|
|
VMK_ReturnStatus ret;
|
|
vmknetddi_queueop_remove_rx_filter_args_t args;
|
|
|
|
vmk_NetqueueOpRemoveRxFilterArgs *vmkargs =
|
|
(vmk_NetqueueOpRemoveRxFilterArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
VMK_ASSERT(dev);
|
|
args.netdev = dev;
|
|
|
|
ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
ret = marshall_from_vmknetq_filter_id(vmkargs->fid, &args.filterid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_REMOVE_RX_FILTER, &args);
|
|
if (result != 0) {
|
|
VMKLNX_DEBUG(0, "vmknetddi_queueops_remove_rx_filter returned %d",
|
|
result);
|
|
ret = VMK_FAILURE;
|
|
} else {
|
|
ret = VMK_OK;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
ret = VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_get_queue_stats --
|
|
*
|
|
* Get queue statistics
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_get_queue_stats(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_set_tx_priority --
|
|
*
|
|
* Set tx queue priority
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
|
|
* not supported
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_set_tx_priority(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
VMK_ReturnStatus ret;
|
|
vmk_NetqueueOpSetTxPriorityArgs *vmkargs = opArgs;
|
|
vmknetddi_queueop_set_tx_priority_args_t args;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
|
|
args.netdev = dev;
|
|
|
|
ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
ret = marshall_from_vmknetq_pri(vmkargs->priority, &args.priority);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
int result;
|
|
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_SET_TX_PRIORITY, &args);
|
|
if (result != 0) {
|
|
return VMK_FAILURE;
|
|
} else {
|
|
return VMK_OK;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_getset_state --
|
|
* Get and Set Netqueue Valid State
|
|
*
|
|
* Results:
|
|
* The previous netqueue state
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_getset_state(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
vmk_NetqueueOpGetSetQueueStateArgs *vmkargs =
|
|
(vmk_NetqueueOpGetSetQueueStateArgs *)opArgs;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
VMK_ASSERT(dev);
|
|
|
|
if (dev->netqueue_ops) {
|
|
vmkargs->oldState = vmknetddi_queueops_getset_state(dev, vmkargs->newState);
|
|
return VMK_OK;
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_enable_queue_feat --
|
|
* Enable queue's features
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_enable_queue_feat(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
VMK_ReturnStatus ret;
|
|
vmk_NetqueueOpEnableQueueFeatArgs *vmkargs = opArgs;
|
|
vmknetddi_queueop_enable_feat_args_t args;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
|
|
args.netdev = dev;
|
|
args.features = 0;
|
|
|
|
ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
marshall_from_vmknetq_queue_features(vmkargs->features,
|
|
&args.features);
|
|
if (dev->netqueue_ops) {
|
|
int result;
|
|
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_ENABLE_FEAT, &args);
|
|
if (result != 0) {
|
|
return VMK_FAILURE;
|
|
} else {
|
|
return VMK_OK;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_disable_queue_feat --
|
|
* Disable queue's features
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_disable_queue_feat(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
VMK_ReturnStatus ret;
|
|
vmk_NetqueueOpDisableQueueFeatArgs *vmkargs = opArgs;
|
|
vmknetddi_queueop_disable_feat_args_t args;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
|
|
args.netdev = dev;
|
|
args.features = 0;
|
|
|
|
ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return ret;
|
|
}
|
|
|
|
marshall_from_vmknetq_queue_features(vmkargs->features,
|
|
&args.features);
|
|
if (dev->netqueue_ops) {
|
|
int result;
|
|
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_DISABLE_FEAT, &args);
|
|
if (result != 0) {
|
|
return VMK_FAILURE;
|
|
} else {
|
|
return VMK_OK;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_get_queue_supported_feat --
|
|
* Get supported queues' features
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_get_queue_supported_feat(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
VMK_ReturnStatus ret;
|
|
vmk_NetqueueOpGetQueueSupFeatArgs *vmkargs = opArgs;
|
|
vmknetddi_queueop_get_sup_feat_args_t args;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
|
|
args.netdev = dev;
|
|
|
|
ret = marshall_from_vmknetq_type(vmkargs->qtype, &args.type);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
int result;
|
|
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_GET_SUPPORTED_FEAT, &args);
|
|
if (result != 0) {
|
|
return VMK_FAILURE;
|
|
} else {
|
|
marshall_to_vmknetq_queue_features(args.features,
|
|
&vmkargs->features);
|
|
return VMK_OK;
|
|
}
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* netqueue_op_get_queue_supported_filter_class --
|
|
* Get supported queues' filter class
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
netqueue_op_get_queue_supported_filter_class(void *clientData,
|
|
void *opArgs)
|
|
{
|
|
VMK_ReturnStatus ret;
|
|
vmk_NetqueueOpGetQueueSupFilterArgs *vmkargs = opArgs;
|
|
vmknetddi_queueop_get_sup_filter_class_args_t args;
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
|
|
args.netdev = dev;
|
|
|
|
ret = marshall_from_vmknetq_type(vmkargs->qtype, &args.type);
|
|
VMK_ASSERT(ret == VMK_OK);
|
|
if (ret != VMK_OK) {
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
if (dev->netqueue_ops) {
|
|
int result;
|
|
|
|
VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
|
|
VMKNETDDI_QUEUEOPS_OP_GET_SUPPORTED_FILTER_CLASS,
|
|
&args);
|
|
if (result != 0) {
|
|
/* Assume by default only supports for mac address filters */
|
|
vmkargs->class = VMK_NETQUEUE_FILTER_MACADDR;
|
|
} else {
|
|
marshall_to_vmknetq_supported_filter_class(args.class,
|
|
&vmkargs->class);
|
|
}
|
|
return VMK_OK;
|
|
} else {
|
|
VMKLNX_DEBUG(0, "!dev->netqueue_ops");
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* LinNet_NetqueueSkbXmit --
|
|
*
|
|
* Transmit a skb on a pre-allocated Tx queue for a specific device
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE|VMK_BUSY on error
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
VMK_ReturnStatus
|
|
LinNet_NetqueueSkbXmit(struct net_device *dev,
|
|
vmk_NetqueueQueueID vmkqid,
|
|
struct sk_buff *skb)
|
|
{
|
|
VMK_ReturnStatus status = VMK_OK;
|
|
struct netdev_queue *queue;
|
|
int xmit_status = -1;
|
|
|
|
queue = netdev_pick_tx_queue(dev, vmkqid);
|
|
VMK_ASSERT(queue != NULL);
|
|
skb->queue_mapping = queue - dev->_tx;
|
|
|
|
spin_lock(&queue->_xmit_lock);
|
|
queue->processing_tx = 1;
|
|
|
|
if (unlikely(netif_tx_queue_stopped(queue))) {
|
|
status = VMK_BUSY;
|
|
goto done;
|
|
}
|
|
|
|
VMKAPI_MODULE_CALL(dev->module_id, xmit_status,
|
|
*dev->hard_start_xmit, skb, dev);
|
|
|
|
/*
|
|
* Map NETDEV_TX_OK and NETDEV_TX_BUSY to VMK_OK and VMK_BUSY. For others
|
|
* that cannot be mapped directly, we should carry through these return
|
|
* status.
|
|
*/
|
|
if (xmit_status == NETDEV_TX_OK) {
|
|
status = VMK_OK;
|
|
} else if (xmit_status == NETDEV_TX_BUSY) {
|
|
status = VMK_BUSY;
|
|
} else if (xmit_status == NETDEV_TX_LOCKED) {
|
|
status = VMK_BUSY;
|
|
} else {
|
|
VMKLNX_WARN("Unknown NETDEV_TX status %d, map to VMK_FAILURE\n",
|
|
xmit_status);
|
|
status = VMK_FAILURE;
|
|
}
|
|
|
|
done:
|
|
queue->processing_tx = 0;
|
|
spin_unlock(&queue->_xmit_lock);
|
|
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* LinNetNetqueueOpFunc --
|
|
* Netqueue ops handler for vmklinux
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
LinNetNetqueueOpFunc(void *clientData,
|
|
vmk_NetqueueOp op,
|
|
void *opArgs)
|
|
{
|
|
|
|
switch (op) {
|
|
case VMK_NETQUEUE_OP_GET_VERSION:
|
|
return netqueue_op_get_version(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_GET_FEATURES:
|
|
return netqueue_op_get_features(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_QUEUE_COUNT:
|
|
return netqueue_op_get_queue_count(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_FILTER_COUNT:
|
|
return netqueue_op_get_filter_count(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_ALLOC_QUEUE:
|
|
return netqueue_op_alloc_queue(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_FREE_QUEUE:
|
|
return netqueue_op_free_queue(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_GET_QUEUE_VECTOR:
|
|
return netqueue_op_get_queue_vector(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_GET_DEFAULT_QUEUE:
|
|
return netqueue_op_get_default_queue(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_APPLY_RX_FILTER:
|
|
return netqueue_op_apply_rx_filter(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_REMOVE_RX_FILTER:
|
|
return netqueue_op_remove_rx_filter(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_GET_QUEUE_STATS:
|
|
return netqueue_op_get_queue_stats(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_SET_TX_PRIORITY:
|
|
return netqueue_op_set_tx_priority(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_GETSET_QUEUE_STATE:
|
|
return netqueue_op_getset_state(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_ALLOC_QUEUE_WITH_ATTR:
|
|
return netqueue_op_alloc_queue_with_attr(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_ENABLE_QUEUE_FEAT:
|
|
return netqueue_op_enable_queue_feat(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_DISABLE_QUEUE_FEAT:
|
|
return netqueue_op_disable_queue_feat(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_GET_QUEUE_SUPPORTED_FEAT:
|
|
return netqueue_op_get_queue_supported_feat(clientData, opArgs);
|
|
|
|
case VMK_NETQUEUE_OP_GET_QUEUE_SUPPORTED_FILTER_CLASS:
|
|
return netqueue_op_get_queue_supported_filter_class(clientData, opArgs);
|
|
|
|
default:
|
|
return VMK_FAILURE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* LinNet_NetqueueOp --
|
|
*
|
|
* Submit a netqueue operation to a specific device
|
|
*
|
|
* Results:
|
|
* VMK_OK on success, VMK_FAILURE on error
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
|
|
VMK_ReturnStatus
|
|
LinNet_NetqueueOp(struct net_device *dev,
|
|
vmk_NetqueueOp op,
|
|
void *opArgs)
|
|
{
|
|
return LinNetNetqueueOpFunc((void *) dev, op, opArgs);
|
|
}
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* LinNetPTOpFunc --
|
|
*
|
|
* This function dispatches the requested passthru control or
|
|
* eSwitch operation to the corresponding driver.
|
|
*
|
|
* Results:
|
|
* VMK_NOT_SUPPORTED if the uplink doesn't support PT/eSwitch or
|
|
* if the desired operation is not implemented. VMK_OK on
|
|
* success. Any other error code from the driver on failure.
|
|
*
|
|
* Side effects:
|
|
* Calls the uplink driver.
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
|
|
static VMK_ReturnStatus
|
|
LinNetPTOpFunc(void *clientData, vmk_NetPTOP op, void *args)
|
|
{
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
VMK_ReturnStatus status;
|
|
|
|
VMK_ASSERT(dev);
|
|
|
|
if (!dev->pt_ops) {
|
|
return VMK_NOT_SUPPORTED;
|
|
}
|
|
|
|
if (op == VMK_NETPTOP_IS_SUPPORTED) {
|
|
return VMK_OK;
|
|
}
|
|
|
|
/*
|
|
* If _attempting_ to get a VF, let's increment the refCount.
|
|
*/
|
|
if (op == VMK_NETPTOP_VF_ACQUIRE) {
|
|
vmk_ModuleIncUseCount(dev->module_id);
|
|
}
|
|
rtnl_lock();
|
|
VMKAPI_MODULE_CALL(dev->module_id,
|
|
status,
|
|
(vmk_UplinkPTOpFunc) dev->pt_ops,
|
|
dev,
|
|
op,
|
|
args);
|
|
rtnl_unlock();
|
|
/*
|
|
* If we succeeded to acquire a VF, then don't do anything. If we
|
|
* failed, let's decrement the refCount. If we successfully
|
|
* released a VF, decrement the refCount.
|
|
*/
|
|
if ((op == VMK_NETPTOP_VF_ACQUIRE && status != VMK_OK) ||
|
|
(op == VMK_NETPTOP_VF_RELEASE && status == VMK_OK)) {
|
|
vmk_ModuleDecUseCount(dev->module_id);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* GetMACAddr --
|
|
*
|
|
* Return the MAC address of the NIC.
|
|
*
|
|
* Results:
|
|
* None
|
|
*
|
|
* Side effects:
|
|
* None
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
GetMACAddr(void *clientData, vmk_uint8 *macAddr)
|
|
{
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
|
|
memcpy(macAddr, dev->dev_addr, 6);
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* GetDeviceName --
|
|
*
|
|
* Return the system name of corresponding device
|
|
*
|
|
* Results:
|
|
* None
|
|
*
|
|
* Side effects:
|
|
* When the dev->pdev is NULL, we return the dev->name (pseudo device name)
|
|
* instead
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
GetDeviceName(void *device,
|
|
char *devName,
|
|
vmk_ByteCount devNameLen)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
struct net_device *dev = device;
|
|
|
|
/* Check if the associated pdev is NULL (a pseudo device) */
|
|
if (dev->pdev) {
|
|
status = vmk_StringCopy(devName, dev->pdev->name, devNameLen);
|
|
} else {
|
|
status = vmk_StringCopy(devName, dev->name, devNameLen);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* GetDeviceStats --
|
|
*
|
|
* Return the stats of corresponding device.
|
|
*
|
|
* There are two kinds of statistics :
|
|
*
|
|
* - General statistics : retrieved by struct net_device_stats enclosed in
|
|
* the struct net_device.
|
|
* These stats are common to all device and stored in
|
|
* stats array.
|
|
*
|
|
* - Specific statistics : retrieved by ethtool functions provided by driver.
|
|
* A global string is created in gtrings containing all
|
|
* formatted statistics.
|
|
*
|
|
* Results:
|
|
* None
|
|
*
|
|
* Side effects:
|
|
* None
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
GetDeviceStats(void *device, vmk_PortClientStats *stats)
|
|
{
|
|
struct net_device *dev = device;
|
|
struct net_device_stats *st = NULL;
|
|
struct ethtool_ops *ops = dev->ethtool_ops;
|
|
struct ethtool_stats stat;
|
|
u64 *data;
|
|
char *buf;
|
|
char *pbuf;
|
|
int idx = 0;
|
|
int pidx = 0;
|
|
|
|
if (dev->get_stats) {
|
|
VMKAPI_MODULE_CALL(dev->module_id, st, dev->get_stats, dev);
|
|
}
|
|
|
|
if (!st) {
|
|
return VMK_FAILURE;
|
|
} else {
|
|
VMK_ASSERT_ON_COMPILE(sizeof stats->rxPkt == sizeof st->rx_packets);
|
|
stats->rxPkt = st->rx_packets;
|
|
stats->txPkt = st->tx_packets;
|
|
stats->rxBytes = st->rx_bytes;
|
|
stats->txBytes = st->tx_bytes;
|
|
stats->rxErr = st->rx_errors;
|
|
stats->txErr = st->tx_errors;
|
|
stats->rxDrp = st->rx_dropped;
|
|
stats->txDrp = st->tx_dropped;
|
|
stats->mltCast = st->multicast;
|
|
stats->col = st->collisions;
|
|
stats->rxLgtErr = st->rx_length_errors;
|
|
stats->rxOvErr = st->rx_over_errors;
|
|
stats->rxCrcErr = st->rx_crc_errors;
|
|
stats->rxFrmErr = st->rx_frame_errors;
|
|
stats->rxFifoErr = st->rx_fifo_errors;
|
|
stats->rxMissErr = st->rx_missed_errors;
|
|
stats->txAbortErr = st->tx_aborted_errors;
|
|
stats->txCarErr = st->tx_carrier_errors;
|
|
stats->txFifoErr = st->tx_fifo_errors;
|
|
stats->txHeartErr = st->tx_heartbeat_errors;
|
|
stats->txWinErr = st->tx_window_errors;
|
|
stats->intRxPkt = dev->linnet_rx_packets;
|
|
stats->intTxPkt = dev->linnet_tx_packets;
|
|
stats->intRxDrp = dev->linnet_rx_dropped;
|
|
stats->intTxDrp = dev->linnet_tx_dropped;
|
|
}
|
|
|
|
if (!ops ||
|
|
!ops->get_ethtool_stats ||
|
|
(!ops->get_stats_count && !ops->get_sset_count) ||
|
|
!ops->get_strings) {
|
|
goto done;
|
|
}
|
|
|
|
rtnl_lock();
|
|
if (ops->get_stats_count) {
|
|
/* 2.6.18 network drivers method to retrieve the number of stats */
|
|
VMKAPI_MODULE_CALL(dev->module_id, stat.n_stats, ops->get_stats_count, dev);
|
|
} else {
|
|
/* 2.6.18+ network drivers method to retrieve the number of stats */
|
|
VMKAPI_MODULE_CALL(dev->module_id, stat.n_stats, ops->get_sset_count, dev, ETH_SS_STATS);
|
|
}
|
|
rtnl_unlock();
|
|
|
|
data = kmalloc(stat.n_stats * sizeof(u64), GFP_ATOMIC);
|
|
pbuf = buf = kmalloc(stat.n_stats * ETH_GSTRING_LEN, GFP_ATOMIC);
|
|
|
|
if (!data) {
|
|
goto done;
|
|
}
|
|
|
|
if (!buf) {
|
|
kfree(data);
|
|
goto done;
|
|
}
|
|
|
|
rtnl_lock();
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, ops->get_ethtool_stats, dev, &stat, data);
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, ops->get_strings, dev, ETH_SS_STATS, (vmk_uint8 *)buf);
|
|
rtnl_unlock();
|
|
|
|
stats->privateStats[pidx++] = '\n';
|
|
for (; (pidx < sizeof stats->privateStats - 1) && (idx < stat.n_stats); idx++) {
|
|
char tmp[128];
|
|
|
|
snprintf(tmp, 128, " %s : %lld\n", pbuf, data[idx]);
|
|
memcpy(stats->privateStats + pidx, tmp,
|
|
min(strlen(tmp), sizeof stats->privateStats - pidx - 1));
|
|
|
|
pidx += min(strlen(tmp), sizeof stats->privateStats - pidx - 1);
|
|
pbuf += ETH_GSTRING_LEN;
|
|
}
|
|
|
|
stats->privateStats[pidx] = '\0';
|
|
|
|
kfree(data);
|
|
kfree(buf);
|
|
|
|
done:
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* GetDriverInfo --
|
|
*
|
|
* Return informations of the corresponding device's driver.
|
|
*
|
|
* Results:
|
|
* None
|
|
*
|
|
* Side effects:
|
|
* None
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
GetDriverInfo(void *device, vmk_UplinkDriverInfo *driverInfo)
|
|
{
|
|
struct net_device *dev = device;
|
|
struct ethtool_ops *ops = dev->ethtool_ops;
|
|
struct ethtool_drvinfo drv;
|
|
VMK_ReturnStatus status;
|
|
|
|
snprintf(driverInfo->moduleInterface,
|
|
sizeof driverInfo->moduleInterface, "vmklinux");
|
|
|
|
if (!ops || !ops->get_drvinfo) {
|
|
snprintf(driverInfo->driver,
|
|
sizeof driverInfo->driver, "(none)");
|
|
snprintf(driverInfo->version,
|
|
sizeof driverInfo->version, "(none)");
|
|
snprintf(driverInfo->firmwareVersion,
|
|
sizeof driverInfo->firmwareVersion, "(none)");
|
|
status = VMK_FAILURE;
|
|
} else {
|
|
memset(&drv, 0, sizeof(struct ethtool_drvinfo));
|
|
|
|
rtnl_lock();
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, ops->get_drvinfo, dev, &drv);
|
|
rtnl_unlock();
|
|
|
|
memset(driverInfo->driver, 0, sizeof driverInfo->driver);
|
|
memset(driverInfo->version, 0, sizeof driverInfo->version);
|
|
memset(driverInfo->firmwareVersion, 0, sizeof driverInfo->firmwareVersion);
|
|
|
|
memcpy(driverInfo->driver, drv.driver,
|
|
min((size_t)(sizeof driverInfo->driver - 1), sizeof drv.driver));
|
|
memcpy(driverInfo->version, drv.version,
|
|
min((size_t)(sizeof driverInfo->version - 1), sizeof drv.version));
|
|
memcpy(driverInfo->firmwareVersion, drv.fw_version,
|
|
min((size_t)(sizeof driverInfo->firmwareVersion - 1), sizeof(drv.fw_version)));
|
|
|
|
status = VMK_OK;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* wolLinuxCapsToVmkCaps --
|
|
*
|
|
* translate from VMK wol caps to linux caps
|
|
*
|
|
* Results:
|
|
* vmk_wolCaps
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static vmk_UplinkWolCaps
|
|
wolLinuxCapsToVmkCaps(vmk_uint32 caps)
|
|
{
|
|
vmk_UplinkWolCaps vmkCaps = 0;
|
|
|
|
if (caps & WAKE_PHY) {
|
|
vmkCaps |= VMK_UPLINK_WAKE_ON_PHY;
|
|
}
|
|
if (caps & WAKE_UCAST) {
|
|
vmkCaps |= VMK_UPLINK_WAKE_ON_UCAST;
|
|
}
|
|
if (caps & WAKE_MCAST) {
|
|
vmkCaps |= VMK_UPLINK_WAKE_ON_MCAST;
|
|
}
|
|
if (caps & WAKE_BCAST) {
|
|
vmkCaps |= VMK_UPLINK_WAKE_ON_BCAST;
|
|
}
|
|
if (caps & WAKE_ARP) {
|
|
vmkCaps |= VMK_UPLINK_WAKE_ON_ARP;
|
|
}
|
|
if (caps & WAKE_MAGIC) {
|
|
vmkCaps |= VMK_UPLINK_WAKE_ON_MAGIC;
|
|
}
|
|
if (caps & WAKE_MAGICSECURE) {
|
|
vmkCaps |= VMK_UPLINK_WAKE_ON_MAGICSECURE;
|
|
}
|
|
|
|
return vmkCaps;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* GetWolState --
|
|
*
|
|
* use the ethtool interface to populate a vmk_UplinkWolState
|
|
*
|
|
* Results:
|
|
* vmk_UplinkWolState
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
GetWolState(void *device, vmk_UplinkWolState *wolState)
|
|
{
|
|
struct net_device *dev = device;
|
|
struct ethtool_ops *ops = dev->ethtool_ops;
|
|
|
|
if (!ops || !ops->get_wol) {
|
|
return VMK_NOT_SUPPORTED;
|
|
} else {
|
|
struct ethtool_wolinfo wolInfo[1];
|
|
VMK_ReturnStatus status = VMK_OK;
|
|
|
|
memset(wolInfo, 0, sizeof(wolInfo));
|
|
rtnl_lock();
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, ops->get_wol, dev, wolInfo);
|
|
rtnl_unlock();
|
|
|
|
wolState->supported = wolLinuxCapsToVmkCaps(wolInfo->supported);
|
|
wolState->enabled = wolLinuxCapsToVmkCaps(wolInfo->wolopts);
|
|
|
|
if (strlen((char *)wolInfo->sopass) > 0) {
|
|
vmk_uint32 length = strlen((char *)wolInfo->sopass);
|
|
|
|
memset(wolState->secureONPassword, 0,
|
|
sizeof wolState->secureONPassword);
|
|
|
|
length++;
|
|
if (length > sizeof wolState->secureONPassword) {
|
|
status = VMK_LIMIT_EXCEEDED; // truncated
|
|
length = sizeof wolState->secureONPassword;
|
|
}
|
|
memcpy(wolState->secureONPassword, wolInfo->sopass, length);
|
|
}
|
|
return status;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* GetCoalesceParams --
|
|
*
|
|
* use the ethtool interface to get device coalescing properties
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
GetCoalesceParams(void *device,
|
|
vmk_UplinkCoalesceParams *coalesceParams)
|
|
{
|
|
struct net_device *dev = device;
|
|
struct ethtool_ops *ops = dev->ethtool_ops;
|
|
struct ethtool_coalesce coalesce;
|
|
VMK_ReturnStatus status;
|
|
|
|
if (!ops || !ops->get_coalesce) {
|
|
status = VMK_NOT_SUPPORTED;
|
|
} else {
|
|
int ret = -1;
|
|
memset(&coalesce, 0, sizeof(struct ethtool_coalesce));
|
|
|
|
rtnl_lock();
|
|
coalesce.cmd = ETHTOOL_GCOALESCE;
|
|
VMKAPI_MODULE_CALL(dev->module_id,
|
|
ret,
|
|
ops->get_coalesce,
|
|
dev,
|
|
&coalesce);
|
|
rtnl_unlock();
|
|
|
|
if (ret == 0) {
|
|
if (coalesce.rx_coalesce_usecs) {
|
|
coalesceParams->rxUsecs = coalesce.rx_coalesce_usecs;
|
|
}
|
|
|
|
if (coalesce.rx_max_coalesced_frames) {
|
|
coalesceParams->rxMaxFrames = coalesce.rx_max_coalesced_frames;
|
|
}
|
|
|
|
if (coalesce.tx_coalesce_usecs) {
|
|
coalesceParams->txUsecs = coalesce.tx_coalesce_usecs;
|
|
}
|
|
|
|
if (coalesce.tx_max_coalesced_frames) {
|
|
coalesceParams->txMaxFrames = coalesce.tx_max_coalesced_frames;
|
|
}
|
|
|
|
status = VMK_OK;
|
|
} else {
|
|
status = VMK_FAILURE;
|
|
}
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* SetCoalesceParams --
|
|
*
|
|
* use the ethtool interface to set device coalescing properties
|
|
*
|
|
* Results:
|
|
* VMK_ReturnStatus
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
SetCoalesceParams(void *device,
|
|
vmk_UplinkCoalesceParams *coalesceParams)
|
|
{
|
|
struct net_device *dev = device;
|
|
struct ethtool_ops *ops = dev->ethtool_ops;
|
|
struct ethtool_coalesce coalesce;
|
|
VMK_ReturnStatus status;
|
|
|
|
if (!ops || !ops->set_coalesce) {
|
|
status = VMK_NOT_SUPPORTED;
|
|
} else {
|
|
int ret = -1;
|
|
memset(&coalesce, 0, sizeof(struct ethtool_coalesce));
|
|
|
|
// get first, then set
|
|
rtnl_lock();
|
|
coalesce.cmd = ETHTOOL_GCOALESCE;
|
|
VMKAPI_MODULE_CALL(dev->module_id,
|
|
ret,
|
|
ops->get_coalesce,
|
|
dev,
|
|
&coalesce);
|
|
|
|
if (ret == 0) {
|
|
if (coalesceParams->rxUsecs) {
|
|
coalesce.rx_coalesce_usecs = coalesceParams->rxUsecs;
|
|
}
|
|
|
|
if (coalesceParams->rxMaxFrames) {
|
|
coalesce.rx_max_coalesced_frames = coalesceParams->rxMaxFrames;
|
|
}
|
|
|
|
if (coalesceParams->txUsecs) {
|
|
coalesce.tx_coalesce_usecs = coalesceParams->txUsecs;
|
|
}
|
|
|
|
if (coalesceParams->txMaxFrames) {
|
|
coalesce.tx_max_coalesced_frames = coalesceParams->txMaxFrames;
|
|
}
|
|
|
|
coalesce.cmd = ETHTOOL_SCOALESCE;
|
|
VMKAPI_MODULE_CALL(dev->module_id,
|
|
ret,
|
|
ops->set_coalesce,
|
|
dev,
|
|
&coalesce);
|
|
}
|
|
rtnl_unlock();
|
|
|
|
if (ret == 0) {
|
|
status = VMK_OK;
|
|
} else {
|
|
status = VMK_FAILURE;
|
|
}
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* wolVmkCapsToLinuxCaps --
|
|
*
|
|
* translate from VMK wol caps to linux caps
|
|
*
|
|
* Results:
|
|
* linux wol cap bits
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static vmk_uint32
|
|
wolVmkCapsToLinuxCaps(vmk_UplinkWolCaps vmkCaps)
|
|
{
|
|
vmk_uint32 caps = 0;
|
|
|
|
if (vmkCaps & VMK_UPLINK_WAKE_ON_PHY) {
|
|
caps |= WAKE_PHY;
|
|
}
|
|
if (vmkCaps & VMK_UPLINK_WAKE_ON_UCAST) {
|
|
caps |= WAKE_UCAST;
|
|
}
|
|
if (vmkCaps & VMK_UPLINK_WAKE_ON_MCAST) {
|
|
caps |= WAKE_MCAST;
|
|
}
|
|
if (vmkCaps & VMK_UPLINK_WAKE_ON_BCAST) {
|
|
caps |= WAKE_BCAST;
|
|
}
|
|
if (vmkCaps & VMK_UPLINK_WAKE_ON_ARP) {
|
|
caps |= WAKE_ARP;
|
|
}
|
|
if (vmkCaps & VMK_UPLINK_WAKE_ON_MAGIC) {
|
|
caps |= WAKE_MAGIC;
|
|
}
|
|
if (vmkCaps & VMK_UPLINK_WAKE_ON_MAGICSECURE) {
|
|
caps |= WAKE_MAGICSECURE;
|
|
}
|
|
|
|
return caps;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* SetWolState --
|
|
*
|
|
* set wol state via ethtool from a vmk_UplinkWolState struct
|
|
*
|
|
* Results;
|
|
* VMK_OK, various other failues
|
|
*
|
|
* Side effects:
|
|
* can set state within the pNic
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
SetWolState(void *device, vmk_UplinkWolState *wolState)
|
|
{
|
|
struct net_device *dev = device;
|
|
struct ethtool_ops *ops = dev->ethtool_ops;
|
|
VMK_ReturnStatus status = VMK_FAILURE;
|
|
|
|
if (!ops || !ops->set_wol) {
|
|
return VMK_NOT_SUPPORTED;
|
|
} else {
|
|
vmk_uint32 length;
|
|
struct ethtool_wolinfo wolInfo[1];
|
|
int error;
|
|
|
|
wolInfo->supported = wolVmkCapsToLinuxCaps(wolState->supported);
|
|
wolInfo->wolopts = wolVmkCapsToLinuxCaps(wolState->enabled);
|
|
|
|
length = strlen(wolState->secureONPassword);
|
|
if (length > 0) {
|
|
if (length > sizeof(wolInfo->sopass)) {
|
|
length = sizeof(wolInfo->sopass);
|
|
}
|
|
memcpy(wolInfo->sopass, wolState->secureONPassword, length);
|
|
}
|
|
rtnl_lock();
|
|
VMKAPI_MODULE_CALL(dev->module_id, error, ops->set_wol, dev, wolInfo);
|
|
rtnl_unlock();
|
|
if (error == 0) {
|
|
status = VMK_OK;
|
|
}
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* GetNICState --
|
|
* For the given NIC, return device resource information such as its
|
|
* irq, memory range, flags and so on.
|
|
*
|
|
* Results:
|
|
* VMK_OK if successful. Other VMK_ReturnStatus codes returned on failure.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
GetNICState(void *clientData, vmk_PortClientStates *states)
|
|
{
|
|
if (clientData && states) {
|
|
struct net_device *dev = (struct net_device *)clientData;
|
|
|
|
if (test_bit(__LINK_STATE_PRESENT, &dev->state)) {
|
|
*states |= VMK_PORT_CLIENT_STATE_PRESENT;
|
|
}
|
|
|
|
if (!test_bit(__LINK_STATE_XOFF, &dev->state)) {
|
|
*states |= VMK_PORT_CLIENT_STATE_QUEUE_OK;
|
|
}
|
|
|
|
if (!test_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
|
|
*states |= VMK_PORT_CLIENT_STATE_LINK_OK;
|
|
}
|
|
|
|
if (test_bit(__LINK_STATE_START, &dev->state)) {
|
|
*states |= VMK_PORT_CLIENT_STATE_RUNNING;
|
|
}
|
|
|
|
if (dev->flags & IFF_UP) {
|
|
*states |= VMK_PORT_CLIENT_STATE_READY;
|
|
}
|
|
|
|
if (dev->flags & IFF_PROMISC) {
|
|
*states |= VMK_PORT_CLIENT_STATE_PROMISC;
|
|
}
|
|
|
|
if (dev->flags & IFF_BROADCAST) {
|
|
*states |= VMK_PORT_CLIENT_STATE_BROADCAST;
|
|
}
|
|
|
|
if (dev->flags & IFF_MULTICAST) {
|
|
*states |= VMK_PORT_CLIENT_STATE_MULTICAST;
|
|
}
|
|
|
|
return VMK_OK;
|
|
} else {
|
|
VMKLNX_DEBUG(0, "clientData: %p, states %p", clientData, states);
|
|
return VMK_FAILURE;
|
|
}
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
GetNICMemResources(void *clientData, vmk_UplinkMemResources *resources)
|
|
{
|
|
if (clientData && resources) {
|
|
struct net_device *dev = (struct net_device *) clientData;
|
|
|
|
resources->baseAddr = (void *)dev->base_addr;
|
|
resources->memStart = (void *)dev->mem_start;
|
|
resources->memEnd = (void *)dev->mem_end;
|
|
resources->dma = dev->dma;
|
|
|
|
return VMK_OK;
|
|
} else {
|
|
VMKLNX_DEBUG(0, "clientData: %p, resources %p", clientData, resources);
|
|
return VMK_FAILURE;
|
|
}
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
GetNICDeviceProperties(void *clientData, vmk_UplinkDeviceInfo *devInfo)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
struct net_device *dev;
|
|
struct pci_dev *pdev;
|
|
vmk_PCIDevice vmkPciDev;
|
|
|
|
if (clientData == NULL || devInfo == NULL) {
|
|
VMKLNX_DEBUG(0, "clientData: %p, pciInfo %p", clientData, devInfo);
|
|
return VMK_FAILURE;
|
|
}
|
|
|
|
dev = (struct net_device *)clientData;
|
|
pdev = dev->pdev;
|
|
|
|
if (dev->features & NETIF_F_PSEUDO_REG) {
|
|
// If physical device but registered as a pseudo-device,
|
|
// get the actual pdev from dev->pdev_pseudo (saved by the
|
|
// NIC driver).
|
|
VMK_ASSERT(pdev == NULL);
|
|
pdev = (struct pci_dev *)dev->pdev_pseudo;
|
|
VMKLNX_WARN("PCI device registered as pseudo-device %u:%u:%u.%u",
|
|
pci_domain_nr(pdev->bus), pdev->bus->number,
|
|
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
|
|
}
|
|
else if (pdev == NULL) {
|
|
/*
|
|
* Pseudo NICs don't have PCI properties
|
|
*/
|
|
status = VMK_NOT_SUPPORTED;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Get the device info and the DMA constraints for the device
|
|
*/
|
|
status = vmk_PCIGetPCIDevice(pci_domain_nr(pdev->bus), pdev->bus->number,
|
|
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
|
|
&vmkPciDev);
|
|
if (status != VMK_OK) {
|
|
VMK_ASSERT(status == VMK_OK);
|
|
VMKLNX_WARN("Unable to find vmk_PCIDevice for PCI device %u:%u:%u.%u %s",
|
|
pci_domain_nr(pdev->bus), pdev->bus->number,
|
|
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
|
|
vmk_StatusToString(status));
|
|
status = VMK_FAILURE;
|
|
goto out;
|
|
}
|
|
|
|
status = vmk_PCIGetGenDevice(vmkPciDev, &devInfo->device);
|
|
if (status != VMK_OK) {
|
|
VMK_ASSERT(status == VMK_OK);
|
|
VMKLNX_WARN("Unable to get vmk_Device for PCI device %u:%u:%u.%u: %s",
|
|
pci_domain_nr(pdev->bus), pdev->bus->number,
|
|
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
|
|
vmk_StatusToString(status));
|
|
status = VMK_FAILURE;
|
|
goto out;
|
|
}
|
|
|
|
// If it is a physical device being registered as a pseudo-device,
|
|
// return here prior to other setup.
|
|
if (dev->features & NETIF_F_PSEUDO_REG) {
|
|
return VMK_OK;
|
|
}
|
|
|
|
/* Most constraints don't apply so set them to zero. */
|
|
memset(&devInfo->constraints, 0, sizeof(devInfo->constraints));
|
|
devInfo->constraints.addressMask = pdev->dma_mask;
|
|
devInfo->constraints.sgMaxEntries = MAX_SKB_FRAGS + 1;
|
|
|
|
return VMK_OK;
|
|
|
|
out:
|
|
return status;
|
|
}
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* GetNICPanicInfo --
|
|
* Fill in vmk_UplinkPanicInfo struct.
|
|
*
|
|
* Results:
|
|
* VMK_OK if properties filled in. VMK_FAILURE otherwise.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
GetNICPanicInfo(void *clientData,
|
|
vmk_UplinkPanicInfo *intInfo)
|
|
{
|
|
if (clientData && intInfo) {
|
|
struct net_device* dev = (struct net_device*)clientData;
|
|
|
|
if (dev->pdev == NULL) {
|
|
/*
|
|
* Pseudo NIC does not support remote
|
|
* debugging.
|
|
*/
|
|
intInfo->vector = 0;
|
|
intInfo->clientData = NULL;
|
|
} else {
|
|
intInfo->vector = dev->pdev->irq;
|
|
intInfo->clientData = dev;
|
|
}
|
|
|
|
return VMK_OK;
|
|
} else {
|
|
VMKLNX_DEBUG(0, "clientData: %p, intInfo %p", clientData, intInfo);
|
|
return VMK_FAILURE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* FlushRxBuffers --
|
|
*
|
|
* Called by the net debugger
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
FlushRxBuffers(void* clientData)
|
|
{
|
|
struct net_device* dev = (struct net_device*)clientData;
|
|
struct napi_struct* napi = NULL;
|
|
vmk_NetPoll pollPriv;
|
|
|
|
VMKLNX_DEBUG(1, "client data, now net_device:%p", dev);
|
|
|
|
list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
|
if (napi != NULL) {
|
|
VMKLNX_DEBUG(1, "Calling Pkt List Rx Process on napi:%p", napi);
|
|
VMK_ASSERT(napi->dev != NULL);
|
|
|
|
/*
|
|
* Bypass the vswitch to receive the packets when the system is in the
|
|
* panic/debug mode.
|
|
*/
|
|
if (vmk_NetPollGetCurrent(&pollPriv) != VMK_OK) {
|
|
if (debugPktList == NULL) {
|
|
debugPktList = (vmk_PktList) vmk_HeapAlloc(vmklnxLowHeap,
|
|
vmk_PktListSizeInBytes);
|
|
if (debugPktList == NULL) {
|
|
return VMK_NO_MEMORY;
|
|
}
|
|
vmk_PktListInit(debugPktList);
|
|
}
|
|
return vmk_NetDebugRxProcess(debugPktList);
|
|
} else {
|
|
vmk_NetPollProcessRx(napi->net_poll);
|
|
}
|
|
}
|
|
}
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* PanicPoll --
|
|
* Poll for rx packets.
|
|
*
|
|
* Results:
|
|
* result of napi->poll: the number of packets received and processed.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
static VMK_ReturnStatus
|
|
PanicPoll(void* clientData,
|
|
vmk_uint32 budget,
|
|
vmk_int32* workDone)
|
|
{
|
|
struct net_device* dev = (struct net_device*)clientData;
|
|
struct napi_struct* napi = NULL;
|
|
vmk_int32 ret = 0;
|
|
vmk_int32 modRet = 0;
|
|
|
|
VMKLNX_DEBUG(1, "data:%p budget:%u", dev, budget);
|
|
VMK_ASSERT(dev != NULL);
|
|
|
|
if (dev->poll_controller) {
|
|
// device supports NET_POLL interface
|
|
VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->poll_controller, dev);
|
|
VMKLNX_DEBUG(1, "%s: poll_controller called\n", dev->name);
|
|
} else {
|
|
list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
|
if ((napi != NULL) && (napi->poll != NULL)) {
|
|
set_bit(NAPI_STATE_SCHED, &napi->state);
|
|
VMKAPI_MODULE_CALL(napi->dev->module_id, modRet, napi->poll, napi,
|
|
budget);
|
|
ret += modRet;
|
|
VMKLNX_DEBUG(1, "poll:%p napi:%p budget:%u poll returned:%d",
|
|
napi->poll, napi, budget, ret);
|
|
}
|
|
}
|
|
if (workDone != NULL) {
|
|
*workDone = ret;
|
|
}
|
|
}
|
|
return VMK_OK;
|
|
}
|
|
|
|
|
|
static VMK_ReturnStatus
|
|
GetWatchdogTimeoHitCnt(void *device, vmk_int16 *hitcnt)
|
|
{
|
|
struct net_device *dev = device;
|
|
|
|
*hitcnt = dev->watchdog_timeohit_cfg;
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
SetWatchdogTimeoHitCnt(void *device, vmk_int16 hitcnt)
|
|
{
|
|
struct net_device *dev = device;
|
|
|
|
dev->watchdog_timeohit_cfg = hitcnt;
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
GetWatchdogTimeoStats(void *device, vmk_int16 *stats)
|
|
{
|
|
struct net_device *dev = device;
|
|
|
|
*stats = dev->watchdog_timeohit_stats;
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
GetWatchdogTimeoPanicMod(void *device, vmk_UplinkWatchdogPanicModState *state)
|
|
{
|
|
struct net_device *dev = device;
|
|
|
|
*state = dev->watchdog_timeohit_panic;
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
SetWatchdogTimeoPanicMod(void *device, vmk_UplinkWatchdogPanicModState state)
|
|
{
|
|
struct net_device *dev = device;
|
|
|
|
dev->watchdog_timeohit_panic = state;
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
|
|
#define NET_DEVICE_MAKE_PROPERTIES_FUNCTIONS \
|
|
{ \
|
|
getStates: GetNICState, \
|
|
getMemResources: GetNICMemResources, \
|
|
getDeviceProperties:GetNICDeviceProperties, \
|
|
getPanicInfo: GetNICPanicInfo, \
|
|
getMACAddr: GetMACAddr, \
|
|
getName: GetDeviceName, \
|
|
getStats: GetDeviceStats, \
|
|
getDriverInfo: GetDriverInfo, \
|
|
getWolState: GetWolState, \
|
|
setWolState: SetWolState, \
|
|
getCoalesceParams: GetCoalesceParams, \
|
|
setCoalesceParams: SetCoalesceParams, \
|
|
}
|
|
|
|
#define NET_DEVICE_MAKE_WATCHDOG_FUNCTIONS \
|
|
{ \
|
|
getHitCnt: GetWatchdogTimeoHitCnt, \
|
|
setHitCnt: SetWatchdogTimeoHitCnt, \
|
|
getStats: GetWatchdogTimeoStats, \
|
|
getPanicMod: GetWatchdogTimeoPanicMod, \
|
|
setPanicMod: SetWatchdogTimeoPanicMod \
|
|
}
|
|
|
|
#define NET_DEVICE_MAKE_NETQUEUE_FUNCTIONS \
|
|
{ \
|
|
netqOpFunc: LinNetNetqueueOpFunc, \
|
|
netqXmit: NULL, \
|
|
}
|
|
|
|
#define NET_DEVICE_MAKE_PT_FUNCTIONS \
|
|
{ \
|
|
ptOpFunc: LinNetPTOpFunc \
|
|
}
|
|
|
|
#define NET_DEVICE_MAKE_VLAN_FUNCTIONS \
|
|
{ \
|
|
setupVlan: SetupVlanGroupDevice, \
|
|
removeVlan: LinNet_RemoveVlanGroupDevice \
|
|
}
|
|
|
|
#define NET_DEVICE_MAKE_MTU_FUNCTIONS \
|
|
{ \
|
|
getMTU: NICGetMTU, \
|
|
setMTU: NICSetMTU \
|
|
}
|
|
|
|
#define NET_DEVICE_MAKE_CORE_FUNCTIONS \
|
|
{ \
|
|
startTxImmediate: DevStartTxImmediate, \
|
|
open: OpenNetDev, \
|
|
close: CloseNetDev, \
|
|
panicPoll: PanicPoll, \
|
|
flushRxBuffers: FlushRxBuffers, \
|
|
ioctl: IoctlNetDev, \
|
|
block: BlockNetDev, \
|
|
unblock: UnblockNetDev, \
|
|
setLinkStatus: NICSetLinkStatus, \
|
|
reset: NICResetDev \
|
|
}
|
|
|
|
#define NET_DEVICE_MAKE_DCB_FUNCTIONS \
|
|
{ \
|
|
isDCBEnabled: NICDCBIsEnabled, \
|
|
enableDCB: NICDCBEnable, \
|
|
disableDCB: NICDCBDisable, \
|
|
getNumTCs: NICDCBGetNumTCs, \
|
|
getPG: NICDCBGetPriorityGroup, \
|
|
setPG: NICDCBSetPriorityGroup, \
|
|
getPFCCfg: NICDCBGetPFCCfg, \
|
|
setPFCCfg: NICDCBSetPFCCfg, \
|
|
isPFCEnabled: NICDCBIsPFCEnabled, \
|
|
enablePFC: NICDCBEnablePFC, \
|
|
disablePFC: NICDCBDisablePFC, \
|
|
getApps: NICDCBGetApplications, \
|
|
setApp: NICDCBSetApplication, \
|
|
getCaps: NICDCBGetCapabilities, \
|
|
applySettings: NICDCBApplySettings, \
|
|
getSettings: NICDCBGetSettings \
|
|
}
|
|
|
|
vmk_UplinkFunctions linNetFunctions = {
|
|
coreFns: NET_DEVICE_MAKE_CORE_FUNCTIONS,
|
|
mtuFns: NET_DEVICE_MAKE_MTU_FUNCTIONS,
|
|
vlanFns: NET_DEVICE_MAKE_VLAN_FUNCTIONS,
|
|
propFns: NET_DEVICE_MAKE_PROPERTIES_FUNCTIONS,
|
|
watchdogFns: NET_DEVICE_MAKE_WATCHDOG_FUNCTIONS,
|
|
netqueueFns: NET_DEVICE_MAKE_NETQUEUE_FUNCTIONS,
|
|
ptFns: NET_DEVICE_MAKE_PT_FUNCTIONS,
|
|
dcbFns: NET_DEVICE_MAKE_DCB_FUNCTIONS,
|
|
};
|
|
|
|
static VMK_ReturnStatus
|
|
NicCharOpsIoctl(vmk_CharDevFdAttr *attr,
|
|
unsigned int cmd,
|
|
vmk_uintptr_t userData,
|
|
vmk_IoctlCallerSize callerSize,
|
|
vmk_int32 *result)
|
|
{
|
|
struct net_device *dev;
|
|
vmkplxr_ChardevHandles *handles;
|
|
struct ifreq ifr;
|
|
VMK_ReturnStatus status;
|
|
|
|
if (copy_from_user(&ifr, (void *)userData, sizeof(ifr))) {
|
|
return VMK_INVALID_ADDRESS;
|
|
}
|
|
|
|
handles = (vmkplxr_ChardevHandles *) attr->clientDeviceData.ptr;
|
|
VMK_ASSERT(handles != NULL);
|
|
|
|
dev = handles->vmklinuxInfo.ptr;
|
|
VMK_ASSERT(dev != NULL);
|
|
|
|
status = netdev_ioctl(dev, cmd, &ifr, (uint32_t *) result, callerSize, VMK_FALSE);
|
|
if (status == VMK_OK) {
|
|
if (copy_to_user((void *)userData, &ifr, sizeof(ifr))) {
|
|
return VMK_INVALID_ADDRESS;
|
|
}
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
NicCharOpsOpen(vmk_CharDevFdAttr *attr)
|
|
{
|
|
struct net_device *dev;
|
|
vmkplxr_ChardevHandles *handles;
|
|
|
|
handles = (vmkplxr_ChardevHandles *) attr->clientDeviceData.ptr;
|
|
VMK_ASSERT(handles != NULL);
|
|
|
|
dev = handles->vmklinuxInfo.ptr;
|
|
VMK_ASSERT(dev != NULL);
|
|
|
|
dev_hold(dev);
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
static VMK_ReturnStatus
|
|
NicCharOpsClose(vmk_CharDevFdAttr *attr)
|
|
{
|
|
struct net_device *dev;
|
|
vmkplxr_ChardevHandles *handles;
|
|
|
|
handles = (vmkplxr_ChardevHandles *) attr->clientDeviceData.ptr;
|
|
VMK_ASSERT(handles != NULL);
|
|
|
|
dev = handles->vmklinuxInfo.ptr;
|
|
VMK_ASSERT(dev != NULL);
|
|
|
|
dev_put(dev);
|
|
|
|
return VMK_OK;
|
|
}
|
|
|
|
|
|
static vmk_CharDevOps nicCharOps = {
|
|
NicCharOpsOpen,
|
|
NicCharOpsClose,
|
|
NicCharOpsIoctl,
|
|
NULL,
|
|
NULL,
|
|
NULL
|
|
};
|
|
|
|
static VMK_ReturnStatus
|
|
NicCharDataDestructor(vmk_AddrCookie charData)
|
|
{
|
|
/*
|
|
* The device-private data is in fact the struct net_device,
|
|
* which is destroyed separately from unregistration of the
|
|
* character device. So, do nothing here.
|
|
*/
|
|
return VMK_OK;
|
|
}
|
|
|
|
static int
|
|
register_nic_chrdev(struct net_device *dev)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
int major = VMKPLXR_DYNAMIC_MAJOR;
|
|
int minor = 0;
|
|
vmk_AddrCookie devCookie;
|
|
|
|
if (dev->name) {
|
|
devCookie.ptr = dev;
|
|
status = vmkplxr_RegisterChardev(&major, &minor, dev->name,
|
|
&nicCharOps, devCookie,
|
|
NicCharDataDestructor,
|
|
dev->module_id);
|
|
if (status == VMK_OK) {
|
|
dev->nicMajor = major;
|
|
return 0;
|
|
} else if (status == VMK_BUSY) {
|
|
return -EBUSY;
|
|
}
|
|
} else {
|
|
printk("Device has no name\n");
|
|
}
|
|
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* LinNet_ConnectUplink --
|
|
*
|
|
* Register the device with the vmkernel. Initializes various device fields
|
|
* and sets up PCI hotplug notification handlers.
|
|
*
|
|
* Results:
|
|
* 0 if successful, non-zero on failure.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
int
|
|
LinNet_ConnectUplink(struct net_device *dev, struct pci_dev *pdev)
|
|
{
|
|
vmk_UplinkCapabilities capabilities = 0;
|
|
vmk_Name pollName;
|
|
|
|
vmk_ModuleID moduleID = VMK_INVALID_MODULE_ID;
|
|
vmk_UplinkConnectInfo connectInfo;
|
|
|
|
struct napi_struct *napi;
|
|
|
|
/*
|
|
* We should only make this call once per net_device
|
|
*/
|
|
VMK_ASSERT(dev->uplinkDev == NULL);
|
|
|
|
/*
|
|
* Driver should have made the association with
|
|
* the PCI device via the macro SET_NETDEV_DEV()
|
|
*/
|
|
VMK_ASSERT(dev->pdev == pdev);
|
|
|
|
/* CNA devices shouldn't go through this path. */
|
|
VMK_ASSERT(!(dev->features & NETIF_F_CNA));
|
|
|
|
/*
|
|
* Driver naming device already has the device name in net_device.
|
|
*/
|
|
if (!dev->useDriverNamingDevice) {
|
|
netdev_name_adapter(dev, pdev);
|
|
}
|
|
|
|
capabilities = netdev_query_capabilities(dev);
|
|
|
|
moduleID = dev->module_id;
|
|
|
|
VMK_ASSERT(moduleID != VMK_INVALID_MODULE_ID);
|
|
|
|
connectInfo.devName = dev->name;
|
|
connectInfo.clientData = dev;
|
|
connectInfo.moduleID = moduleID;
|
|
connectInfo.functions = &linNetFunctions;
|
|
connectInfo.cap = capabilities;
|
|
|
|
if (dev->features & NETIF_F_HIDDEN_UPLINK) {
|
|
connectInfo.flags = VMK_UPLINK_FLAG_HIDDEN;
|
|
} else {
|
|
connectInfo.flags = 0;
|
|
}
|
|
|
|
if (dev->features & NETIF_F_PSEUDO_REG) {
|
|
connectInfo.flags |= VMK_UPLINK_FLAG_PSEUDO_REG;
|
|
}
|
|
|
|
if (vmk_UplinkRegister((vmk_Uplink *)&dev->uplinkDev, &connectInfo) != VMK_OK) {
|
|
goto fail;
|
|
}
|
|
|
|
VMK_ASSERT(dev->net_poll);
|
|
|
|
(void) vmk_NameFormat(&pollName, "-backup");
|
|
vmk_NetPollRegisterUplink(dev->net_poll, dev->uplinkDev, pollName, VMK_FALSE);
|
|
|
|
list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
|
vmk_Name pollName;
|
|
(void) vmk_NameFormat(&pollName, "-%d", napi->napi_id);
|
|
vmk_NetPollRegisterUplink(napi->net_poll, napi->dev->uplinkDev, pollName, VMK_TRUE);
|
|
}
|
|
|
|
|
|
dev->link_speed = -1;
|
|
dev->full_duplex = 0;
|
|
|
|
dev->link_state = VMKLNX_UPLINK_LINK_DOWN;
|
|
dev->watchdog_timeohit_cnt = 0;
|
|
dev->watchdog_timeohit_cfg = VMK_UPLINK_WATCHDOG_HIT_CNT_DEFAULT;
|
|
dev->watchdog_timeohit_stats = 0;
|
|
dev->watchdog_timeohit_panic = VMKLNX_UPLINK_WATCHDOG_PANIC_MOD_ENABLE;
|
|
dev->watchdog_timeohit_period_start = jiffies;
|
|
|
|
return register_nic_chrdev(dev);
|
|
|
|
fail:
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* vmklnx_netdev_high_dma_workaround --
|
|
* Make a copy of a skb buffer in low dma.
|
|
*
|
|
* Results:
|
|
* If the copy succeeds then it releases the previous skb and
|
|
* returns the new one.
|
|
* If not it returns NULL.
|
|
*
|
|
* Side effects:
|
|
* The skb buffer passed to the function might be released.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
struct sk_buff *
|
|
vmklnx_netdev_high_dma_workaround(struct sk_buff *base)
|
|
{
|
|
struct sk_buff *skb = skb_copy(base, GFP_ATOMIC);
|
|
|
|
if (skb) {
|
|
vmk_PktRelease(base->pkt);
|
|
}
|
|
|
|
return skb;
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* vmklnx_netdev_high_dma_overflow --
|
|
* Check skb buffer's data are located beyond a specified dma limit.
|
|
*
|
|
* Results:
|
|
* Returns TRUE if there is an overflow with the passed skb and FALSE
|
|
* otherwise.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
#define GB (1024LL * 1024 * 1024)
|
|
int
|
|
vmklnx_netdev_high_dma_overflow(struct sk_buff *skb,
|
|
short gb_limit)
|
|
{
|
|
uint64_t dma_addr;
|
|
uint64_t dma_addr_limit;
|
|
int idx_frags;
|
|
int nr_frags;
|
|
skb_frag_t *skb_frag;
|
|
vmk_PktFrag pkt_frag;
|
|
|
|
if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfForceHighDMAOverflow)) {
|
|
return VMK_TRUE;
|
|
}
|
|
|
|
dma_addr_limit = (uint64_t) gb_limit * GB;
|
|
if (dma_addr_limit > max_phys_addr) {
|
|
return VMK_FALSE;
|
|
}
|
|
|
|
if (vmk_PktFragGet(skb->pkt, &pkt_frag, 0) != VMK_OK) {
|
|
return VMK_FALSE;
|
|
}
|
|
|
|
dma_addr = pkt_frag.addr + (skb->end - skb->head);
|
|
if (dma_addr >= dma_addr_limit) {
|
|
return VMK_TRUE;
|
|
}
|
|
|
|
nr_frags = skb_shinfo(skb)->nr_frags;
|
|
for (idx_frags = 0; idx_frags < nr_frags; idx_frags++) {
|
|
skb_frag = &skb_shinfo(skb)->frags[idx_frags];
|
|
dma_addr = page_to_phys(skb_frag->page) + skb_frag->page_offset + skb_frag->size;
|
|
|
|
if (dma_addr >= dma_addr_limit) {
|
|
return VMK_TRUE;
|
|
}
|
|
}
|
|
|
|
return VMK_FALSE;
|
|
}
|
|
EXPORT_SYMBOL(vmklnx_netdev_high_dma_overflow);
|
|
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* vmklnx_skb_real_size --
|
|
* This call is created to hide the size of "struct LinSkb" so that
|
|
* it won't be subject to binary compatibility. We can expand LinSkb
|
|
* in the future when the need comes and do not have to worry about
|
|
* binary compatibility.
|
|
*
|
|
* Results:
|
|
* sizeof(struct LinSkb)
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
|
|
size_t
|
|
vmklnx_skb_real_size()
|
|
{
|
|
return sizeof(struct LinSkb);
|
|
}
|
|
EXPORT_SYMBOL(vmklnx_skb_real_size);
|
|
|
|
static void
|
|
LinNetComputeEthCRCTableLE(void)
|
|
{
|
|
unsigned i, crc, j;
|
|
|
|
for (i = 0; i < 256; i++) {
|
|
crc = i;
|
|
for (j = 0; j < 8; j++) {
|
|
crc = (crc >> 1) ^ ((crc & 0x1)? eth_crc32_poly_le : 0);
|
|
}
|
|
eth_crc32_poly_tbl_le[i] = crc;
|
|
}
|
|
}
|
|
|
|
static uint32_t
|
|
LinNetComputeEthCRCLE(unsigned crc, const unsigned char *frame, uint32_t frameLen)
|
|
{
|
|
int i, j;
|
|
|
|
for (i = 0; i + 4 <= frameLen; i += 4) {
|
|
crc ^= *(unsigned *)&frame[i];
|
|
for (j = 0; j < 4; j++) {
|
|
crc = eth_crc32_poly_tbl_le[crc & 0xff] ^ (crc >> 8);
|
|
}
|
|
}
|
|
|
|
while (i < frameLen) {
|
|
crc = eth_crc32_poly_tbl_le[(crc ^ frame[i++]) & 0xff] ^ (crc >> 8);
|
|
}
|
|
|
|
return crc;
|
|
}
|
|
|
|
/**
|
|
* crc32_le - Calculate bitwise little-endian Ethernet CRC
|
|
* @crc: seed value for computation
|
|
* @p: pointer to buffer over which CRC is run
|
|
* @len: length of buffer p
|
|
*
|
|
* Calculates bitwise little-endian Ethernet CRC from an
|
|
* initial seed value that could be 0 or a previous value if
|
|
* computing incrementally.
|
|
*
|
|
* RETURN VALUE:
|
|
* 32-bit CRC value.
|
|
*
|
|
*/
|
|
/* _VMKLNX_CODECHECK_: crc32_le */
|
|
uint32_t
|
|
crc32_le(uint32_t crc, unsigned char const *p, size_t len)
|
|
{
|
|
return LinNetComputeEthCRCLE(crc, p, len);
|
|
}
|
|
EXPORT_SYMBOL(crc32_le);
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* LinNet_Init --
|
|
*
|
|
* Initialize LinNet data structures.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
void
|
|
LinNet_Init(void)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
|
|
VMKLNX_CREATE_LOG();
|
|
|
|
LinStress_SetupStress();
|
|
LinNetComputeEthCRCTableLE();
|
|
|
|
/* set up link state timer */
|
|
status = vmk_ConfigParamOpen("Net", "LinkStatePollTimeout",
|
|
&linkStateTimerPeriodConfigHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamGetUint(linkStateTimerPeriodConfigHandle,
|
|
&linkStateTimerPeriod);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
status = vmk_ConfigParamOpen("Net", "VmklnxLROEnabled",
|
|
&vmklnxLROEnabledConfigHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamGetUint(vmklnxLROEnabledConfigHandle,
|
|
&vmklnxLROEnabled);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
status = vmk_ConfigParamOpen("Net", "VmklnxLROMaxAggr",
|
|
&vmklnxLROMaxAggrConfigHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamGetUint(vmklnxLROMaxAggrConfigHandle,
|
|
&vmklnxLROMaxAggr);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
INIT_DELAYED_WORK(&linkStateWork, link_state_work_cb);
|
|
schedule_delayed_work(&linkStateWork,
|
|
msecs_to_jiffies(linkStateTimerPeriod));
|
|
|
|
INIT_DELAYED_WORK(&watchdogWork, watchdog_work_cb);
|
|
schedule_delayed_work(&watchdogWork,
|
|
msecs_to_jiffies(WATCHDOG_DEF_TIMER));
|
|
|
|
status = vmk_ConfigParamOpen("Net", "PortDisableTimeout",
|
|
&blockTotalSleepMsecHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamGetUint(blockTotalSleepMsecHandle, &blockTotalSleepMsec);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
max_phys_addr = vmk_MachMemMaxAddr();
|
|
|
|
status = vmk_ConfigParamOpen("Net", "MaxNetifTxQueueLen",
|
|
&maxNetifTxQueueLenConfigHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
status = vmk_ConfigParamOpen("Net", "UseHwIPv6Csum",
|
|
&useHwIPv6CsumHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
status = vmk_ConfigParamOpen("Net", "UseHwCsumForIPv6Csum",
|
|
&useHwCsumForIPv6CsumHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
status = vmk_ConfigParamOpen("Net", "UseHwTSO", &useHwTSOHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
status = vmk_ConfigParamOpen("Net", "UseHwTSO6", &useHwTSO6Handle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_GEN_TINY_ARP_RARP,
|
|
&stressNetGenTinyArpRarp);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_CORRUPT_ETHERNET_HDR,
|
|
&stressNetIfCorruptEthHdr);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_CORRUPT_RX_DATA,
|
|
&stressNetIfCorruptRxData);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_CORRUPT_RX_TCP_UDP,
|
|
&stressNetIfCorruptRxTcpUdp);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_CORRUPT_TX,
|
|
&stressNetIfCorruptTx);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_FAIL_HARD_TX,
|
|
&stressNetIfFailHardTx);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_FAIL_RX,
|
|
&stressNetIfFailRx);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_FAIL_TX_AND_STOP_QUEUE,
|
|
&stressNetIfFailTxAndStopQueue);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_FORCE_HIGH_DMA_OVERFLOW,
|
|
&stressNetIfForceHighDMAOverflow);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_FORCE_RX_SW_CSUM,
|
|
&stressNetIfForceRxSWCsum);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_NAPI_FORCE_BACKUP_WORLDLET,
|
|
&stressNetNapiForceBackupWorldlet);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_BLOCK_DEV_IS_SLUGGISH,
|
|
&stressNetBlockDevIsSluggish);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
}
|
|
|
|
/*
|
|
*----------------------------------------------------------------------------
|
|
*
|
|
* LinNet_Cleanup --
|
|
*
|
|
* Cleanup function for linux_net. Release and cleanup all resources.
|
|
*
|
|
* Results:
|
|
* None.
|
|
*
|
|
* Side effects:
|
|
* None.
|
|
*
|
|
*----------------------------------------------------------------------------
|
|
*/
|
|
void LinNet_Cleanup(void)
|
|
{
|
|
VMK_ReturnStatus status;
|
|
|
|
LinStress_CleanupStress();
|
|
cancel_delayed_work_sync(&linkStateWork);
|
|
cancel_delayed_work_sync(&watchdogWork);
|
|
vmk_TimerRemoveSync(devWatchdogTimer);
|
|
|
|
status = vmk_ConfigParamClose(linkStateTimerPeriodConfigHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamClose(maxNetifTxQueueLenConfigHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamClose(useHwIPv6CsumHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamClose(useHwCsumForIPv6CsumHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamClose(useHwTSOHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamClose(useHwTSO6Handle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamClose(blockTotalSleepMsecHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamClose(vmklnxLROEnabledConfigHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_ConfigParamClose(vmklnxLROMaxAggrConfigHandle);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
status = vmk_StressOptionClose(stressNetGenTinyArpRarp);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetIfCorruptEthHdr);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetIfCorruptRxData);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetIfCorruptRxTcpUdp);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetIfCorruptTx);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetIfFailHardTx);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetIfFailRx);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetIfFailTxAndStopQueue);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetIfForceHighDMAOverflow);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetIfForceRxSWCsum);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetNapiForceBackupWorldlet);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
status = vmk_StressOptionClose(stressNetBlockDevIsSluggish);
|
|
VMK_ASSERT(status == VMK_OK);
|
|
|
|
VMKLNX_DESTROY_LOG();
|
|
}
|