vmkdrivers/vmkdrivers/src_9/vmklinux_9/vmware/linux_net.c

/* ****************************************************************
 * Portions Copyright 2005, 2009-2011 VMware, Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * ****************************************************************/

/******************************************************************
 *
 *  linux_net.c
 *
 * From linux-2.6.24-7/include/linux/netdevice.h:
 *
 * Authors:     Ross Biro
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *              Corey Minyard <wf-rch!minyard@relay.EU.net>
 *              Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
 *              Alan Cox, <Alan.Cox@linux.org>
 *              Bjorn Ekwall. <bj0rn@blox.se>
 *              Pekka Riikonen <priikone@poseidon.pspt.fi>
 *
 * From linux-2.6.27-rc9/net/core/dev.c:
 *
 * Authors:     Ross Biro
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *              Mark Evans, <evansmp@uhura.aston.ac.uk>
 *
 * Additional Authors:
 *              Florian la Roche <rzsfl@rz.uni-sb.de>
 *              Alan Cox <gw4pts@gw4pts.ampr.org>
 *              David Hinds <dahinds@users.sourceforge.net>
 *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
 *              Adam Sulmicki <adam@cfar.umd.edu>
 *              Pekka Riikonen <priikone@poesidon.pspt.fi>
 *
 * From linux-2.6.27-rc9/net/sched/sch_generic.c:
 *
 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *
 ******************************************************************/

#define NET_DRIVER      // Special case for Net portion of VMKLINUX

#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/ethtool.h>
#include <linux/rtnetlink.h> /* BUG_TRAP */
#include <linux/workqueue.h>
#include <linux/dma-mapping.h>
#include <asm/uaccess.h>
#include <asm/page.h> /* phys_to_page */

#include "vmkapi.h"
#include "linux_stubs.h"
#include "linux_pci.h"
#include "linux_stress.h"
#include "linux_task.h"
#include "linux_net.h"
#include "linux_cna.h"
#include "linux_dcb.h"

#include <vmkplexer_chardevs.h>

#define VMKLNX_LOG_HANDLE LinNet
#include "vmklinux_log.h"

/* default watchdog timeout value and timer period for device */
#define WATCHDOG_DEF_TIMEO 5 * HZ
#define WATCHDOG_DEF_TIMER 1000


enum {
   LIN_NET_HARD_QUEUE_XOFF = 0x0001,   /* hardware queue is stopped */
};

/*
 * NOTE: Try not to put any critical (data path) fields in LinNetDev.
 *       Instead, embed them in net_device, where they are next to
 *       their cache line brethrens.
 */
struct LinNetDev {
   unsigned int       napiNextId; /* Next unique id for napi context. */
   unsigned long      flags;      /* vmklinux private device flags */
   unsigned short     padded;     /* Padding added by alloc_netdev() */
   struct net_device  linNetDev __attribute__((aligned(NETDEV_ALIGN)));
   /*
    * WARNING: linNetDev must be last because it is assumed that
    * private data area follows immediately after.
    */
};

typedef struct LinNetDev LinNetDev;
typedef int (*PollHandler) (void* clientData, vmk_uint32 vector);

#define get_LinNetDev(net_device)                                \
   ((LinNetDev*)(((char*)net_device)-(offsetof(struct LinNetDev, linNetDev))))

static vmk_Timer  devWatchdogTimer;
static void link_state_work_cb(struct work_struct *work);
static void watchdog_work_cb(struct work_struct *work);
static struct delayed_work linkStateWork;
static struct delayed_work watchdogWork;
static unsigned      linkStateTimerPeriod;
static vmk_ConfigParamHandle linkStateTimerPeriodConfigHandle;
static vmk_ConfigParamHandle maxNetifTxQueueLenConfigHandle;
static unsigned blockTotalSleepMsec;
static vmk_ConfigParamHandle blockTotalSleepMsecHandle;
struct net_device    *dev_base = NULL;
EXPORT_SYMBOL(dev_base);
DEFINE_RWLOCK(dev_base_lock);
int                  netdev_max_backlog = 300;
static const unsigned eth_crc32_poly_le = 0xedb88320;
static unsigned      eth_crc32_poly_tbl_le[256];
static uint64_t max_phys_addr;

static vmk_ConfigParamHandle useHwIPv6CsumHandle;
static vmk_ConfigParamHandle useHwCsumForIPv6CsumHandle;
static vmk_ConfigParamHandle useHwTSO6Handle;
static vmk_ConfigParamHandle useHwTSOHandle;

/*
 * The global packet list for receiving packets when the system is in
 * the panic/debug status.
 */
static vmk_PktList debugPktList = NULL;

/* Stress option handles */
static vmk_StressOptionHandle stressNetGenTinyArpRarp;
static vmk_StressOptionHandle stressNetIfCorruptEthHdr;
static vmk_StressOptionHandle stressNetIfCorruptRxData;
static vmk_StressOptionHandle stressNetIfCorruptRxTcpUdp;
static vmk_StressOptionHandle stressNetIfCorruptTx;
static vmk_StressOptionHandle stressNetIfFailHardTx;
static vmk_StressOptionHandle stressNetIfFailRx;
static vmk_StressOptionHandle stressNetIfFailTxAndStopQueue;
static vmk_StressOptionHandle stressNetIfForceHighDMAOverflow;
static vmk_StressOptionHandle stressNetIfForceRxSWCsum;
static vmk_StressOptionHandle stressNetNapiForceBackupWorldlet;
static vmk_StressOptionHandle stressNetBlockDevIsSluggish;

/* LRO config option */
static vmk_ConfigParamHandle vmklnxLROEnabledConfigHandle;
static vmk_ConfigParamHandle vmklnxLROMaxAggrConfigHandle;
unsigned int vmklnxLROEnabled;
unsigned int vmklnxLROMaxAggr;

extern void LinStress_SetupStress(void);
extern void LinStress_CleanupStress(void);
extern void LinStress_CorruptSkbData(struct sk_buff*, unsigned int,
   unsigned int);
extern void LinStress_CorruptRxData(vmk_PktHandle*, struct sk_buff *);
extern void LinStress_CorruptEthHdr(struct sk_buff *skb);

static VMK_ReturnStatus map_pkt_to_skb(struct net_device *dev,
                                       struct netdev_queue *queue,
                                       vmk_PktHandle *pkt,
                                       struct sk_buff **pskb);
static void do_free_skb(struct sk_buff *skb);
static struct sk_buff *do_alloc_skb(kmem_cache_t *skb, gfp_t flags);
static VMK_ReturnStatus BlockNetDev(void *clientData);
static void SetNICLinkStatus(struct net_device *dev);
static VMK_ReturnStatus skb_gen_pkt_frags(struct sk_buff *skb);

static inline VMK_ReturnStatus
marshall_from_vmknetq_id(vmk_NetqueueQueueID vmkqid,
                         vmknetddi_queueops_queueid_t *qid);
static ATOMIC_NOTIFIER_HEAD(netdev_notifier_list);
static vmk_Bool napi_poll(void *ptr);

inline void vmklnx_set_skb_frags_owner_vmkernel(struct sk_buff *);

/*
 * Deal with the transition away from exposing vmk_Worldlet and
 * vmk_Uplink* directly through the vmklnx headers.
 */
VMK_ASSERT_LIST(VMKLNX_NET,
   VMK_ASSERT_ON_COMPILE(sizeof(vmk_Worldlet) == sizeof(void *));
   VMK_ASSERT_ON_COMPILE(sizeof(vmk_LinkState) ==
                         sizeof(vmklnx_uplink_link_state));
   VMK_ASSERT_ON_COMPILE(sizeof(vmk_UplinkPTOpFunc) ==
                         sizeof(void *));
   VMK_ASSERT_ON_COMPILE(sizeof(vmk_NetqueueQueueID) ==
                         sizeof(vmk_uint64));
   VMK_ASSERT_ON_COMPILE(VMKLNX_UPLINK_LINK_DOWN == VMK_LINK_STATE_DOWN);
   VMK_ASSERT_ON_COMPILE(VMKLNX_UPLINK_LINK_UP == VMK_LINK_STATE_UP);
   VMK_ASSERT_ON_COMPILE(sizeof(vmk_UplinkWatchdogPanicModState) ==
                         sizeof(vmklnx_uplink_watchdog_panic_mod_state));
   VMK_ASSERT_ON_COMPILE(sizeof(vmk_UplinkWatchdogPanicModState) ==
                         sizeof(vmklnx_uplink_watchdog_panic_mod_state));
   VMK_ASSERT_ON_COMPILE(VMKLNX_UPLINK_WATCHDOG_PANIC_MOD_DISABLE ==
                         VMK_UPLINK_WATCHDOG_PANIC_MOD_DISABLE);
   VMK_ASSERT_ON_COMPILE(VMKLNX_UPLINK_WATCHDOG_PANIC_MOD_ENABLE ==
                         VMK_UPLINK_WATCHDOG_PANIC_MOD_ENABLE);
   VMK_ASSERT_ON_COMPILE(sizeof(vmk_NetqueueQueueID) == sizeof(vmk_uint64));
   VMK_ASSERT_ON_COMPILE(VMKLNX_PKT_HEAP_MAX_SIZE == VMK_PKT_HEAP_MAX_SIZE);
)

/*
 * Section: Receive path
 */

/*
 *----------------------------------------------------------------------------
 *
 *  map_skb_to_pkt --
 *
 *    Converts sk_buff to PktHandle before handing packet to vmkernel.
 *
 *  Results:
 *    NET_RX_SUCCESS on success; NET_RX_DROP if the packet is dropped.
 *
 *  Side effects:
 *    Drops packet on the floor if unsuccessful.
 *
 *----------------------------------------------------------------------------
 */
static int
map_skb_to_pkt(struct sk_buff *skb)
{
   VMK_ReturnStatus status;
   vmk_PktHandle *pkt = NULL;
   struct net_device *dev = skb->dev;

   /* we need to ensure the blocked status */
   if (unlikely(test_bit(__LINK_STATE_BLOCKED, &dev->state))) {
      VMK_ASSERT(!(dev->features & NETIF_F_CNA));
      goto drop;
   }
   if (unlikely(skb->len == 0)) {
      static uint32_t logThrottleCounter = 0;
      VMKLNX_THROTTLED_INFO(logThrottleCounter,
                            "dropping zero length packet "
                            "(skb->len=%u, skb->data_len=%u)\n",
                            skb->len, skb->data_len);
      VMK_ASSERT(!(dev->features & NETIF_F_CNA));
      goto drop;
   }

   if (unlikely(skb_gen_pkt_frags(skb) != VMK_OK)) {
      VMK_ASSERT(!(dev->features & NETIF_F_CNA));
      goto drop;
   }
   pkt = skb->pkt;

   if (unlikely(vmk_PktFrameLenSet(pkt, skb->len) != VMK_OK)) {
      printk("unable to set skb->pkt %p frame length with skb->len = %u\n",
             pkt, skb->len);
      VMK_ASSERT(VMK_FALSE);
      goto drop;
   }

   if (skb_shinfo(skb)->gso_type != 0) {
      switch (skb_shinfo(skb)->gso_type) {
      case SKB_GSO_TCPV4:
         status = vmk_PktSetLargeTcpPacket(pkt, skb_shinfo(skb)->gso_size);
         VMK_ASSERT(status == VMK_OK);
         break;
      default:
         printk("unable to process gso type 0x%x on the rx path\n",
                skb_shinfo(skb)->gso_type);
         VMK_ASSERT(VMK_FALSE);
         goto drop;
      }
   }

   /*
    * The following extracts vlan tag from skb.
    * The check just looks at a field of skb, so we
    * don't bother to check whether vlan is enabled.
    */
   if (vlan_rx_tag_present(skb)) {
      VMK_ASSERT(vmk_PktVlanIDGet(pkt) == 0);

      if  ((vlan_rx_tag_get(skb) & VLAN_VID_MASK) > VLAN_MAX_VALID_VID) {
         static uint32_t logThrottleCounter = 0;
         VMKLNX_THROTTLED_INFO(logThrottleCounter,
                               "invalid vlan tag: %d dropped",
                               vlan_rx_tag_get(skb) & VLAN_VID_MASK);
         VMK_ASSERT(!(dev->features & NETIF_F_CNA));
         goto drop;
      }
      status = vmk_PktVlanIDSet(pkt, vlan_rx_tag_get(skb) & VLAN_VID_MASK);
      VMK_ASSERT(status == VMK_OK);
      status = vmk_PktPrioritySet(pkt,
                                  (vlan_rx_tag_get(skb) & VLAN_1PTAG_MASK) >> VLAN_1PTAG_SHIFT);
      VMK_ASSERT(status == VMK_OK);
      VMKLNX_DEBUG(2, "%s: rx vlan tag %u present with priority %u",
                   dev->name, vmk_PktVlanIDGet(pkt), vmk_PktPriorityGet(pkt));

#ifdef VMX86_DEBUG
      {
         // generate arp/rarp frames that are < ETH_MIN_FRAME_LEN to
         // create test cases for PR 106153.
	 struct ethhdr *eh = (struct ethhdr *)vmk_PktFrameMappedPointerGet(pkt);

         if ((eh->h_proto == ntohs(ETH_P_ARP)
	      || eh->h_proto == ntohs(ETH_P_RARP))
	     && VMKLNX_STRESS_DEBUG_COUNTER(stressNetGenTinyArpRarp)) {
            int old_frameMappedLen;
            int target_len = (ETH_ZLEN - VLAN_HLEN);

	    old_frameMappedLen = vmk_PktFrameMappedLenGet(pkt);

            if (target_len <= old_frameMappedLen) {
               int old_len;
	       int len;

	       old_len = vmk_PktFrameLenGet(pkt);
               vmk_PktFrameLenSet(pkt, target_len);
	       len = vmk_PktFrameLenGet(pkt);
               VMKLNX_DEBUG(1, "shorten arp/rarp pkt to %d from %d",
                            len, old_len);
            }
         }
      }
#endif
   }

   if (skb->ip_summed != CHECKSUM_NONE &&
       !VMKLNX_STRESS_DEBUG_OPTION(stressNetIfForceRxSWCsum)) {
      status = vmk_PktSetCsumVfd(pkt);
      VMK_ASSERT(status == VMK_OK);
   }

   if (likely(!(dev->features & NETIF_F_CNA))) {
      if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfCorruptRxTcpUdp)) {
         LinStress_CorruptSkbData(skb, 40, 14);
      }
      if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfCorruptRxData)) {
         LinStress_CorruptRxData(pkt, skb);
      }
      if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfCorruptEthHdr)) {
         LinStress_CorruptEthHdr(skb);
      }
   }

   dev->linnet_rx_packets++;

   if (!(dev->features & NETIF_F_CNA)) {
      do_free_skb(skb);
   } else {
      /*
       * Packets received for FCOE will be free'd by the OpenFCOE stack.
       */
      vmk_PktSetCompletionData(pkt, skb, dev->genCount, VMK_TRUE);
   }
   return NET_RX_SUCCESS;

 drop:
   dev_kfree_skb_any(skb);
   dev->linnet_rx_dropped++;
   VMK_ASSERT(!(dev->features & NETIF_F_CNA));
   return NET_RX_DROP;
}

/**
 * netif_rx - post buffer to the network code
 * @skb: buffer to post
 *
 * This function receives a packet from a device driver and queues it for
 * the upper (protocol) levels to process.  It always succeeds. The buffer
 * may be dropped during processing for congestion control or by the
 * protocol layers.
 *
 *  RETURN VALUE:
 *  NET_RX_SUCCESS (no congestion)
 *  NET_RX_DROP	(packet was dropped)
 *
 */
/* _VMKLNX_CODECHECK_: netif_rx */
int
netif_rx(struct sk_buff *skb)
{
   struct net_device *dev = skb->dev;
   vmk_PktHandle *pkt;
   int status;

   VMK_ASSERT(dev);

   VMKLNX_DEBUG(1, "Napi is not enabled for device %s\n", dev->name);

   pkt = skb->pkt;
   VMK_ASSERT(pkt);

   status = map_skb_to_pkt(skb);
   if (likely(status == NET_RX_SUCCESS)) {
      vmk_PktQueueForRxProcess(pkt, dev->uplinkDev);
   }

   return status;
}
EXPORT_SYMBOL(netif_rx);

/**
 * netif_receive_skb - process receive buffer from network
 * @skb: buffer to process
 *
 * netif_receive_skb() is the main receive data processing function.
 * It always succeeds. The buffer may be dropped during processing
 * for congestion control or by the protocol layers.
 *
 * ESX Deviation Notes:
 * This function may only be called from the napi poll callback routine.
 *
 *  RETURN VALUE:
 *  NET_RX_SUCCESS (no congestion)
 *  NET_RX_DROP	(packet was dropped)
 */
/* _VMKLNX_CODECHECK_: netif_receive_skb */
int
netif_receive_skb(struct sk_buff *skb)
{
   struct net_device *dev = skb->dev;
   vmk_NetPoll pollPriv;
   vmk_Worldlet wdt;
   struct napi_struct *napi = NULL;
   vmk_PktHandle *pkt;
   int status;

   VMK_ASSERT(dev);

   /*
    * When the system is not in the panic/debug status, put the arrived packets into
    * skb->napi->rxPktList.
    */
   if (skb->napi == NULL) {
      if (unlikely(vmk_WorldletGetCurrent(&wdt, (void **)&pollPriv) != VMK_OK)) {
         VMK_ASSERT(VMK_FALSE);
         dev_kfree_skb_any(skb);
         dev->linnet_rx_dropped++;
         status = NET_RX_DROP;
         goto done;
      } else {
         /*
          * When the system is in the panic/debug status, the current worldlet is the
          * debug worldlet rather than the napi_poll worldlet. In this case, put the
          * arrived packets into debugPktList. This list will be processed by
          * FlushRxBuffers, because netdump/netdebug will bypass the vswitch to read
          * the packets.
         */
         if  (vmk_NetPollGetCurrent(&pollPriv) == VMK_OK) {
            napi = (struct napi_struct *)vmk_NetPollGetPrivate(pollPriv);
         }
         if (!napi) {
            pkt = skb->pkt;
            status = map_skb_to_pkt(skb);
            if (likely(status == NET_RX_SUCCESS)) {
               if (debugPktList == NULL) {
                  if (vmk_PktListAlloc(&debugPktList) != VMK_OK) {
                     dev_kfree_skb_any(skb);
                     dev->linnet_rx_dropped++;
                     status = NET_RX_DROP;
                     goto done;
                  }
                  vmk_PktListInit(debugPktList);
               }
               VMK_ASSERT(pkt);
               vmk_PktListAppendPkt(debugPktList, pkt);
            }
            goto done;
         } else {
            VMK_ASSERT(pollPriv != NULL);
            skb->napi = napi;
         }
      }
   }

   VMK_ASSERT(skb->napi != NULL);
   VMK_ASSERT(skb->napi->dev == skb->dev);

   pkt = skb->pkt;
   napi = skb->napi;

   status = map_skb_to_pkt(skb);
   if (likely(status == NET_RX_SUCCESS)) {
      VMK_ASSERT(napi);
      VMK_ASSERT(pkt);
      vmk_NetPollQueueRxPkt(napi->net_poll, pkt);
   }

 done:
   return status;
}
EXPORT_SYMBOL(netif_receive_skb);


/*
 *----------------------------------------------------------------------------
 *
 *  napi_poll --
 *
 *    Callback registered with the net poll handler.
 *    This handler is responsible of polling the different napi context.
 *
 *  Results:
 *    VMK_TRUE if we need to keep polling and VMK_FALSE otherwise.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */

static vmk_Bool
napi_poll(void *ptr)
{
   VMK_ReturnStatus status = VMK_OK;
   struct napi_struct *napi = (struct napi_struct *)ptr;

   /*
    * napi_schedule_prep()/napi_schedule() depend on accurately seeing whether
    * or not the worldlet is running and assume that the check for polling
    * executes only after the worldlet has been dispatched.  If the CPU
    * aggressively prefetches the test_bit() load here so that it occurs
    * prior to the worldlet being dispatched then __napi_schedule() could
    * avoid kicking the worldlet (seeing that it had not yet run), but at
    * the same time the aggressive prefetch would result in us seeing a
    * clear napi->state and returning VMK_WDT_SUSPEND from here.
    * Consequently an smp_mb() is required here; we need to ensure that none of
    * our loads here occur prior to any stores that may have occurred by the
    * caller of this function.
    */
   smp_mb();

   if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
      VMKAPI_MODULE_CALL(napi->dev->module_id, status, napi->poll, napi,
                         napi->weight);
      if (vmklnxLROEnabled && !(napi->dev->features & NETIF_F_SW_LRO)) {
         /* Flush all the lro sessions as we are done polling the napi context */
         lro_flush_all(&napi->lro_mgr);
      }
   }

   if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
      return VMK_TRUE;
   } else {
      return VMK_FALSE;
   }
}


/*
 *----------------------------------------------------------------------------
 *
 *  netdev_poll --
 *
 *    Callback registered for the devices that are unable to create their own
 *    poll. This handler is responsible of polling the different napi context.
 *
 *  Results:
 *    VMK_TRUE if we need to keep polling and VMK_FALSE otherwise.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */

static vmk_Bool
netdev_poll(void *private)
{
   struct net_device *dev = private;
   vmk_Bool needWork;
   struct napi_struct *napi;
   VMK_ReturnStatus status = VMK_OK;

   needWork = VMK_FALSE;

   spin_lock(&dev->napi_lock);
   list_for_each_entry(napi, &dev->napi_list, dev_list) {
      if (napi->dev_poll &&
         (test_bit(NAPI_STATE_SCHED, &napi->state))) {
         needWork = VMK_TRUE;
         list_move_tail(&napi->dev_list, &dev->napi_list);
         break;
      }
   }
   spin_unlock(&dev->napi_lock);

   if (!needWork) {
      return VMK_FALSE;
   }

   VMKAPI_MODULE_CALL(napi->dev->module_id, status, napi->poll, napi,
                      napi->weight);
   if (vmklnxLROEnabled && !(napi->dev->features & NETIF_F_SW_LRO)) {
      /* Flush all the lro sessions as we are done polling the napi context */
      lro_flush_all(&napi->lro_mgr);
   }

   return VMK_TRUE;
}


/*
 *----------------------------------------------------------------------------
 *
 *  napi_poll_init --
 *
 *    Initialize a napi context . If the function is unable to create a unique
 *    net poll it will attach the napi context to the one provided by the
 *    device it belongs to.
 *
 *  Results:
 *    VMK_OK on success, VMK_NO_MEMORY if resources could not be allocated.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
napi_poll_init(struct napi_struct *napi)
{
   VMK_ReturnStatus ret;
   vmk_ServiceAcctID serviceID;
   vmk_NetPollProperties pollInit;

   spin_lock(&napi->dev->napi_lock);
   napi->napi_id = get_LinNetDev(napi->dev)->napiNextId++;
   spin_unlock(&napi->dev->napi_lock);

   ret = vmk_ServiceGetID("netdev", &serviceID);
   VMK_ASSERT(ret == VMK_OK);

   napi->napi_wdt_priv.dev = napi->dev;
   napi->napi_wdt_priv.napi = napi;
   napi->dev_poll = VMK_FALSE;
   napi->vector = 0;

   pollInit.poll = napi_poll;
   pollInit.priv = napi;

   if (napi->dev->features & NETIF_F_CNA) {
      pollInit.deliveryCallback = LinuxCNA_Poll;
      pollInit.features = VMK_NETPOLL_CUSTOM_DELIVERY_CALLBACK;
   } else {
      pollInit.deliveryCallback = NULL;
      pollInit.features = VMK_NETPOLL_NONE;
   }
   ret = vmk_NetPollInit(&pollInit, serviceID, (vmk_NetPoll *)&napi->net_poll) ;
   if (ret != VMK_OK) {
      VMKLNX_WARN("Unable to create net poll for %s, using backup",
                  napi->dev->name);
      if (napi->dev->reg_state == NETREG_REGISTERED) {
         napi->net_poll = napi->dev->net_poll;
         napi->net_poll_type = NETPOLL_BACKUP;

         /*
          * Use device global net poll for polling this napi_struct,
          * if net poll creation fails
          */
         napi->dev_poll = VMK_TRUE;
      } else {
         napi->dev->reg_state = NETREG_EARLY_NAPI_ADD_FAILED;
         return VMK_FAILURE;
      }
   } else {
      napi->net_poll_type = NETPOLL_DEFAULT;
   }

   if (napi->dev->uplinkDev) {
      vmk_Name pollName;
      (void) vmk_NameFormat(&pollName, "-%d", napi->napi_id);
      vmk_NetPollRegisterUplink(napi->net_poll, napi->dev->uplinkDev, pollName, VMK_TRUE);
   }

   spin_lock(&napi->dev->napi_lock);
   list_add(&napi->dev_list, &napi->dev->napi_list);
   spin_unlock(&napi->dev->napi_lock);

   /*
    * Keep track of which poll is (most probably) driving the
    * default queue. For netqueue capable nics, we call
    * VMKNETDDI_QUEUEOPS_OP_GET_DEFAULT_QUEUE to figure out the
    * default poll. For non-netqueue nics, the first suceessful
    * netif_napi_add wins.
    */
   if (!napi->dev->default_net_poll && napi->net_poll) {
      napi->dev->default_net_poll = napi->net_poll;
   }

   return VMK_OK;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netdev_poll_init --
 *
 *    Initialize a device's backup net poll for the napi context not able to
 *    create their own.
 *
 *  Results:
 *    VMK_OK if everything is ok, VMK_* otherwise.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netdev_poll_init(struct net_device *dev)
{
   VMK_ReturnStatus ret;
   vmk_ServiceAcctID serviceID;
   vmk_NetPollProperties pollInit;

   VMK_ASSERT(dev);

   ret = vmk_ServiceGetID("netdev", &serviceID);
   VMK_ASSERT(ret == VMK_OK);

   dev->napi_wdt_priv.dev = dev;
   dev->napi_wdt_priv.napi = NULL;

   pollInit.poll = netdev_poll;
   pollInit.priv = dev;

   if (dev->features & NETIF_F_CNA) {
      pollInit.deliveryCallback = LinuxCNADev_Poll;
      pollInit.features = VMK_NETPOLL_CUSTOM_DELIVERY_CALLBACK;
   } else {
      pollInit.deliveryCallback = NULL;
      pollInit.features = VMK_NETPOLL_NONE;
   }

   ret = vmk_NetPollInit(&pollInit, serviceID, (vmk_NetPoll *)&dev->net_poll) ;
   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 *  napi_poll_cleanup --
 *
 *    Cleanup a napi structure.
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
napi_poll_cleanup(struct napi_struct *napi)
{
   VMK_ASSERT(napi);

   if (napi->net_poll == napi->dev->default_net_poll) {
      napi->dev->default_net_poll = NULL;
   }

   if (likely(!napi->dev_poll)) {
      if (napi->vector) {
         vmk_NetPollVectorUnSet(napi->net_poll);
         napi->vector = 0;
      }

      if (napi->net_poll) {
         vmk_NetPollCleanup(napi->net_poll);
         napi->net_poll = NULL;
      }
   }
   list_del_init(&napi->dev_list);
}

/*
 *----------------------------------------------------------------------------
 *
 *  netdev_poll_cleanup --
 *
 *    Cleanup all napi structures associated with the device.
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
netdev_poll_cleanup(struct net_device *dev)
{
   VMK_ASSERT(dev);
   struct list_head *ele, *next;
   struct napi_struct *napi;

   /*
    * Cleanup all napi structs
    */
   list_for_each_safe(ele, next, &dev->napi_list) {
      napi = list_entry(ele, struct napi_struct, dev_list);
      napi_poll_cleanup(napi);
   }

   if (dev->net_poll) {
      vmk_NetPollCleanup(dev->net_poll);
      dev->net_poll = NULL;
   }
}

/**
 * __napi_schedule - schedule for receive
 * @napi: entry to schedule
 *
 * The entry's receive function will be scheduled to run
 *
 * RETURN VALUE:
 * None
 */
/* _VMKLNX_CODECHECK_: __napi_schedule */
void
__napi_schedule(struct napi_struct *napi)
{
   vmk_uint32 myVector = 0;
   vmk_Bool inIntr = vmk_ContextIsInterruptHandler(&myVector);
   VMK_ASSERT(napi);

   if (unlikely(napi->vector != myVector)) {
      if (likely(inIntr)) {
         vmk_NetPollVectorSet(napi->net_poll, myVector);
         napi->vector = myVector;
      }
   }

   vmk_NetPollActivate(napi->net_poll);

}
EXPORT_SYMBOL(__napi_schedule);

/**
 *      napi_disable_timeout - prevent NAPI from scheduling
 *      @napi: napi context
 *
 * Stop NAPI from being scheduled on this context.
 * Waits till any outstanding processing completes
 *
 * RETURN VALUE:
 * None
 */
static vmk_Bool
napi_disable_timeout(struct napi_struct *napi, int timeout)
{
   VMK_ReturnStatus status;
   vmk_NetPollState state;
   vmk_Bool doTimeout=(timeout == -1)?VMK_FALSE:VMK_TRUE;
   vmk_Bool timedOut=VMK_TRUE;

   VMK_ASSERT(napi);

   while (timeout) {
      set_bit(NAPI_STATE_DISABLE, &napi->state);
      status = vmk_NetPollCheckState(napi->net_poll, &state);
      VMK_ASSERT(status == VMK_OK);
      /* If the poll isn't running/set to run, then we see if we can
       * disable it from running in the future by blocking off
       * NAPI_STATE_SCHED.
       */
      if (state == VMK_NETPOLL_DISABLED  &&
          !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) {
         vmk_NetPollCheckState(napi->net_poll, &state);
         VMK_ASSERT(state == VMK_NETPOLL_DISABLED);
         timedOut=VMK_FALSE;
         break;
      }
      /**
       * Give the flush a chance to run.
       */
      schedule_timeout_interruptible(1);
      if (doTimeout) {
         timeout--;
      }
   }
   if (!timedOut) {
      set_bit(NAPI_STATE_UNUSED, &napi->state);
   }

   if (napi->vector) {
      vmk_NetPollVectorUnSet(napi->net_poll);
      napi->vector = 0;
   }

   clear_bit (NAPI_STATE_DISABLE, &napi->state);
   return timedOut;
}

/**
 *      napi_disable - prevent NAPI from scheduling
 *      @napi: napi context
 *
 * Stop NAPI from being scheduled on this context.
 * Waits till any outstanding processing completes.
 *
 * RETURN VALUE:
 * None
 */
/* _VMKLNX_CODECHECK_: napi_disable */
void
napi_disable(struct napi_struct *napi)
{
   napi_disable_timeout (napi, -1);
}
EXPORT_SYMBOL(napi_disable);

/**
 * netif_napi_add - initialize a napi context
 * @dev:  network device
 * @napi: napi context
 * @poll: polling function
 * @weight: default weight
 *
 * netif_napi_add() must be used to initialize a napi context prior to calling
 * *any* of the other napi related functions.
 *
 * RETURN VALUE:
 * None
 */
/* _VMKLNX_CODECHECK_: netif_napi_add */
void
netif_napi_add(struct net_device *dev,
               struct napi_struct *napi,
               int (*poll)(struct napi_struct *, int),
               int weight)
{
        struct net_lro_mgr *lro_mgr;

        napi->poll = poll;
        napi->weight = weight;
        napi->dev = dev;

        lro_mgr = &napi->lro_mgr;
        lro_mgr->dev = dev;
        lro_mgr->features = LRO_F_NAPI;
        lro_mgr->ip_summed = CHECKSUM_UNNECESSARY;
        lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY;
        lro_mgr->max_desc = LRO_DEFAULT_MAX_DESC;
        lro_mgr->lro_arr = napi->lro_desc;
        lro_mgr->get_skb_header = vmklnx_net_lro_get_skb_header;
        lro_mgr->get_frag_header = NULL;
        lro_mgr->max_aggr = vmklnxLROMaxAggr;
        lro_mgr->frag_align_pad = 0;

        napi_poll_init(napi);

        set_bit(NAPI_STATE_SCHED, &napi->state);
        set_bit(NAPI_STATE_UNUSED, &napi->state);
}
EXPORT_SYMBOL(netif_napi_add);

/**
 *  netif_napi_del - remove a napi context
 *  @napi: napi context
 *
 *  netif_napi_del() removes a napi context from the network device napi list
 *
 * RETURN VALUE:
 * None
 */
/* _VMKLNX_CODECHECK_: netif_napi_del */
void
netif_napi_del(struct napi_struct *napi)
{
   napi_poll_cleanup(napi);
}
EXPORT_SYMBOL(netif_napi_del);

/**
 *      napi_enable - enable NAPI scheduling
 *      @n: napi context
 *
 * Resume NAPI from being scheduled on this context.
 * Must be paired with napi_disable.
 *
 * RETURN VALUE:
 * None
 */
/* _VMKLNX_CODECHECK_: napi_enable */
void
napi_enable(struct napi_struct *napi)
{
   struct net_lro_mgr *lro_mgr;
   int idx;

   BUG_ON(!test_bit(NAPI_STATE_SCHED, &napi->state));

   lro_mgr = &napi->lro_mgr;
   for (idx = 0; idx < lro_mgr->max_desc; idx++) {
      memset(&napi->lro_desc[idx], 0, sizeof(struct net_lro_desc));
   }

   smp_mb__before_clear_bit();
   clear_bit(NAPI_STATE_SCHED, &napi->state);
   clear_bit(NAPI_STATE_UNUSED, &napi->state);
}
EXPORT_SYMBOL(napi_enable);


/*
 * Section: Skb helpers
 */

/*
 *----------------------------------------------------------------------------
 *
 *  skb_append_frags_to_pkt --
 *
 *    Append skb frags to the packet handle associated to it.
 *
 *  Results:
 *    VMK_OK on success; VMK_* otherwise.
 *
 *  Side effects:
 *    Drops packet on the floor if unsuccessful.
 *
 *----------------------------------------------------------------------------
 */
static inline VMK_ReturnStatus
skb_append_frags_to_pkt(struct sk_buff *skb)
{
   VMK_ReturnStatus status = VMK_OK;
   int i;

   for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
      skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

      status = vmk_PktAppendFrag(skb->pkt,
                                 page_to_phys(frag->page) + frag->page_offset,
                                 frag->size);
      if (unlikely(status != VMK_OK)) {
         return status;
      }

      /*
       * The frags should not be coalesced with the first sg entry (flat buffer).
       * If this happens let's just drop the packet instead of leaking.
       */
      if (unlikely(vmk_PktFragsNb(skb->pkt) <= 1)) {
         VMK_ASSERT(VMK_FALSE);
         return VMK_FAILURE;
      }
   }

   /*
    * Let vmkernel know it needs to release those frags explicitly.
    */
   vmk_PktSetPageFrags(skb->pkt);

   return status;
}

/*
 *----------------------------------------------------------------------------
 *
 *  skb_append_fraglist_to_pkt --
 *
 *    Append skb frag list to the packet handle associated to it.
 *
 *  Results:
 *    VMK_OK on success; VMK_* otherwise.
 *
 *  Side effects:
 *    Drops packet on the floor if unsuccessful.
 *
 *----------------------------------------------------------------------------
 */
static inline VMK_ReturnStatus
skb_append_fraglist_to_pkt(struct sk_buff *skb)
{
   VMK_ReturnStatus status = VMK_OK;
   struct sk_buff *frag_skb = skb_shinfo(skb)->frag_list;

   while (frag_skb) {
      /*
       * LRO might have pulled the all flat buffer if header split mode
       * is activated.
       */
      if (skb_headlen(frag_skb)) {
         status = vmk_PktAppend(skb->pkt, frag_skb->pkt,
                                skb_headroom(frag_skb), skb_headlen(frag_skb));
         if (unlikely(status != VMK_OK)) {
            return status;
         }
      }

      if (skb_shinfo(frag_skb)->nr_frags) {
         int i;

         for (i = 0; i < skb_shinfo(frag_skb)->nr_frags; i++) {
            skb_frag_t *frag = &skb_shinfo(frag_skb)->frags[i];

            status = vmk_PktAppendFrag(skb->pkt,
                                       page_to_phys(frag->page) + frag->page_offset,
                                       frag->size);
            if (unlikely(status != VMK_OK)) {
               return status;
            }
         }
      }

      frag_skb = frag_skb->next;
   }

   return status;
}

/*
 *----------------------------------------------------------------------------
 *
 *  skb_gen_pkt_frags --
 *
 *    Append the skb frags and frag list to the packet handle associated to it.
 *
 *  Results:
 *    VMK_OK on success; VMK_* otherwise.
 *
 *  Side effects:
 *    Drops packet on the floor if unsuccessful.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
skb_gen_pkt_frags(struct sk_buff *skb)
{
   VMK_ReturnStatus status;

   status = vmk_PktAdjust(skb->pkt, skb_headroom(skb), skb_headlen(skb));
   VMK_ASSERT(status == VMK_OK);

   if (skb_shinfo(skb)->nr_frags) {
      status = skb_append_frags_to_pkt(skb);
      if (unlikely(status != VMK_OK)) {
         return status;
      }
   }

   /*
    * Since we removed packet completion in vmklinux, we
    * cannot support skb chaining anymore.
    */
   if (skb_shinfo(skb)->frag_list) {
      VMK_ASSERT(VMK_FALSE);
      return VMK_NOT_SUPPORTED;
   }

   return status;
}


/*
 *----------------------------------------------------------------------------
 *
 *  do_init_skb_bits --
 *
 *    Initialize a socket buffer.
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static inline void
do_init_skb_bits(struct sk_buff *skb, kmem_cache_t *cache)
{
   skb->qid = VMKNETDDI_QUEUEOPS_INVALID_QUEUEID;
   skb->next = NULL;
   skb->prev = NULL;
   skb->head = NULL;
   skb->data = NULL;
   skb->tail = NULL;
   skb->end = NULL;
   skb->dev = NULL;
   skb->pkt = NULL;
   atomic_set(&skb->users, 1);
   skb->cache = cache;
   skb->mhead = 0;
   skb->len = 0;
   skb->data_len = 0;
   skb->ip_summed = CHECKSUM_NONE;
   skb->csum = 0;
   skb->priority = 0;
   skb->protocol = 0;
   skb->truesize = 0;
   skb->mac.raw = NULL;
   skb->nh.raw = NULL;
   skb->h.raw = NULL;
   skb->napi = NULL;
   skb->lro_ready = 0;

   /* VLAN_RX_SKB_CB shares the same space so this is sufficient */
   VLAN_TX_SKB_CB(skb)->magic = 0;
   VLAN_TX_SKB_CB(skb)->vlan_tag = 0;

   atomic_set(&(skb_shinfo(skb)->dataref), 1);
   atomic_set(&(skb_shinfo(skb)->fragsref), 1);
   skb_shinfo(skb)->nr_frags = 0;
   skb_shinfo(skb)->frag_list = NULL;
   skb_shinfo(skb)->gso_size = 0;
   skb_shinfo(skb)->gso_segs = 0;
   skb_shinfo(skb)->gso_type = 0;
   skb_shinfo(skb)->ip6_frag_id = 0;

   get_LinSkb(skb)->flags = LIN_SKB_FLAGS_FRAGSOWNER_VMKLNX;
}


/*
 *----------------------------------------------------------------------------
 *
 *  do_bind_skb_to_pkt --
 *
 *    Bind a socket buffer to a packet handle.
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static inline void
do_bind_skb_to_pkt(struct sk_buff *skb, vmk_PktHandle *pkt, unsigned int size)
{
   skb->pkt = pkt;
   skb->head = (void *) vmk_PktFrameMappedPointerGet(pkt);
   skb->end = skb->head + size;
   skb->data = skb->head;
   skb->tail = skb->head;

#ifdef VMX86_DEBUG
   VMK_ASSERT(vmk_PktFrameMappedLenGet(pkt) >= size);

   /*
    * linux guarantees physical contiguity of the pages backing
    * skb's returned by this routine, so the drivers assume that.
    * we guarantee this by backing the buffers returned from
    * vmk_PktAlloc with a large-page, low-memory heap which is
    * guaranteed to be physically contiguous, so we just double
    * check it here.
    */
   {
      vmk_Bool isFlat;

      isFlat = vmk_PktIsFlatBuffer(pkt);
      VMK_ASSERT(isFlat);
   }
#endif // VMX86_DEBUG
}

/*
 *----------------------------------------------------------------------------
 *
 *  do_alloc_skb --
 *
 *    Allocate a socket buffer.
 *
 *  Results:
 *    A pointer to the allocated socket buffer.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static struct sk_buff *
do_alloc_skb(kmem_cache_t *cache, gfp_t flags)
{
   struct LinSkb *linSkb;

   VMK_ASSERT(cache != NULL);

   if (!cache) {
      VMKLNX_WARN("No skb cache provided.");
      return NULL;
   }

   linSkb = vmklnx_kmem_cache_alloc(cache, flags);
   if (unlikely(linSkb == NULL)) {
      return NULL;
   }

   do_init_skb_bits(&linSkb->skb, cache);
   return &linSkb->skb;
}

/*
 *----------------------------------------------------------------------------
 *
 *  vmklnx_net_alloc_skb --
 *
 *    Allocate a socket buffer for a specified size and bind it to a packet handle.
 *
 *  Results:
 *    A pointer the allocated socket buffer.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
struct sk_buff *
vmklnx_net_alloc_skb(struct kmem_cache_s *cache, unsigned int size, struct net_device *dev, gfp_t flags)
{
   vmk_PktHandle *pkt;
   struct sk_buff *skb;

   skb = do_alloc_skb(cache, flags);

   if (unlikely(skb == NULL)) {
      goto done;
   }

   if (dev && dev->uplinkDev) {
      /*
       * Do a packet allocation aimed at the specified device.
       * The packet will be allocated in memory that will be
       * easy to DMA map to.
       */
      vmk_PktAllocForUplink(size, dev->uplinkDev, &pkt);
   } else {
      /* Do a simple packet allocation. */
      vmk_PktAlloc(size, &pkt);
   }

   if (unlikely(pkt == NULL)) {
      do_free_skb(skb);
      skb = NULL;
      goto done;
   }

   do_bind_skb_to_pkt(skb, pkt, size);

 done:
   return skb;
}
EXPORT_SYMBOL(vmklnx_net_alloc_skb);

/*
 *-----------------------------------------------------------------------------
 *
 * vmklnx_set_skb_frags_owner_vmkernel --
 *
 *      Toggle skb frag ownership for the given skb to VMkernel.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Sets the skb frag ownership to VMkernel for the given skb.
 *
 *-----------------------------------------------------------------------------
 */

inline void
vmklnx_set_skb_frags_owner_vmkernel(struct sk_buff *skb)
{
   get_LinSkb(skb)->flags &= ~LIN_SKB_FLAGS_FRAGSOWNER_VMKLNX;
   get_LinSkb(skb)->flags |= LIN_SKB_FLAGS_FRAGSOWNER_VMKERNEL;
   return;
}
EXPORT_SYMBOL(vmklnx_set_skb_frags_owner_vmkernel);

/*
 *-----------------------------------------------------------------------------
 *
 * vmklnx_is_skb_frags_owner --
 *
 *      Indicate if the skb frags belongs to vmklinux.
 *
 *      We do not always want to call put_page() on skb frags. For
 *      instance, in the TX path the frags belong to the guest
 *      OS. However, in the RX path with packet split and others we
 *      need to call put_page() since the frags belong to vmklinux.
 *
 * Results:
 *      1 if the frags belong to vmklinux, 0 otherwise.
 *
 * Side effects:
 *      None.
 *
 *-----------------------------------------------------------------------------
 */

int
vmklnx_is_skb_frags_owner(struct sk_buff *skb)
{
   VMK_ASSERT(skb_shinfo(skb)->nr_frags);

   return (get_LinSkb(skb)->flags & LIN_SKB_FLAGS_FRAGSOWNER_VMKLNX);
}
EXPORT_SYMBOL(vmklnx_is_skb_frags_owner);

/*
 *----------------------------------------------------------------------------
 *
 *  skb_release_data --
 *
 *    Release data associated to a skb.
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
void
skb_release_data(struct sk_buff *skb)
{
   VMK_ASSERT((atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) == 1);

   if (atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
      if (unlikely(skb->mhead)) {
         skb->mhead = 0;
         vmklnx_kfree(vmklnxLowHeap, skb->head);
      }

      if (likely(atomic_dec_and_test(&(skb_shinfo(skb)->fragsref)))) {
         if (skb->pkt) {
            if ((in_irq() || irqs_disabled()) && !vmklnx_is_panic()) {
               vmk_PktReleaseIRQ(skb->pkt);
            } else {
               vmk_NetPoll pollPriv;
               struct napi_struct *napi;

               /*
                * Try to queue packets in NAPI's compPktList in order to
                * release them in batch, but first thoroughly check if we
                * got called from a napi context (PR #396873).
                */
               if (vmk_NetPollGetCurrent(&pollPriv) == VMK_OK &&
                   (napi = (struct napi_struct *) vmk_NetPollGetPrivate(pollPriv)) != NULL &&
                   napi->net_poll_type == NETPOLL_DEFAULT) {
                  vmk_NetPollQueueCompPkt(pollPriv, skb->pkt);
               } else {
                  vmk_PktRelease(skb->pkt);
               }
            }
         }

         if (skb_shinfo(skb)->nr_frags && vmklnx_is_skb_frags_owner(skb)) {
            int i;

            for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
               put_page(skb_shinfo(skb)->frags[i].page);
            }
            skb_shinfo(skb)->nr_frags = 0;
         }

         if (skb_shinfo(skb)->frag_list) {
            struct sk_buff *frag_skb = skb_shinfo(skb)->frag_list;
            struct sk_buff *next_skb;

            while (frag_skb) {
               next_skb = frag_skb->next;
               kfree_skb(frag_skb);
               frag_skb = next_skb;
            }
            skb_shinfo(skb)->frag_list = NULL;
         }
      }
   }
}

/*
 *----------------------------------------------------------------------------
 *
 *  do_free_skb --
 *
 *    Release socket buffer.
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
do_free_skb(struct sk_buff *skb)
{
   vmklnx_kmem_cache_free(skb->cache, get_LinSkb(skb));
}

/**
 *  __kfree_skb - private function
 *  @skb: buffer
 *
 *  Free an sk_buff. Release anything attached to the buffer.
 *  Clean the state. This is an internal helper function. Users should
 *  always call kfree_skb
 *
 * RETURN VALUE:
 * None
 */
/* _VMKLNX_CODECHECK_: __kfree_skb */
void
__kfree_skb(struct sk_buff *skb)
{
   if (unlikely(!atomic_dec_and_test(&skb->users))) {
      return;
   }

   skb_release_data(skb);
   do_free_skb(skb);
}
EXPORT_SYMBOL(__kfree_skb);

/*
 *----------------------------------------------------------------------------
 *
 *  skb_debug_info --
 *      Debug function to print contents of a socket buffer.
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *----------------------------------------------------------------------------
 */
void
skb_debug_info(struct sk_buff *skb)
{
   int f;
   skb_frag_t *frag;

   printk(KERN_ERR "skb\n"
          "   head     <%p>\n"
          "   mhead    <%u>\n"
          "   data     <%p>\n"
          "   tail     <%p>\n"
          "   end      <%p>\n"
          "   data_len <%u>\n"
          "   nr_frags <%u>\n"
          "   dataref  <%u>\n"
          "   gso_size <%u>\n",
          skb->head, skb->mhead,
          skb->data, skb->tail, skb->end,
          skb->data_len,
          skb_shinfo(skb)->nr_frags,
          atomic_read(&(skb_shinfo(skb)->dataref)),
          skb_shinfo(skb)->gso_size);

   for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
      frag = &skb_shinfo(skb)->frags[f];
      printk(KERN_ERR "skb frag %d\n"
             "   page         <0x%llx>\n"
             "   page_offset  <%u>\n"
             "   size         <%u>\n",
             f, page_to_phys(frag->page),
             frag->page_offset, frag->size);
   }
}


/*
 * Section: Transmit path
 */

/*
 *----------------------------------------------------------------------------
 *
 *  ipv6_set_hraw --
 *
 *    Parse an IPv6 skb to find the appropriate value for initializing
 *    skb->h.raw.  If skb->h.raw is initialized, also sets *protocol to
 *    the last nexthdr found.
 *
 *   Results:
 *    None
 *
 *   Side effects:
 *    None
 *
 *----------------------------------------------------------------------------
 */
static void
ipv6_set_hraw(struct sk_buff *skb, vmk_uint8 *protocol)
{
   vmk_uint8 nextHdr = skb->nh.ipv6h->nexthdr;
   vmk_uint8 *nextHdrPtr = (vmk_uint8 *) (skb->nh.ipv6h + 1);

   if (nextHdrPtr > skb->end) {
      // this happens if the source doesn't take care to map entire header
      return;

   }
   // take care of most common situation:
   if ((nextHdr == IPPROTO_TCP)
       || (nextHdr == IPPROTO_UDP)
       || (nextHdr == IPPROTO_ICMPV6)) {
      skb->h.raw = nextHdrPtr;
      (*protocol) = nextHdr;
      return;
   }

   /*
    * This will be the value if "end" not found within
    *   linear region.
    */
   VMK_ASSERT(skb->h.raw == NULL);
   do {
      switch (nextHdr) {
      case IPPROTO_ROUTING:
      case IPPROTO_HOPOPTS:
      case IPPROTO_DSTOPTS:
         // continue searching
         nextHdr = *nextHdrPtr;
         nextHdrPtr += nextHdrPtr[1] * 8 + 8;
         break;

      case IPPROTO_AH:
         // continue searching
         nextHdr = *nextHdrPtr;
         nextHdrPtr += nextHdrPtr[1] * 4 + 8;
         break;

      /*
       * We do NOT handle the IPPROTO_FRAGMENT case here. Thus,
       * if any packet has a IPv6 fragment header, this function
       * will return protocol == IPPROTO_FRAGMENT and *not*
       * find the L4 protocol. As the returned protocol is only
       * used for TSO and CSUM cases, and a fragment header is
       * not allowed in either case, this behavior is desirable,
       * as it allows handling this case in the caller.
       */

      default:
         // not recursing
         skb->h.raw = nextHdrPtr;
         (*protocol) = nextHdr;
         return;
         break;
      }
   } while (nextHdrPtr < skb->end);
}

/*
 *----------------------------------------------------------------------------
 *
 *  map_pkt_to_skb --
 *
 *    Converts PktHandle to sk_buff before handing packet to linux driver.
 *
 *   Results:
 *    Returns VMK_ReturnStatus
 *
 *   Side effects:
 *    This is ugly. Too much memory writes / packet. We should look at
 *    optimizing this. Maybe an skb cache or something, instead of
 *    having to touch 20+ variables for each packet.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
map_pkt_to_skb(struct net_device *dev,
               struct netdev_queue *queue,
               vmk_PktHandle *pkt,
               struct sk_buff **pskb)
{
   VMK_ReturnStatus status;
   struct sk_buff *skb;
   int i;
   vmk_uint16 sgInspected;
   unsigned int headLen, bytesLeft;
   vmk_uint32 frameLen;
   VMK_ReturnStatus ret = VMK_OK;
   vmk_PktFrag frag;
   vmk_Bool must_vlantag, must_tso, must_csum, pkt_ipv4;
   vmk_uint8 protocol, ipVersion;
   vmk_uint32 ehLen;
   vmk_uint32 ipHdrLength;

   skb = do_alloc_skb(dev->skb_pool, GFP_ATOMIC);

   if (unlikely(skb == NULL)) {
      ret = VMK_NO_MEMORY;
      goto done;
   }

   skb->pkt = pkt;
   skb->queue_mapping = queue - dev->_tx;

   VMK_ASSERT(dev);
   VMK_ASSERT(pkt);
#ifdef VMX86_DEBUG
 {
    vmk_Bool consistent;

    consistent = vmk_PktCheckInternalConsistency(pkt);
    VMK_ASSERT(consistent);
 }
#endif

   VMK_ASSERT(vmk_PktFrameMappedLenGet(pkt) > 0);

   skb->head = (void *) vmk_PktFrameMappedPointerGet(pkt);

   frameLen = vmk_PktFrameLenGet(pkt);

   skb->len = frameLen;
   skb->dev = dev;
   skb->data = skb->head;

   headLen = min(vmk_PktFrameMappedLenGet(pkt), frameLen);
   skb->end = skb->tail = skb->head + headLen;
   skb->mac.raw = skb->data;

   must_csum = vmk_PktIsMustCsum(pkt);


   status = vmk_PktInetFrameLayoutGetComponents(pkt,
                                                &ehLen,
                                                &ipHdrLength,
                                                &ipVersion,
                                                &protocol);
   if (status == VMK_OK) {
      /*
       * Pkt has inet layout attributes associated, populate
       * the skb details from the layout components.
       */
      skb->nh.raw = skb->mac.raw + ehLen;
      skb->h.raw = skb->nh.raw + ipHdrLength;

      if (ipVersion == 4) {
         const int eth_p_ip_nbo = htons(ETH_P_IP);
         pkt_ipv4 = VMK_TRUE;
         skb->protocol = eth_p_ip_nbo;
      } else {
         pkt_ipv4 = VMK_FALSE;
         if (ipVersion == 6) {
            skb->protocol = ETH_P_IPV6_NBO;
         }
      }
      VMKLNX_DEBUG(3, "inet layout %d %d %d protocol %x ipVers %d proto %x "
                   "ipv4 %d csum %d tso %d",
                   ehLen,
                   ipHdrLength,
                   vmk_PktInetFrameLayoutGetL4HdrLength(pkt, VMK_FALSE),
                   skb->protocol,
                   protocol,
                   pkt_ipv4,
                   ipVersion,
                   must_csum,
                   vmk_PktIsLargeTcpPacket(pkt));
   } else {
      struct ethhdr *eh;

      eh = (struct ethhdr *) skb->head;
      ehLen = eth_header_len(eh);
      skb->nh.raw = skb->mac.raw + ehLen;
      skb->protocol = eth_header_frame_type(eh);
      sgInspected = 1;

      if (eth_header_is_ipv4(eh)) {
         if (headLen < ehLen + sizeof(*skb->nh.iph)) {
            ret = VMK_FAILURE;
            goto done;
         }
         skb->h.raw = skb->nh.raw + skb->nh.iph->ihl*4;
         pkt_ipv4 = VMK_TRUE;
         protocol = skb->nh.iph->protocol;
      } else {
         pkt_ipv4 = VMK_FALSE;
         protocol = 0xff;  // unused value.
         if (skb->protocol == ETH_P_IPV6_NBO) {
            ipv6_set_hraw(skb, &protocol);
            VMKLNX_DEBUG(3, "ipv6 %ld offset %ld %d",
                        skb->h.raw - (vmk_uint8 *)(skb->data),
                        skb->h.raw - (vmk_uint8 *)(skb->nh.ipv6h), protocol);
         }
      }
   }

   VMKLNX_DEBUG(10, "head: %u bytes at VA 0x%p", headLen, skb->head);

   /*
    * See if the packet requires VLAN tagging
    */
   must_vlantag = vmk_PktMustVlanTag(pkt);

   if (must_vlantag) {
      vmk_VlanID vlanID;
      vmk_VlanPriority priority;

      VMKLNX_DEBUG(2, "%s: tx vlan tag %u present with priority %u",
                   dev->name, vmk_PktVlanIDGet(pkt), vmk_PktPriorityGet(pkt));

      vlanID = vmk_PktVlanIDGet(pkt);
      priority = vmk_PktPriorityGet(pkt);

      vlan_put_tag(skb, vlanID | (priority << VLAN_1PTAG_SHIFT));
   }

   /*
    * See if the packet requires checksum offloading or TSO
    */

   must_tso = vmk_PktIsLargeTcpPacket(pkt);

   if (must_tso) {
      vmk_uint32 tsoMss = vmk_PktGetLargeTcpPacketMss(pkt);
      unsigned short inetHdrLen;

      /*
       * backends should check the tsoMss before setting MUST_TSO flag
       */
      VMK_ASSERT(tsoMss);

      if (!pkt_ipv4 &&
          (skb->protocol != ntohs(ETH_P_IPV6))) {
         static uint32_t throttle = 0;
         VMKLNX_THROTTLED_WARN(throttle,
                               "%s: non-ip packet with TSO (proto=0x%x)",
                               dev->name,
                               skb->protocol);
         ret = VMK_FAILURE;
         goto done;
      }

      if (!skb->h.raw || (protocol != IPPROTO_TCP)) {
         /*
          * This check will also trigger for IPv6 packets that
          * have a fragment header, as ipv6_set_hraw() sets protocol
          * to IPPROTO_FRAGMENT.
          */
         static uint32_t throttle = 0;
         VMKLNX_THROTTLED_WARN(throttle,
                               "%s: non-tcp packet with TSO (ip%s, proto=0x%x, hraw=%p)",
                               dev->name,
                               pkt_ipv4 ? "v4" : "v6",
                               protocol, skb->h.raw);
         ret = VMK_FAILURE;
         goto done;
      }

      /*
       * Perform some sanity checks on TSO frames, because buggy and/or
       * malicious guests might generate invalid packets which may wedge
       * the physical hardware if we let them through.
       */
      inetHdrLen = (skb->h.raw + tcp_hdrlen(skb)) - skb->nh.raw;

      // Reject if the frame doesn't require TSO in the first place
      if (unlikely(frameLen - ehLen - inetHdrLen <= tsoMss)) {
         static uint32_t throttle = 0;
         VMKLNX_THROTTLED_WARN(throttle,
                               "%s: runt TSO packet (tsoMss=%d, frameLen=%d)",
                               dev->name, tsoMss, frameLen);
         ret = VMK_FAILURE;
         goto done;
      }

      // Reject if segmented frame will exceed MTU
      if (unlikely(tsoMss + inetHdrLen > dev->mtu)) {
         static uint32_t logThrottleCounter = 0;
         VMKLNX_THROTTLED_WARN(logThrottleCounter,
                               "%s: oversized tsoMss: %d, mtu=%d",
                               dev->name, tsoMss, dev->mtu);
         ret = VMK_FAILURE;
         goto done;
      }

      skb_shinfo(skb)->gso_size = tsoMss;
      skb_shinfo(skb)->gso_segs = (skb->len + tsoMss - 1) / tsoMss;
      skb_shinfo(skb)->gso_type = pkt_ipv4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;

      /*
       * If congestion window has been reduced due to the
       *  previous TCP segment
       */
      if (unlikely(skb->h.th->cwr == 1)) {
         skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
      }
   } else {
      /*
       * We are dropping packets that are larger than the MTU of the NIC
       * since they could potentially wedge the NIC or PSOD in the driver.
       */
      if (unlikely(frameLen - ehLen > dev->mtu)) {
         static uint32_t linuxTxWarnCounter;
         VMKLNX_THROTTLED_WARN(linuxTxWarnCounter,
                               "%s: %d bytes packet couldn't be sent (mtu=%d)",
                               dev->name, frameLen, dev->mtu);
         ret = VMK_FAILURE;
         goto done;
      }
   }

   if (must_csum || must_tso) {

      switch (protocol) {

      case IPPROTO_TCP:
         skb->csum = 16;
         skb->ip_summed = CHECKSUM_HW;
         break;

      case IPPROTO_UDP:
         skb->csum = 6;
         skb->ip_summed = CHECKSUM_HW;
         break;

      /*
       * XXX add cases for other protos once we use NETIF_F_HW_CSUM
       * in some device.  I think the e1000 can do it, but the Intel
       * driver doesn't advertise so.
       */

      default:
         VMKLNX_DEBUG(0, "%s: guest driver requested xsum offload on "
                      "unsupported type %d", dev->name, protocol);
         ret = VMK_FAILURE;
         goto done;
      }

      VMK_ASSERT(skb->h.raw);
   } else {
      skb->ip_summed = CHECKSUM_NONE; // XXX: for now
   }

   bytesLeft = frameLen - headLen;
   for (i = sgInspected; bytesLeft > 0; i++) {
      skb_frag_t *skb_frag;

      if (unlikely(i - sgInspected >= MAX_SKB_FRAGS)) {
         static uint32_t fragsThrottleCounter = 0;
         VMKLNX_THROTTLED_INFO(fragsThrottleCounter,
		               "too many frags (> %u) bytesLeft %d",
                               MAX_SKB_FRAGS, bytesLeft);
#ifdef VMX86_DEBUG
	 VMK_ASSERT(VMK_FALSE);
#endif
    	 ret = VMK_FAILURE;
         goto done;
      }

      if (vmk_PktFragGet(pkt, &frag, i) != VMK_OK) {
    	 ret = VMK_FAILURE;
         goto done;
      }
      skb_frag = &skb_shinfo(skb)->frags[i - sgInspected];
      /* Going to use the frag->page to store page number and
         frag->page_offset for offset within that page */
      skb_frag->page = phys_to_page(frag.addr);
      skb_frag->page_offset = offset_in_page(frag.addr);
      skb_frag->size = min(frag.length, bytesLeft);
      VMKLNX_DEBUG(10, "frag: %u bytes at MA 0x%llx",
                   skb_frag->size, page_to_phys(skb_frag->page) + skb_frag->page_offset);
      skb->data_len += skb_frag->size;
      bytesLeft -= skb_frag->size;
      skb_shinfo(skb)->nr_frags++;

      vmk_MAAssertIOAbility(frag.addr, frag.length);
   }

   /*
    * Those frags are VMkernel's buffers.  Nothing special to do in the
    * Vmklinux layer for completion.
    */
   vmklnx_set_skb_frags_owner_vmkernel(skb);

   if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfCorruptTx)) {
      LinStress_CorruptSkbData(skb, 60, 0);
   }

 done:

   if ((ret != VMK_OK) && (skb != NULL)) {
      do_free_skb(skb);
      skb = NULL;
   }

   *pskb = skb;

   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 * netdev_pick_tx_queue --
 *
 *    Pick device tx subqueue for transmission. The upper layers must ensure
 *    that all packets in pktList are destined for the same queue.
 *
 * Results:
 *    pointer to netdev_queue
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static inline struct netdev_queue *
netdev_pick_tx_queue(struct net_device *dev, vmk_NetqueueQueueID vmkqid)
{
   int queue_idx = 0;
   vmknetddi_queueops_queueid_t qid = VMKNETDDI_QUEUEOPS_INVALID_QUEUEID;
   VMK_ReturnStatus status;

   if (!vmkqid) {
      goto out;
   }

   status = marshall_from_vmknetq_id(vmkqid, &qid);
   VMK_ASSERT(status == VMK_OK);
   if (status == VMK_OK) {
      queue_idx = VMKNETDDI_QUEUEOPS_QUEUEID_VAL(qid);
      if (unlikely(queue_idx >= dev->real_num_tx_queues ||
                   queue_idx >= dev->num_tx_queues)) {
         queue_idx = 0;
      }
   }

 out:
   VMK_ASSERT(queue_idx < dev->num_tx_queues);
   VMK_ASSERT(queue_idx >= 0);
   return &dev->_tx[queue_idx];
}

/*
 *----------------------------------------------------------------------------
 *
 * netdev_tx --
 *
 *    Transmit packets
 *
 * Results:
 *    VMK_ReturnStatus indicating the outcome.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netdev_tx(struct net_device *dev,
          vmk_PktList pktList,
          vmk_NetqueueQueueID vmkqid)
{
   VMK_ReturnStatus ret = VMK_OK;
   VMK_PKTLIST_STACK_DEF_INIT(freeList);
   vmk_uint32 pktsCount;
   vmk_PktHandle *pkt;
   struct sk_buff *skb;
   struct netdev_queue *queue;

   queue = netdev_pick_tx_queue(dev, vmkqid);
   VMK_ASSERT(queue);

   if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfFailTxAndStopQueue)) {
      netif_tx_stop_queue(queue);
   }

   if (unlikely(test_bit(__LINK_STATE_BLOCKED, &dev->state)) ||
       VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfFailHardTx)) {
      vmk_PktListAppend(freeList, pktList);
      goto out;
   }

   spin_lock(&queue->_xmit_lock);
   while (!vmk_PktListIsEmpty(pktList)) {
      int xmit_status = -1;
      VMK_ReturnStatus mapRet = VMK_OK;

      VMK_ASSERT(dev->flags & IFF_UP);

      /*
       * Queue state can change even before the device is opened!
       * Upper layers have no way of knowing about it until after
       * the device is opened. All we can do is check for a stopped
       * queue here and return the appropriate error.
       */
      if (unlikely(netif_tx_queue_stopped(queue))) {
         spin_unlock(&queue->_xmit_lock);
         ret = VMK_BUSY;
         goto out;
      }

      if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfFailHardTx)) {
         pkt = vmk_PktListPopFirstPkt(pktList);
         VMK_ASSERT(pkt);
         VMKLNX_DEBUG(1, "Failing Hard Transmit. pkt = %p, device = %s\n",
                      pkt, dev->name);
         vmk_PktListAppendPkt(freeList, pkt);
         continue;
      }

      pkt = vmk_PktListPopFirstPkt(pktList);
      VMK_ASSERT(pkt);

      mapRet = map_pkt_to_skb(dev, queue, pkt, &skb);
      if (unlikely(mapRet != VMK_OK)) {
#if defined(VMX86_LOG)
         static uint32_t logThrottleCounter = 0;
#endif
         VMKLNX_THROTTLED_DEBUG(logThrottleCounter, 0,
                                "%s: Unable to map packet to skb (%s). Dropping",
                                dev->name, vmk_StatusToString(mapRet));
         vmk_PktListAppendPkt(freeList, pkt);
         continue;
      }

      VMKAPI_MODULE_CALL(dev->module_id, xmit_status,
                         *dev->hard_start_xmit, skb, dev);

      if (unlikely(xmit_status != NETDEV_TX_OK)) {
         spin_unlock(&queue->_xmit_lock);
         VMKLNX_DEBUG(1, "hard_start_xmit failed (status %d; Q stopped %d. "
                      "Queuing packet. pkt=%p dev=%s\n",
                      xmit_status, netif_tx_queue_stopped(queue),
                      skb->pkt, dev->name);

         /* destroy skb and its resources besides the packet handle itself. */
         atomic_inc(&(skb_shinfo(skb)->fragsref));
         dev_kfree_skb_any(skb);

         /*
          * sticking pkt back this way may cause tx re-ordering,
          * but this should be very rare.
          */
         vmk_PktListAppendPkt(pktList, pkt);
         if (xmit_status == NETDEV_TX_BUSY) {
            ret = VMK_BUSY;
         } else {
            ret = VMK_FAILURE;
         }
         goto out;
      }

      dev->linnet_tx_packets++;
   }

   spin_unlock(&queue->_xmit_lock);

 out:
   /*
    * Free whatever could not be txed
    */
   pktsCount = vmk_PktListGetCount(freeList);
   if (unlikely(pktsCount)) {
      dev->linnet_tx_dropped += pktsCount;
      vmk_PktListReleaseAllPkts(freeList);
   }

   return ret;
}

/*
 * Section: Control operations and queue management
 */

void __netif_schedule(struct netdev_queue *queue)
{
   //XXX: does nothing. scheduling is done by the vmkernel now.
}
EXPORT_SYMBOL(__netif_schedule);

/*
 *----------------------------------------------------------------------------
 *
 * vmklnx_netif_start_tx_queue --
 *
 *    queue started
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
void
vmklnx_netif_start_tx_queue(struct netdev_queue *queue)
{
   struct net_device *dev = queue->dev;
   u16 qidx = queue - dev->_tx;
   VMK_ASSERT(qidx < dev->num_tx_queues);

   if (dev->uplinkDev) {
      struct tx_netqueue_info *txinfo = dev->tx_netqueue_info;
      VMK_ASSERT(txinfo);

      if (txinfo[qidx].valid) {
         VMK_ASSERT(txinfo[qidx].vmkqid != VMK_NETQUEUE_INVALID_QUEUEID);
         vmk_UplinkQueueStart(dev->uplinkDev, txinfo[qidx].vmkqid);
      }
   }
}
EXPORT_SYMBOL(vmklnx_netif_start_tx_queue);


/*
 *----------------------------------------------------------------------------
 *
 * vmklnx_netif_stop_tx_queue --
 *
 *    queue stopped
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
void
vmklnx_netif_stop_tx_queue(struct netdev_queue *queue)
{
   struct net_device *dev = queue->dev;
   u16 qidx = queue - dev->_tx;
   VMK_ASSERT(qidx < dev->num_tx_queues);

   if (dev->uplinkDev) {
      struct tx_netqueue_info *txinfo = dev->tx_netqueue_info;
      VMK_ASSERT(txinfo);

      if (txinfo[qidx].valid) {
         VMK_ASSERT(txinfo[qidx].vmkqid != VMK_NETQUEUE_INVALID_QUEUEID);
         vmk_UplinkQueueStop(dev->uplinkDev, txinfo[qidx].vmkqid);
      }
   }
}
EXPORT_SYMBOL(vmklnx_netif_stop_tx_queue);

/*
 *----------------------------------------------------------------------------
 *
 * vmklnx_netif_set_poll_cna --
 *
 *    Change net poll routine to do CNA processing.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
void
vmklnx_netif_set_poll_cna(struct napi_struct *napi)
{
   if (napi->net_poll) {
      vmk_NetPollProperties pollInit;
      if (napi->net_poll_type == NETPOLL_BACKUP) {
         pollInit.poll = netdev_poll;
         pollInit.priv = napi->dev;
         pollInit.deliveryCallback = LinuxCNADev_Poll;
      } else {
         pollInit.poll = napi_poll;
         pollInit.priv = napi;
         pollInit.deliveryCallback = LinuxCNA_Poll;
      }
      pollInit.features = VMK_NETPOLL_CUSTOM_DELIVERY_CALLBACK;
      vmk_NetPollChangeCallback(napi->net_poll, &pollInit);
   }
}
EXPORT_SYMBOL(vmklnx_netif_set_poll_cna);

/**
 * dev_close - shutdown an interface.
 * @dev: device to shutdown
 *
 * This function moves an active device into down state. The device's
 * private close function is invoked.
 *
 * ESX Deviation Notes:
 *  netdev notifier chain is not called.
 *
 * RETURN VALUE:
 *  0
 */
/* _VMKLNX_CODECHECK_: dev_close */
int
dev_close(struct net_device *dev)
{
   unsigned int i;

   ASSERT_RTNL();

#ifdef VMX86_DEBUG
   {
      VMK_ASSERT(test_bit(__LINK_STATE_START, &dev->state));
      VMK_ASSERT(dev->flags & IFF_UP);
   }
#endif

   for (i = 0; i < dev->num_tx_queues; i++) {
      struct netdev_queue *queue = &dev->_tx[i];
      spin_unlock_wait(&queue->_xmit_lock);
   }

   clear_bit(__LINK_STATE_START, &dev->state);
   smp_mb__after_clear_bit(); /* Commit netif_running(). */

   if (dev->stop) {
      VMKLNX_DEBUG(0, "Calling device stop %p", dev->stop);
      VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->stop, dev);
      VMKLNX_DEBUG(0, "Device stopped");
   }

   dev->flags &= ~IFF_UP;

   return 0;
}
EXPORT_SYMBOL(dev_close);

/*
 *----------------------------------------------------------------------------
 *
 * init_watchdog_timeo --
 *
 *    Init watchdog timeout
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
init_watchdog_timeo(struct net_device *dev)
{
   if (dev->tx_timeout) {
      if (dev->watchdog_timeo <= 0) {
         dev->watchdog_timeo = WATCHDOG_DEF_TIMEO;
      }
      dev->watchdog_timeohit_period_start = jiffies;
      dev->watchdog_timeohit_cnt = 0;
   }
}

/**
 *  dev_open	- prepare an interface for use.
 *  @dev:	device to open
 *
 *  Takes a device from down to up state. The device's private open
 *  function is invoked.
 *
 * ESX Deviation Notes:
 *  Device's notifier chain is not called.
 *  Device is put in promiscuous mode after it is opened unless it is
 *  a passthru device, in which case RX filters are pushed through the
 *  passthru APIs.
 *
 *  Calling this function on an active interface is a nop. On a failure
 *  a negative errno code is returned.
 *
 * RETURN VALUE:
 *  0 on success
 *  negative error code returned by the device on error
 *
 */
/* _VMKLNX_CODECHECK_: dev_open */
int
dev_open(struct net_device *dev)
{
   int ret = 0;

   ASSERT_RTNL();

   if (dev->flags & IFF_UP) {
      return 0;
   }

   set_bit(__LINK_STATE_START, &dev->state);
   if (dev->open) {
      VMKAPI_MODULE_CALL(dev->module_id, ret, dev->open, dev);
      if (ret == 0) {
         VMKLNX_DEBUG(0, "%s opened successfully\n", dev->name);

         dev->flags |= IFF_UP;
	 if (!(dev->features & NETIF_F_CNA)) {
            init_watchdog_timeo(dev);

            if (!dev->pt_ops) {
               /*
                * Regular uplinks are put in promiscuous mode.
                */
               dev->flags |= IFF_PROMISC;
            } else {
               /*
                * Passthru devices should not be in promiscuous mode:
                *
                *  UPT: device is used only for one vNIC, vf_set_mc,
                *  vf_set_rx_mode and vf_set_multicast are used to
                *  program filtering.
                *
                *  NPA: device has embedded l2 switching and adds filter
                *  for every unicast MAC addresses on the vSwitch.
                *  pf_add_mac_filter/pf_del_mac_filter and pf_mirror_all
                *  are used to program filtering.
                *
                * However, for NPA, device must be in all-multi mode.
                */
               if (!(dev->features & NETIF_F_UPT)) {
                  dev->flags |= IFF_ALLMULTI;
               }
            }

            VMKLNX_DEBUG(0, "%s set_multi %x %lx %p\n", dev->name, dev->flags, dev->features, dev->pt_ops);
            if (dev->set_multicast_list) {
               VMKAPI_MODULE_CALL_VOID(dev->module_id,
                                       dev->set_multicast_list,
                                       dev);
            }
	 } else {
            /* unblock the device */
            clear_bit(__LINK_STATE_BLOCKED, &dev->state);
         }
      } else {
         clear_bit(__LINK_STATE_START, &dev->state);
      }
   }

   return ret;
}
EXPORT_SYMBOL(dev_open);

/*
 *----------------------------------------------------------------------------
 *
 * vmklnx_free_netdev
 *
 *    Internal implementation of free_netdev, frees net_device and associated
 *    structures. Exposed verion of free_netdev is an inline because it
 *    touches driver private data structs.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
void
vmklnx_free_netdev(struct kmem_cache_s *pmCache, struct net_device *dev)
{
   LinNetDev *linDev = get_LinNetDev(dev);

   if (dev->skb_pool) {
      dev->skb_pool = NULL;
   }

   kfree(dev->tx_netqueue_info);
   kfree(dev->_tx);
   kfree((char *)linDev - linDev->padded);
}
EXPORT_SYMBOL(vmklnx_free_netdev);

static void
netdev_init_one_queue(struct net_device *dev,
                      struct netdev_queue *queue,
                      void *_unused)
{
   queue->dev = dev;
}

static void
netdev_init_queues(struct net_device *dev)
{
   netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
}

struct net_device *
vmklnx_alloc_netdev_mq(struct module *this_module,
                       int sizeof_priv,
                       const char *name,
                       void (*setup)(struct net_device *),
                       unsigned int queue_count)
{
   int i;
   LinNetDev *linDev;
   struct netdev_queue *tx;
   struct net_device *dev;
   int alloc_size;
   void *p;
   struct tx_netqueue_info *tx_netqueue_info;

   VMK_ASSERT(this_module->skb_cache);
   VMK_ASSERT(this_module->moduleID != 0 && this_module->moduleID != VMK_INVALID_MODULE_ID);

   BUG_ON(strlen(name) >= sizeof(dev->name));

   alloc_size = sizeof(struct LinNetDev);

   if (sizeof_priv) {
      /* ensure 32-byte alignment of private area */
      alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
      alloc_size += sizeof_priv;
   }

   /* ensure 32-byte alignment of whole construct */
   alloc_size += NETDEV_ALIGN_CONST;

   p = kzalloc(alloc_size, GFP_KERNEL);
   if (!p) {
      printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
      return NULL;
   }

   linDev = (LinNetDev *)
      (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
   linDev->padded = (char *)linDev - (char *)p;

   tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL);
   if (!tx) {
      printk(KERN_ERR "alloc_netdev: Unable to allocate "
             "tx qdiscs.\n");
      kfree(p);
      return NULL;
   }

   alloc_size = sizeof (struct tx_netqueue_info) * queue_count;
   tx_netqueue_info = kzalloc(alloc_size, GFP_KERNEL);
   if (!tx_netqueue_info) {
      printk(KERN_ERR "alloc_netdev: Unable to allocate tx_netqueue_info.\n");
      kfree(tx);
      kfree(p);
      return NULL;
   }

   /* make default queue valid */
   tx_netqueue_info[0].valid = VMK_TRUE;
   tx_netqueue_info[0].vmkqid = VMK_NETQUEUE_DEFAULT_QUEUEID;

   for (i = 1; i < queue_count; i++) {
      tx_netqueue_info[i].valid = VMK_FALSE;
      tx_netqueue_info[i].vmkqid = VMK_NETQUEUE_INVALID_QUEUEID;
   }

   dev = &linDev->linNetDev;
   dev->skb_pool = this_module->skb_cache;
   dev->_tx = tx;
   dev->num_tx_queues = queue_count;
   dev->real_num_tx_queues = queue_count;
   dev->tx_netqueue_info = tx_netqueue_info;

   if (sizeof_priv) {
      dev->priv = ((char *)dev +
                   ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
                    & ~NETDEV_ALIGN_CONST));
   }

   netdev_init_queues(dev);

   dev->module_id = this_module->moduleID;
   INIT_LIST_HEAD(&dev->napi_list);
   spin_lock_init(&dev->napi_lock);
   set_bit(__NETQUEUE_STATE, (void*)&dev->netq_state);

   VMKAPI_MODULE_CALL_VOID(dev->module_id, setup, dev);
   strcpy(dev->name, name);

   return dev;
}
EXPORT_SYMBOL(vmklnx_alloc_netdev_mq);

#ifndef ARPHRD_ETHER
#define ARPHRD_ETHER  1       /* Ethernet 10Mbps.  */
#endif

/**
 *  ether_setup - setup the given Ethernet network device
 *  @dev: network device
 *
 *  Initializes fields of the given network device with Ethernet-generic
 *  values
 *
 *  ESX Deviation Notes:
 *  This function does not initialize any function pointers in the
 *  given net_device
 *
 *  RETURN VALUE:
 *  This function does not return a value
 */
/* _VMKLNX_CODECHECK_: ether_setup */
void
ether_setup(struct net_device *dev)
{
   dev->type		= ARPHRD_ETHER;
   dev->hard_header_len = ETH_HLEN; /* XXX should this include 802.1pq? */
   dev->mtu		= ETH_DATA_LEN; /* eth_mtu */
   dev->addr_len	= ETH_ALEN;
   /* XXX */
   dev->tx_queue_len	= 100;	/* Ethernet wants good queues */

   memset(dev->broadcast, 0xFF, ETH_ALEN);

   dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
}
EXPORT_SYMBOL(ether_setup);


/**
 * netif_device_attach - mark device as attached
 * @dev: network device
 *
 * Mark device as attached from system and restart if needed.
 *
 * RETURN VALUE:
 *  None
 */
/* _VMKLNX_CODECHECK_: netif_device_attach */
void
netif_device_attach(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
	    netif_running(dev)) {
                netif_tx_wake_all_queues(dev);
 		__netdev_watchdog_up(dev);
	}
}
EXPORT_SYMBOL(netif_device_attach);

/**
 * netif_device_detach - mark device as removed
 * @dev: network device
 *
 * Mark device as removed from system and therefore no longer available.
 *
 * RETURN VALUE:
 *  None
 */
/* _VMKLNX_CODECHECK_: netif_device_detach */
void
netif_device_detach(struct net_device *dev)
{
	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
	    netif_running(dev)) {
                netif_tx_stop_all_queues(dev);
	}
}
EXPORT_SYMBOL(netif_device_detach);

static void
__netdev_init_queue_locks_one(struct net_device *dev,
                              struct netdev_queue *queue,
                              void *_unused)
{
   VMK_ReturnStatus status;
   struct netdev_soft_queue *softq = &queue->softq;

   spin_lock_init(&queue->_xmit_lock);
   queue->xmit_lock_owner = -1;
   queue->processing_tx = 0;

   spin_lock_init(&softq->queue_lock);
   softq->state = 0;
   softq->outputList = (vmk_PktList) vmk_HeapAlloc(vmklnxLowHeap,
                                                   vmk_PktListSizeInBytes);
   if (softq->outputList == NULL) {
      VMK_ASSERT(VMK_FALSE);
      return;
   }
   vmk_PktListInit(softq->outputList);
   status = vmk_ConfigParamGetUint(maxNetifTxQueueLenConfigHandle,
                                   &softq->outputListMaxSize);
   VMK_ASSERT(status == VMK_OK);
}

/*
 *----------------------------------------------------------------------------
 *
 * netdev_init_queue_locks --
 *
 *    Init device queues locks.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
netdev_init_queue_locks(struct net_device *dev)
{
   netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
}

static void
__netdev_destroy_queue_locks_one(struct net_device *dev,
                                 struct netdev_queue *queue,
                                 void *_unused)
{
   struct netdev_soft_queue *softq = &queue->softq;

   vmk_HeapFree(vmklnxLowHeap, softq->outputList);
}

/*
 *----------------------------------------------------------------------------
 *
 * netdev_destroy_queue_locks --
 *
 *    Destroy device queues locks.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
netdev_destroy_queue_locks(struct net_device *dev)
{
   netdev_for_each_tx_queue(dev, __netdev_destroy_queue_locks_one, NULL);
}

/*
 *----------------------------------------------------------------------------
 *
 *  netdev_ioctl --
 *    Process an ioctl request for a given device.
 *
 *  Results:
 *    VMK_ReturnStatus indicating the outcome.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netdev_ioctl(struct net_device *dev, uint32_t cmd, void *args, uint32_t *result,
             vmk_IoctlCallerSize callerSize, vmk_Bool callerHasRtnlLock)
{
   VMK_ReturnStatus ret = VMK_OK;

   VMK_ASSERT(dev);

   if (args && result) {
      if (cmd == SIOCGIFHWADDR) {
         struct ifreq *ifr = args;
         memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, 6);
         ifr->ifr_hwaddr.sa_family = dev->type;
         *result = 0;
         return VMK_OK;
      }

      if (cmd == SIOCETHTOOL) {
         struct ifreq *ifr = args;

         if (callerHasRtnlLock == VMK_FALSE) {
            rtnl_lock();
         }

         ret = vmklnx_ethtool_ioctl(dev, ifr, result, callerSize);

         /* Some drivers call dev_close() when ethtool ops like .set_ringparam failed.
          * The following check will update dev->gflags accordingly to avoid a second
          * dev_close() when CloseNetDev() is called.
          */
         if (ret && !(dev->flags & IFF_UP))
            dev->gflags &= ~IFF_DEV_IS_OPEN;

         if (callerHasRtnlLock == VMK_FALSE) {
            rtnl_unlock();
         }

         return ret;
      }

      if (dev->do_ioctl) {
         if (callerHasRtnlLock == VMK_FALSE) {
            rtnl_lock();
         }
         VMKAPI_MODULE_CALL(dev->module_id, *result, dev->do_ioctl, dev,
            args, cmd);
         if (callerHasRtnlLock == VMK_FALSE) {
            rtnl_unlock();
         }
         ret = VMK_OK;
      } else {
         ret = VMK_NOT_SUPPORTED;
      }
   } else {
      VMKLNX_DEBUG(0, "net_device: %p, cmd: 0x%x, args: %p, result: %p",
          dev, cmd, args, result);
      ret = VMK_FAILURE;
   }

   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 *  link_state_work_cb --
 *
 *    Periodic work function to check the status of various physical NICS.
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
link_state_work_cb(struct work_struct *work)
{
   struct net_device *cur;
   uint32_t result;
   unsigned speed = 0, duplex = 0, linkState = 0;
   VMK_ReturnStatus status;
   unsigned newLinkStateTimerPeriod;
   struct ethtool_cmd *cmd;

   cmd = compat_alloc_user_space(sizeof(*cmd));
   if (cmd == NULL) {
      VMKLNX_WARN("Aborting link state watchdog due to compat_alloc_user_space() failure.");
      goto reschedule_work;
   }

   /*
    * Since the ethtool ioctls require the rtnl_lock,
    * we should acquire the lock first before getting
    * dev_base_lock. This is the order used by other
    * code paths that require both locks.
    */
   rtnl_lock();
   write_lock(&dev_base_lock);

   cur = dev_base;
   while (cur) {
      struct ifreq ifr;

      vmk_Bool link_changed = VMK_FALSE;


      memset(&ifr, 0, sizeof(ifr));
      memcpy(ifr.ifr_name, cur->name, sizeof(ifr.ifr_name));

      /* get link speed and duplexity */
      put_user(ETHTOOL_GSET, &cmd->cmd);
      ifr.ifr_data = (void *) cmd;
      if (netdev_ioctl(cur, SIOCETHTOOL, &ifr, &result,
                       VMK_IOCTL_CALLER_64, VMK_TRUE) == VMK_OK) {
         get_user(speed, &cmd->speed);
         get_user(duplex, &cmd->duplex);
      }

      /* get link state */
      put_user(ETHTOOL_GLINK, &cmd->cmd);
      ifr.ifr_data = (void *) cmd;
      if (netdev_ioctl(cur, SIOCETHTOOL, &ifr, &result,
                       VMK_IOCTL_CALLER_64, VMK_TRUE) == VMK_OK) {
         struct ethtool_value value;
         copy_from_user(&value, cmd, sizeof(struct ethtool_value));
         linkState = value.data ?  VMKLNX_UPLINK_LINK_UP :
                     VMKLNX_UPLINK_LINK_DOWN;
      }

      /* set speed, duplexity and link state if changed */
      if (cur->link_state != linkState) {
         cur->link_state = linkState;
         link_changed = VMK_TRUE;
         if (linkState == VMKLNX_UPLINK_LINK_DOWN) {
            /* Tell people we are going down */
            call_netdevice_notifiers(NETDEV_GOING_DOWN, cur);
         } else {
            call_netdevice_notifiers(NETDEV_UP, cur);
         }
	 netif_toggled_clear(cur);
      } else if(netif_carrier_ok(cur)) {
         if (netif_toggled_test_and_clear(cur)) {
            /* Tell people we had a link flap */
            VMKLNX_DEBUG(0, "link flap on %s", cur->name);
            call_netdevice_notifiers(NETDEV_GOING_DOWN, cur);
            call_netdevice_notifiers(NETDEV_UP, cur);
         }
      }
      if (netif_carrier_ok(cur)) {
         if (cur->full_duplex != duplex) {
            cur->full_duplex = duplex;
            link_changed = VMK_TRUE;
         }
         if (cur->link_speed != speed) {
            cur->link_speed = speed;
            link_changed = VMK_TRUE;
         }
      }
      if (link_changed) {
         SetNICLinkStatus(cur);
      }

      cur = cur->next;
   }

   write_unlock(&dev_base_lock);
   rtnl_unlock();

reschedule_work:
   status = vmk_ConfigParamGetUint(linkStateTimerPeriodConfigHandle,
                                   &newLinkStateTimerPeriod);
   VMK_ASSERT(status == VMK_OK);
   if (linkStateTimerPeriod != newLinkStateTimerPeriod) {
      linkStateTimerPeriod = newLinkStateTimerPeriod;
   }
   schedule_delayed_work(&linkStateWork,
                         msecs_to_jiffies(linkStateTimerPeriod));

   /* Periodic update of the LRO config option */
   status = vmk_ConfigParamGetUint(vmklnxLROEnabledConfigHandle,
                                   &vmklnxLROEnabled);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamGetUint(vmklnxLROMaxAggrConfigHandle,
                                   &vmklnxLROMaxAggr);
   VMK_ASSERT(status == VMK_OK);
}

/*
 *----------------------------------------------------------------------------
 *
 *  netdev_watchdog --
 *
 *    Device watchdog
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
netdev_watchdog(struct net_device *dev)
{
   int some_queue_stopped = 0;

   netif_tx_lock(dev);
   if (netif_device_present(dev) &&
       /* don't bother if the device is being closed */
       netif_running(dev) &&
       /* only after the device is opened */
       (dev->flags & IFF_UP) &&
       netif_carrier_ok(dev)) {
      unsigned int i;

      for (i = 0; i < dev->real_num_tx_queues; i++) {
         struct netdev_queue *txq;

         txq = netdev_get_tx_queue(dev, i);
         if (netif_tx_queue_stopped(txq)) {
            some_queue_stopped = 1;
            break;
         }
      }

      if (some_queue_stopped &&
          time_after(jiffies, (dev->trans_start +
                               dev->watchdog_timeo))) {
         VMKLNX_WARN("NETDEV WATCHDOG: %s: transmit timed out", dev->name);

         dev->watchdog_timeohit_stats++;
         vmk_UplinkWatchdogTimeoutHit(dev->uplinkDev);

         /* call driver to reset the device */
         VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->tx_timeout, dev);
         WARN_ON_ONCE(1);

#ifdef VMX86_DEBUG
         // PR 167776: Reset counter every hour or so. We'll panic
         // only if we go beyond a certain number of watchdog timouts
         // in an hour.
         if (time_after(jiffies,
                        dev->watchdog_timeohit_period_start + NETDEV_TICKS_PER_HOUR)) {
            dev->watchdog_timeohit_cnt = 0;
            dev->watchdog_timeohit_period_start = jiffies;
         }

         if (!VMKLNX_STRESS_DEBUG_OPTION(stressNetIfFailTxAndStopQueue)) {
            dev->watchdog_timeohit_cnt++;

            if (dev->watchdog_timeohit_cnt >= dev->watchdog_timeohit_cfg) {
               dev->watchdog_timeohit_cnt = 0;
               if (dev->watchdog_timeohit_panic == VMKLNX_UPLINK_WATCHDOG_PANIC_MOD_ENABLE) {
                  VMK_ASSERT_BUG(VMK_FALSE);
               }
            }
         }
#endif
      }
   }
   netif_tx_unlock(dev);
}


/*
 *----------------------------------------------------------------------------
 *
 *  watchdog_timer_cb --
 *
 *    Watchdog timer callback for all registered devices.
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
watchdog_work_cb(struct work_struct *work)
{
   struct net_device *dev = NULL;

   write_lock(&dev_base_lock);

   for (dev = dev_base; dev; dev = dev->next) {
      netdev_watchdog(dev);
   }

   write_unlock(&dev_base_lock);

   schedule_delayed_work(&watchdogWork,
                         msecs_to_jiffies(WATCHDOG_DEF_TIMER));
}

/**
 * __dev_get_by_name - find a device by its name
 * @name: name to find
 *
 * Find an interface by name. The returned handle does not have the
 * usage count incremented and the caller must be careful defore using
 * the handle.  %NULL is returned if no matching device is found.
 *
 * RETURN VALUE:
 *  Pointer to device structure on success
 *  %NULL is returned if no matching device is found
 */
/* _VMKLNX_CODECHECK_: __dev_get_by_name */
struct net_device *
__dev_get_by_name(const char *name)
{
   struct net_device *dev;

   read_lock(&dev_base_lock);

   dev = dev_base;
   while (dev) {
      if (!strncmp(dev->name, name, sizeof(dev->name))) {
         break;
      }
      dev = dev->next;
   }

   read_unlock(&dev_base_lock);

   return dev;
}
EXPORT_SYMBOL(__dev_get_by_name);

/**
 * dev_get_by_name - find a device by its name
 * @name: name to find
 *
 * Find an interface by name. The returned handle has the usage count
 * incremented and the caller must use dev_put() to release it when it
 * is no longer needed. %NULL is returned if no matching device is
 * found.
 *
 * RETURN VALUE:
 *  Pointer to device structure on success
 *  %NULL is returned if no matching device is found
 */
/* _VMKLNX_CODECHECK_: dev_get_by_name */
struct net_device *
dev_get_by_name(const char *name)
{
   struct net_device *dev;

   dev = __dev_get_by_name(name);
   if (dev) {
      dev_hold(dev);
   }
   return dev;
}
EXPORT_SYMBOL(dev_get_by_name);

/**
 * dev_alloc_name - allocate a name for a device
 * @dev: device
 * @name: name format string
 *
 * Passed a format string - eg "lt%d" it will try and find a suitable
 * id. It scans list of devices to build up a free map, then chooses
 * the first empty slot. Returns the number of the unit assigned or
 * a negative errno code.
 *
 * RETURN VALUE:
 *  Number of the unit assigned on success
 *  Negative errno code on error
 */
/* _VMKLNX_CODECHECK_: dev_alloc_name */
int
dev_alloc_name(struct net_device *dev, const char *name)
{
   int i;
   char buf[VMK_DEVICE_NAME_MAX_LENGTH];
   const int max_netdevices = 8*PAGE_SIZE;
   char *p;

   p = strnchr(name, VMK_DEVICE_NAME_MAX_LENGTH - 1, '%');
   if (p && (p[1] != 'd' || strchr(p+2, '%'))) {
      return -EINVAL;
   }

   for (i = 0; i < max_netdevices; i++) {
      snprintf(buf, sizeof(buf), name, i);

      if (vmk_UplinkIsNameAvailable(buf)) {
         strcpy(dev->name, buf);
         return i;
      }
   }
   return -ENFILE;
}
EXPORT_SYMBOL(dev_alloc_name);

/*
 *----------------------------------------------------------------------------
 *
 *  set_device_pci_name --
 *
 *    Set device's pci name
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
set_device_pci_name(struct net_device *dev, struct pci_dev *pdev)
{
   /* We normally have the pci device name, because
    * execfg-init or esxcfg-init-eesx generates the pci device names.
    *
    * We just override it with the one named by the driver.
    */
   VMK_ASSERT_ON_COMPILE(VMK_DEVICE_NAME_MAX_LENGTH >= IFNAMSIZ);
   if (LinuxPCI_IsValidPCIBusDev(pdev)) {
      LinuxPCIDevExt *pe = container_of(pdev, LinuxPCIDevExt, linuxDev);
      vmk_PCISetDeviceName(pe->vmkDev, dev->name);
      strncpy(pdev->name, dev->name,  sizeof(pdev->name));
   }
   if (strnlen(dev->name, VMK_DEVICE_NAME_MAX_LENGTH) > (IFNAMSIZ - 1)) {
      VMKLNX_WARN("Net device name length(%zd) exceeds IFNAMSIZ - 1(%d)",
                  strnlen(dev->name, VMK_DEVICE_NAME_MAX_LENGTH), IFNAMSIZ - 1);
   }
}

/**
 * register_netdevice	- register a network device
 * @dev: device to register
 *
 * Take a completed network device structure and add it to the kernel
 * interfaces. 0 is returned on success. A negative errno code is returned
 * on a failure to set up the device, or if the name is a duplicate.
 *
 * RETURN VALUE:
 *  0 on success
 *  negative errno code on error
 */
/* _VMKLNX_CODECHECK_: register_netdevice */
int
register_netdevice(struct net_device *dev)
{
   int ret = 0;

   /*
    * netif_napi_add can be called before register_netdev, unfortunately.
    * fail register_netdev, if the prior napi_add had failed. it's most
    * likely a low memory condition and we'll fail somewhere further down
    * the line if we go on.
    */
   if (dev->reg_state == NETREG_EARLY_NAPI_ADD_FAILED) {
      VMKLNX_WARN("%s: early napi registration failed, bailing", dev->name);
      ret = -EIO;
      goto out;
   }

   netdev_init_queue_locks(dev);
   dev->iflink = -1;
   dev->vlan_group = NULL;

   /* Init, if this function is available */
   int rv = 0;
   if (dev->init != 0) {
      VMKAPI_MODULE_CALL(dev->module_id, rv, dev->init, dev);
      if (rv != 0) {
         ret = -EIO;
         goto out;
      }
   }

   if (netdev_poll_init(dev) != VMK_OK) {
      ret = -ENOMEM;
      goto err_uninit;
   }

   set_bit(__LINK_STATE_PRESENT, &dev->state);

   write_lock(&dev_base_lock);

   /* CNA devices don't belong to the same uplink namespace. */
   if (dev->features & NETIF_F_CNA) {
      if (LinuxCNA_RegisterNetDev(dev) != VMK_OK) {
         ret = -EIO;
         write_unlock(&dev_base_lock);
         goto err_cna_reg;
      }
   } else {
      dev->next = dev_base;
      dev_base = dev;
   }


   write_unlock(&dev_base_lock);

   dev_hold(dev);
   dev->reg_state = NETREG_REGISTERED;

 out:
   return ret;

 err_cna_reg:
    netdev_poll_cleanup(dev);
 err_uninit:
   if (dev->uninit) {
      VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->uninit, dev);
   }
   goto out;
}

/**
 * register_netdev	- register a network device
 * @dev: device to register
 *
 * Take a completed network device structure and add it to the kernel
 * interfaces. 0 is returned on success. A negative errno code is returned
 * on a failure to set up the device, or if the name is a duplicate.
 *
 * This is a wrapper around register_netdevice that expands the device name
 * if you passed a format string to alloc_netdev.
 *
 * RETURN VALUE:
 *  0 on success
 *  negative errno code on error
 */
/* _VMKLNX_CODECHECK_: register_netdev */
int
register_netdev(struct net_device *dev)
{
   int err = 0;

   rtnl_lock();

   if (strchr(dev->name, '%')) {
      err = dev_alloc_name(dev, dev->name);
   } else if (dev->name[0]==0 || dev->name[0]==' ') {
      err = dev_alloc_name(dev, "vmnic%d");
   }

   if (err >= 0) {
      struct pci_dev *pdev = dev->pdev;

      if (dev->useDriverNamingDevice) {
         /* net_device already named, we need update the PCI device name list */
         set_device_pci_name(dev, pdev);
      }
      err = register_netdevice(dev);
   }

   rtnl_unlock();

   if (dev->pdev == NULL) {
      /*
       * For pseudo network interfaces, we connect and open the
       * uplink at this point. For Real PCI NIC's, they do
       * this in pci_announce_device() and vmk_PCIPostInsert()
       * respectively.
       */
      if (LinNet_ConnectUplink(dev, NULL)
          || (vmk_UplinkOpen(dev->uplinkDev) != VMK_OK)) {
         err = -EIO;
      }
   }

   return err;
}
EXPORT_SYMBOL(register_netdev);

int
unregister_netdevice(struct net_device *dev)
{
   struct net_device **cur;

   VMK_ASSERT(atomic_read(&dev->refcnt) == 1);

   if (dev->nicMajor > 0) {
      vmkplxr_UnregisterChardev(dev->nicMajor, 0, dev->name);
   }

   if (dev->flags & IFF_UP) {
      dev_close(dev);
   }

   VMK_ASSERT(dev->reg_state == NETREG_REGISTERED);
   dev->reg_state = NETREG_UNREGISTERING;

   if (dev->uninit) {
      VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->uninit, dev);
   }

   /* CNA devices don't belong to the same uplink namespace. */
   if (dev->features & NETIF_F_CNA) {
      LinuxCNA_UnRegisterNetDev(dev);
   } else {
      write_lock(&dev_base_lock);
      cur = &dev_base;
      while (*cur && *cur != dev) {
         cur = &(*cur)->next;
      }
      if (*cur) {
         *cur = (*cur)->next;
      }
      write_unlock(&dev_base_lock);
   }

   dev->reg_state = NETREG_UNREGISTERED;

   netdev_poll_cleanup(dev);

   VMK_ASSERT(dev->vlan_group == NULL);
   if (dev->vlan_group) {
      vmk_HeapFree(VMK_MODULE_HEAP_ID, dev->vlan_group);
      dev->vlan_group = NULL;
   }

   netdev_destroy_queue_locks(dev);

   /*
    * Disassociate the pci_dev from this net device
    */
   if (dev->pdev != NULL) {
      dev->pdev->netdev = NULL;
      dev->pdev = NULL;
   }

   dev_put(dev);
   return 0;
}

/**
 * unregister_netdev - remove device from the kernel
 * @dev: device
 *
 * This function shuts down a device interface and removes it from the
 * kernel tables.
 *
 * This is just a wrapper for unregister_netdevice. In general you want
 * to use this and not unregister_netdevice.
 *
 * RETURN VALUE:
 *  None
 */
/* _VMKLNX_CODECHECK_: unregister_netdev */
void
unregister_netdev(struct net_device *dev)
{
   unsigned long warning_time;

   VMKLNX_DEBUG(0, "Unregistering %s", dev->name);

   if (dev->pdev == NULL) {
      /*
       * Close and disconnect the uplink here if
       * the device is a pseudo NIC. For real PCI
       * NIC, the uplink is closed and disconnected
       * via vmk_PCIDoPreRemove().
       */
      vmk_UplinkClose(dev->uplinkDev);
   }

   /*
    * Fixed PR366444 - Moved the 'refcnt' check here from within
    * unregister_netdevice()
    *
    * We will be stuck in the while loop below if someone forgot
    * to drop the reference count.
    */
   warning_time = jiffies;
   rtnl_lock();
   while (atomic_read(&dev->refcnt) > 1) {
      rtnl_unlock();

      if ((jiffies - warning_time) > 10*HZ) {
         VMKLNX_WARN("waiting for %s to become free. Usage count = %d",
                     dev->name, atomic_read(&dev->refcnt));
         warning_time = jiffies;
      }

      current->state = TASK_INTERRUPTIBLE;
      schedule_timeout(HZ/4);
      current->state = TASK_RUNNING;

      rtnl_lock();
   }

   unregister_netdevice(dev);
   rtnl_unlock();
   VMKLNX_DEBUG(0, "Done Unregistering %s", dev->name);
}
EXPORT_SYMBOL(unregister_netdev);

/*
 * register_netdevice_notifier - register a network notifier block
 * @nb: notifier
 *
 * Register a notifier to be called when network device events occur.
 * When registered, all registration and up events are replayed
 * to the new notifier to allow device to have a race free
 * view of the network device list.
 *
 * RETURN VALUE:
 *      0 on success, -1 on failure.
 */
/* _VMKLNX_CODECHECK_: register_netdevice_notifier */

int register_netdevice_notifier(struct notifier_block *nb)
{
   return atomic_notifier_chain_register(&netdev_notifier_list, nb);
}
EXPORT_SYMBOL(register_netdevice_notifier);

/*
 * unregister_netdevice_notifier - unregister a network notifier block
 * @nb: notifier
 *
 * Unregister a previously regustered notifier block.
 *
 * RETURN VALUE:
 *      0 on success, -1 on failure.
 */
/* _VMKLNX_CODECHECK_: unregister_netdevice_notifier */
int unregister_netdevice_notifier(struct notifier_block *nb)
{
   return atomic_notifier_chain_unregister(&netdev_notifier_list, nb);
}
EXPORT_SYMBOL(unregister_netdevice_notifier);

int call_netdevice_notifiers(unsigned long val, void *v)
{
   return atomic_notifier_call_chain (&netdev_notifier_list, val,
                                      (struct net_device *)v);
}

/*
 *-----------------------------------------------------------------------------
 *
 * create_dev_name --
 *
 * create a unique name for a network device.
 *
 * Results:
 *      none
 *
 * Side effects:
 *      pdev->name field is set to vmnic%d
 *
 *-----------------------------------------------------------------------------
 */
static void
create_dev_name(char *name, int length)
{
   /*
    * We use 32 as the starting number because we do not want to overlap with
    * the names used in the initprocess.  It is assumed that the first 32
    * devices (vmnic0 - vmnic31) may be used during boot.
    */
   #define NET_ANON_START_ID VMK_CONST64U(32)
   static vmk_atomic64 nameCounter = NET_ANON_START_ID;

   snprintf(name, length, "vmnic%"VMK_FMT64"u",
            vmk_AtomicReadInc64(&nameCounter));
}

/*
 *-----------------------------------------------------------------------------
 *
 * netdev_name_adapter --
 *
 *      Set the PCI adapter name, if not already set.  If the PCI adapter
 *      already has a name and the name is registered as an uplink then
 *      create a new name for a new uplink port.  Copy it to the net_device
 *      structure.
 *
 * Results:
 *      none
 *
 * Side effects:
 *      dev->name field is set.
 *
 *-----------------------------------------------------------------------------
 */
static void
netdev_name_adapter(struct net_device *dev, struct pci_dev *pdev)
{
   LinuxPCIDevExt *pe;
   char devName[VMK_DEVICE_NAME_MAX_LENGTH];
   char *name = NULL;

   if (pdev == NULL) {
      // Pseudo devices may handle their own naming.
      if (dev->name[0] != 0) {
         return;
      }
      create_dev_name(dev->name, sizeof dev->name);
      VMKLNX_INFO("Pseudo device %s", dev->name);
      return;
   }

   pe = container_of(pdev, LinuxPCIDevExt, linuxDev);

   /* Make sure a name exists */
   devName[0] = '\0';
   vmk_PCIGetDeviceName(pe->vmkDev, devName, sizeof devName);

   /*
    * If we do not have a name for the physical device then create one, else
    * if the uplink port has already been registered then we assume that the
    * we are called for a new port on the device and therefore create a new
    * which we are not passing on to the physical device.
    */
   if (devName[0] == '\0') {
      create_dev_name(pdev->name, sizeof pdev->name);
      vmk_PCISetDeviceName(pe->vmkDev, pdev->name);
      name = pdev->name;
      VMKLNX_INFO("%s at " PCI_DEVICE_BUS_ADDRESS, pdev->name,
                  pci_domain_nr(pdev->bus),
                  pdev->bus->number,
                  PCI_SLOT(pdev->devfn),
                  PCI_FUNC(pdev->devfn));
   } else {
      if (!vmk_UplinkIsNameAvailable(devName)) {
	 create_dev_name(pdev->name, sizeof pdev->name);
         name = pdev->name;
      } else {
         name = devName;
         /*
          * If we already have a name for the physical device in vmkernel,
          * copy the name into pdev->name.
          */
         snprintf(pdev->name, sizeof(pdev->name), "%s", name);
      }
   }

   /*
    * Give the PCI device name to net_device
    */
   snprintf(dev->name, sizeof (dev->name), "%s", name);

}

/*
 *----------------------------------------------------------------------------
 *
 *  netdev_query_capabilities --
 *
 *    Checks hardware device's capability and return the information in a
 *    32 bit "capability" value
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static vmk_UplinkCapabilities
netdev_query_capabilities(struct net_device *dev)
{
   vmk_UplinkCapabilities capability = 0;
   VMK_ReturnStatus status;
   unsigned int permitHwIPv6Csum = 0;
   unsigned int permitHwCsumForIPv6Csum = 0;
   unsigned int permitHwTSO6 = 0;
   unsigned int permitHwTSO = 0;
   vmk_MA maxPhysAddr = vmk_MachMemMaxAddr();

   status = vmk_ConfigParamGetUint(useHwIPv6CsumHandle, &permitHwIPv6Csum);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamGetUint(useHwCsumForIPv6CsumHandle, &permitHwCsumForIPv6Csum);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamGetUint(useHwTSOHandle, &permitHwTSO);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamGetUint(useHwTSO6Handle, &permitHwTSO6);
   VMK_ASSERT(status == VMK_OK);

   VMKLNX_DEBUG(0, "Checking device: %s's capabilities", dev->name);
   if (dev->features & NETIF_F_HW_VLAN_TX) {
      VMKLNX_DEBUG(0, "device: %s has hw_vlan_tx capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_HW_TX_VLAN, VMK_TRUE);
   }
   if (dev->features & NETIF_F_HW_VLAN_RX) {
      VMKLNX_DEBUG(0, "device: %s has hw_vlan_rx capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_HW_RX_VLAN, VMK_TRUE);
   }
   if (dev->features & NETIF_F_IP_CSUM) {
      VMKLNX_DEBUG(0, "device: %s has IP CSUM capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_IP4_CSUM, VMK_TRUE);
   }
   if (permitHwIPv6Csum) {
      if (dev->features & NETIF_F_IPV6_CSUM) {
         VMKLNX_DEBUG(0, "device: %s has IPV6 CSUM capability", dev->name);
         vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_IP6_CSUM, VMK_TRUE);
      } else {
         /*
          * When NETIF_F_IPV6_CSUM isn't available, then software
          * CSUM for IP6 headers will be done.  If software csum
          * is included, there's no reason to also examine the pktLists
          * for ip6 extension header offloads
          */
         if (!(dev->features & NETIF_F_HW_CSUM)) {
            vmk_UplinkCapabilitySet(&capability,
                                    VMK_PORT_CLIENT_CAP_IP6_CSUM_EXT_HDRS,
                                    VMK_TRUE);
         }
      }
   }
   if (dev->features & NETIF_F_HW_CSUM) {
      VMKLNX_DEBUG(0, "device: %s has HW CSUM capability", dev->name);
      // IP is the subset of HW we support.
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_IP4_CSUM, VMK_TRUE);
      if (permitHwCsumForIPv6Csum) {
         VMKLNX_DEBUG(0, "device: %s has HW CSUM => IPv6 CSUM capability", dev->name);
         vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_IP6_CSUM, VMK_TRUE);
      }
   }
   if ((dev->features & NETIF_F_SG) &&
       (MAX_SKB_FRAGS >= VMK_PKT_FRAGS_MAX_LENGTH)) {
      VMKLNX_DEBUG(0, "device: %s has SG capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_SG, VMK_TRUE);
   }
   if (!(dev->features & NETIF_F_FRAG_CANT_SPAN_PAGES)) {
      VMKLNX_DEBUG(0, "device: %s has Frag Span Pages capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_SG_SPAN_PAGES,
                              VMK_TRUE);
   }
   if ((dev->features & NETIF_F_HIGHDMA) ||
      ((dev->features & NETIF_F_DMA39) && maxPhysAddr <= DMA_BIT_MASK(39)) ||
      ((dev->features & NETIF_F_DMA40) && maxPhysAddr <= DMA_BIT_MASK(40)) ||
      ((dev->features & NETIF_F_DMA48) && maxPhysAddr <= DMA_BIT_MASK(48))) {
      VMKLNX_DEBUG(0, "device: %s has high dma capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_HIGH_DMA, VMK_TRUE);
   }
   if (permitHwTSO && (dev->features & NETIF_F_TSO)) {
      VMKLNX_DEBUG(0, "device: %s has TSO capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_TSO, VMK_TRUE);
   }

   if (permitHwTSO6) {
      if (dev->features & NETIF_F_TSO6) {
         VMKLNX_DEBUG(0, "device: %s has TSO6 capability", dev->name);
         vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_TSO6, VMK_TRUE);
      } else {
         /*
          * When NETIF_F_TSO6 isn't available, then software TSO6
          * will be done, but when software TSO6 is enabled, there's
          * no reason to also review the pktLists for IP6 extension
          * headers.
          */
         vmk_UplinkCapabilitySet(&capability,
                                 VMK_PORT_CLIENT_CAP_TSO6_EXT_HDRS,
                                 VMK_TRUE);
      }
   }

   if (dev->features & NETIF_F_UPT) {
      VMKLNX_DEBUG(0, "device: %s has UPT capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_UPT, VMK_TRUE);
   }

   if (dev->pt_ops && !(dev->features & NETIF_F_UPT)) {
      VMKLNX_DEBUG(0, "device: %s has NPA capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_NPA, VMK_TRUE);
   }

   if (dev->dcbnl_ops) {
      VMKLNX_DEBUG(0, "device: %s has DCB capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_DCB, VMK_TRUE);
   }

   /*
    * All devices have the RDONLY_INETHDRS capability.  Its a property
    * of a device driver, when VMK_TRUE, it means the device driver does
    * NOT modify the inet headers.  When VMK_FALSE, it means the device
    * driver DOES modify the inet headers, and that privte copies of
    * the pktHandles need to be make for the safety of the pktHandles
    * without priavate writable buffers.
    */
   if (dev->features & NETIF_F_RDONLYINETHDRS) {
      VMKLNX_DEBUG(0, "device: %s has RDONLY_INETHDRS capability", dev->name);
      vmk_UplinkCapabilitySet(&capability,
                              VMK_PORT_CLIENT_CAP_RDONLY_INETHDRS, VMK_TRUE);
   } else {
      VMKLNX_DEBUG(0, "device: %s does not have RDONLY_INETHDRS capability",
                   dev->name);
      vmk_UplinkCapabilitySet(&capability,
                              VMK_PORT_CLIENT_CAP_RDONLY_INETHDRS, VMK_FALSE);
   }

   /*
    * PR #324545: Artificially turn this feature on so that the VMkernel
    * doesn't activate any unnecessary & wasteful SW workaround.
    * The VMkernel shouldn't generate this kind of frames anyway.
    */
   if (VMK_TRUE) {
      VMKLNX_DEBUG(0, "device: %s has TSO256k capability", dev->name);
      vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_TSO256k, VMK_TRUE);
   }

   if (dev->features & NETIF_F_TSO) {
      /*
       *  If a pNIC can do TSO, but not any of the following,
       *  our software path for any of these missing functions
       *  may end up trying to allocate very large buffers and
       *  not able to do it.  We'd like to know about such
       *  devices during development.
       * NB: we already know that some e1000 devices,
       *      e.g. 82544EI (e1000 XT), can do TSO but not High_DMA.
       */
      VMK_ASSERT(dev->features & NETIF_F_SG);
      VMK_ASSERT(!(dev->features & NETIF_F_FRAG_CANT_SPAN_PAGES));

      if (!(dev->features & NETIF_F_SG) ||
          (dev->features & NETIF_F_FRAG_CANT_SPAN_PAGES)) {
         VMKLNX_WARN("%s: disabling hardware TSO because dev "
                     "has no hardware SG",
                     dev->name);
	 vmk_UplinkCapabilitySet(&capability, VMK_PORT_CLIENT_CAP_TSO, VMK_FALSE);
      }
   }

   /*
    * To support encapsulated offloads, the pNic must be able to
    * parameterize the location of the header, csum, etc.  Some
    * nics can parameterize, some can't.  Some nics use 8-bit
    * offsets, some use 16-bits.
    *
    */

   if (dev->features & NETIF_F_OFFLOAD_16OFFSET) {
      VMKLNX_DEBUG(0, "device: %s has TSO-CSUM offloads "
                   "with 16 bit offsets (8-bit also enabled)",
                   dev->name);

      vmk_UplinkCapabilitySet(&capability,
                              VMK_PORT_CLIENT_CAP_OFFLOAD_16OFFSET,
                              VMK_TRUE);
      if (!((dev->features & NETIF_F_OFFLOAD_8OFFSET))) {
         VMKLNX_DEBUG(0, "device: %s missing 8-bit offsets also enabled",
                      dev->name);
      }
      vmk_UplinkCapabilitySet(&capability,
                              VMK_PORT_CLIENT_CAP_OFFLOAD_8OFFSET,
                              VMK_TRUE);
   } else if (dev->features & NETIF_F_OFFLOAD_8OFFSET) {
      VMKLNX_DEBUG(0, "device: %s has TSO-CSUM with 8 bit offset capability",
                   dev->name);

      vmk_UplinkCapabilitySet(&capability,
                              VMK_PORT_CLIENT_CAP_OFFLOAD_8OFFSET,
                              VMK_TRUE);
   } else {
      VMKLNX_DEBUG(0, "device: %s no TSO-CSUM offset capability",
                   dev->name);
      /*
       * By enabling 16OFFSET, 8OFFSET is still disabled,and the
       * software version of the cap will be inserted.
       */
      vmk_UplinkCapabilitySet(&capability,
                              VMK_PORT_CLIENT_CAP_OFFLOAD_16OFFSET,
                              VMK_TRUE);
   }

   if (!(dev->features & NETIF_F_NO_SCHED)) {
      vmk_UplinkCapabilitySet(&capability,
                              VMK_PORT_CLIENT_CAP_SCHED,
                              VMK_TRUE);
      VMKLNX_DEBUG(0, "device: %s is network scheduling compliant",
                   dev->name);
   } else {
      VMKLNX_DEBUG(0, "device: %s is not network scheduling compliant",
                   dev->name);
   }

   VMKLNX_DEBUG(0, "device %s vmnet cap is 0x%"VMK_FMT64"x",
                dev->name, capability);

   return capability;
}

/*
 * Section: calltable functions, called through vmk_UplinkFunctions
 */

/*
 *----------------------------------------------------------------------------
 *
 *  IoctlNetDev --
 *
 *    Handle an ioctl request from the VMKernel for the given device name.
 *
 *  Results:
 *    VMK_ReturnStatus indicating the outcome.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
IoctlNetDev(char *uplinkName, uint32_t cmd, void *args, uint32_t *result)
{
   VMK_ReturnStatus status;
   struct net_device *dev;

   dev = dev_get_by_name(uplinkName);
   if (!dev) {
      return VMK_NOT_FOUND;
   }

   status = netdev_ioctl(dev, cmd, args, result, VMK_IOCTL_CALLER_64, VMK_FALSE);
   dev_put(dev);
   return status;
}

/*
 *-----------------------------------------------------------------------------
 *
 * SetNICLinkStatus --
 *
 *      Push new link status up to the vmkernel.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      May cause teaming failover events to be scheduled.
 *
 *-----------------------------------------------------------------------------
 */
void
SetNICLinkStatus(struct net_device *dev)
{
   vmk_UplinkLinkInfo linkInfo;

   linkInfo.linkState  = dev->link_state;
   linkInfo.linkSpeed  = linkInfo.linkState ? dev->link_speed  : 0;
   linkInfo.fullDuplex = linkInfo.linkState ? dev->full_duplex : VMK_FALSE;

   /* Test if the uplink is connected (for a pseudo device) */
   if (dev->uplinkDev) {
      vmk_UplinkUpdateLinkState(dev->uplinkDev, &linkInfo);
   }
}

/*
 *----------------------------------------------------------------------------
 *
 * DevStartTxImmediate --
 *
 *    External entry point for transmitting packets. Packets are queued and
 *    then Tx-ed immediately.
 *
 *
 * Results:
 *    VMK_ReturnStatus indicating the outcome.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
DevStartTxImmediate(void *clientData, vmk_PktList pktList)
{
   struct net_device *dev = (struct net_device *)clientData;
   vmk_PktHandle *pkt = vmk_PktListGetFirstPkt(pktList);
   vmk_NetqueueQueueID vmkqid;

   VMK_ASSERT(pkt);
   vmkqid = vmk_PktQueueIDGet(pkt);
#ifdef VMX86_DEBUG
   {
      VMK_PKTLIST_ITER_STACK_DEF(iter);
      vmk_PktListIterStart(iter, pktList);
      while (!vmk_PktListIterIsAtEnd(iter)) {
         pkt = vmk_PktListIterGetPkt(iter);
         VMK_ASSERT(vmk_PktQueueIDGet(pkt) == vmkqid);
         vmk_PktListIterMove(iter);
      }
   }
#endif

   return netdev_tx(dev, pktList, vmkqid);
}

/*
 *----------------------------------------------------------------------------
 *
 *  OpenNetDev --
 *
 *    Handler for calling the device's open function. If successful, the device
 *    state is changed to indicate that the device has been opened.
 *
 *  Results:
 *    Returns whatever the device's open function returns.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
OpenNetDev(void *clientData)
{
   struct net_device *dev = (struct net_device *)clientData;
   int status = 0;

   if (dev->open == NULL) {
      VMKLNX_WARN("NULL open function for device %s", dev->name);
      return 1;
   }

   rtnl_lock();
   if ((dev->gflags & IFF_DEV_IS_OPEN) == 0) {
      status = dev_open(dev);
      if (status == 0) {
         dev->gflags |= IFF_DEV_IS_OPEN;
      }
   }
   rtnl_unlock();

   return status == 0 ? VMK_OK : VMK_FAILURE;
}

/*
 *----------------------------------------------------------------------------
 *
 *  CloseNetDev --
 *
 *    Handler for closing the device. If successful, the device state is
 *    modified to indicate that the device is now non-functional.
 *
 *  Results:
 *    Returns whatever the stop function of the module owning the device
 *    returns.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
CloseNetDev(void *clientData)
{
   struct net_device *dev = (struct net_device *)clientData;
   int status = 0;

   VMK_ASSERT(dev->stop != NULL);
   VMKLNX_DEBUG(0, "Stopping device %s", dev->name);

   rtnl_lock();
   if (dev->gflags & IFF_DEV_IS_OPEN ) {
      status = dev_close(dev);
      if (status == 0) {
         dev->gflags &= ~IFF_DEV_IS_OPEN;
      }
   }
   rtnl_unlock();

   return status == 0 ? VMK_OK : VMK_FAILURE;
}


/*
 *----------------------------------------------------------------------------
 *
 *  BlockNetDev --
 *
 *    Handler for blocking the device. If successful, the device state is
 *    modified to indicate that the device is now blocked.
 *
 *  Results:
 *    VMK_OK always.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
BlockNetDev(void *clientData)
{
   struct net_device *dev = (struct net_device *)clientData;
   struct napi_struct *napi;

   if (test_and_set_bit(__LINK_STATE_BLOCKED, &dev->state)) {
      VMKLNX_DEBUG(0, "%s is actually already blocked.", dev->name);
      return VMK_OK;
   }

   // Disable napi so as to give a chance for all packets in the middle of
   // rx processing to be handed off to the kernel
   spin_lock(&dev->napi_lock);
   list_for_each_entry(napi, &dev->napi_list, dev_list)
      if (!(test_bit(NAPI_STATE_UNUSED, &napi->state))) {
         while (1) {
            if (!napi_disable_timeout(napi, 50)) {
               // make sure we don't have packets stuck in the napi context
               VMKLNX_DEBUG(0, "Flushing napi context (%d) pending packets for %s",
                            napi->napi_id, dev->name);
               vmk_NetPollProcessRx(napi->net_poll);
               napi_enable (napi);
               break;
            }
            if (test_bit(NAPI_STATE_UNUSED, &napi->state)) {
               break;
            }
         }
      }
   spin_unlock(&dev->napi_lock);

   /* Emulate a case where it takes longer to complete the rx packets in flight */
   if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetBlockDevIsSluggish)) {
      msleep(blockTotalSleepMsec);
   }

   VMKLNX_DEBUG(0, "%s is blocked.", dev->name);
   return VMK_OK;
}

/*
 *----------------------------------------------------------------------------
 *
 *  UnblockNetDev --
 *
 *    Handler for unblocking the device. If successful, the device state is
 *    modified to indicate that the device is now unblocked.
 *
 *  Results:
 *    VMK_OK always.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
UnblockNetDev(void *clientData)
{
   struct net_device *dev = (struct net_device *)clientData;

   if (!test_bit(__LINK_STATE_BLOCKED, &dev->state)) {
      VMKLNX_DEBUG(0, "%s is actually already unblocked.", dev->name);
      return VMK_OK;
   }

   smp_mb__before_clear_bit();
   clear_bit(__LINK_STATE_BLOCKED, &dev->state);

   VMKLNX_DEBUG(0, "%s is unblocked.", dev->name);
   return VMK_OK;
}


/*
 *-----------------------------------------------------------------------------
 *
 * LinNet_EnableHwVlan --
 *
 *      Enable HW vlan on the netdev
 *      If enable is FALSE, hardware vlan is expected to be enabled already.
 *
 * Results:
 *      Return VMK_OK if there is VLan HW tx/rx acceleration support;
 *      Return VMK_VLAN_NO_HW_ACCEL otherwise.
 *
 * Side effects:
 *      hw vlan register is updated.
 *
 *-----------------------------------------------------------------------------
 */
VMK_ReturnStatus
LinNet_EnableHwVlan(struct net_device *dev)
{
   struct vlan_group *grp = dev->vlan_group;

   /*
    * dev->vlan_group is only allocated after vlan_rx_register() has been
    * called successfully. If dev->vlan_group is not NULL, it means
    * vlan has already been enabled and no need to do it again
    */
   if(grp != NULL) {
      VMKLNX_DEBUG(1, "%s: HW VLAN already enabled", dev->name);
      return VMK_OK;
   }

   VMK_ASSERT(dev->features & NETIF_F_HW_VLAN_RX);

   /* call driver's vlan_rx_register handler to enable vlan */
   VMK_ASSERT(dev->vlan_rx_register);
   if (!dev->vlan_rx_register) {
      VMKLNX_DEBUG(0, "%s: no vlan_rx_register handler", dev->name);
      return VMK_VLAN_NO_HW_ACCEL;
   }

   grp = vmk_HeapAlloc(VMK_MODULE_HEAP_ID, sizeof (struct vlan_group));
   if (grp == NULL) {
      VMKLNX_DEBUG(0, "%s: failed to allocate vlan_group", dev->name);
      return VMK_NO_MEMORY;
   }
   vmk_Memset(grp, 0, sizeof (struct vlan_group));
   dev->vlan_group = grp;

   VMKLNX_DEBUG(0, "%s: enabling vlan", dev->name);
   VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->vlan_rx_register, dev, grp);

   return VMK_OK;
}


/*
 *-----------------------------------------------------------------------------
 *
 * SetupVlanGroupDevice --
 *
 *      Enable HW vlan and add new vlan id's based on the bitmap.
 *      If enable is FALSE, hardware vlan is expected to be enabled
 *      already, If bitmap is null, just do enable.
 *
 * Results:
 *      Return VMK_OK if there is VLan HW tx/rx acceleration support;
 *      Return VMK_VLAN_NO_HW_ACCEL otherwise.
 *
 * Side effects:
 *      hw vlan register is updated.
 *
 *-----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
SetupVlanGroupDevice(void *clientData, vmk_Bool enable, void *bitmap)
{
   struct net_device *dev = (struct net_device *) clientData;
   struct vlan_group *grp = dev->vlan_group;
   VMK_ReturnStatus status;

   rtnl_lock();
   if (enable || grp == NULL) {
      status = LinNet_EnableHwVlan(dev);
      if (status != VMK_OK) {
         goto end;
      }
      grp = dev->vlan_group;
   }

   /* if hw doesn't support rx vlan filter, bail out here */
   if (!(dev->features & NETIF_F_HW_VLAN_FILTER)) {
      status = VMK_OK;
      goto end;
   }

   /* now compare bitmap with vlan_group and make up the difference */
   if (bitmap) {
      vmk_VlanID vid;
      VMK_ASSERT(dev->vlan_rx_add_vid);
      if (!dev->vlan_rx_add_vid) {
         VMKLNX_DEBUG(0, "%s: driver has no vlan_rx_add_vid handler",
                     dev->name);
         status = VMK_FAILURE;
         goto end;
      }

      for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
         if (test_bit(vid, bitmap) && grp->vlan_devices[vid] == NULL) {
            grp->vlan_devices[vid] = dev;
            VMKLNX_DEBUG(1, "%s: adding vlan id %d", dev->name, (int)vid);
            VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->vlan_rx_add_vid, dev,
                                    vid);
         }
      }
   }
   status = VMK_OK;
  end:
   rtnl_unlock();
   return status;
}

/*
 *-----------------------------------------------------------------------------
 *
 * LinNet_RemoveVlanGroupDevice --
 *
 *      Delete vlan id's based on bitmap and disable hw vlan.
 *      Either bitmap or disable should be set, but not both.
 *      If neither is set, there is no work to do (illegal?).
 *
 * Results:
 *      VMK_OK if successfully added/deleted.
 *      VMK_FAILURE otherwise.
 *
 * Side effects:
 *      HW vlan table is updated. HW will may stop passing vlan.
 *
 *-----------------------------------------------------------------------------
 */
VMK_ReturnStatus
LinNet_RemoveVlanGroupDevice(void *clientData, vmk_Bool disable, void *bitmap)
{
   struct net_device *dev = (struct net_device *) clientData;
   struct vlan_group *grp = dev->vlan_group;
   VMK_ReturnStatus status;

   VMK_ASSERT(dev->features & NETIF_F_HW_VLAN_RX);

   rtnl_lock();
   /* Unregister vid's if hardware supports vlan filter */
   if (dev->features & NETIF_F_HW_VLAN_FILTER) {
      vmk_VlanID vid;
      VMK_ASSERT(dev->vlan_rx_kill_vid);
      if (!dev->vlan_rx_kill_vid) {
         VMKLNX_DEBUG(0, "%s: no vlan_rx_kill_vid handler", dev->name);
         status = VMK_FAILURE;
         goto end;
      }

      if (grp == NULL) {
         VMKLNX_DEBUG(0, "%s: the vlan_group of this device is NULL",
                      dev->name);
         status = VMK_FAILURE;
         goto end;
      }

      for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
         if (grp->vlan_devices[vid] == NULL) {
            continue;
         }
         /* delete all if disable is true, else consult bitmap */
         if (disable || (bitmap && !test_bit(vid, bitmap))) {
            grp->vlan_devices[vid] = NULL;
            VMKLNX_DEBUG(1, "%s: deleting vlan id %d", dev->name, (int)vid);
            VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->vlan_rx_kill_vid, dev,
                                    vid);
         }
      }
   }

   if (disable) {
      VMK_ASSERT(dev->vlan_rx_register);
      if (!dev->vlan_rx_register) {
         VMKLNX_DEBUG(0, "%s: no vlan_rx_register handler", dev->name);
         status = VMK_VLAN_NO_HW_ACCEL;
         goto end;
      }

      VMKLNX_DEBUG(0, "%s: disabling vlan", dev->name);
      VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->vlan_rx_register, dev, NULL);

      VMK_ASSERT(grp);
      if (grp) {
         dev->vlan_group = NULL;
         vmk_HeapFree(VMK_MODULE_HEAP_ID, grp);
      }
   }
   status = VMK_OK;
  end:
   rtnl_unlock();
   return status;
}

/*
 *-----------------------------------------------------------------------------
 *
 * NICGetMTU --
 *
 *      Returns the MTU value for the given NIC
 *
 * Results:
 *      MTU for the given device.
 *
 * Side effects:
 *      none.
 *
 *-----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
NICGetMTU(void *device, vmk_uint32 *mtu)
{
   struct net_device *dev = (struct net_device *) device;

   *mtu = dev->mtu;

   return VMK_OK;
}

/*
 *-----------------------------------------------------------------------------
 *
 * NICSetMTU --
 *
 *      Set new MTU for the given NIC
 *
 * Results:
 *      VMK_OK if the new_mtu is accepted by the device.
 *      VMK_FAILURE or VMK_NOT_SUPPORTED otherwise.
 *
 * Side effects:
 *      The device queue is stopped. For most devices the entire ring is
 *      reallocated, and the device is reset.
 *
 *-----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
NICSetMTU(void *device, vmk_uint32 new_mtu)
{
   int ret = 0;
   struct net_device *dev = (struct net_device *) device;

   if (!dev->change_mtu) { // 3Com doesn't even register change_mtu!
      VMKLNX_DEBUG(0, "Changing MTU not supported by device.");
      return VMK_NOT_SUPPORTED;
   }
   /* PRs 478842, 478939
    * Update trans_start here so that netdev_watchdog will not mistake
    * a stopped tx_queue as a sign of pNIc hang when change MTU is undergoing.
    */
   rtnl_lock();
   dev->trans_start = jiffies;
   VMKAPI_MODULE_CALL(dev->module_id, ret, dev->change_mtu, dev, new_mtu);

   /* Some drivers call dev_close() when change_mtu failed. The following check
    * will update dev->gflags accordingly to avoid a second dev_close()
    * when CloseNetDev() is called.
    */
   if (ret && !(dev->flags & IFF_UP))
      dev->gflags &= ~IFF_DEV_IS_OPEN;
   rtnl_unlock();

   if (ret == 0) {
      VMKLNX_DEBUG(0, "%s: MTU changed to %d", dev->name, new_mtu);
   } else {
      VMKLNX_DEBUG(0, "%s: Failed to change MTU to %d", dev->name, new_mtu);
      return VMK_FAILURE;
   }

   return VMK_OK;
}

/*
 *----------------------------------------------------------------------------
 *
 *  NICSetLinkStateDown --
 *    Set NIC hardware to link down state to inform link peer.
 *
 *  Results:
 *    VMK_OK or failure code.
 *
 *  Side effects:
 *    Device is closed and settings may be lost.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
NICSetLinkStateDown(struct net_device *dev)
{
   struct ethtool_ops *ops;

   if ((dev->gflags & IFF_DEV_IS_OPEN) == 0) {
      return VMK_OK;
   }

   /* disable wol so link is down */
   ops = dev->ethtool_ops;
   if (ops && ops->set_wol) {
      int error;
      struct ethtool_wolinfo wolInfo[1];
      vmk_LogMessage("Disable WOL on device %s", dev->name);
      memset(wolInfo, 0, sizeof (wolInfo));
      rtnl_lock();
      VMKAPI_MODULE_CALL(dev->module_id, error, ops->set_wol, dev, wolInfo);
      rtnl_unlock();
      if (error != 0) {
         vmk_LogMessage("Failed to disable wol on device %s", dev->name);
      }
   }

   /* now close the device to take the link down */
   return CloseNetDev((void *)dev);
}

/*
 *----------------------------------------------------------------------------
 *
 *  NICSetLinkStateUp --
 *    Set NIC hardware to link up state to inform link peer.
 *
 *  Results:
 *    VMK_OK or failure code.
 *
 *  Side effects:
 *    Device is opened.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
NICSetLinkStateUp(struct net_device *dev)
{
   VMK_ReturnStatus status;

   if (dev->gflags & IFF_DEV_IS_OPEN) {
      return VMK_OK;      /* nothing to do */
   }

   status = OpenNetDev((void *)dev);
   if (status != VMK_OK) {
      return status;
   }

   /* Now the link is up, unblock device and restore wol state */
   if (UnblockNetDev((void *)dev) != VMK_OK) {
      vmk_LogMessage("Failed to unblock device %s", dev->name);
   }

   /* hostd will reenable wol when it processes link up */
   return VMK_OK;
}

/*
 *----------------------------------------------------------------------------
 *
 *  NICSetLinkStatus --
 *    Set NIC hardware speed and duplex.
 *
 *  Results:
 *    VMK_OK or failure code.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
NICSetLinkStatus(void *clientData, vmk_UplinkLinkInfo *linkInfo)
{
   struct net_device *dev = (struct net_device *)clientData;
   struct ethtool_cmd cmd;
   uint32_t result;
   VMK_ReturnStatus status;

   if (linkInfo->linkState == VMK_LINK_STATE_DOWN) {
      vmk_LogMessage("Taking down link on device %s", dev->name);
      return NICSetLinkStateDown(dev);
   }

   status = NICSetLinkStateUp(dev);
   if (status != VMK_OK) {
      vmk_LogMessage("Failed to bring link up on device %s", dev->name);
      return status;
   }

   /* get meaningful ethtool_cmd value first */
   if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) {
      return VMK_NOT_SUPPORTED;
   }

   memset(&cmd, 0, sizeof(struct ethtool_cmd));
   cmd.cmd = ETHTOOL_GSET;
   rtnl_lock();
   VMKAPI_MODULE_CALL(dev->module_id, result, dev->ethtool_ops->get_settings,
                      dev, &cmd);
   rtnl_unlock();

   if (result)
      return vmklnx_errno_to_vmk_return_status(result);

   /* set link speed and duplexity according to linkInfo */
   cmd.cmd = ETHTOOL_SSET;
   if (linkInfo->linkState == VMK_LINK_STATE_DOWN) {
      cmd.autoneg = 1;
      cmd.speed = ~0;
      cmd.duplex = ~0;
   } else {
      cmd.speed = linkInfo->linkSpeed;
      cmd.duplex = linkInfo->fullDuplex;
      if (cmd.speed != 0) {
         cmd.autoneg = 0;
      } else {
         cmd.autoneg = 1;
         cmd.advertising = cmd.supported &
               (ADVERTISED_100baseT_Full |
               ADVERTISED_100baseT_Half |
               ADVERTISED_10baseT_Full |
               ADVERTISED_10baseT_Half |
               ADVERTISED_1000baseT_Full |
               ADVERTISED_1000baseT_Half |
               ADVERTISED_Autoneg |
               ADVERTISED_2500baseX_Full |
               ADVERTISED_10000baseT_Full);
      }
   }

   /*
    * We call ethtool_ops directly to bypass copy_from_user(),
    * which doesn't handle in-kernel buffers (except for BH callers).
    *
    * See ethtool_set_settings()
    */
   if (!dev->ethtool_ops || !dev->ethtool_ops->set_settings) {
       return VMK_NOT_SUPPORTED;
   }

   rtnl_lock();
   VMKAPI_MODULE_CALL(dev->module_id, result, dev->ethtool_ops->set_settings,
                      dev, &cmd);
   rtnl_unlock();
   return vmklnx_errno_to_vmk_return_status(result);
}

/*
 *---------------------------------------------------------------------------->  *
 *  NICResetDev --
 *
 *    Handler for resetting the device. If successful, the device state is
 *    reset and the link state should go down and then up.
 *
 *  Results:
 *    VMK_OK always.
 *
 *  Side effects:
 *    Link state should bounce as seen from physical switch.
 *
 *---------------------------------------------------------------------------->  */

static VMK_ReturnStatus
NICResetDev(void *clientData)
{
   struct net_device *dev = (struct net_device *)clientData;

   netif_tx_lock(dev);
   VMK_ASSERT(dev->tx_timeout != NULL);
   VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->tx_timeout, dev);
   netif_tx_unlock(dev);

   return VMK_OK;
}

static inline VMK_ReturnStatus
marshall_to_vmknetq_features(vmknetddi_queueops_features_t features,
                             vmk_NetqueueFeatures *vmkfeatures)
{
   if (features & VMKNETDDI_QUEUEOPS_FEATURE_RXQUEUES) {
      *vmkfeatures |= VMK_NETQUEUE_FEATURE_RXQUEUES;
   }
   if (features & VMKNETDDI_QUEUEOPS_FEATURE_TXQUEUES) {
      *vmkfeatures |= VMK_NETQUEUE_FEATURE_TXQUEUES;
   }

   return VMK_OK;
}

static inline VMK_ReturnStatus
marshall_from_vmknetq_type(vmk_NetqueueQueueType vmkqtype,
                           vmknetddi_queueops_queue_t *qtype)
{
   if (vmkqtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
      *qtype = VMKNETDDI_QUEUEOPS_QUEUE_TYPE_TX;
   } else if (vmkqtype == VMK_NETQUEUE_QUEUE_TYPE_RX) {
      *qtype = VMKNETDDI_QUEUEOPS_QUEUE_TYPE_RX;
   } else {
      VMKLNX_DEBUG(0, "invalid vmkqueue type 0x%x", (uint32_t)vmkqtype);
      return VMK_FAILURE;
   }

   return VMK_OK;
}

static inline VMK_ReturnStatus
marshall_to_vmknetq_id(vmknetddi_queueops_queueid_t qid,
                       vmk_NetqueueQueueID *vmkqid)
{
   if ( !VMKNETDDI_QUEUEOPS_IS_TX_QUEUEID(qid) &&
        !VMKNETDDI_QUEUEOPS_IS_RX_QUEUEID(qid) ) {
      VMKLNX_WARN("invalid queue id 0x%x", qid);
      return VMK_FAILURE;
   }

   vmk_NetqueueSetQueueIDUserVal(vmkqid, qid);
   return VMK_OK;
}

static inline VMK_ReturnStatus
marshall_from_vmknetq_id(vmk_NetqueueQueueID vmkqid,
                         vmknetddi_queueops_queueid_t *qid)
{
   VMK_DEBUG_ONLY(
   vmk_NetqueueQueueType qtype = vmk_NetqueueQueueIDType(vmkqid);

   if (unlikely((qtype != VMK_NETQUEUE_QUEUE_TYPE_TX) &&
                (qtype != VMK_NETQUEUE_QUEUE_TYPE_RX))) {
      VMKLNX_WARN("invalid vmk queue type 0x%"VMK_FMT64"x", vmkqid);
      return VMK_FAILURE;
   });

   *qid = vmk_NetqueueQueueIDUserVal(vmkqid);
   return VMK_OK;
}

static inline VMK_ReturnStatus
marshall_from_vmknetq_filter_type(vmk_NetqueueFilter *vmkfilter,
                                  vmknetddi_queueops_filter_t *filter)
{
   if (vmkfilter->class != VMK_NETQUEUE_FILTER_MACADDR &&
       vmkfilter->class != VMK_NETQUEUE_FILTER_VLAN &&
       vmkfilter->class != VMK_NETQUEUE_FILTER_VLANMACADDR) {
      VMKLNX_DEBUG(0, "unsupported vmk filter class");
      return VMK_NOT_SUPPORTED;
   }

   if (vmkfilter->class == VMK_NETQUEUE_FILTER_MACADDR) {
      filter->class = VMKNETDDI_QUEUEOPS_FILTER_MACADDR;
      memcpy(filter->u.macaddr, vmkfilter->u.macaddr, 6);
   }

   if (vmkfilter->class == VMK_NETQUEUE_FILTER_VLAN) {
      filter->class = VMKNETDDI_QUEUEOPS_FILTER_VLAN;
      filter->u.vlan_id = vmkfilter->u.vlan_id;
   }

   if (vmkfilter->class == VMK_NETQUEUE_FILTER_VLANMACADDR) {
      filter->class = VMKNETDDI_QUEUEOPS_FILTER_VLANMACADDR;
      memcpy(filter->u.vlanmac.macaddr, vmkfilter->u.vlanmac.macaddr, 6);
      filter->u.vlanmac.vlan_id = vmkfilter->u.vlanmac.vlan_id;
   }

   return VMK_OK;
}

static inline VMK_ReturnStatus
marshall_to_vmknetq_supported_filter_class(vmknetddi_queueops_filter_class_t class,
                                           vmk_NetqueueFilterClass *vmkclass)
{
   *vmkclass = VMK_NETQUEUE_FILTER_NONE;

   if (class & VMKNETDDI_QUEUEOPS_FILTER_MACADDR) {
      *vmkclass |= VMK_NETQUEUE_FILTER_MACADDR;
   }

   if (class & VMKNETDDI_QUEUEOPS_FILTER_VLAN) {
      *vmkclass |= VMK_NETQUEUE_FILTER_VLAN;
   }

   if (class & VMKNETDDI_QUEUEOPS_FILTER_VLANMACADDR) {
      *vmkclass |= VMK_NETQUEUE_FILTER_VLANMACADDR;
   }

   return VMK_OK;
}

static inline VMK_ReturnStatus
marshall_to_vmknetq_filter_id(vmknetddi_queueops_filterid_t fid,
                              vmk_NetqueueFilterID *vmkfid)
{
   return vmk_NetqueueMkFilterID(vmkfid, VMKNETDDI_QUEUEOPS_FILTERID_VAL(fid));
}

static VMK_ReturnStatus
marshall_from_vmknetq_filter_id(vmk_NetqueueFilterID vmkfid,
                                vmknetddi_queueops_filterid_t *fid)
{
   *fid = VMKNETDDI_QUEUEOPS_MK_FILTERID(vmk_NetqueueFilterIDVal(vmkfid));
   return VMK_OK;
}

static VMK_ReturnStatus
marshall_from_vmknetq_pri(vmk_NetqueuePriority vmkpri,
                          vmknetddi_queueops_tx_priority_t *pri)
{
   *pri = (vmknetddi_queueops_tx_priority_t)vmkpri;
   return VMK_OK;
}

static inline VMK_ReturnStatus
marshall_to_vmknetq_queue_features(vmknetddi_queueops_queue_features_t features,
                                   vmk_NetqueueQueueFeatures *vmkfeatures)
{
   if (features & VMKNETDDI_QUEUEOPS_QUEUE_FEAT_LRO) {
      *vmkfeatures |= VMK_NETQUEUE_QUEUE_FEAT_LRO;
   }
   if (features & VMKNETDDI_QUEUEOPS_QUEUE_FEAT_PAIR) {
      *vmkfeatures |= VMK_NETQUEUE_QUEUE_FEAT_PAIR;
   }

   return VMK_OK;
}

static inline VMK_ReturnStatus
marshall_from_vmknetq_queue_features(vmk_NetqueueQueueFeatures vmkfeatures,
                                     vmknetddi_queueops_queue_features_t *features)
{
   if (vmkfeatures & VMK_NETQUEUE_QUEUE_FEAT_LRO) {
      *features |= VMKNETDDI_QUEUEOPS_QUEUE_FEAT_LRO;
   }
   if (vmkfeatures & VMK_NETQUEUE_QUEUE_FEAT_PAIR) {
      *features |= VMKNETDDI_QUEUEOPS_QUEUE_FEAT_PAIR;
   }

   return VMK_OK;
}

static VMK_ReturnStatus
marshall_from_vmknetq_attr(vmk_NetqueueQueueAttr *vmkattr,
                           u16 nattr,
                           vmknetddi_queueops_queueattr_t *attr)
{
   int i;

   for (i = 0; i < nattr; i++) {
      switch (vmkattr[i].type) {
      case VMK_NETQUEUE_QUEUE_ATTR_PRIOR:
         attr[i].type = VMKNETDDI_QUEUEOPS_QUEUE_ATTR_PRIOR;
         marshall_from_vmknetq_pri(vmkattr[i].args.priority,
                                   &attr[i].args.priority);
         break;

      case VMK_NETQUEUE_QUEUE_ATTR_FEAT:
         attr[i].type = VMKNETDDI_QUEUEOPS_QUEUE_ATTR_FEAT;
         marshall_from_vmknetq_queue_features(vmkattr[i].args.features,
                                   &attr[i].args.features);
         break;

      default:
         return VMK_FAILURE;
      }
   }

   return VMK_OK;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_get_version --
 *
 *    Get driver Netqueue version
 *
 *  Results:
 *    VMK_OK on success. VMK_NOT_SUPPORTED, if operation is not supported by
 *    device. VMK_FAILURE, if operation fails.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_get_version(void *clientData,
                        void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_get_version_args_t args;

   vmk_NetqueueOpGetVersionArgs *vmkargs = (vmk_NetqueueOpGetVersionArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   VMK_ASSERT(dev);

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                 VMKNETDDI_QUEUEOPS_OP_GET_VERSION, &args);
      if (result != 0) {
         ret = VMK_FAILURE;
      } else {
         vmkargs->major = args.major;
         vmkargs->minor = args.minor;
         ret = VMK_OK;
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_get_features --
 *
 *    Get driver Netqueue features
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    Netqueue ops are not supprted by the driver
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_get_features(void *clientData,
                         void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_get_features_args_t args;

   vmk_NetqueueOpGetFeaturesArgs *vmkargs = (vmk_NetqueueOpGetFeaturesArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   VMK_ASSERT(dev);
   args.netdev = dev;
   args.features = VMKNETDDI_QUEUEOPS_FEATURE_NONE;

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                 VMKNETDDI_QUEUEOPS_OP_GET_FEATURES, &args);
      if (result != 0) {
         ret = VMK_FAILURE;
      } else {
         ret = marshall_to_vmknetq_features(args.features, &vmkargs->features);
         VMK_ASSERT(ret == VMK_OK);
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_get_queue_count --
 *
 *    Get count of tx or rx queues supprted by the driver
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_get_queue_count(void *clientData,
                            void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_get_queue_count_args_t args;

   vmk_NetqueueOpGetQueueCountArgs *vmkargs =
                                      (vmk_NetqueueOpGetQueueCountArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   VMK_ASSERT(dev);
   args.netdev = dev;

   if (marshall_from_vmknetq_type(vmkargs->qtype, &args.type) != VMK_OK) {
      return VMK_FAILURE;
   }

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                         VMKNETDDI_QUEUEOPS_OP_GET_QUEUE_COUNT, &args);
      if (result != 0) {
         ret = VMK_FAILURE;
      } else {
         vmkargs->count = args.count;
         ret = VMK_OK;
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_get_filter_count --
 *
 *    Get number of rx filters supprted by the driver
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_get_filter_count(void *clientData,
                             void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_get_filter_count_args_t args;

   vmk_NetqueueOpGetFilterCountArgs *vmkargs = (vmk_NetqueueOpGetFilterCountArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   VMK_ASSERT(dev);
   args.netdev = dev;

   if (marshall_from_vmknetq_type(vmkargs->qtype, &args.type) != VMK_OK) {
      return VMK_FAILURE;
   }

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                 VMKNETDDI_QUEUEOPS_OP_GET_FILTER_COUNT, &args);
      if (result != 0) {
         ret = VMK_FAILURE;
      } else {
         vmkargs->count = args.count;
         ret = VMK_OK;
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 *  dev_add_netqueue_qid --
 *
 *    Record new netqueue qid
 *
 *  Results:
 *    VMK_ReturnStatus
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
dev_add_netqueue_qid(struct net_device *dev,
                     u16 qidx,
                     vmk_NetqueueQueueID vmkqid)
{
   VMK_ReturnStatus ret = VMK_OK;
   struct tx_netqueue_info *txinfo = dev->tx_netqueue_info;
   VMK_ASSERT(txinfo);

   if (qidx < dev->num_tx_queues) {
      VMK_ASSERT(txinfo[qidx].valid == VMK_FALSE);
      txinfo[qidx].valid = VMK_TRUE;
      txinfo[qidx].vmkqid = vmkqid;
   } else {
      ret = VMK_FAILURE;
   }

   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 *  dev_remove_netqueue_qid --
 *
 *    Removed recorded netqueue qid
 *
 *  Results:
 *    None.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static void
dev_remove_netqueue_qid(struct net_device *dev,
                        u32 qidx)
{
   struct tx_netqueue_info *txinfo = dev->tx_netqueue_info;
   VMK_ASSERT(txinfo);

   VMK_ASSERT(txinfo[qidx].valid == VMK_TRUE);
   txinfo[qidx].valid = VMK_FALSE;
   txinfo[qidx].vmkqid = VMK_NETQUEUE_INVALID_QUEUEID;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_alloc_queue --
 *
 *    Call driver netqueue_op for allocating queue
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_alloc_queue(void *clientData,
                        void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_alloc_queue_args_t args;
   vmknetddi_queueop_free_queue_args_t freeargs;
   vmk_NetqueueOpAllocQueueArgs *vmkargs = (vmk_NetqueueOpAllocQueueArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   vmk_NetqueueQueueType qtype = vmkargs->qtype;

   VMK_ASSERT(dev);

   args.netdev = dev;
   args.napi = NULL;
   args.queue_mapping = 0;

   if (!(qtype == VMK_NETQUEUE_QUEUE_TYPE_RX) &&
       !(qtype == VMK_NETQUEUE_QUEUE_TYPE_TX)) {
      VMKLNX_DEBUG(0, "invalid vmkqueue type 0x%x", qtype);
      return VMK_FAILURE;
   }

   ret = marshall_from_vmknetq_type(qtype, &args.type);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return VMK_FAILURE;
   }

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                         VMKNETDDI_QUEUEOPS_OP_ALLOC_QUEUE, &args);
      if (result != 0) {
         ret = VMK_FAILURE;
      } else {
         if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
            VMK_ASSERT(VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid) < dev->num_tx_queues);
            if (args.queue_mapping) {
               VMK_ASSERT(args.queue_mapping ==
                          VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid));
            }
         } else {
            VMK_ASSERT(qtype == VMK_NETQUEUE_QUEUE_TYPE_RX);
            if (args.napi != NULL) {
               vmkargs->net_poll = args.napi->net_poll;
            }
         }

         ret = marshall_to_vmknetq_id(args.queueid, &vmkargs->qid);
         VMK_ASSERT(ret == VMK_OK);
         if (unlikely(ret != VMK_OK)) {
            goto error_free;
         }

         if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
            u16 qidx = VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid);
            ret = dev_add_netqueue_qid(dev, qidx, vmkargs->qid);
            if (ret != VMK_OK) {
               VMKLNX_DEBUG(0, "%s: failed to add netqueue qidx=%d", dev->name, qidx);
               goto error_free;
            }
         }
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

 out:
   return ret;

 error_free:
   VMK_ASSERT(ret != VMK_OK);

   freeargs.netdev = dev;
   freeargs.queueid = args.queueid;
   VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                      VMKNETDDI_QUEUEOPS_OP_FREE_QUEUE, &freeargs);
   goto out;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_alloc_queue_with_attr --
 *
 *    Call driver netqueue_op for allocating queue with attributes
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_alloc_queue_with_attr(void *clientData,
                                  void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_alloc_queue_with_attr_args_t args;
   vmknetddi_queueop_free_queue_args_t freeargs;
   vmk_NetqueueOpAllocQueueArgs vmkallocargs;
   vmk_NetqueueOpAllocQueueWithAttrArgs *vmkargs =
      (vmk_NetqueueOpAllocQueueWithAttrArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   vmk_NetqueueQueueType qtype = vmkargs->qtype;
   vmknetddi_queueops_queueattr_t attr[VMKNETDDI_QUEUEOPS_QUEUE_ATTR_NUM];

   /* If alloc without attributes, just call normal alloc queue */
   if (vmkargs->nattr == 0) {
      memset(&vmkallocargs, 0, sizeof(vmkallocargs));
      vmkallocargs.net_poll = NULL;
      vmkallocargs.qtype = qtype;
      vmkallocargs.qid = vmkargs->qid;
      ret = netqueue_op_alloc_queue(clientData, &vmkallocargs);
      if (ret == VMK_OK) {
         vmkargs->net_poll = vmkallocargs.net_poll;
	 vmkargs->qid = vmkallocargs.qid;
	 return VMK_OK;
      } else {
	  return VMK_FAILURE;
      }
   }

   VMK_ASSERT(dev);
   args.netdev = dev;
   args.napi = NULL;
   args.queue_mapping = 0;

   if (vmkargs->nattr > VMKNETDDI_QUEUEOPS_QUEUE_ATTR_NUM) {
      VMK_ASSERT(VMK_FALSE);
      return VMK_LIMIT_EXCEEDED;
   }

   args.nattr = vmkargs->nattr;

   if (!(qtype == VMK_NETQUEUE_QUEUE_TYPE_RX) &&
       !(qtype == VMK_NETQUEUE_QUEUE_TYPE_TX)) {
      VMKLNX_DEBUG(0, "invalid vmkqueue type 0x%x", qtype);
      return VMK_FAILURE;
   }

   ret = marshall_from_vmknetq_type(qtype, &args.type);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return VMK_FAILURE;
   }

   memset(attr, 0, sizeof(attr));
   ret = marshall_from_vmknetq_attr(vmkargs->attr, vmkargs->nattr, attr);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return VMK_FAILURE;
   }
   args.attr = attr;

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                         VMKNETDDI_QUEUEOPS_OP_ALLOC_QUEUE_WITH_ATTR, &args);
      if (result != 0) {
         ret = VMK_FAILURE;
      } else {
         if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
            VMK_ASSERT(VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid) < dev->num_tx_queues);
            if (args.queue_mapping) {
               VMK_ASSERT(args.queue_mapping ==
                          VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid));
            }
         } else {
            VMK_ASSERT(qtype == VMK_NETQUEUE_QUEUE_TYPE_RX);
            if (args.napi != NULL) {
               vmkargs->net_poll = args.napi->net_poll;
            }
         }

         ret = marshall_to_vmknetq_id(args.queueid, &vmkargs->qid);
         VMK_ASSERT(ret == VMK_OK);
         if (unlikely(ret != VMK_OK)) {
            VMKLNX_DEBUG(0, "invalid qid. freeing allocated queue");
            goto error_free;
         }

         if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
            u16 qidx = VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid);
            ret = dev_add_netqueue_qid(dev, qidx, vmkargs->qid);
            if (ret != VMK_OK) {
               VMKLNX_DEBUG(0, "%s: failed to add netqueue qidx=%d", dev->name, qidx);
               goto error_free;
            }
         }
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

 out:
   return ret;

 error_free:
   VMK_ASSERT(ret != VMK_OK);

   freeargs.netdev = dev;
   freeargs.queueid = args.queueid;
   VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                      VMKNETDDI_QUEUEOPS_OP_FREE_QUEUE, &freeargs);
   goto out;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_free_queue --
 *
 *    Free queue
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_free_queue(void *clientData,
                       void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_free_queue_args_t args;
   vmk_NetqueueOpFreeQueueArgs *vmkargs = (vmk_NetqueueOpFreeQueueArgs *)opArgs;
   vmk_NetqueueQueueID vmkqid = vmkargs->qid;
   vmk_NetqueueQueueType qtype = vmk_NetqueueQueueIDType(vmkqid);
   struct net_device *dev = (struct net_device *)clientData;

   VMK_ASSERT(dev);
   args.netdev = dev;

   ret = marshall_from_vmknetq_id(vmkqid, &args.queueid);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
      dev_remove_netqueue_qid(dev,
                              VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid));
   }

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                 VMKNETDDI_QUEUEOPS_OP_FREE_QUEUE, &args);
      if (result != 0) {
         ret = VMK_FAILURE;
      } else {
         ret = VMK_OK;
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_get_queue_vector --
 *
 *    Get interrupt vector for the queue
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_get_queue_vector(void *clientData,
                             void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_get_queue_vector_args_t args;

   vmk_NetqueueOpGetQueueVectorArgs *vmkargs = (vmk_NetqueueOpGetQueueVectorArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   VMK_ASSERT(dev);
   args.netdev = dev;

   ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                 VMKNETDDI_QUEUEOPS_OP_GET_QUEUE_VECTOR, &args);
      if (result != 0) {
         ret = VMK_FAILURE;
      } else {
         vmkargs->vector = args.vector;
         ret = VMK_OK;
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

   return ret;
}


/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_get_default_queue --
 *
 *    Get default queue for tx/rx operations
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_get_default_queue(void *clientData,
                              void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_get_default_queue_args_t args;
    vmk_NetqueueOpGetDefaultQueueArgs *vmkargs =
      (vmk_NetqueueOpGetDefaultQueueArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   vmk_NetqueueQueueType qtype = vmkargs->qtype;

   VMK_ASSERT(dev);
   args.netdev = dev;
   args.napi = NULL;
   args.queue_mapping  = 0;

   ret = marshall_from_vmknetq_type(qtype, &args.type);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                 VMKNETDDI_QUEUEOPS_OP_GET_DEFAULT_QUEUE, &args);
      if (result != 0) {
         ret = VMK_FAILURE;
      } else {
         if (qtype == VMK_NETQUEUE_QUEUE_TYPE_TX) {
            VMK_ASSERT(VMKNETDDI_QUEUEOPS_QUEUEID_VAL(args.queueid) < dev->num_tx_queues);
         } else {
            VMK_ASSERT(qtype == VMK_NETQUEUE_QUEUE_TYPE_RX);
            if (args.napi != NULL) {
               vmkargs->net_poll = args.napi->net_poll;
            }
         }

         ret = marshall_to_vmknetq_id(args.queueid, &vmkargs->qid);
         VMK_ASSERT(ret == VMK_OK);
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

   return ret;
}


/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_apply_rx_filter --
 *
 *    Apply rx filter on queue
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_apply_rx_filter(void *clientData,
                            void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_apply_rx_filter_args_t args;

   vmk_NetqueueOpApplyRxFilterArgs *vmkargs =
      (vmk_NetqueueOpApplyRxFilterArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   VMK_ASSERT(dev);
   args.netdev = dev;

   ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   ret = marshall_from_vmknetq_filter_type(&vmkargs->filter, &args.filter);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                 VMKNETDDI_QUEUEOPS_OP_APPLY_RX_FILTER, &args);
      if (result != 0) {
         VMKLNX_DEBUG(0, "vmknetddi_queueops_apply_rx_filter returned %d", result);
         ret = VMK_FAILURE;
      } else {
         ret = marshall_to_vmknetq_filter_id(args.filterid, &vmkargs->fid);
	 vmkargs->pairhwqid = args.pairtxqid;
         VMK_ASSERT(ret == VMK_OK);
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_remove_rx_filter --
 *
 *    Remove rx filter from queue
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_remove_rx_filter(void *clientData,
                             void *opArgs)
{
   int result;
   VMK_ReturnStatus ret;
   vmknetddi_queueop_remove_rx_filter_args_t args;

   vmk_NetqueueOpRemoveRxFilterArgs *vmkargs =
      (vmk_NetqueueOpRemoveRxFilterArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   VMK_ASSERT(dev);
   args.netdev = dev;

   ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   ret = marshall_from_vmknetq_filter_id(vmkargs->fid, &args.filterid);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   if (dev->netqueue_ops) {
      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                 VMKNETDDI_QUEUEOPS_OP_REMOVE_RX_FILTER, &args);
      if (result != 0) {
         VMKLNX_DEBUG(0, "vmknetddi_queueops_remove_rx_filter returned %d",
                      result);
         ret = VMK_FAILURE;
      } else {
         ret = VMK_OK;
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      ret = VMK_NOT_SUPPORTED;
   }

   return ret;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_get_queue_stats --
 *
 *    Get queue statistics
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_get_queue_stats(void *clientData,
                            void *opArgs)
{
   return VMK_NOT_SUPPORTED;
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_set_tx_priority --
 *
 *    Set tx queue priority
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error, VMK_NOT_SUPPORTED if
 *    not supported
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_set_tx_priority(void *clientData,
                            void *opArgs)
{
   VMK_ReturnStatus ret;
   vmk_NetqueueOpSetTxPriorityArgs *vmkargs = opArgs;
   vmknetddi_queueop_set_tx_priority_args_t args;
   struct net_device *dev = (struct net_device *)clientData;

   args.netdev = dev;

   ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   ret = marshall_from_vmknetq_pri(vmkargs->priority, &args.priority);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   if (dev->netqueue_ops) {
      int result;

      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                         VMKNETDDI_QUEUEOPS_OP_SET_TX_PRIORITY, &args);
      if (result != 0) {
         return VMK_FAILURE;
      } else {
         return VMK_OK;
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      return VMK_NOT_SUPPORTED;
   }
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_getset_state --
 *    Get and Set Netqueue Valid State
 *
 *  Results:
 *    The previous netqueue state
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_getset_state(void *clientData,
                         void *opArgs)
{
   vmk_NetqueueOpGetSetQueueStateArgs *vmkargs =
      (vmk_NetqueueOpGetSetQueueStateArgs *)opArgs;
   struct net_device *dev = (struct net_device *)clientData;
   VMK_ASSERT(dev);

   if (dev->netqueue_ops) {
      vmkargs->oldState = vmknetddi_queueops_getset_state(dev, vmkargs->newState);
      return VMK_OK;
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      return VMK_NOT_SUPPORTED;
   }
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_enable_queue_feat --
 *    Enable queue's features
 *
 *  Results:
 *    VMK_ReturnStatus
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_enable_queue_feat(void *clientData,
                              void *opArgs)
{
   VMK_ReturnStatus ret;
   vmk_NetqueueOpEnableQueueFeatArgs *vmkargs = opArgs;
   vmknetddi_queueop_enable_feat_args_t args;
   struct net_device *dev = (struct net_device *)clientData;

   args.netdev = dev;
   args.features = 0;

   ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   marshall_from_vmknetq_queue_features(vmkargs->features,
                                        &args.features);
   if (dev->netqueue_ops) {
      int result;

      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                         VMKNETDDI_QUEUEOPS_OP_ENABLE_FEAT, &args);
      if (result != 0) {
         return VMK_FAILURE;
      } else {
         return VMK_OK;
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      return VMK_NOT_SUPPORTED;
   }
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_disable_queue_feat --
 *    Disable queue's features
 *
 *  Results:
 *    VMK_ReturnStatus
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_disable_queue_feat(void *clientData,
                               void *opArgs)
{
   VMK_ReturnStatus ret;
   vmk_NetqueueOpDisableQueueFeatArgs *vmkargs = opArgs;
   vmknetddi_queueop_disable_feat_args_t args;
   struct net_device *dev = (struct net_device *)clientData;

   args.netdev = dev;
   args.features = 0;

   ret = marshall_from_vmknetq_id(vmkargs->qid, &args.queueid);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return ret;
   }

   marshall_from_vmknetq_queue_features(vmkargs->features,
                                        &args.features);
   if (dev->netqueue_ops) {
      int result;

      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                         VMKNETDDI_QUEUEOPS_OP_DISABLE_FEAT, &args);
      if (result != 0) {
         return VMK_FAILURE;
      } else {
         return VMK_OK;
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      return VMK_NOT_SUPPORTED;
   }
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_get_queue_supported_feat --
 *    Get supported queues' features
 *
 *  Results:
 *    VMK_ReturnStatus
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_get_queue_supported_feat(void *clientData,
                                     void *opArgs)
{
   VMK_ReturnStatus ret;
   vmk_NetqueueOpGetQueueSupFeatArgs *vmkargs = opArgs;
   vmknetddi_queueop_get_sup_feat_args_t args;
   struct net_device *dev = (struct net_device *)clientData;

   args.netdev = dev;

   ret = marshall_from_vmknetq_type(vmkargs->qtype, &args.type);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return VMK_FAILURE;
   }

   if (dev->netqueue_ops) {
      int result;

      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                         VMKNETDDI_QUEUEOPS_OP_GET_SUPPORTED_FEAT, &args);
      if (result != 0) {
         return VMK_FAILURE;
      } else {
         marshall_to_vmknetq_queue_features(args.features,
                                            &vmkargs->features);
         return VMK_OK;
      }
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      return VMK_NOT_SUPPORTED;
   }
}

/*
 *----------------------------------------------------------------------------
 *
 *  netqueue_op_get_queue_supported_filter_class --
 *    Get supported queues' filter class
 *
 *  Results:
 *    VMK_ReturnStatus
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
netqueue_op_get_queue_supported_filter_class(void *clientData,
                                             void *opArgs)
{
   VMK_ReturnStatus ret;
   vmk_NetqueueOpGetQueueSupFilterArgs *vmkargs = opArgs;
   vmknetddi_queueop_get_sup_filter_class_args_t args;
   struct net_device *dev = (struct net_device *)clientData;

   args.netdev = dev;

   ret = marshall_from_vmknetq_type(vmkargs->qtype, &args.type);
   VMK_ASSERT(ret == VMK_OK);
   if (ret != VMK_OK) {
      return VMK_FAILURE;
   }

   if (dev->netqueue_ops) {
      int result;

      VMKAPI_MODULE_CALL(dev->module_id, result, dev->netqueue_ops,
                         VMKNETDDI_QUEUEOPS_OP_GET_SUPPORTED_FILTER_CLASS,
                         &args);
      if (result != 0) {
         /* Assume by default only supports for mac address filters */
         vmkargs->class = VMK_NETQUEUE_FILTER_MACADDR;
      } else {
         marshall_to_vmknetq_supported_filter_class(args.class,
                                                    &vmkargs->class);
      }
      return VMK_OK;
   } else {
      VMKLNX_DEBUG(0, "!dev->netqueue_ops");
      return VMK_NOT_SUPPORTED;
   }
}

/*
 *----------------------------------------------------------------------------
 *
 *  LinNet_NetqueueSkbXmit --
 *
 *    Transmit a skb on a pre-allocated Tx queue for a specific device
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE|VMK_BUSY on error
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
VMK_ReturnStatus
LinNet_NetqueueSkbXmit(struct net_device *dev,
                       vmk_NetqueueQueueID vmkqid,
                       struct sk_buff *skb)
{
   VMK_ReturnStatus status = VMK_OK;
   struct netdev_queue *queue;
   int xmit_status = -1;

   queue = netdev_pick_tx_queue(dev, vmkqid);
   VMK_ASSERT(queue != NULL);
   skb->queue_mapping = queue - dev->_tx;

   spin_lock(&queue->_xmit_lock);
   queue->processing_tx = 1;

   if (unlikely(netif_tx_queue_stopped(queue))) {
      status = VMK_BUSY;
      goto done;
   }

   VMKAPI_MODULE_CALL(dev->module_id, xmit_status,
                      *dev->hard_start_xmit, skb, dev);

   /*
    * Map NETDEV_TX_OK and NETDEV_TX_BUSY to VMK_OK and VMK_BUSY. For others
    * that cannot be mapped directly, we should carry through these return
    * status.
    */
   if (xmit_status == NETDEV_TX_OK) {
      status = VMK_OK;
   } else if (xmit_status == NETDEV_TX_BUSY) {
      status = VMK_BUSY;
   } else if (xmit_status == NETDEV_TX_LOCKED) {
      status = VMK_BUSY;
   } else {
      VMKLNX_WARN("Unknown NETDEV_TX status %d, map to VMK_FAILURE\n",
                  xmit_status);
      status = VMK_FAILURE;
   }

 done:
   queue->processing_tx = 0;
   spin_unlock(&queue->_xmit_lock);

   return status;
}

/*
 *----------------------------------------------------------------------------
 *
 *  LinNetNetqueueOpFunc --
 *    Netqueue ops handler for vmklinux
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
LinNetNetqueueOpFunc(void *clientData,
                     vmk_NetqueueOp op,
                     void *opArgs)
{

   switch (op) {
   case VMK_NETQUEUE_OP_GET_VERSION:
      return netqueue_op_get_version(clientData, opArgs);

   case VMK_NETQUEUE_OP_GET_FEATURES:
      return netqueue_op_get_features(clientData, opArgs);

   case VMK_NETQUEUE_OP_QUEUE_COUNT:
      return netqueue_op_get_queue_count(clientData, opArgs);

   case VMK_NETQUEUE_OP_FILTER_COUNT:
      return netqueue_op_get_filter_count(clientData, opArgs);

   case VMK_NETQUEUE_OP_ALLOC_QUEUE:
      return netqueue_op_alloc_queue(clientData, opArgs);

   case VMK_NETQUEUE_OP_FREE_QUEUE:
      return netqueue_op_free_queue(clientData, opArgs);

   case VMK_NETQUEUE_OP_GET_QUEUE_VECTOR:
      return netqueue_op_get_queue_vector(clientData, opArgs);

   case VMK_NETQUEUE_OP_GET_DEFAULT_QUEUE:
      return netqueue_op_get_default_queue(clientData, opArgs);

   case VMK_NETQUEUE_OP_APPLY_RX_FILTER:
      return netqueue_op_apply_rx_filter(clientData, opArgs);

   case VMK_NETQUEUE_OP_REMOVE_RX_FILTER:
      return netqueue_op_remove_rx_filter(clientData, opArgs);

   case VMK_NETQUEUE_OP_GET_QUEUE_STATS:
      return netqueue_op_get_queue_stats(clientData, opArgs);

   case VMK_NETQUEUE_OP_SET_TX_PRIORITY:
      return netqueue_op_set_tx_priority(clientData, opArgs);

   case VMK_NETQUEUE_OP_GETSET_QUEUE_STATE:
      return netqueue_op_getset_state(clientData, opArgs);

   case VMK_NETQUEUE_OP_ALLOC_QUEUE_WITH_ATTR:
      return netqueue_op_alloc_queue_with_attr(clientData, opArgs);

   case VMK_NETQUEUE_OP_ENABLE_QUEUE_FEAT:
      return netqueue_op_enable_queue_feat(clientData, opArgs);

   case VMK_NETQUEUE_OP_DISABLE_QUEUE_FEAT:
      return netqueue_op_disable_queue_feat(clientData, opArgs);

   case VMK_NETQUEUE_OP_GET_QUEUE_SUPPORTED_FEAT:
      return netqueue_op_get_queue_supported_feat(clientData, opArgs);

   case VMK_NETQUEUE_OP_GET_QUEUE_SUPPORTED_FILTER_CLASS:
      return netqueue_op_get_queue_supported_filter_class(clientData, opArgs);

   default:
      return VMK_FAILURE;
   }
}

/*
 *----------------------------------------------------------------------------
 *
 *  LinNet_NetqueueOp --
 *
 *    Submit a netqueue operation to a specific device
 *
 *  Results:
 *    VMK_OK on success, VMK_FAILURE on error
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */

VMK_ReturnStatus
LinNet_NetqueueOp(struct net_device *dev,
                  vmk_NetqueueOp op,
                  void *opArgs)
{
   return LinNetNetqueueOpFunc((void *) dev, op, opArgs);
}

/*
 *-----------------------------------------------------------------------------
 *
 * LinNetPTOpFunc --
 *
 *      This function dispatches the requested passthru control or
 *      eSwitch operation to the corresponding driver.
 *
 * Results:
 *      VMK_NOT_SUPPORTED if the uplink doesn't support PT/eSwitch or
 *      if the desired operation is not implemented. VMK_OK on
 *      success. Any other error code from the driver on failure.
 *
 * Side effects:
 *      Calls the uplink driver.
 *
 *-----------------------------------------------------------------------------
 */

static VMK_ReturnStatus
LinNetPTOpFunc(void *clientData, vmk_NetPTOP op, void *args)
{
   struct net_device *dev = (struct net_device *)clientData;
   VMK_ReturnStatus status;

   VMK_ASSERT(dev);

   if (!dev->pt_ops) {
      return VMK_NOT_SUPPORTED;
   }

   if (op == VMK_NETPTOP_IS_SUPPORTED) {
      return VMK_OK;
   }

   /*
    * If _attempting_ to get a VF, let's increment the refCount.
    */
   if (op == VMK_NETPTOP_VF_ACQUIRE) {
      vmk_ModuleIncUseCount(dev->module_id);
   }
   rtnl_lock();
   VMKAPI_MODULE_CALL(dev->module_id,
                      status,
                      (vmk_UplinkPTOpFunc) dev->pt_ops,
                      dev,
                      op,
                      args);
   rtnl_unlock();
   /*
    * If we succeeded to acquire a VF, then don't do anything. If we
    * failed, let's decrement the refCount. If we successfully
    * released a VF, decrement the refCount.
    */
   if ((op == VMK_NETPTOP_VF_ACQUIRE && status != VMK_OK) ||
       (op == VMK_NETPTOP_VF_RELEASE && status == VMK_OK)) {
      vmk_ModuleDecUseCount(dev->module_id);
   }

   return status;
}

/*
 *----------------------------------------------------------------------------
 *
 * GetMACAddr --
 *
 *    Return the MAC address of the NIC.
 *
 * Results:
 *    None
 *
 * Side effects:
 *    None
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
GetMACAddr(void *clientData, vmk_uint8 *macAddr)
{
   struct net_device *dev = (struct net_device *)clientData;

   memcpy(macAddr, dev->dev_addr, 6);

   return VMK_OK;
}

/*
 *----------------------------------------------------------------------------
 *
 * GetDeviceName --
 *
 *    Return the system name of corresponding device
 *
 * Results:
 *    None
 *
 * Side effects:
 *    When the dev->pdev is NULL, we return the dev->name (pseudo device name)
 *    instead
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
GetDeviceName(void *device,
              char *devName,
              vmk_ByteCount devNameLen)
{
   VMK_ReturnStatus status;
   struct net_device *dev = device;

   /* Check if the associated pdev is NULL (a pseudo device) */
   if (dev->pdev) {
      status = vmk_StringCopy(devName, dev->pdev->name, devNameLen);
   } else {
      status = vmk_StringCopy(devName, dev->name, devNameLen);
   }

   return status;
}


/*
 *----------------------------------------------------------------------------
 *
 * GetDeviceStats --
 *
 *    Return the stats of corresponding device.
 *
 *    There are two kinds of statistics :
 *
 *    - General statistics : retrieved by struct net_device_stats enclosed in
 *                           the struct net_device.
 *                           These stats are common to all device and stored in
 *                           stats array.
 *
 *    - Specific statistics : retrieved by ethtool functions provided by driver.
 *                            A global string is created in gtrings containing all
 *                            formatted statistics.
 *
 * Results:
 *    None
 *
 * Side effects:
 *    None
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
GetDeviceStats(void *device, vmk_PortClientStats *stats)
{
   struct net_device *dev = device;
   struct net_device_stats *st = NULL;
   struct ethtool_ops *ops = dev->ethtool_ops;
   struct ethtool_stats stat;
   u64 *data;
   char *buf;
   char *pbuf;
   int idx = 0;
   int pidx = 0;

   if (dev->get_stats) {
      VMKAPI_MODULE_CALL(dev->module_id, st, dev->get_stats, dev);
   }

   if (!st) {
      return VMK_FAILURE;
   } else {
      VMK_ASSERT_ON_COMPILE(sizeof stats->rxPkt == sizeof st->rx_packets);
      stats->rxPkt = st->rx_packets;
      stats->txPkt = st->tx_packets;
      stats->rxBytes = st->rx_bytes;
      stats->txBytes = st->tx_bytes;
      stats->rxErr = st->rx_errors;
      stats->txErr = st->tx_errors;
      stats->rxDrp = st->rx_dropped;
      stats->txDrp = st->tx_dropped;
      stats->mltCast = st->multicast;
      stats->col = st->collisions;
      stats->rxLgtErr = st->rx_length_errors;
      stats->rxOvErr = st->rx_over_errors;
      stats->rxCrcErr = st->rx_crc_errors;
      stats->rxFrmErr = st->rx_frame_errors;
      stats->rxFifoErr = st->rx_fifo_errors;
      stats->rxMissErr = st->rx_missed_errors;
      stats->txAbortErr = st->tx_aborted_errors;
      stats->txCarErr = st->tx_carrier_errors;
      stats->txFifoErr = st->tx_fifo_errors;
      stats->txHeartErr = st->tx_heartbeat_errors;
      stats->txWinErr = st->tx_window_errors;
      stats->intRxPkt = dev->linnet_rx_packets;
      stats->intTxPkt = dev->linnet_tx_packets;
      stats->intRxDrp = dev->linnet_rx_dropped;
      stats->intTxDrp = dev->linnet_tx_dropped;
   }

   if (!ops ||
       !ops->get_ethtool_stats ||
       (!ops->get_stats_count && !ops->get_sset_count) ||
       !ops->get_strings) {
      goto done;
   }

   rtnl_lock();
   if (ops->get_stats_count) {
      /* 2.6.18 network drivers method to retrieve the number of stats */
      VMKAPI_MODULE_CALL(dev->module_id, stat.n_stats, ops->get_stats_count, dev);
   } else {
      /* 2.6.18+ network drivers method to retrieve the number of stats */
      VMKAPI_MODULE_CALL(dev->module_id, stat.n_stats, ops->get_sset_count, dev, ETH_SS_STATS);
   }
   rtnl_unlock();

   data = kmalloc(stat.n_stats * sizeof(u64), GFP_ATOMIC);
   pbuf = buf = kmalloc(stat.n_stats * ETH_GSTRING_LEN, GFP_ATOMIC);

   if (!data) {
      goto done;
   }

   if (!buf) {
      kfree(data);
      goto done;
   }

   rtnl_lock();
   VMKAPI_MODULE_CALL_VOID(dev->module_id, ops->get_ethtool_stats, dev, &stat, data);
   VMKAPI_MODULE_CALL_VOID(dev->module_id, ops->get_strings, dev, ETH_SS_STATS, (vmk_uint8 *)buf);
   rtnl_unlock();

   stats->privateStats[pidx++] = '\n';
   for (; (pidx < sizeof stats->privateStats - 1) && (idx < stat.n_stats); idx++) {
      char tmp[128];

      snprintf(tmp, 128, "   %s : %lld\n", pbuf, data[idx]);
      memcpy(stats->privateStats + pidx, tmp,
             min(strlen(tmp), sizeof stats->privateStats - pidx - 1));

      pidx += min(strlen(tmp), sizeof stats->privateStats - pidx - 1);
      pbuf += ETH_GSTRING_LEN;
   }

   stats->privateStats[pidx] = '\0';

   kfree(data);
   kfree(buf);

 done:

   return VMK_OK;
}


/*
 *----------------------------------------------------------------------------
 *
 * GetDriverInfo --
 *
 *    Return informations of the corresponding device's driver.
 *
 * Results:
 *    None
 *
 * Side effects:
 *    None
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
GetDriverInfo(void *device, vmk_UplinkDriverInfo *driverInfo)
{
   struct net_device *dev = device;
   struct ethtool_ops *ops = dev->ethtool_ops;
   struct ethtool_drvinfo drv;
   VMK_ReturnStatus status;

   snprintf(driverInfo->moduleInterface,
            sizeof driverInfo->moduleInterface,  "vmklinux");

   if (!ops || !ops->get_drvinfo) {
      snprintf(driverInfo->driver,
               sizeof driverInfo->driver, "(none)");
      snprintf(driverInfo->version,
               sizeof driverInfo->version, "(none)");
      snprintf(driverInfo->firmwareVersion,
               sizeof driverInfo->firmwareVersion, "(none)");
      status = VMK_FAILURE;
   } else {
      memset(&drv, 0, sizeof(struct ethtool_drvinfo));

      rtnl_lock();
      VMKAPI_MODULE_CALL_VOID(dev->module_id, ops->get_drvinfo, dev, &drv);
      rtnl_unlock();

      memset(driverInfo->driver, 0, sizeof driverInfo->driver);
      memset(driverInfo->version, 0, sizeof driverInfo->version);
      memset(driverInfo->firmwareVersion, 0, sizeof driverInfo->firmwareVersion);

      memcpy(driverInfo->driver, drv.driver,
             min((size_t)(sizeof driverInfo->driver - 1), sizeof drv.driver));
      memcpy(driverInfo->version, drv.version,
	     min((size_t)(sizeof driverInfo->version - 1), sizeof drv.version));
      memcpy(driverInfo->firmwareVersion, drv.fw_version,
	     min((size_t)(sizeof driverInfo->firmwareVersion - 1), sizeof(drv.fw_version)));

      status = VMK_OK;
   }

   return status;
}

/*
 *----------------------------------------------------------------------------
 *
 * wolLinuxCapsToVmkCaps --
 *
 *      translate from VMK wol caps to linux caps
 *
 * Results:
 *      vmk_wolCaps
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */
static vmk_UplinkWolCaps
wolLinuxCapsToVmkCaps(vmk_uint32 caps)
{
   vmk_UplinkWolCaps vmkCaps = 0;

   if (caps & WAKE_PHY) {
      vmkCaps |= VMK_UPLINK_WAKE_ON_PHY;
   }
   if (caps & WAKE_UCAST) {
      vmkCaps |= VMK_UPLINK_WAKE_ON_UCAST;
   }
   if (caps & WAKE_MCAST) {
      vmkCaps |= VMK_UPLINK_WAKE_ON_MCAST;
   }
   if (caps & WAKE_BCAST) {
      vmkCaps |= VMK_UPLINK_WAKE_ON_BCAST;
   }
   if (caps & WAKE_ARP) {
      vmkCaps |= VMK_UPLINK_WAKE_ON_ARP;
   }
   if (caps & WAKE_MAGIC) {
      vmkCaps |= VMK_UPLINK_WAKE_ON_MAGIC;
   }
   if (caps & WAKE_MAGICSECURE) {
      vmkCaps |= VMK_UPLINK_WAKE_ON_MAGICSECURE;
   }

   return vmkCaps;
}

/*
 *----------------------------------------------------------------------------
 *
 * GetWolState --
 *
 *      use the ethtool interface to populate a vmk_UplinkWolState
 *
 * Results:
 *      vmk_UplinkWolState
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
GetWolState(void *device, vmk_UplinkWolState *wolState)
{
   struct net_device *dev = device;
   struct ethtool_ops *ops = dev->ethtool_ops;

   if (!ops || !ops->get_wol) {
      return VMK_NOT_SUPPORTED;
   } else {
      struct ethtool_wolinfo wolInfo[1];
      VMK_ReturnStatus status = VMK_OK;

      memset(wolInfo, 0, sizeof(wolInfo));
      rtnl_lock();
      VMKAPI_MODULE_CALL_VOID(dev->module_id, ops->get_wol, dev, wolInfo);
      rtnl_unlock();

      wolState->supported = wolLinuxCapsToVmkCaps(wolInfo->supported);
      wolState->enabled = wolLinuxCapsToVmkCaps(wolInfo->wolopts);

      if (strlen((char *)wolInfo->sopass) > 0) {
         vmk_uint32 length = strlen((char *)wolInfo->sopass);

         memset(wolState->secureONPassword, 0,
                sizeof wolState->secureONPassword);

         length++;
         if (length > sizeof wolState->secureONPassword) {
            status = VMK_LIMIT_EXCEEDED; // truncated
            length = sizeof wolState->secureONPassword;
         }
         memcpy(wolState->secureONPassword, wolInfo->sopass, length);
      }
      return status;
   }
}

/*
 *----------------------------------------------------------------------------
 *
 * GetCoalesceParams --
 *
 *      use the ethtool interface to get device coalescing properties
 *
 * Results:
 *      VMK_ReturnStatus
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
GetCoalesceParams(void *device,
                  vmk_UplinkCoalesceParams *coalesceParams)
{
   struct net_device *dev = device;
   struct ethtool_ops *ops = dev->ethtool_ops;
   struct ethtool_coalesce coalesce;
   VMK_ReturnStatus status;

   if (!ops || !ops->get_coalesce) {
      status = VMK_NOT_SUPPORTED;
   } else {
      int ret = -1;
      memset(&coalesce, 0, sizeof(struct ethtool_coalesce));

      rtnl_lock();
      coalesce.cmd = ETHTOOL_GCOALESCE;
      VMKAPI_MODULE_CALL(dev->module_id,
                         ret,
                         ops->get_coalesce,
                         dev,
                         &coalesce);
      rtnl_unlock();

      if (ret == 0) {
         if (coalesce.rx_coalesce_usecs) {
            coalesceParams->rxUsecs = coalesce.rx_coalesce_usecs;
         }

         if (coalesce.rx_max_coalesced_frames) {
            coalesceParams->rxMaxFrames = coalesce.rx_max_coalesced_frames;
         }

         if (coalesce.tx_coalesce_usecs) {
            coalesceParams->txUsecs = coalesce.tx_coalesce_usecs;
         }

         if (coalesce.tx_max_coalesced_frames) {
            coalesceParams->txMaxFrames = coalesce.tx_max_coalesced_frames;
         }

         status = VMK_OK;
      } else {
         status = VMK_FAILURE;
      }
   }

   return status;
}


/*
 *----------------------------------------------------------------------------
 *
 * SetCoalesceParams --
 *
 *      use the ethtool interface to set device coalescing properties
 *
 * Results:
 *      VMK_ReturnStatus
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
SetCoalesceParams(void *device,
                  vmk_UplinkCoalesceParams *coalesceParams)
{
   struct net_device *dev = device;
   struct ethtool_ops *ops = dev->ethtool_ops;
   struct ethtool_coalesce coalesce;
   VMK_ReturnStatus status;

   if (!ops || !ops->set_coalesce) {
      status = VMK_NOT_SUPPORTED;
   } else {
      int ret = -1;
      memset(&coalesce, 0, sizeof(struct ethtool_coalesce));

      // get first, then set
      rtnl_lock();
      coalesce.cmd = ETHTOOL_GCOALESCE;
      VMKAPI_MODULE_CALL(dev->module_id,
                         ret,
                         ops->get_coalesce,
                         dev,
                         &coalesce);

      if (ret == 0) {
         if (coalesceParams->rxUsecs) {
            coalesce.rx_coalesce_usecs = coalesceParams->rxUsecs;
         }

         if (coalesceParams->rxMaxFrames) {
            coalesce.rx_max_coalesced_frames = coalesceParams->rxMaxFrames;
         }

         if (coalesceParams->txUsecs) {
            coalesce.tx_coalesce_usecs = coalesceParams->txUsecs;
         }

         if (coalesceParams->txMaxFrames) {
            coalesce.tx_max_coalesced_frames = coalesceParams->txMaxFrames;
         }

         coalesce.cmd = ETHTOOL_SCOALESCE;
         VMKAPI_MODULE_CALL(dev->module_id,
                            ret,
                            ops->set_coalesce,
                            dev,
                            &coalesce);
      }
      rtnl_unlock();

      if (ret == 0) {
         status = VMK_OK;
      } else {
         status = VMK_FAILURE;
      }
   }

   return status;
}


/*
 *----------------------------------------------------------------------------
 *
 * wolVmkCapsToLinuxCaps --
 *
 *      translate from VMK wol caps to linux caps
 *
 * Results:
 *      linux wol cap bits
 *
 * Side effects:
 *      None.
 *
 *----------------------------------------------------------------------------
 */
static vmk_uint32
wolVmkCapsToLinuxCaps(vmk_UplinkWolCaps vmkCaps)
{
   vmk_uint32 caps = 0;

   if (vmkCaps & VMK_UPLINK_WAKE_ON_PHY) {
      caps |= WAKE_PHY;
   }
   if (vmkCaps & VMK_UPLINK_WAKE_ON_UCAST) {
      caps |= WAKE_UCAST;
   }
   if (vmkCaps & VMK_UPLINK_WAKE_ON_MCAST) {
      caps |= WAKE_MCAST;
   }
   if (vmkCaps & VMK_UPLINK_WAKE_ON_BCAST) {
      caps |= WAKE_BCAST;
   }
   if (vmkCaps & VMK_UPLINK_WAKE_ON_ARP) {
      caps |= WAKE_ARP;
   }
   if (vmkCaps & VMK_UPLINK_WAKE_ON_MAGIC) {
      caps |= WAKE_MAGIC;
   }
   if (vmkCaps & VMK_UPLINK_WAKE_ON_MAGICSECURE) {
      caps |= WAKE_MAGICSECURE;
   }

   return caps;
}

/*
 *----------------------------------------------------------------------------
 *
 * SetWolState --
 *
 *      set wol state via ethtool from a vmk_UplinkWolState struct
 *
 * Results;
 *      VMK_OK, various other failues
 *
 * Side effects:
 *      can set state within the pNic
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
SetWolState(void *device, vmk_UplinkWolState *wolState)
{
   struct net_device *dev = device;
   struct ethtool_ops *ops = dev->ethtool_ops;
   VMK_ReturnStatus status = VMK_FAILURE;

   if (!ops || !ops->set_wol) {
      return VMK_NOT_SUPPORTED;
   } else {
      vmk_uint32 length;
      struct ethtool_wolinfo wolInfo[1];
      int error;

      wolInfo->supported = wolVmkCapsToLinuxCaps(wolState->supported);
      wolInfo->wolopts = wolVmkCapsToLinuxCaps(wolState->enabled);

      length = strlen(wolState->secureONPassword);
      if (length > 0) {
         if (length > sizeof(wolInfo->sopass)) {
            length = sizeof(wolInfo->sopass);
         }
         memcpy(wolInfo->sopass, wolState->secureONPassword, length);
      }
      rtnl_lock();
      VMKAPI_MODULE_CALL(dev->module_id, error, ops->set_wol, dev, wolInfo);
      rtnl_unlock();
      if (error == 0) {
         status = VMK_OK;
      }
   }

   return status;
}

/*
 *----------------------------------------------------------------------------
 *
 *  GetNICState --
 *    For the given NIC, return device resource information such as its
 *    irq, memory range, flags and so on.
 *
 *  Results:
 *    VMK_OK if successful. Other VMK_ReturnStatus codes returned on failure.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
GetNICState(void *clientData, vmk_PortClientStates *states)
{
   if (clientData && states) {
      struct net_device *dev = (struct net_device *)clientData;

      if (test_bit(__LINK_STATE_PRESENT, &dev->state)) {
	 *states |= VMK_PORT_CLIENT_STATE_PRESENT;
      }

      if (!test_bit(__LINK_STATE_XOFF, &dev->state)) {
	 *states |= VMK_PORT_CLIENT_STATE_QUEUE_OK;
      }

      if (!test_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
	 *states |= VMK_PORT_CLIENT_STATE_LINK_OK;
      }

      if (test_bit(__LINK_STATE_START, &dev->state)) {
	 *states |= VMK_PORT_CLIENT_STATE_RUNNING;
      }

      if (dev->flags & IFF_UP) {
	 *states |= VMK_PORT_CLIENT_STATE_READY;
      }

      if (dev->flags & IFF_PROMISC) {
	 *states |= VMK_PORT_CLIENT_STATE_PROMISC;
      }

      if (dev->flags & IFF_BROADCAST) {
	 *states |= VMK_PORT_CLIENT_STATE_BROADCAST;
      }

      if (dev->flags & IFF_MULTICAST) {
	 *states |= VMK_PORT_CLIENT_STATE_MULTICAST;
      }

      return VMK_OK;
   } else {
      VMKLNX_DEBUG(0, "clientData: %p, states %p", clientData, states);
      return VMK_FAILURE;
   }
}

static VMK_ReturnStatus
GetNICMemResources(void *clientData, vmk_UplinkMemResources *resources)
{
   if (clientData && resources) {
      struct net_device *dev = (struct net_device *) clientData;

      resources->baseAddr = (void *)dev->base_addr;
      resources->memStart = (void *)dev->mem_start;
      resources->memEnd = (void *)dev->mem_end;
      resources->dma = dev->dma;

      return VMK_OK;
   } else {
      VMKLNX_DEBUG(0, "clientData: %p, resources %p", clientData, resources);
      return VMK_FAILURE;
   }
}

static VMK_ReturnStatus
GetNICDeviceProperties(void *clientData, vmk_UplinkDeviceInfo *devInfo)
{
   VMK_ReturnStatus status;
   struct net_device *dev;
   struct pci_dev *pdev;
   vmk_PCIDevice vmkPciDev;

   if (clientData == NULL || devInfo == NULL) {
      VMKLNX_DEBUG(0, "clientData: %p, pciInfo %p", clientData, devInfo);
      return VMK_FAILURE;
   }

   dev = (struct net_device *)clientData;
   pdev = dev->pdev;

   if (dev->features & NETIF_F_PSEUDO_REG) {
      // If physical device but registered as a pseudo-device,
      // get the actual pdev from dev->pdev_pseudo (saved by the
      // NIC driver).
      VMK_ASSERT(pdev == NULL);
      pdev = (struct pci_dev *)dev->pdev_pseudo;
      VMKLNX_WARN("PCI device registered as pseudo-device %u:%u:%u.%u",
                  pci_domain_nr(pdev->bus), pdev->bus->number,
                  PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
   }
   else if (pdev == NULL) {
      /*
       * Pseudo NICs don't have PCI properties
       */
      status = VMK_NOT_SUPPORTED;
      goto out;
   }

   /*
    * Get the device info and the DMA constraints for the device
    */
   status = vmk_PCIGetPCIDevice(pci_domain_nr(pdev->bus), pdev->bus->number,
                                PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
                                &vmkPciDev);
   if (status != VMK_OK) {
      VMK_ASSERT(status == VMK_OK);
      VMKLNX_WARN("Unable to find vmk_PCIDevice for PCI device %u:%u:%u.%u %s",
                  pci_domain_nr(pdev->bus), pdev->bus->number,
                  PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
                  vmk_StatusToString(status));
      status = VMK_FAILURE;
      goto out;
   }

   status = vmk_PCIGetGenDevice(vmkPciDev, &devInfo->device);
   if (status != VMK_OK) {
      VMK_ASSERT(status == VMK_OK);
      VMKLNX_WARN("Unable to get vmk_Device for PCI device %u:%u:%u.%u: %s",
                  pci_domain_nr(pdev->bus), pdev->bus->number,
                  PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
                  vmk_StatusToString(status));
      status = VMK_FAILURE;
      goto out;
   }

   // If it is a physical device being registered as a pseudo-device,
   // return here prior to other setup.
   if (dev->features & NETIF_F_PSEUDO_REG) {
      return VMK_OK;
   }

   /* Most constraints don't apply so set them to zero. */
   memset(&devInfo->constraints, 0, sizeof(devInfo->constraints));
   devInfo->constraints.addressMask = pdev->dma_mask;
   devInfo->constraints.sgMaxEntries = MAX_SKB_FRAGS + 1;

   return VMK_OK;

out:
   return status;
}


/*
 *----------------------------------------------------------------------------
 *
 *  GetNICPanicInfo --
 *    Fill in vmk_UplinkPanicInfo struct.
 *
 *  Results:
 *    VMK_OK if properties filled in. VMK_FAILURE otherwise.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
GetNICPanicInfo(void *clientData,
                vmk_UplinkPanicInfo *intInfo)
{
   if (clientData && intInfo) {
      struct net_device* dev = (struct net_device*)clientData;

      if (dev->pdev == NULL) {
         /*
          * Pseudo NIC does not support remote
          * debugging.
          */
         intInfo->vector = 0;
         intInfo->clientData = NULL;
      } else {
         intInfo->vector = dev->pdev->irq;
         intInfo->clientData = dev;
      }

      return VMK_OK;
   } else {
      VMKLNX_DEBUG(0, "clientData: %p, intInfo %p", clientData, intInfo);
      return VMK_FAILURE;
   }
}

/*
 *----------------------------------------------------------------------------
 *
 * FlushRxBuffers --
 *
 *    Called by the net debugger
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
FlushRxBuffers(void* clientData)
{
   struct net_device* dev = (struct net_device*)clientData;
   struct napi_struct* napi = NULL;
   vmk_NetPoll pollPriv;

   VMKLNX_DEBUG(1, "client data, now net_device:%p", dev);

   list_for_each_entry(napi, &dev->napi_list, dev_list) {
      if (napi != NULL) {
         VMKLNX_DEBUG(1, "Calling Pkt List Rx Process on napi:%p", napi);
         VMK_ASSERT(napi->dev != NULL);

         /*
          * Bypass the vswitch to receive the packets when the system is in the
          * panic/debug mode.
          */
         if (vmk_NetPollGetCurrent(&pollPriv) != VMK_OK) {
            if (debugPktList == NULL) {
               debugPktList = (vmk_PktList) vmk_HeapAlloc(vmklnxLowHeap,
                                                          vmk_PktListSizeInBytes);
               if (debugPktList == NULL) {
                  return VMK_NO_MEMORY;
               }
               vmk_PktListInit(debugPktList);
            }
            return vmk_NetDebugRxProcess(debugPktList);
         } else {
            vmk_NetPollProcessRx(napi->net_poll);
         }
      }
   }

   return VMK_OK;
}

/*
 *----------------------------------------------------------------------------
 *
 *  PanicPoll --
 *    Poll for rx packets.
 *
 *  Results:
 *    result of napi->poll: the number of packets received and processed.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
static VMK_ReturnStatus
PanicPoll(void* clientData,
          vmk_uint32 budget,
          vmk_int32* workDone)
{
   struct net_device* dev = (struct net_device*)clientData;
   struct napi_struct* napi = NULL;
   vmk_int32 ret = 0;
   vmk_int32 modRet = 0;

   VMKLNX_DEBUG(1, "data:%p budget:%u", dev, budget);
   VMK_ASSERT(dev != NULL);

   if (dev->poll_controller) {
      // device supports NET_POLL interface
      VMKAPI_MODULE_CALL_VOID(dev->module_id, dev->poll_controller, dev);
      VMKLNX_DEBUG(1, "%s: poll_controller called\n", dev->name);
   } else {
      list_for_each_entry(napi, &dev->napi_list, dev_list) {
         if ((napi != NULL) && (napi->poll != NULL)) {
            set_bit(NAPI_STATE_SCHED, &napi->state);
            VMKAPI_MODULE_CALL(napi->dev->module_id, modRet, napi->poll, napi,
                               budget);
            ret += modRet;
            VMKLNX_DEBUG(1, "poll:%p napi:%p budget:%u poll returned:%d",
                         napi->poll, napi, budget, ret);
         }
      }
      if (workDone != NULL) {
         *workDone = ret;
      }
   }
   return VMK_OK;
}


static VMK_ReturnStatus
GetWatchdogTimeoHitCnt(void *device, vmk_int16 *hitcnt)
{
   struct net_device *dev = device;

   *hitcnt = dev->watchdog_timeohit_cfg;

   return VMK_OK;
}

static VMK_ReturnStatus
SetWatchdogTimeoHitCnt(void *device, vmk_int16 hitcnt)
{
   struct net_device *dev = device;

   dev->watchdog_timeohit_cfg = hitcnt;

   return VMK_OK;
}

static VMK_ReturnStatus
GetWatchdogTimeoStats(void *device, vmk_int16 *stats)
{
   struct net_device *dev = device;

   *stats = dev->watchdog_timeohit_stats;

   return VMK_OK;
}

static VMK_ReturnStatus
GetWatchdogTimeoPanicMod(void *device, vmk_UplinkWatchdogPanicModState *state)
{
   struct net_device *dev = device;

   *state = dev->watchdog_timeohit_panic;

   return VMK_OK;
}

static VMK_ReturnStatus
SetWatchdogTimeoPanicMod(void *device, vmk_UplinkWatchdogPanicModState state)
{
   struct net_device *dev = device;

   dev->watchdog_timeohit_panic = state;

   return VMK_OK;
}


#define NET_DEVICE_MAKE_PROPERTIES_FUNCTIONS     \
{                                                \
   getStates:          GetNICState,              \
   getMemResources:    GetNICMemResources,       \
   getDeviceProperties:GetNICDeviceProperties,   \
   getPanicInfo:       GetNICPanicInfo,          \
   getMACAddr:         GetMACAddr,               \
   getName:            GetDeviceName,            \
   getStats:           GetDeviceStats,           \
   getDriverInfo:      GetDriverInfo,            \
   getWolState:        GetWolState,              \
   setWolState:        SetWolState,              \
   getCoalesceParams:  GetCoalesceParams,        \
   setCoalesceParams:  SetCoalesceParams,        \
}

#define NET_DEVICE_MAKE_WATCHDOG_FUNCTIONS       \
{                                                \
   getHitCnt:          GetWatchdogTimeoHitCnt,   \
   setHitCnt:          SetWatchdogTimeoHitCnt,   \
   getStats:           GetWatchdogTimeoStats,    \
   getPanicMod:        GetWatchdogTimeoPanicMod, \
   setPanicMod:        SetWatchdogTimeoPanicMod  \
}

#define NET_DEVICE_MAKE_NETQUEUE_FUNCTIONS       \
{                                                \
   netqOpFunc:         LinNetNetqueueOpFunc,     \
   netqXmit:           NULL,                     \
}

#define NET_DEVICE_MAKE_PT_FUNCTIONS             \
{                                                \
   ptOpFunc:           LinNetPTOpFunc            \
}

#define NET_DEVICE_MAKE_VLAN_FUNCTIONS             \
{                                                  \
   setupVlan:         SetupVlanGroupDevice,        \
   removeVlan:        LinNet_RemoveVlanGroupDevice \
}

#define NET_DEVICE_MAKE_MTU_FUNCTIONS            \
{                                                \
   getMTU:            NICGetMTU,                 \
   setMTU:            NICSetMTU                  \
}

#define NET_DEVICE_MAKE_CORE_FUNCTIONS           \
{                                                \
   startTxImmediate:  DevStartTxImmediate,       \
   open:              OpenNetDev,                \
   close:             CloseNetDev,               \
   panicPoll:         PanicPoll,                 \
   flushRxBuffers:    FlushRxBuffers,            \
   ioctl:             IoctlNetDev,               \
   block:             BlockNetDev,               \
   unblock:           UnblockNetDev,             \
   setLinkStatus:     NICSetLinkStatus,          \
   reset:             NICResetDev                \
}

#define NET_DEVICE_MAKE_DCB_FUNCTIONS            \
{                                                \
   isDCBEnabled:      NICDCBIsEnabled,           \
   enableDCB:         NICDCBEnable,              \
   disableDCB:        NICDCBDisable,             \
   getNumTCs:         NICDCBGetNumTCs,           \
   getPG:             NICDCBGetPriorityGroup,    \
   setPG:             NICDCBSetPriorityGroup,    \
   getPFCCfg:         NICDCBGetPFCCfg,           \
   setPFCCfg:         NICDCBSetPFCCfg,           \
   isPFCEnabled:      NICDCBIsPFCEnabled,        \
   enablePFC:         NICDCBEnablePFC,           \
   disablePFC:        NICDCBDisablePFC,          \
   getApps:           NICDCBGetApplications,     \
   setApp:            NICDCBSetApplication,      \
   getCaps:           NICDCBGetCapabilities,     \
   applySettings:     NICDCBApplySettings,       \
   getSettings:       NICDCBGetSettings          \
}

vmk_UplinkFunctions linNetFunctions = {
   coreFns:           NET_DEVICE_MAKE_CORE_FUNCTIONS,
   mtuFns:            NET_DEVICE_MAKE_MTU_FUNCTIONS,
   vlanFns:           NET_DEVICE_MAKE_VLAN_FUNCTIONS,
   propFns:           NET_DEVICE_MAKE_PROPERTIES_FUNCTIONS,
   watchdogFns:       NET_DEVICE_MAKE_WATCHDOG_FUNCTIONS,
   netqueueFns:       NET_DEVICE_MAKE_NETQUEUE_FUNCTIONS,
   ptFns:             NET_DEVICE_MAKE_PT_FUNCTIONS,
   dcbFns:            NET_DEVICE_MAKE_DCB_FUNCTIONS,
};

static VMK_ReturnStatus
NicCharOpsIoctl(vmk_CharDevFdAttr *attr,
               unsigned int cmd,
               vmk_uintptr_t userData,
               vmk_IoctlCallerSize callerSize,
               vmk_int32 *result)
{
   struct net_device *dev;
   vmkplxr_ChardevHandles *handles;
   struct ifreq ifr;
   VMK_ReturnStatus status;

   if (copy_from_user(&ifr, (void *)userData, sizeof(ifr))) {
      return VMK_INVALID_ADDRESS;
   }

   handles = (vmkplxr_ChardevHandles *) attr->clientDeviceData.ptr;
   VMK_ASSERT(handles != NULL);

   dev = handles->vmklinuxInfo.ptr;
   VMK_ASSERT(dev != NULL);

   status = netdev_ioctl(dev, cmd, &ifr, (uint32_t *) result, callerSize, VMK_FALSE);
   if (status == VMK_OK) {
      if (copy_to_user((void *)userData, &ifr, sizeof(ifr))) {
         return VMK_INVALID_ADDRESS;
      }
   }

   return status;
}

static VMK_ReturnStatus
NicCharOpsOpen(vmk_CharDevFdAttr *attr)
{
   struct net_device *dev;
   vmkplxr_ChardevHandles *handles;

   handles = (vmkplxr_ChardevHandles *) attr->clientDeviceData.ptr;
   VMK_ASSERT(handles != NULL);

   dev = handles->vmklinuxInfo.ptr;
   VMK_ASSERT(dev != NULL);

   dev_hold(dev);

   return VMK_OK;
}

static VMK_ReturnStatus
NicCharOpsClose(vmk_CharDevFdAttr *attr)
{
   struct net_device *dev;
   vmkplxr_ChardevHandles *handles;

   handles = (vmkplxr_ChardevHandles *) attr->clientDeviceData.ptr;
   VMK_ASSERT(handles != NULL);

   dev = handles->vmklinuxInfo.ptr;
   VMK_ASSERT(dev != NULL);

   dev_put(dev);

   return VMK_OK;
}


static vmk_CharDevOps nicCharOps = {
   NicCharOpsOpen,
   NicCharOpsClose,
   NicCharOpsIoctl,
   NULL,
   NULL,
   NULL
};

static VMK_ReturnStatus
NicCharDataDestructor(vmk_AddrCookie charData)
{
  /*
   * The device-private data is in fact the struct net_device,
   * which is destroyed separately from unregistration of the
   * character device.  So, do nothing here.
   */
   return VMK_OK;
}

static int
register_nic_chrdev(struct net_device *dev)
{
   VMK_ReturnStatus status;
   int major = VMKPLXR_DYNAMIC_MAJOR;
   int minor = 0;
   vmk_AddrCookie devCookie;

   if (dev->name) {
      devCookie.ptr = dev;
      status = vmkplxr_RegisterChardev(&major, &minor, dev->name,
                                        &nicCharOps, devCookie,
                                        NicCharDataDestructor,
                                        dev->module_id);
      if (status == VMK_OK) {
         dev->nicMajor = major;
         return 0;
      } else if (status == VMK_BUSY) {
         return -EBUSY;
      }
   } else {
      printk("Device has no name\n");
   }

   return -EINVAL;
}

/*
 *----------------------------------------------------------------------------
 *
 *  LinNet_ConnectUplink --
 *
 *    Register the device with the vmkernel. Initializes various device fields
 *    and sets up PCI hotplug notification handlers.
 *
 *  Results:
 *    0 if successful, non-zero on failure.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
int
LinNet_ConnectUplink(struct net_device *dev, struct pci_dev *pdev)
{
   vmk_UplinkCapabilities capabilities = 0;
   vmk_Name pollName;

   vmk_ModuleID moduleID = VMK_INVALID_MODULE_ID;
   vmk_UplinkConnectInfo connectInfo;

   struct napi_struct *napi;

   /*
    * We should only make this call once per net_device
    */
   VMK_ASSERT(dev->uplinkDev == NULL);

   /*
    * Driver should have made the association with
    * the PCI device via the macro SET_NETDEV_DEV()
    */
   VMK_ASSERT(dev->pdev == pdev);

   /* CNA devices shouldn't go through this path. */
   VMK_ASSERT(!(dev->features & NETIF_F_CNA));

   /*
    * Driver naming device already has the device name in net_device.
    */
   if (!dev->useDriverNamingDevice) {
      netdev_name_adapter(dev, pdev);
   }

   capabilities = netdev_query_capabilities(dev);

   moduleID = dev->module_id;

   VMK_ASSERT(moduleID != VMK_INVALID_MODULE_ID);

   connectInfo.devName = dev->name;
   connectInfo.clientData = dev;
   connectInfo.moduleID = moduleID;
   connectInfo.functions = &linNetFunctions;
   connectInfo.cap = capabilities;

   if (dev->features & NETIF_F_HIDDEN_UPLINK) {
      connectInfo.flags = VMK_UPLINK_FLAG_HIDDEN;
   } else {
      connectInfo.flags = 0;
   }

   if (dev->features & NETIF_F_PSEUDO_REG) {
      connectInfo.flags |= VMK_UPLINK_FLAG_PSEUDO_REG;
   }

   if (vmk_UplinkRegister((vmk_Uplink *)&dev->uplinkDev, &connectInfo) != VMK_OK) {
      goto fail;
   }

   VMK_ASSERT(dev->net_poll);

   (void) vmk_NameFormat(&pollName, "-backup");
   vmk_NetPollRegisterUplink(dev->net_poll, dev->uplinkDev, pollName, VMK_FALSE);

   list_for_each_entry(napi, &dev->napi_list, dev_list) {
      vmk_Name pollName;
      (void) vmk_NameFormat(&pollName, "-%d", napi->napi_id);
      vmk_NetPollRegisterUplink(napi->net_poll, napi->dev->uplinkDev, pollName, VMK_TRUE);
   }


   dev->link_speed = -1;
   dev->full_duplex = 0;

   dev->link_state = VMKLNX_UPLINK_LINK_DOWN;
   dev->watchdog_timeohit_cnt = 0;
   dev->watchdog_timeohit_cfg = VMK_UPLINK_WATCHDOG_HIT_CNT_DEFAULT;
   dev->watchdog_timeohit_stats = 0;
   dev->watchdog_timeohit_panic = VMKLNX_UPLINK_WATCHDOG_PANIC_MOD_ENABLE;
   dev->watchdog_timeohit_period_start = jiffies;

   return register_nic_chrdev(dev);

 fail:
   return -1;
}

/*
 *----------------------------------------------------------------------------
 *
 *  vmklnx_netdev_high_dma_workaround --
 *    Make a copy of a skb buffer in low dma.
 *
 *  Results:
 *    If the copy succeeds then it releases the previous skb and
 *    returns the new one.
 *    If not it returns NULL.
 *
 *  Side effects:
 *    The skb buffer passed to the function might be released.
 *
 *----------------------------------------------------------------------------
 */
struct sk_buff *
vmklnx_netdev_high_dma_workaround(struct sk_buff *base)
{
   struct sk_buff *skb = skb_copy(base, GFP_ATOMIC);

   if (skb) {
      vmk_PktRelease(base->pkt);
   }

   return skb;
}

/*
 *----------------------------------------------------------------------------
 *
 *  vmklnx_netdev_high_dma_overflow --
 *    Check skb buffer's data are located beyond a specified dma limit.
 *
 *  Results:
 *    Returns TRUE if there is an overflow with the passed skb and FALSE
 *    otherwise.
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
#define GB      (1024LL * 1024 * 1024)
int
vmklnx_netdev_high_dma_overflow(struct sk_buff *skb,
                                short gb_limit)
{
   uint64_t dma_addr;
   uint64_t dma_addr_limit;
   int idx_frags;
   int nr_frags;
   skb_frag_t *skb_frag;
   vmk_PktFrag pkt_frag;

   if (VMKLNX_STRESS_DEBUG_COUNTER(stressNetIfForceHighDMAOverflow)) {
      return VMK_TRUE;
   }

   dma_addr_limit = (uint64_t) gb_limit * GB;
   if (dma_addr_limit > max_phys_addr) {
      return VMK_FALSE;
   }

   if (vmk_PktFragGet(skb->pkt, &pkt_frag, 0) != VMK_OK) {
      return VMK_FALSE;
   }

   dma_addr = pkt_frag.addr + (skb->end - skb->head);
   if (dma_addr >= dma_addr_limit) {
      return VMK_TRUE;
   }

   nr_frags = skb_shinfo(skb)->nr_frags;
   for (idx_frags = 0; idx_frags < nr_frags; idx_frags++) {
      skb_frag = &skb_shinfo(skb)->frags[idx_frags];
      dma_addr = page_to_phys(skb_frag->page) + skb_frag->page_offset + skb_frag->size;

      if (dma_addr >= dma_addr_limit) {
         return VMK_TRUE;
      }
   }

   return VMK_FALSE;
}
EXPORT_SYMBOL(vmklnx_netdev_high_dma_overflow);


/*
 *----------------------------------------------------------------------------
 *
 *  vmklnx_skb_real_size --
 *    This call is created to hide the size of "struct LinSkb" so that
 *    it won't be subject to binary compatibility. We can expand LinSkb
 *    in the future when the need comes and do not have to worry about
 *    binary compatibility.
 *
 *  Results:
 *    sizeof(struct LinSkb)
 *
 *  Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */

size_t
vmklnx_skb_real_size()
{
   return sizeof(struct LinSkb);
}
EXPORT_SYMBOL(vmklnx_skb_real_size);

static void
LinNetComputeEthCRCTableLE(void)
{
   unsigned i, crc, j;

   for (i = 0; i < 256; i++) {
      crc = i;
      for (j = 0; j < 8; j++) {
         crc = (crc >> 1) ^ ((crc & 0x1)? eth_crc32_poly_le : 0);
      }
      eth_crc32_poly_tbl_le[i] = crc;
   }
}

static uint32_t
LinNetComputeEthCRCLE(unsigned crc, const unsigned char *frame, uint32_t frameLen)
{
   int i, j;

   for (i = 0; i + 4 <= frameLen; i += 4) {
      crc ^= *(unsigned *)&frame[i];
      for (j = 0; j < 4; j++) {
         crc = eth_crc32_poly_tbl_le[crc & 0xff] ^ (crc >> 8);
      }
   }

   while (i < frameLen) {
      crc = eth_crc32_poly_tbl_le[(crc ^ frame[i++]) & 0xff] ^ (crc >> 8);
   }

   return crc;
}

/**
 *  crc32_le - Calculate bitwise little-endian Ethernet CRC
 *  @crc: seed value for computation
 *  @p: pointer to buffer over which CRC is run
 *  @len: length of buffer p
 *
 *  Calculates bitwise little-endian Ethernet CRC from an
 *  initial seed value that could be 0 or a previous value if
 *  computing incrementally.
 *
 *  RETURN VALUE:
 *  32-bit CRC value.
 *
 */
/* _VMKLNX_CODECHECK_: crc32_le */
uint32_t
crc32_le(uint32_t crc, unsigned char const *p, size_t len)
{
   return LinNetComputeEthCRCLE(crc, p, len);
}
EXPORT_SYMBOL(crc32_le);

/*
 *----------------------------------------------------------------------------
 *
 * LinNet_Init --
 *
 *    Initialize LinNet data structures.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
void
LinNet_Init(void)
{
   VMK_ReturnStatus status;

   VMKLNX_CREATE_LOG();

   LinStress_SetupStress();
   LinNetComputeEthCRCTableLE();

   /* set up link state timer */
   status = vmk_ConfigParamOpen("Net", "LinkStatePollTimeout",
                                &linkStateTimerPeriodConfigHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamGetUint(linkStateTimerPeriodConfigHandle,
                                   &linkStateTimerPeriod);
   VMK_ASSERT(status == VMK_OK);

   status = vmk_ConfigParamOpen("Net", "VmklnxLROEnabled",
                                &vmklnxLROEnabledConfigHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamGetUint(vmklnxLROEnabledConfigHandle,
                                   &vmklnxLROEnabled);
   VMK_ASSERT(status == VMK_OK);

   status = vmk_ConfigParamOpen("Net", "VmklnxLROMaxAggr",
                                &vmklnxLROMaxAggrConfigHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamGetUint(vmklnxLROMaxAggrConfigHandle,
                                   &vmklnxLROMaxAggr);
   VMK_ASSERT(status == VMK_OK);

   INIT_DELAYED_WORK(&linkStateWork, link_state_work_cb);
   schedule_delayed_work(&linkStateWork,
                         msecs_to_jiffies(linkStateTimerPeriod));

   INIT_DELAYED_WORK(&watchdogWork, watchdog_work_cb);
   schedule_delayed_work(&watchdogWork,
                         msecs_to_jiffies(WATCHDOG_DEF_TIMER));

   status = vmk_ConfigParamOpen("Net", "PortDisableTimeout",
                                &blockTotalSleepMsecHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamGetUint(blockTotalSleepMsecHandle, &blockTotalSleepMsec);
   VMK_ASSERT(status == VMK_OK);

   max_phys_addr = vmk_MachMemMaxAddr();

   status = vmk_ConfigParamOpen("Net", "MaxNetifTxQueueLen",
                                &maxNetifTxQueueLenConfigHandle);
   VMK_ASSERT(status == VMK_OK);

   status = vmk_ConfigParamOpen("Net", "UseHwIPv6Csum",
                                &useHwIPv6CsumHandle);
   VMK_ASSERT(status == VMK_OK);

   status = vmk_ConfigParamOpen("Net", "UseHwCsumForIPv6Csum",
                                &useHwCsumForIPv6CsumHandle);
   VMK_ASSERT(status == VMK_OK);

   status = vmk_ConfigParamOpen("Net", "UseHwTSO", &useHwTSOHandle);
   VMK_ASSERT(status == VMK_OK);

   status = vmk_ConfigParamOpen("Net", "UseHwTSO6", &useHwTSO6Handle);
   VMK_ASSERT(status == VMK_OK);

   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_GEN_TINY_ARP_RARP,
                                 &stressNetGenTinyArpRarp);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_CORRUPT_ETHERNET_HDR,
                                 &stressNetIfCorruptEthHdr);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_CORRUPT_RX_DATA,
                                 &stressNetIfCorruptRxData);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_CORRUPT_RX_TCP_UDP,
                                 &stressNetIfCorruptRxTcpUdp);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_CORRUPT_TX,
                                 &stressNetIfCorruptTx);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_FAIL_HARD_TX,
                                 &stressNetIfFailHardTx);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_FAIL_RX,
                                 &stressNetIfFailRx);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_FAIL_TX_AND_STOP_QUEUE,
                                 &stressNetIfFailTxAndStopQueue);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_FORCE_HIGH_DMA_OVERFLOW,
                                 &stressNetIfForceHighDMAOverflow);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_IF_FORCE_RX_SW_CSUM,
                                 &stressNetIfForceRxSWCsum);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_NAPI_FORCE_BACKUP_WORLDLET,
                                 &stressNetNapiForceBackupWorldlet);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionOpen(VMK_STRESS_OPT_NET_BLOCK_DEV_IS_SLUGGISH,
                                 &stressNetBlockDevIsSluggish);
   VMK_ASSERT(status == VMK_OK);
}

/*
 *----------------------------------------------------------------------------
 *
 * LinNet_Cleanup --
 *
 *    Cleanup function for linux_net. Release and cleanup all resources.
 *
 * Results:
 *    None.
 *
 * Side effects:
 *    None.
 *
 *----------------------------------------------------------------------------
 */
void LinNet_Cleanup(void)
{
   VMK_ReturnStatus status;

   LinStress_CleanupStress();
   cancel_delayed_work_sync(&linkStateWork);
   cancel_delayed_work_sync(&watchdogWork);
   vmk_TimerRemoveSync(devWatchdogTimer);

   status = vmk_ConfigParamClose(linkStateTimerPeriodConfigHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamClose(maxNetifTxQueueLenConfigHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamClose(useHwIPv6CsumHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamClose(useHwCsumForIPv6CsumHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamClose(useHwTSOHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamClose(useHwTSO6Handle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamClose(blockTotalSleepMsecHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamClose(vmklnxLROEnabledConfigHandle);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_ConfigParamClose(vmklnxLROMaxAggrConfigHandle);
   VMK_ASSERT(status == VMK_OK);

   status = vmk_StressOptionClose(stressNetGenTinyArpRarp);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetIfCorruptEthHdr);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetIfCorruptRxData);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetIfCorruptRxTcpUdp);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetIfCorruptTx);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetIfFailHardTx);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetIfFailRx);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetIfFailTxAndStopQueue);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetIfForceHighDMAOverflow);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetIfForceRxSWCsum);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetNapiForceBackupWorldlet);
   VMK_ASSERT(status == VMK_OK);
   status = vmk_StressOptionClose(stressNetBlockDevIsSluggish);
   VMK_ASSERT(status == VMK_OK);

   VMKLNX_DESTROY_LOG();
}
No results found.