/*-
 * Copyright (c) 2009-2012 Microsoft Corp.
 * Copyright (c) 2010-2012 Citrix Inc.
 * Copyright (c) 2012 NetApp Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice unmodified, this list of conditions, and the following
 *    disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $FreeBSD$
 */

/**
 * HyperV vmbus network VSC (virtual services client) module
 *
 */


#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/lock.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_arp.h>
#include <machine/bus.h>
#include <machine/atomic.h>

#include <dev/hyperv/include/hyperv.h>
#include "hv_net_vsc.h"
#include "hv_rndis.h"
#include "hv_rndis_filter.h"


/*
 * Forward declarations
 */
static void hv_nv_on_channel_callback(void *context);
static int  hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
static int  hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
static int  hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
static int  hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
static int  hv_nv_connect_to_vsp(struct hv_device *device);
static void hv_nv_on_send_completion(struct hv_device *device,
				     hv_vm_packet_descriptor *pkt);
static void hv_nv_on_receive(struct hv_device *device,
			     hv_vm_packet_descriptor *pkt);
static void hv_nv_send_receive_completion(struct hv_device *device,
					  uint64_t tid);


/*
 *
 */
static inline netvsc_dev *
hv_nv_alloc_net_device(struct hv_device *device)
{
	netvsc_dev *net_dev;
	hn_softc_t *sc = device_get_softc(device->device);

	net_dev = malloc(sizeof(netvsc_dev), M_DEVBUF, M_NOWAIT | M_ZERO);
	if (net_dev == NULL) {
		return (NULL);
	}

	net_dev->dev = device;
	net_dev->destroy = FALSE;
	sc->net_dev = net_dev;

	return (net_dev);
}

/*
 *
 */
static inline netvsc_dev *
hv_nv_get_outbound_net_device(struct hv_device *device)
{
	hn_softc_t *sc = device_get_softc(device->device);
	netvsc_dev *net_dev = sc->net_dev;;

	if ((net_dev != NULL) && net_dev->destroy) {
		return (NULL);
	}

	return (net_dev);
}

/*
 *
 */
static inline netvsc_dev *
hv_nv_get_inbound_net_device(struct hv_device *device)
{
	hn_softc_t *sc = device_get_softc(device->device);
	netvsc_dev *net_dev = sc->net_dev;;

	if (net_dev == NULL) {
		return (net_dev);
	}
	/*
	 * When the device is being destroyed; we only
	 * permit incoming packets if and only if there
	 * are outstanding sends.
	 */
	if (net_dev->destroy && net_dev->num_outstanding_sends == 0) {
		return (NULL);
	}

	return (net_dev);
}

/*
 * Net VSC initialize receive buffer with net VSP
 * 
 * Net VSP:  Network virtual services client, also known as the
 *     Hyper-V extensible switch and the synthetic data path.
 */
static int 
hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
{
	netvsc_dev *net_dev;
	nvsp_msg *init_pkt;
	int ret = 0;

	net_dev = hv_nv_get_outbound_net_device(device);
	if (!net_dev) {
		return (ENODEV);
	}

	net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_DEVBUF,
	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
	if (net_dev->rx_buf == NULL) {
		ret = ENOMEM;
		goto cleanup;
	}

	/*
	 * Establish the GPADL handle for this buffer on this channel.
	 * Note:  This call uses the vmbus connection rather than the
	 * channel to establish the gpadl handle. 
	 * GPADL:  Guest physical address descriptor list.
	 */
	ret = hv_vmbus_channel_establish_gpadl(
		device->channel, net_dev->rx_buf,
		net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle);
	if (ret != 0) {
		goto cleanup;
	}
	
	/* sema_wait(&ext->channel_init_sema); KYS CHECK */

	/* Notify the NetVsp of the gpadl handle */
	init_pkt = &net_dev->channel_init_packet;

	memset(init_pkt, 0, sizeof(nvsp_msg));

	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf;
	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
	    net_dev->rx_buf_gpadl_handle;
	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
	    NETVSC_RECEIVE_BUFFER_ID;

	/* Send the gpadl notification request */

	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
	if (ret != 0) {
		goto cleanup;
	}

	sema_wait(&net_dev->channel_init_sema);

	/* Check the response */
	if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status
	    != nvsp_status_success) {
		ret = EINVAL;
		goto cleanup;
	}

	net_dev->rx_section_count =
	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;

	net_dev->rx_sections = malloc(net_dev->rx_section_count *
	    sizeof(nvsp_1_rx_buf_section), M_DEVBUF, M_NOWAIT);
	if (net_dev->rx_sections == NULL) {
		ret = EINVAL;
		goto cleanup;
	}
	memcpy(net_dev->rx_sections, 
	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
	    net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));


	/*
	 * For first release, there should only be 1 section that represents
	 * the entire receive buffer
	 */
	if (net_dev->rx_section_count != 1
	    || net_dev->rx_sections->offset != 0) {
		ret = EINVAL;
		goto cleanup;
	}

	goto exit;

cleanup:
	hv_nv_destroy_rx_buffer(net_dev);
	
exit:
	return (ret);
}

/*
 * Net VSC initialize send buffer with net VSP
 */
static int 
hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
{
	netvsc_dev *net_dev;
	nvsp_msg *init_pkt;
	int ret = 0;

	net_dev = hv_nv_get_outbound_net_device(device);
	if (!net_dev) {
		return (ENODEV);
	}

	net_dev->send_buf  = contigmalloc(net_dev->send_buf_size, M_DEVBUF,
	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
	if (net_dev->send_buf == NULL) {
		ret = ENOMEM;
		goto cleanup;
	}

	/*
	 * Establish the gpadl handle for this buffer on this channel.
	 * Note:  This call uses the vmbus connection rather than the
	 * channel to establish the gpadl handle. 
	 */
	ret = hv_vmbus_channel_establish_gpadl(device->channel,
	    net_dev->send_buf, net_dev->send_buf_size,
	    &net_dev->send_buf_gpadl_handle);
	if (ret != 0) {
		goto cleanup;
	}

	/* Notify the NetVsp of the gpadl handle */

	init_pkt = &net_dev->channel_init_packet;

	memset(init_pkt, 0, sizeof(nvsp_msg));

	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf;
	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
	    net_dev->send_buf_gpadl_handle;
	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
	    NETVSC_SEND_BUFFER_ID;

	/* Send the gpadl notification request */

	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
	if (ret != 0) {
		goto cleanup;
	}

	sema_wait(&net_dev->channel_init_sema);

	/* Check the response */
	if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status
	    != nvsp_status_success) {
		ret = EINVAL;
		goto cleanup;
	}

	net_dev->send_section_size =
	    init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size;

	goto exit;

cleanup:
	hv_nv_destroy_send_buffer(net_dev);
	
exit:
	return (ret);
}

/*
 * Net VSC destroy receive buffer
 */
static int
hv_nv_destroy_rx_buffer(netvsc_dev *net_dev)
{
	nvsp_msg *revoke_pkt;
	int ret = 0;

	/*
	 * If we got a section count, it means we received a
	 * send_rx_buf_complete msg 
	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
	 * we need to send a revoke msg here
	 */
	if (net_dev->rx_section_count) {
		/* Send the revoke receive buffer */
		revoke_pkt = &net_dev->revoke_packet;
		memset(revoke_pkt, 0, sizeof(nvsp_msg));

		revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf;
		revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id =
		    NETVSC_RECEIVE_BUFFER_ID;

		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
		    revoke_pkt, sizeof(nvsp_msg),
		    (uint64_t)(uintptr_t)revoke_pkt,
		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);

		/*
		 * If we failed here, we might as well return and have a leak 
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
			return (ret);
		}
	}
		
	/* Tear down the gpadl on the vsp end */
	if (net_dev->rx_buf_gpadl_handle) {
		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
		    net_dev->rx_buf_gpadl_handle);
		/*
		 * If we failed here, we might as well return and have a leak 
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
			return (ret);
		}
		net_dev->rx_buf_gpadl_handle = 0;
	}

	if (net_dev->rx_buf) {
		/* Free up the receive buffer */
		contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_DEVBUF);
		net_dev->rx_buf = NULL;
	}

	if (net_dev->rx_sections) {
		free(net_dev->rx_sections, M_DEVBUF);
		net_dev->rx_sections = NULL;
		net_dev->rx_section_count = 0;
	}

	return (ret);
}

/*
 * Net VSC destroy send buffer
 */
static int
hv_nv_destroy_send_buffer(netvsc_dev *net_dev)
{
	nvsp_msg *revoke_pkt;
	int ret = 0;

	/*
	 * If we got a section count, it means we received a
	 * send_rx_buf_complete msg 
	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
	 * we need to send a revoke msg here
	 */
	if (net_dev->send_section_size) {
		/* Send the revoke send buffer */
		revoke_pkt = &net_dev->revoke_packet;
		memset(revoke_pkt, 0, sizeof(nvsp_msg));

		revoke_pkt->hdr.msg_type =
		    nvsp_msg_1_type_revoke_send_buf;
		revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id =
		    NETVSC_SEND_BUFFER_ID;

		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
		    revoke_pkt, sizeof(nvsp_msg),
		    (uint64_t)(uintptr_t)revoke_pkt,
		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
		/*
		 * If we failed here, we might as well return and have a leak 
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
			return (ret);
		}
	}
		
	/* Tear down the gpadl on the vsp end */
	if (net_dev->send_buf_gpadl_handle) {
		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
		    net_dev->send_buf_gpadl_handle);

		/*
		 * If we failed here, we might as well return and have a leak 
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
			return (ret);
		}
		net_dev->send_buf_gpadl_handle = 0;
	}

	if (net_dev->send_buf) {
		/* Free up the receive buffer */
		contigfree(net_dev->send_buf, net_dev->send_buf_size, M_DEVBUF);
		net_dev->send_buf = NULL;
	}

	return (ret);
}


/*
 * Attempt to negotiate the caller-specified NVSP version
 *
 * For NVSP v2, Server 2008 R2 does not set
 * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers
 * to the negotiated version, so we cannot rely on that.
 */
static int
hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev,
			      uint32_t nvsp_ver)
{
	nvsp_msg *init_pkt;
	int ret;

	init_pkt = &net_dev->channel_init_packet;
	memset(init_pkt, 0, sizeof(nvsp_msg));
	init_pkt->hdr.msg_type = nvsp_msg_type_init;

	/*
	 * Specify parameter as the only acceptable protocol version
	 */
	init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver;
	init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver;

	/* Send the init request */
	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
	if (ret != 0)
		return (-1);

	sema_wait(&net_dev->channel_init_sema);

	if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success)
		return (EINVAL);

	return (0);
}

/*
 * Send NDIS version 2 config packet containing MTU.
 *
 * Not valid for NDIS version 1.
 */
static int
hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu)
{
	netvsc_dev *net_dev;
	nvsp_msg *init_pkt;
	int ret;

	net_dev = hv_nv_get_outbound_net_device(device);
	if (!net_dev)
		return (-ENODEV);

	/*
	 * Set up configuration packet, write MTU
	 * Indicate we are capable of handling VLAN tags
	 */
	init_pkt = &net_dev->channel_init_packet;
	memset(init_pkt, 0, sizeof(nvsp_msg));
	init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config;
	init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu;
	init_pkt->
		msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q
		= 1;

	/* Send the configuration packet */
	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
	if (ret != 0)
		return (-EINVAL);

	return (0);
}

/*
 * Net VSC connect to VSP
 */
static int
hv_nv_connect_to_vsp(struct hv_device *device)
{
	netvsc_dev *net_dev;
	nvsp_msg *init_pkt;
	uint32_t nvsp_vers;
	uint32_t ndis_version;
	int ret = 0;
	device_t dev = device->device;
	hn_softc_t *sc = device_get_softc(dev);
	struct ifnet *ifp = sc->hn_ifp;

	net_dev = hv_nv_get_outbound_net_device(device);
	if (!net_dev) {
		return (ENODEV);
	}

	/*
	 * Negotiate the NVSP version.  Try NVSP v2 first.
	 */
	nvsp_vers = NVSP_PROTOCOL_VERSION_2;
	ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
	if (ret != 0) {
		/* NVSP v2 failed, try NVSP v1 */
		nvsp_vers = NVSP_PROTOCOL_VERSION_1;
		ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
		if (ret != 0) {
			/* NVSP v1 failed, return bad status */
			return (ret);
		}
	}
	net_dev->nvsp_version = nvsp_vers;

	/*
	 * Set the MTU if supported by this NVSP protocol version
	 * This needs to be right after the NVSP init message per Haiyang
	 */
	if (nvsp_vers >= NVSP_PROTOCOL_VERSION_2)
		ret = hv_nv_send_ndis_config(device, ifp->if_mtu);

	/*
	 * Send the NDIS version
	 */
	init_pkt = &net_dev->channel_init_packet;

	memset(init_pkt, 0, sizeof(nvsp_msg));

	/*
	 * Updated to version 5.1, minimum, for VLAN per Haiyang
	 */
	ndis_version = NDIS_VERSION;

	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers;
	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers =
	    (ndis_version & 0xFFFF0000) >> 16;
	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers =
	    ndis_version & 0xFFFF;

	/* Send the init request */

	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
	if (ret != 0) {
		goto cleanup;
	}
	/*
	 * TODO:  BUGBUG - We have to wait for the above msg since the netvsp
	 * uses KMCL which acknowledges packet (completion packet) 
	 * since our Vmbus always set the
	 * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag
	 */
	/* sema_wait(&NetVscChannel->channel_init_sema); */

	/* Post the big receive buffer to NetVSP */
	ret = hv_nv_init_rx_buffer_with_net_vsp(device);
	if (ret == 0)
		ret = hv_nv_init_send_buffer_with_net_vsp(device);

cleanup:
	return (ret);
}

/*
 * Net VSC disconnect from VSP
 */
static void
hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
{
	hv_nv_destroy_rx_buffer(net_dev);
	hv_nv_destroy_send_buffer(net_dev);
}

/*
 * Net VSC on device add
 * 
 * Callback when the device belonging to this driver is added
 */
netvsc_dev *
hv_nv_on_device_add(struct hv_device *device, void *additional_info)
{
	netvsc_dev *net_dev;
	netvsc_packet *packet;
	netvsc_packet *next_packet;
	int i, ret = 0;

	net_dev = hv_nv_alloc_net_device(device);
	if (!net_dev)
		goto cleanup;

	/* Initialize the NetVSC channel extension */
	net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
	mtx_init(&net_dev->rx_pkt_list_lock, "HV-RPL", NULL,
	    MTX_SPIN | MTX_RECURSE);

	net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;

	/* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
	STAILQ_INIT(&net_dev->myrx_packet_list);

	/* 
	 * malloc a sufficient number of netvsc_packet buffers to hold
	 * a packet list.  Add them to the netvsc device packet queue.
	 */
	for (i=0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
		packet = malloc(sizeof(netvsc_packet) +
		    (NETVSC_RECEIVE_SG_COUNT * sizeof(hv_vmbus_page_buffer)),
		    M_DEVBUF, M_NOWAIT | M_ZERO);
		if (!packet) {
			break;
		}
		STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet,
		    mylist_entry);
	}

	sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");

	/*
	 * Open the channel
	 */
	ret = hv_vmbus_channel_open(device->channel,
	    NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE,
	    NULL, 0, hv_nv_on_channel_callback, device);
	if (ret != 0)
		goto cleanup;

	/*
	 * Connect with the NetVsp
	 */
	ret = hv_nv_connect_to_vsp(device);
	if (ret != 0)
		goto close;

	return (net_dev);

close:
	/* Now, we can close the channel safely */

	hv_vmbus_channel_close(device->channel);

cleanup:
	/*
	 * Free the packet buffers on the netvsc device packet queue.
	 * Release other resources.
	 */
	if (net_dev) {
		sema_destroy(&net_dev->channel_init_sema);

		packet = STAILQ_FIRST(&net_dev->myrx_packet_list);
		while (packet != NULL) {
			next_packet = STAILQ_NEXT(packet, mylist_entry);
			free(packet, M_DEVBUF);
			packet = next_packet;
		}
		/* Reset the list to initial state */
		STAILQ_INIT(&net_dev->myrx_packet_list);

		mtx_destroy(&net_dev->rx_pkt_list_lock);

		free(net_dev, M_DEVBUF);
	}

	return (NULL);
}

/*
 * Net VSC on device remove
 */
int
hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
{
	netvsc_packet *net_vsc_pkt;
	netvsc_packet *next_net_vsc_pkt;
	hn_softc_t *sc = device_get_softc(device->device);
	netvsc_dev *net_dev = sc->net_dev;;
	
	/* Stop outbound traffic ie sends and receives completions */
	mtx_lock(&device->channel->inbound_lock);
	net_dev->destroy = TRUE;
	mtx_unlock(&device->channel->inbound_lock);

	/* Wait for all send completions */
	while (net_dev->num_outstanding_sends) {
		DELAY(100);
	}

	hv_nv_disconnect_from_vsp(net_dev);

	/* At this point, no one should be accessing net_dev except in here */

	/* Now, we can close the channel safely */

	if (!destroy_channel) {
		device->channel->state =
		    HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE;
	}

	hv_vmbus_channel_close(device->channel);

	/* Release all resources */
	net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
	while (net_vsc_pkt != NULL) {
		next_net_vsc_pkt = STAILQ_NEXT(net_vsc_pkt, mylist_entry);
		free(net_vsc_pkt, M_DEVBUF);
		net_vsc_pkt = next_net_vsc_pkt;
	}

	/* Reset the list to initial state */
	STAILQ_INIT(&net_dev->myrx_packet_list);

	mtx_destroy(&net_dev->rx_pkt_list_lock);
	sema_destroy(&net_dev->channel_init_sema);
	free(net_dev, M_DEVBUF);

	return (0);
}

/*
 * Net VSC on send completion
 */
static void 
hv_nv_on_send_completion(struct hv_device *device, hv_vm_packet_descriptor *pkt)
{
	netvsc_dev *net_dev;
	nvsp_msg *nvsp_msg_pkt;
	netvsc_packet *net_vsc_pkt;

	net_dev = hv_nv_get_inbound_net_device(device);
	if (!net_dev) {
		return;
	}

	nvsp_msg_pkt =
	    (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));

	if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete
		|| nvsp_msg_pkt->hdr.msg_type
			== nvsp_msg_1_type_send_rx_buf_complete
		|| nvsp_msg_pkt->hdr.msg_type
			== nvsp_msg_1_type_send_send_buf_complete) {
		/* Copy the response back */
		memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
		    sizeof(nvsp_msg));			
		sema_post(&net_dev->channel_init_sema);
	} else if (nvsp_msg_pkt->hdr.msg_type ==
				   nvsp_msg_1_type_send_rndis_pkt_complete) {
		/* Get the send context */
		net_vsc_pkt =
		    (netvsc_packet *)(unsigned long)pkt->transaction_id;

		/* Notify the layer above us */
		net_vsc_pkt->compl.send.on_send_completion(
		    net_vsc_pkt->compl.send.send_completion_context);

		atomic_subtract_int(&net_dev->num_outstanding_sends, 1);
	}
}

/*
 * Net VSC on send
 * Sends a packet on the specified Hyper-V device.
 * Returns 0 on success, non-zero on failure.
 */
int
hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
{
	netvsc_dev *net_dev;
	nvsp_msg send_msg;
	int ret;

	net_dev = hv_nv_get_outbound_net_device(device);
	if (!net_dev)
		return (ENODEV);

	send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt;
	if (pkt->is_data_pkt) {
		/* 0 is RMC_DATA */
		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0;
	} else {
		/* 1 is RMC_CONTROL */
		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1;
	}

	/* Not using send buffer section */
	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx =
	    0xFFFFFFFF;
	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = 0;

	if (pkt->page_buf_count) {
		ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel,
		    pkt->page_buffers, pkt->page_buf_count,
		    &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt);
	} else {
		ret = hv_vmbus_channel_send_packet(device->channel,
		    &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt,
		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
		    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
	}

	/* Record outstanding send only if send_packet() succeeded */
	if (ret == 0)
		atomic_add_int(&net_dev->num_outstanding_sends, 1);

	return (ret);
}

/*
 * Net VSC on receive
 *
 * In the FreeBSD Hyper-V virtual world, this function deals exclusively
 * with virtual addresses.
 */
static void 
hv_nv_on_receive(struct hv_device *device, hv_vm_packet_descriptor *pkt)
{
	netvsc_dev *net_dev;
	hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
	nvsp_msg *nvsp_msg_pkt;
	netvsc_packet *net_vsc_pkt = NULL;
	unsigned long start;
	xfer_page_packet *xfer_page_pkt = NULL;
	STAILQ_HEAD(PKT_LIST, netvsc_packet_) mylist_head =
	    STAILQ_HEAD_INITIALIZER(mylist_head);
	int count = 0;
	int i = 0;

	net_dev = hv_nv_get_inbound_net_device(device);
	if (!net_dev)
		return;

	/*
	 * All inbound packets other than send completion should be
	 * xfer page packet.
	 */
	if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)
		return;

	nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt
		+ (pkt->data_offset8 << 3));

	/* Make sure this is a valid nvsp packet */
	if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt)
		return;
	
	vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt;

	if (vm_xfer_page_pkt->transfer_page_set_id
		!= NETVSC_RECEIVE_BUFFER_ID) {
		return;
	}

	STAILQ_INIT(&mylist_head);

	/*
	 * Grab free packets (range count + 1) to represent this xfer page
	 * packet.  +1 to represent the xfer page packet itself.  We grab it
	 * here so that we know exactly how many we can fulfill.
	 */
	mtx_lock_spin(&net_dev->rx_pkt_list_lock);
	while (!STAILQ_EMPTY(&net_dev->myrx_packet_list)) {	
		net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
		STAILQ_REMOVE_HEAD(&net_dev->myrx_packet_list, mylist_entry);

		STAILQ_INSERT_TAIL(&mylist_head, net_vsc_pkt, mylist_entry);

		if (++count == vm_xfer_page_pkt->range_count + 1)
			break;
	}

	mtx_unlock_spin(&net_dev->rx_pkt_list_lock);

	/*
	 * We need at least 2 netvsc pkts (1 to represent the xfer page
	 * and at least 1 for the range) i.e. we can handle some of the
	 * xfer page packet ranges...
	 */
	if (count < 2) {
		/* Return netvsc packet to the freelist */
		mtx_lock_spin(&net_dev->rx_pkt_list_lock);
		for (i=count; i != 0; i--) {
			net_vsc_pkt = STAILQ_FIRST(&mylist_head);
			STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);

			STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
			    net_vsc_pkt, mylist_entry);
		}
		mtx_unlock_spin(&net_dev->rx_pkt_list_lock);

		hv_nv_send_receive_completion(device,
		    vm_xfer_page_pkt->d.transaction_id);

		return;
	}

	/* Take the first packet in the list */
	xfer_page_pkt = (xfer_page_packet *)STAILQ_FIRST(&mylist_head);
	STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);

	/* This is how many data packets we can supply */
	xfer_page_pkt->count = count - 1;

	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
	for (i=0; i < (count - 1); i++) {
		net_vsc_pkt = STAILQ_FIRST(&mylist_head);
		STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);

		/*
		 * Initialize the netvsc packet
		 */
		net_vsc_pkt->xfer_page_pkt = xfer_page_pkt;
		net_vsc_pkt->compl.rx.rx_completion_context = net_vsc_pkt;
		net_vsc_pkt->device = device;
		/* Save this so that we can send it back */
		net_vsc_pkt->compl.rx.rx_completion_tid =
		    vm_xfer_page_pkt->d.transaction_id;

		net_vsc_pkt->tot_data_buf_len =
		    vm_xfer_page_pkt->ranges[i].byte_count;
		net_vsc_pkt->page_buf_count = 1;

		net_vsc_pkt->page_buffers[0].length =
		    vm_xfer_page_pkt->ranges[i].byte_count;

		/* The virtual address of the packet in the receive buffer */
		start = ((unsigned long)net_dev->rx_buf +
		    vm_xfer_page_pkt->ranges[i].byte_offset);
		start = ((unsigned long)start) & ~(PAGE_SIZE - 1);

		/* Page number of the virtual page containing packet start */
		net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;

		/* Calculate the page relative offset */
		net_vsc_pkt->page_buffers[0].offset =
		    vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1);

		/*
		 * In this implementation, we are dealing with virtual
		 * addresses exclusively.  Since we aren't using physical
		 * addresses at all, we don't care if a packet crosses a
		 * page boundary.  For this reason, the original code to
		 * check for and handle page crossings has been removed.
		 */

		/*
		 * Pass it to the upper layer.  The receive completion call
		 * has been moved into this function.
		 */
		hv_rf_on_receive(device, net_vsc_pkt);

		/*
		 * Moved completion call back here so that all received 
		 * messages (not just data messages) will trigger a response
		 * message back to the host.
		 */
		hv_nv_on_receive_completion(net_vsc_pkt);
	}
}

/*
 * Net VSC send receive completion
 */
static void
hv_nv_send_receive_completion(struct hv_device *device, uint64_t tid)
{
	nvsp_msg rx_comp_msg;
	int retries = 0;
	int ret = 0;
	
	rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;

	/* Pass in the status */
	rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status =
	    nvsp_status_success;

retry_send_cmplt:
	/* Send the completion */
	ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg,
	    sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
	if (ret == 0) {
		/* success */
		/* no-op */
	} else if (ret == EAGAIN) {
		/* no more room... wait a bit and attempt to retry 3 times */
		retries++;

		if (retries < 4) {
			DELAY(100);
			goto retry_send_cmplt;
		}
	}
}

/*
 * Net VSC on receive completion
 *
 * Send a receive completion packet to RNDIS device (ie NetVsp)
 */
void
hv_nv_on_receive_completion(void *context)
{
	netvsc_packet *packet = (netvsc_packet *)context;
	struct hv_device *device = (struct hv_device *)packet->device;
	netvsc_dev    *net_dev;
	uint64_t       tid = 0;
	boolean_t send_rx_completion = FALSE;

	/*
	 * Even though it seems logical to do a hv_nv_get_outbound_net_device()
	 * here to send out receive completion, we are using
	 * hv_nv_get_inbound_net_device() since we may have disabled
	 * outbound traffic already.
	 */
	net_dev = hv_nv_get_inbound_net_device(device);
	if (net_dev == NULL)
		return;
	
	/* Overloading use of the lock. */
	mtx_lock_spin(&net_dev->rx_pkt_list_lock);

	packet->xfer_page_pkt->count--;

	/*
	 * Last one in the line that represent 1 xfer page packet.
	 * Return the xfer page packet itself to the free list.
	 */
	if (packet->xfer_page_pkt->count == 0) {
		send_rx_completion = TRUE;
		tid = packet->compl.rx.rx_completion_tid;
		STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
		    (netvsc_packet *)(packet->xfer_page_pkt), mylist_entry);
	}

	/* Put the packet back on the free list */
	STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, mylist_entry);
	mtx_unlock_spin(&net_dev->rx_pkt_list_lock);

	/* Send a receive completion for the xfer page packet */
	if (send_rx_completion)
		hv_nv_send_receive_completion(device, tid);
}

/*
 * Net VSC on channel callback
 */
static void
hv_nv_on_channel_callback(void *context)
{
	/* Fixme:  Magic number */
	const int net_pkt_size = 2048;
	struct hv_device *device = (struct hv_device *)context;
	netvsc_dev *net_dev;
	uint32_t bytes_rxed;
	uint64_t request_id;
	uint8_t  *packet;
	hv_vm_packet_descriptor *desc;
	uint8_t *buffer;
	int     bufferlen = net_pkt_size;
	int     ret = 0;

	packet = malloc(net_pkt_size * sizeof(uint8_t), M_DEVBUF, M_NOWAIT);
	if (!packet)
		return;

	buffer = packet;

	net_dev = hv_nv_get_inbound_net_device(device);
	if (net_dev == NULL)
		goto out;

	do {
		ret = hv_vmbus_channel_recv_packet_raw(device->channel,
		    buffer, bufferlen, &bytes_rxed, &request_id);
		if (ret == 0) {
			if (bytes_rxed > 0) {
				desc = (hv_vm_packet_descriptor *)buffer;
				switch (desc->type) {
				case HV_VMBUS_PACKET_TYPE_COMPLETION:
					hv_nv_on_send_completion(device, desc);
					break;
				case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
					hv_nv_on_receive(device, desc);
					break;
				default:
					break;
				}
			} else {
				break;
			}
		} else if (ret == ENOBUFS) {
			/* Handle large packet */
			free(buffer, M_DEVBUF);
			buffer = malloc(bytes_rxed, M_DEVBUF, M_NOWAIT);
			if (buffer == NULL) {
				break;
			}
			bufferlen = bytes_rxed;
		}
	} while (1);

out:
	free(buffer, M_DEVBUF);
}