本实验只设置了实现int e1000_transmit(struct mbuf* m)
和static void e1000_recv(void)
函数部分
背景知识
- 相关函数,结构体
//kernel/net.h
struct mbuf {
struct mbuf *next; // the next mbuf in the chain
char *head; // the current start position of the buffer
unsigned int len; // the length of the buffer
char buf[MBUF_SIZE]; // the backing store
};
// an Ethernet packet header (start of the packet).
struct eth {
uint8 dhost[ETHADDR_LEN];
uint8 shost[ETHADDR_LEN];
uint16 type;
} __attribute__((packed));
// an IP packet header (comes after an Ethernet header).
struct ip {
uint8 ip_vhl; // version << 4 | header length >> 2
uint8 ip_tos; // type of service
uint16 ip_len; // total length
uint16 ip_id; // identification
uint16 ip_off; // fragment offset field
uint8 ip_ttl; // time to live
uint8 ip_p; // protocol
uint16 ip_sum; // checksum
uint32 ip_src, ip_dst;
};
// a UDP packet header (comes after an IP header).
struct udp {
uint16 sport; // source port
uint16 dport; // destination port
uint16 ulen; // length, including udp header, not including IP header
uint16 sum; // checksum
};
// an ARP packet (comes after an Ethernet header).
struct arp {
uint16 hrd; // format of hardware address
uint16 pro; // format of protocol address
uint8 hln; // length of hardware address
uint8 pln; // length of protocol address
uint16 op; // operation
//used to save information of ethernet and internet address
char sha[ETHADDR_LEN]; // sender hardware address
uint32 sip; // sender IP address
char tha[ETHADDR_LEN]; // target hardware address
uint32 tip; // target IP address
} __attribute__((packed));
//kernel/e1000_dev.h
//descriptor format
struct tx_desc
{
uint64 addr;
uint16 length;
uint8 cso;
uint8 cmd;
uint8 status;
uint8 css;
uint16 special;
};
/* Receive Descriptor bit definitions [E1000 3.2.3.1] */
#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */
#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */
// [E1000 3.2.3]
struct rx_desc
{
uint64 addr; /* Address of the descriptor's data buffer */
uint16 length; /* Length of data DMAed into data buffer */
uint16 csum; /* Packet checksum */
uint8 status; /* Descriptor status */
uint8 errors; /* Descriptor Errors */
uint16 special;
};
//kernel/e1000.c
// called by pci_init().
// xregs is the memory address at which the
// e1000's registers are mapped.
//配置E1000以DMA的方式直接读写RAM;
//由于数据包可能到达的比驱动程序处理的快,该函数为E1000提供多个可供写入数据包的缓冲区,E1000需要描述数组(在内存中)来描述这些缓冲区,描述数组的项实际为结构体,描述着内存中数据包的地址;描述数组被设计为接收环(rx ring),或称为接收队列。
//该函数使用mbufalloc分配mbuf数据包缓冲区;同时也有一个发送环(tx ring),驱动可以将想要发送的packets放在其中
void
e1000_init(uint32 *xregs)
{
int i;
initlock(&e1000_lock, "e1000");
regs = xregs;
// Reset the device
regs[E1000_IMS] = 0; // disable interrupts
regs[E1000_CTL] |= E1000_CTL_RST;
regs[E1000_IMS] = 0; // redisable interrupts
__sync_synchronize();
// [E1000 14.5] Transmit initialization
memset(tx_ring, 0, sizeof(tx_ring));
for (i = 0; i < TX_RING_SIZE; i++) {
tx_ring[i].status = E1000_TXD_STAT_DD;
tx_mbufs[i] = 0;
}
regs[E1000_TDBAL] = (uint64) tx_ring;
if(sizeof(tx_ring) % 128 != 0)
panic("e1000");
regs[E1000_TDLEN] = sizeof(tx_ring);
regs[E1000_TDH] = regs[E1000_TDT] = 0;
// [E1000 14.4] Receive initialization
memset(rx_ring, 0, sizeof(rx_ring));
for (i = 0; i < RX_RING_SIZE; i++) {
rx_mbufs[i] = mbufalloc(0);
if (!rx_mbufs[i])
panic("e1000");
rx_ring[i].addr = (uint64) rx_mbufs[i]->head;
}
regs[E1000_RDBAL] = (uint64) rx_ring;
if(sizeof(rx_ring) % 128 != 0)
panic("e1000");
regs[E1000_RDH] = 0;
regs[E1000_RDT] = RX_RING_SIZE - 1;
regs[E1000_RDLEN] = sizeof(rx_ring);
// filter by qemu's MAC address, 52:54:00:12:34:56
regs[E1000_RA] = 0x12005452;
regs[E1000_RA+1] = 0x5634 | (1<<31);
// multicast table
for (int i = 0; i < 4096/32; i++)
regs[E1000_MTA + i] = 0;
// transmitter control bits.
regs[E1000_TCTL] = E1000_TCTL_EN | // enable
E1000_TCTL_PSP | // pad short packets
(0x10 << E1000_TCTL_CT_SHIFT) | // collision stuff
(0x40 << E1000_TCTL_COLD_SHIFT);
regs[E1000_TIPG] = 10 | (8<<10) | (6<<20); // inter-pkt gap
// receiver control bits.
regs[E1000_RCTL] = E1000_RCTL_EN | // enable receiver
E1000_RCTL_BAM | // enable broadcast
E1000_RCTL_SZ_2048 | // 2048-byte rx buffers
E1000_RCTL_SECRC; // strip CRC
// ask e1000 for receive interrupts.
regs[E1000_RDTR] = 0; // interrupt after every received packet (no timer)
regs[E1000_RADV] = 0; // interrupt after every packet (no timer)
regs[E1000_IMS] = (1 << 7); // RXDW -- Receiver Descriptor Write Back
}
实验过程
1.对于发送函数,当网络栈需要发送packet,它调用e1000_transmit(),使用mbuf(该函数的参数)中持有的待发送的packet。该函数需要在tx ring的描述符中放置指向数据包数据的指针。此外还需要确保每个mbuf最终被释放,但只能在E1000完成数据传输之后(E1000在描述符中设置E1000_TXD_STAT_DD位以指示此情况)
2.对于接收函数,当E1000从以太网中接收每一个packet,它首先DMA数据包到mbuf(由下一个接收环描述符所指),然后产生一个中断。e1000_recv()函数必须扫描rx ring并且将新的packet的mbuf递交到网络栈(调用net_rx())。然后需要分配一个新mbuf并且将它的信息记录在描述数组(rx_ring)项中,因此当E1000到达rx ring中的该点时,它将会找到一个未填充的buffer,用来DMA 一个新的packet。
//给了一个新的packet, 我们需要在ring里找到下一个空余位置, 然后把它放进去等待传输.
int
e1000_transmit(struct mbuf *m)
{
// the mbuf contains an ethernet frame; program it into
// the TX descriptor ring so that the e1000 sends it. Stash
// a pointer so that it can be freed after sending.
acquire(&e1000_lock); // 获取 E1000 的锁,防止多进程同时发送数据出现 race
uint32 ind = regs[E1000_TDT]; // 下一个可用的 buffer 的下标
struct tx_desc *desc = &tx_ring[ind]; // 获取 buffer 的描述符,其中存储了关于该 buffer 的各种信息
// 如果该 buffer 中的数据还未传输完,则代表我们已经将环形 buffer 列表全部用完,缓冲区不足,返回错误
if(!(desc->status & E1000_TXD_STAT_DD)) {
release(&e1000_lock);
return -1;
}
// 如果该下标仍有之前发送完毕但未释放的 mbuf,则释放
if(tx_mbufs[ind]) {
mbuffree(tx_mbufs[ind]);
tx_mbufs[ind] = 0;
}
// 将要发送的 mbuf 的内存地址与长度填写到发送描述符中
desc->addr = (uint64)m->head;
desc->length = m->len;
// 设置参数,EOP 表示该 buffer 含有一个完整的 packet
// RS 告诉网卡在发送完成后,设置 status 中的 E1000_TXD_STAT_DD 位,表示发送完成。
desc->cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS;
// 保留新 mbuf 的指针,方便后续再次用到同一下标时释放。
tx_mbufs[ind] = m;
// 环形缓冲区内下标增加一。
regs[E1000_TDT] = (regs[E1000_TDT] + 1) % TX_RING_SIZE;
release(&e1000_lock);
return 0;
}
//需要遍历这个ring, 把所有新到来的packet交由网络上层的协议/应用去处理
static void
e1000_recv(void)
{
// Check for packets that have arrived from the e1000
// Create and deliver an mbuf for each packet (using net_rx()).
while(1) { // 每次 recv 可能接收多个包
uint32 ind = (regs[E1000_RDT] + 1) % RX_RING_SIZE;
struct rx_desc *desc = &rx_ring[ind];
// 如果需要接收的包都已经接收完毕,则退出
if(!(desc->status & E1000_RXD_STAT_DD)) {
return;
}
rx_mbufs[ind]->len = desc->length;
net_rx(rx_mbufs[ind]); // 传递给上层网络栈。上层负责释放 mbuf
// 分配并设置新的 mbuf,供给下一次轮到该下标时使用
rx_mbufs[ind] = mbufalloc(0);
desc->addr = (uint64)rx_mbufs[ind]->head;
desc->status = 0;
regs[E1000_RDT] = ind;
}
}