ngx_ff_module.c中提供了被nginx中调用的socket相关api函数,比如:socket/bind/connect/recv/send接口的重新实现等等。
#define SYSCALL(func) \
({ \
if (unlikely(!real_##func)) { \
real_##func = dlsym(RTLD_NEXT, #func); \
} \
real_##func; \
})
extern intptr_t ngx_max_sockets;
/*-
* Make sockfd assigned by the fstack plus the value of maximum kernel socket.
* so we can tell them apart according to different scopes.
* Solve the condominium ownership at Application Layer and obtain more freedom.
* fstack tried to do this by 'fd_reserve', unfortunately, it doesn't work well.
*/
static inline int convert_fstack_fd(int sockfd) {
return sockfd + ngx_max_sockets;
}
/* Restore socket fd. */
static inline int restore_fstack_fd(int sockfd) {
if(sockfd <= ngx_max_sockets) {
return sockfd;
}
return sockfd - ngx_max_sockets;
}
/* Tell whether a 'sockfd' belongs to fstack. */
int is_fstack_fd(int sockfd) {
if (unlikely(inited == 0)) {
return 0;
}
return sockfd >= ngx_max_sockets;
}
// proc_type, 1: primary, 0: secondary.
int
ff_mod_init(const char *conf, int proc_id, int proc_type) {
int rc, i;
int ff_argc = 4;
char **ff_argv = malloc(sizeof(char *)*ff_argc);
for (i = 0; i < ff_argc; i++) {
ff_argv[i] = malloc(sizeof(char)*PATH_MAX);
}
sprintf(ff_argv[0], "nginx");
sprintf(ff_argv[1], "--conf=%s", conf);
sprintf(ff_argv[2], "--proc-id=%d", proc_id);
if (proc_type == 1) {
sprintf(ff_argv[3], "--proc-type=primary");
} else {
sprintf(ff_argv[3], "--proc-type=secondary");
}
rc = ff_init(ff_argc, ff_argv);
if (rc == 0) {
/* Ensure that the socket we converted
does not exceed the maximum value of 'int' */
if(ngx_max_sockets + (unsigned)ff_getmaxfd() > INT_MAX)
{
rc = -1;
}
inited = 1;
}
for (i = 0; i < ff_argc; i++) {
free(ff_argv[i]);
}
free(ff_argv);
return rc;
}
/*-
* Verify whether the socket is supported by fstack or not.
*/
int
fstack_territory(int domain, int type, int protocol)
{
/* Remove creation flags */
type &= ~SOCK_CLOEXEC;
type &= ~SOCK_NONBLOCK;
type &= ~SOCK_FSTACK;
if ((AF_INET != domain && AF_INET6 != domain) || (SOCK_STREAM != type && SOCK_DGRAM != type)) {
return 0;
}
return 1;
}
int
socket(int domain, int type, int protocol)
{
int sock;
if (unlikely(inited == 0)) {
return SYSCALL(socket)(domain, type, protocol);
}
if (unlikely(fstack_territory(domain, type, protocol) == 0)) {
return SYSCALL(socket)(domain, type, protocol);
}
if (unlikely((type & SOCK_FSTACK) == 0)) {
return SYSCALL(socket)(domain, type, protocol);
}
type &= ~SOCK_FSTACK;
sock = ff_socket(domain, type, protocol);
if (sock != -1) {
sock = convert_fstack_fd(sock);
}
return sock;
}
int
bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_bind(sockfd, (struct linux_sockaddr *)addr, addrlen);
}
return SYSCALL(bind)(sockfd, addr, addrlen);
}
int
connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_connect(sockfd, (struct linux_sockaddr *)addr, addrlen);
}
return SYSCALL(connect)(sockfd, addr, addrlen);
}
int
getpeername(int sockfd, struct sockaddr * name,
socklen_t *namelen)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_getpeername(sockfd,
(struct linux_sockaddr *)name, namelen);
}
return SYSCALL(getpeername)(sockfd, name, namelen);
}
int
getsockname(int sockfd, struct sockaddr *name,
socklen_t *namelen)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_getsockname(sockfd,
(struct linux_sockaddr *)name, namelen);
}
return SYSCALL(getsockname)(sockfd, name, namelen);
}
ssize_t
send(int sockfd, const void *buf, size_t len, int flags)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_send(sockfd, buf, len, flags);
}
return SYSCALL(send)(sockfd, buf, len, flags);
}
ssize_t
sendto(int sockfd, const void *buf, size_t len, int flags,
const struct sockaddr *dest_addr, socklen_t addrlen)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_sendto(sockfd, buf, len, flags,
(struct linux_sockaddr *)dest_addr, addrlen);
}
return SYSCALL(sendto)(sockfd, buf, len, flags, dest_addr, addrlen);
}
ssize_t
sendmsg(int sockfd, const struct msghdr *msg, int flags)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_sendmsg(sockfd, msg, flags);
}
return SYSCALL(sendmsg)(sockfd, msg, flags);
}
ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_recvmsg(sockfd, msg, flags);
}
return SYSCALL(recvmsg)(sockfd, msg, flags);
}
ssize_t
recv(int sockfd, void *buf, size_t len, int flags)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_recv(sockfd, buf, len, flags);
}
return SYSCALL(recv)(sockfd, buf, len, flags);
}
ssize_t
__recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags)
{
/*
if (n > buflen)
__chk_fail ();
*/
return recv (fd, buf, n, flags);
}
int
listen(int sockfd, int backlog)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_listen(sockfd, backlog);
}
return SYSCALL(listen)(sockfd, backlog);
}
int
getsockopt(int sockfd, int level, int optname,
void *optval, socklen_t *optlen)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_getsockopt(sockfd, level, optname, optval, optlen);
}
return SYSCALL(getsockopt)(sockfd, level, optname, optval, optlen);
}
int
setsockopt (int sockfd, int level, int optname,
const void *optval, socklen_t optlen)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_setsockopt(sockfd, level, optname, optval, optlen);
}
return SYSCALL(setsockopt)(sockfd, level, optname, optval, optlen);
}
int
accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
{
int rc;
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
rc = ff_accept(sockfd, (struct linux_sockaddr *)addr, addrlen);
if (rc != -1) {
rc = convert_fstack_fd(rc);
}
return rc;
}
return SYSCALL(accept)(sockfd, addr, addrlen);
}
int
accept4(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)
{
int rc;
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
rc = ff_accept(sockfd, (struct linux_sockaddr *)addr, addrlen);
if (rc != -1) {
rc = convert_fstack_fd(rc);
}
return rc;
}
return SYSCALL(accept4)(sockfd, addr, addrlen, flags);
}
int
close(int sockfd)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_close(sockfd);
}
return SYSCALL(close)(sockfd);
}
int
shutdown(int sockfd, int how)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_shutdown(sockfd, how);
}
return SYSCALL(shutdown)(sockfd, how);
}
ssize_t
writev(int sockfd, const struct iovec *iov, int iovcnt)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_writev(sockfd, iov, iovcnt);
}
return SYSCALL(writev)(sockfd, iov, iovcnt);
}
ssize_t
readv(int sockfd, const struct iovec *iov, int iovcnt)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_readv(sockfd, iov, iovcnt);
}
return SYSCALL(readv)(sockfd, iov, iovcnt);
}
ssize_t
read(int sockfd, void *buf, size_t count)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_read(sockfd, buf, count);
}
return SYSCALL(read)(sockfd, buf, count);
}
ssize_t
write(int sockfd, const void *buf, size_t count)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_write(sockfd, buf, count);
}
return SYSCALL(write)(sockfd, buf, count);
}
int
ioctl(int sockfd, int request, void *p)
{
if(is_fstack_fd(sockfd)){
sockfd = restore_fstack_fd(sockfd);
return ff_ioctl(sockfd, request, p);
}
return SYSCALL(ioctl)(sockfd, request, p);
}
int
kqueue(void)
{
return ff_kqueue();
}
int
kevent(int kq, const struct kevent *changelist, int nchanges,
struct kevent *eventlist, int nevents, const struct timespec *timeout)
{
struct kevent *kev;
int i = 0;
for(i = 0; i < nchanges; i++) {
kev = (struct kevent *)&changelist[i];
switch (kev->filter) {
case EVFILT_READ:
case EVFILT_WRITE:
case EVFILT_VNODE:
kev->ident = restore_fstack_fd(kev->ident);
break;
case EVFILT_AIO:
case EVFILT_PROC:
case EVFILT_SIGNAL:
case EVFILT_TIMER:
case EVFILT_USER:
default:
break;
}
}
return ff_kevent(kq, changelist, nchanges, eventlist, nevents, timeout);
}
int
gettimeofday(struct timeval *tv, struct timezone *tz)
{
if (unlikely(inited == 0)) {
return SYSCALL(gettimeofday)(tv, tz);
}
return ff_gettimeofday(tv, tz);
}
socket的相关api接口重新实现中,调用的api来自ff_api.symlist中声明:
ff_hardclock
ff_freebsd_init
ff_socket
ff_setsockopt
ff_getsockopt
ff_ioctl
ff_close
ff_read
ff_readv
ff_write
ff_writev
ff_send
ff_sendto
ff_sendmsg
ff_recv
ff_recvfrom
ff_recvmsg
ff_select
ff_fcntl
ff_socketpair
ff_poll
ff_accept
ff_listen
ff_bind
ff_connect
ff_getpeername
ff_getsockname
ff_shutdown
ff_sysctl
ff_kqueue
ff_kevent
ff_kevent_do_each
ff_veth_attach
ff_veth_detach
ff_veth_process_packet
ff_veth_softc_to_hostc
ff_mbuf_gethdr
ff_mbuf_get
ff_mbuf_free
ff_mbuf_copydata
ff_mbuf_tx_offload
ff_route_ctl
ff_rtioctl
ff_gettimeofday
ff_fdisused
ff_getmaxfd
ff_ngctl
ff_ioctl_freebsd
ff_getsockopt_freebsd
ff_setsockopt_freebsd
ff_dup
ff_dup2
ff_next_mbuf
ff_mbuf_mtod
ff_rte_frm_extcl
ff_mbuf_set_vlan_info
为何需要文件 ff_api.symlist(参考lib/Makefile中实现逻辑):
c源码文件:
FF_HOST_SRCS+= \
ff_host_interface.c \
ff_config.c \
ff_ini_parser.c \
ff_dpdk_if.c \
ff_dpdk_pcap.c \
ff_epoll.c \
ff_init.c
ifdef FF_KNI
FF_HOST_SRCS+= \
ff_dpdk_kni.c
endif
c源码文件变量 ASM_SRCS SRCS HOST_SRCS:
ASM_SRCS = ${CRYPTO_ASM_SRCS}
SRCS= ${FF_SRCS} ${CRYPTO_SRCS} ${KERN_SRCS} ${LIBKERN_SRCS} ${MACHINE_SRCS}
SRCS+= ${MSRCS} ${NET_SRCS} ${NETGRAPH_SRCS} ${NETINET_SRCS} ${NETINET6_SRCS}
SRCS+= ${NETIPSEC_SRCS} ${NETIPFW_SRCS} ${OPENCRYPTO_SRCS} ${VM_SRCS}
# If witness is enabled.
# SRCS+= ${KERN_WITNESS_SRCS}
# Extra FreeBSD kernel module srcs.
SRCS+= ${KMOD_SRCS}
HOST_SRCS = ${FF_HOST_SRCS}
根据c源码生成 *.o:
# Extra FreeBSD kernel module srcs.
SRCS+= ${KMOD_SRCS}
HOST_SRCS = ${FF_HOST_SRCS}
ASM_OBJS+= $(patsubst %.S,%.o,${ASM_SRCS})
OBJS+= $(patsubst %.c,%.o,${SRCS})
HOST_OBJS+= $(patsubst %.c,%.o,${HOST_SRCS})
利用*.o生成静态库 libfstack.a(且将ff_api.symlist中设置的函数设置为全局可见):
all: libfstack.a
#
# The library is built by first incrementally linking all the object
# to resolve internal references. Then, all symbols are made local.
# Then, only the symbols that are part of the API are made
# externally available.
#
libfstack.a: machine_includes ff_api.symlist ${MHEADERS} ${MSRCS} ${HOST_OBJS} ${ASM_OBJS} ${OBJS}
#libfstack.ro 里面函数除了未定义的外,其他函数写到文件中
${LD} -d -r -o $*.ro ${ASM_OBJS} ${OBJS}
nm $*.ro | grep -v ' U ' | cut -d ' ' -f 3 > $*_localize_list.tmp
#把 libfstack_localize_list.tmp 中函数设置为本地可见
objcopy --localize-symbols=$*_localize_list.tmp $*.ro
rm $*_localize_list.tmp
#把 ff_api.symlist 中函数设置为全局可见(这样ff_api.symlist中的函数名可以被其他接口调用)
objcopy --globalize-symbols=ff_api.symlist $*.ro
rm -f $@
#打包为静态库 libfstack.a
ar -cqs $@ $*.ro ${HOST_OBJS}
rm -f $*.ro
${HOST_OBJS}: %.o: %.c
${HOST_C}
${ASM_OBJS}: %.o: %.S ${IMACROS_FILE}
${NORMAL_S}
${OBJS}: %.o: %.c ${IMACROS_FILE}
${NORMAL_C}
objcopy-命令参考:
假如我们有个静态库,想把一个函数作用域从全局修改为本地、把一个函数作用域从本地修改为全局、把一个函数的名字修改一下。
ar -x xxx.a //释放其中的.o文件
objdump -t xxx.o //查看导出符号表,可以看到导出的函数和引用的函数
objcopy --localize-symbol function_1 xxx.o xxx_new.o //把函数设置为本地
objcopy --globalize-symbol function_2 xxx.o xxx_new.o //把函数设置为全局可见
objcopy --redefine-sym old_func=new_func xxx.o xxx_new.o //重命名函数名
ar cqs xxx.a xxx_new.o //打包为静态库