Linux 内核提供了多种通用进程间通信的机制以适应不同的应用场景,比如FIFO,PIPE,signal,socket,share memory。为了实现进程间通信,应用程序必须陷入内核态以便于交换数据。
Linux提供的进程间通信接口可以满足基本的编程需求,但是为了克服某些缺点,达到应用程序或者OS的特有的需求,可以在内核态实现自定义的进程间通信方式,比如kdbus/binder等。
以socket和binder为例,看看通用的进程间通信的实现机制以及如何自己实现一个进程间通信的接口。
典型的socket通信图:
进程间通信的流程和网络通信基本一致。下面是一段比较常见的进程间通信代码:
Server:
1 #include <stdio.h> 2 3 #include <sys/types.h> 4 #include <sys/socket.h> 5 #include <sys/un.h> 6 7 #include <errno.h> 8 #include <string.h> 9 10 #include <unistd.h> 11 12 #define MY_SOCK_ADDR "/tmp/my_socket" 13 14 int main() 15 { 16 int fd; 17 int ret = 0; 18 int new_fd; 19 20 struct sockaddr_un my_addr, client_addr; 21 socklen_t len; 22 23 char buffer[1024]; 24 int size; 25 26 /* create a socket for inter-process communication */ 27 fd = socket(AF_UNIX, SOCK_STREAM, 0); 28 if (fd < 0) { 29 fprintf(stderr, "create socket error(%s)\n", strerror(errno)); 30 return -1; 31 } 32 33 /* initialize socket address */ 34 memset(&my_addr, 0, sizeof(struct sockaddr_un)); 35 my_addr.sun_family = AF_UNIX; 36 strcpy(my_addr.sun_path, MY_SOCK_ADDR); 37 38 /* bind address */ 39 unlink(MY_SOCK_ADDR); 40 ret = bind(fd, (const struct sockaddr*)&my_addr, sizeof(my_addr)); 41 if (ret < 0) { 42 fprintf(stderr, "bind address failed(%s)\n", strerror(errno)); 43 goto exit; 44 } 45 46 /* listen */ 47 ret = listen(fd, -1); 48 if (ret < 0) { 49 fprintf(stderr, "listen failed(%s)\n", strerror(errno)); 50 goto exit; 51 } 52 53 /* accept incoming connections */ 54 new_fd = accept(fd, (struct sockaddr*)&client_addr, &len); 55 if (new_fd < 0) { 56 fprintf(stderr, "accept error.(%s)\n", strerror(errno)); 57 ret = -1; 58 goto exit; 59 } 60 61 size = read(new_fd, buffer, sizeof(buffer)); 62 if (size < 0) { 63 fprintf(stderr, "read error(%s)\n", strerror(errno)); 64 ret = -1; 65 close(new_fd); 66 goto exit; 67 } 68 69 printf("received \"%s\" from client\n", buffer); 70 close(new_fd); 71 72 exit: 73 close(fd); 74 unlink(MY_SOCK_ADDR); 75 76 return ret; 77 }
Client:
1 #include <stdio.h> 2 3 #include <sys/types.h> 4 #include <sys/socket.h> 5 #include <sys/un.h> 6 7 #include <errno.h> 8 #include <string.h> 9 10 #include <unistd.h> 11 12 #define SERVER_SOCK_ADDR "/tmp/my_socket" 13 14 int main() 15 { 16 int fd; 17 int ret = 0; 18 19 struct sockaddr_un server_addr; 20 char buffer[1024]; 21 int len; 22 23 /* create a socket for inter-process communication */ 24 fd = socket(AF_UNIX, SOCK_STREAM, 0); 25 if (fd < 0) { 26 fprintf(stderr, "create socket error(%s)\n", strerror(errno)); 27 return -1; 28 } 29 30 /* initialize socket address */ 31 memset(&server_addr, 0, sizeof(struct sockaddr_un)); 32 server_addr.sun_family = AF_UNIX; 33 strcpy(server_addr.sun_path, SERVER_SOCK_ADDR); 34 35 /* connect to server */ 36 ret = connect(fd, (struct sockaddr *)&server_addr, sizeof(server_addr)); 37 if (ret < 0) { 38 fprintf(stderr, "connect to server failed(%s)\n", strerror(errno)); 39 close(fd); 40 return -1; 41 } 42 43 strcpy(buffer, "Hello server"); 44 45 len = write(fd, buffer, strlen(buffer) + 1); 46 if (len < 0) { 47 fprintf(stderr, "write to server failed(%s)\n", strerror(errno)); 48 close(fd); 49 return -1; 50 } 51 52 close(fd); 53 54 return 0; 55 }
socket的用户态的实现位于libc.so里面,基本上是一个系统调用,陷入内核态。我们看socket在内核态的实现。
socket和文件系统之间的关系可以参考下面的链接。
http://linuxeco.com/?p=1
1 /** 2 * struct socket - general BSD socket 3 * @state: socket state (%SS_CONNECTED, etc) 4 * @type: socket type (%SOCK_STREAM, etc) 5 * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc) 6 * @ops: protocol specific socket operations 7 * @file: File back pointer for gc 8 * @sk: internal networking protocol agnostic socket representation 9 * @wq: wait queue for several uses 10 */ 11 struct socket { 12 socket_state state; 13 14 kmemcheck_bitfield_begin(type); 15 short type; 16 kmemcheck_bitfield_end(type); 17 18 unsigned long flags; 19 20 struct socket_wq __rcu *wq; 21 22 struct file *file; 23 struct sock *sk; 24 const struct proto_ops *ops; 25 };
1 /** 2 * struct sock - network layer representation of sockets 3 * @__sk_common: shared layout with inet_timewait_sock 4 * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN 5 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 6 * @sk_lock: synchronizer 7 * @sk_rcvbuf: size of receive buffer in bytes 8 * @sk_wq: sock wait queue and async head 9 * @sk_rx_dst: receive input route used by early demux 10 * @sk_dst_cache: destination cache 11 * @sk_dst_lock: destination cache lock 12 * @sk_policy: flow policy 13 * @sk_receive_queue: incoming packets 14 * @sk_wmem_alloc: transmit queue bytes committed 15 * @sk_write_queue: Packet sending queue 16 * @sk_async_wait_queue: DMA copied packets 17 * @sk_omem_alloc: "o" is "option" or "other" 18 * @sk_wmem_queued: persistent queue size 19 * @sk_forward_alloc: space allocated forward 20 * @sk_napi_id: id of the last napi context to receive data for sk 21 * @sk_ll_usec: usecs to busypoll when there is no data 22 * @sk_allocation: allocation mode 23 * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) 24 * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) 25 * @sk_sndbuf: size of send buffer in bytes 26 * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, 27 * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings 28 * @sk_no_check: %SO_NO_CHECK setting, whether or not checkup packets 29 * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) 30 * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) 31 * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) 32 * @sk_gso_max_size: Maximum GSO segment size to build 33 * @sk_gso_max_segs: Maximum number of GSO segments 34 * @sk_lingertime: %SO_LINGER l_linger setting 35 * @sk_backlog: always used with the per-socket spinlock held 36 * @sk_callback_lock: used with the callbacks in the end of this struct 37 * @sk_error_queue: rarely used 38 * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, 39 * IPV6_ADDRFORM for instance) 40 * @sk_err: last error 41 * @sk_err_soft: errors that don't cause failure but are the cause of a 42 * persistent failure not just 'timed out' 43 * @sk_drops: raw/udp drops counter 44 * @sk_ack_backlog: current listen backlog 45 * @sk_max_ack_backlog: listen backlog set in listen() 46 * @sk_priority: %SO_PRIORITY setting 47 * @sk_cgrp_prioidx: socket group's priority map index 48 * @sk_type: socket type (%SOCK_STREAM, etc) 49 * @sk_protocol: which protocol this socket belongs in this network family 50 * @sk_peer_pid: &struct pid for this socket's peer 51 * @sk_peer_cred: %SO_PEERCRED setting 52 * @sk_rcvlowat: %SO_RCVLOWAT setting 53 * @sk_rcvtimeo: %SO_RCVTIMEO setting 54 * @sk_sndtimeo: %SO_SNDTIMEO setting 55 * @sk_rxhash: flow hash received from netif layer 56 * @sk_filter: socket filtering instructions 57 * @sk_protinfo: private area, net family specific, when not using slab 58 * @sk_timer: sock cleanup timer 59 * @sk_stamp: time stamp of last packet received 60 * @sk_socket: Identd and reporting IO signals 61 * @sk_user_data: RPC layer private data 62 * @sk_frag: cached page frag 63 * @sk_peek_off: current peek_offset value 64 * @sk_send_head: front of stuff to transmit 65 * @sk_security: used by security modules 66 * @sk_mark: generic packet mark 67 * @sk_classid: this socket's cgroup classid 68 * @sk_cgrp: this socket's cgroup-specific proto data 69 * @sk_write_pending: a write to stream socket waits to start 70 * @sk_state_change: callback to indicate change in the state of the sock 71 * @sk_data_ready: callback to indicate there is data to be processed 72 * @sk_write_space: callback to indicate there is bf sending space available 73 * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) 74 * @sk_backlog_rcv: callback to process the backlog 75 * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 76 */ 77 struct sock { 78 /* 79 * Now struct inet_timewait_sock also uses sock_common, so please just 80 * don't add nothing before this first member (__sk_common) --acme 81 */ 82 struct sock_common __sk_common; 83 #define sk_node __sk_common.skc_node 84 #define sk_nulls_node __sk_common.skc_nulls_node 85 #define sk_refcnt __sk_common.skc_refcnt 86 #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping 87 88 #define sk_dontcopy_begin __sk_common.skc_dontcopy_begin 89 #define sk_dontcopy_end __sk_common.skc_dontcopy_end 90 #define sk_hash __sk_common.skc_hash 91 #define sk_portpair __sk_common.skc_portpair 92 #define sk_num __sk_common.skc_num 93 #define sk_dport __sk_common.skc_dport 94 #define sk_addrpair __sk_common.skc_addrpair 95 #define sk_daddr __sk_common.skc_daddr 96 #define sk_rcv_saddr __sk_common.skc_rcv_saddr 97 #define sk_family __sk_common.skc_family 98 #define sk_state __sk_common.skc_state 99 #define sk_reuse __sk_common.skc_reuse 100 #define sk_reuseport __sk_common.skc_reuseport 101 #define sk_bound_dev_if __sk_common.skc_bound_dev_if 102 #define sk_bind_node __sk_common.skc_bind_node 103 #define sk_prot __sk_common.skc_prot 104 #define sk_net __sk_common.skc_net 105 #define sk_v6_daddr __sk_common.skc_v6_daddr 106 #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr 107 108 socket_lock_t sk_lock; 109 struct sk_buff_head sk_receive_queue; 110 /* 111 * The backlog queue is special, it is always used with 112 * the per-socket spinlock held and requires low latency 113 * access. Therefore we special case it's implementation. 114 * Note : rmem_alloc is in this structure to fill a hole 115 * on 64bit arches, not because its logically part of 116 * backlog. 117 */ 118 struct { 119 atomic_t rmem_alloc; 120 int len; 121 struct sk_buff *head; 122 struct sk_buff *tail; 123 } sk_backlog; 124 #define sk_rmem_alloc sk_backlog.rmem_alloc 125 int sk_forward_alloc; 126 #ifdef CONFIG_RPS 127 __u32 sk_rxhash; 128 #endif 129 #ifdef CONFIG_NET_RX_BUSY_POLL 130 unsigned int sk_napi_id; 131 unsigned int sk_ll_usec; 132 #endif 133 atomic_t sk_drops; 134 int sk_rcvbuf; 135 136 struct sk_filter __rcu *sk_filter; 137 struct socket_wq __rcu *sk_wq; 138 139 #ifdef CONFIG_NET_DMA 140 struct sk_buff_head sk_async_wait_queue; 141 #endif 142 143 #ifdef CONFIG_XFRM 144 struct xfrm_policy *sk_policy[2]; 145 #endif 146 unsigned long sk_flags; 147 struct dst_entry *sk_rx_dst; 148 struct dst_entry __rcu *sk_dst_cache; 149 spinlock_t sk_dst_lock; 150 atomic_t sk_wmem_alloc; 151 atomic_t sk_omem_alloc; 152 int sk_sndbuf; 153 struct sk_buff_head sk_write_queue; 154 kmemcheck_bitfield_begin(flags); 155 unsigned int sk_shutdown : 2, 156 sk_no_check : 2, 157 sk_userlocks : 4, 158 sk_protocol : 8, 159 #define SK_PROTOCOL_MAX U8_MAX 160 sk_type : 16; 161 kmemcheck_bitfield_end(flags); 162 int sk_wmem_queued; 163 gfp_t sk_allocation; 164 u32 sk_pacing_rate; /* bytes per second */ 165 u32 sk_max_pacing_rate; 166 netdev_features_t sk_route_caps; 167 netdev_features_t sk_route_nocaps; 168 int sk_gso_type; 169 unsigned int sk_gso_max_size; 170 u16 sk_gso_max_segs; 171 int sk_rcvlowat; 172 unsigned long sk_lingertime; 173 struct sk_buff_head sk_error_queue; 174 struct proto *sk_prot_creator; 175 rwlock_t sk_callback_lock; 176 int sk_err, 177 sk_err_soft; 178 unsigned short sk_ack_backlog; 179 unsigned short sk_max_ack_backlog; 180 __u32 sk_priority; 181 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) 182 __u32 sk_cgrp_prioidx; 183 #endif 184 struct pid *sk_peer_pid; 185 const struct cred *sk_peer_cred; 186 long sk_rcvtimeo; 187 long sk_sndtimeo; 188 void *sk_protinfo; 189 struct timer_list sk_timer; 190 ktime_t sk_stamp; 191 struct socket *sk_socket; 192 void *sk_user_data; 193 struct page_frag sk_frag; 194 struct sk_buff *sk_send_head; 195 __s32 sk_peek_off; 196 int sk_write_pending; 197 #ifdef CONFIG_SECURITY 198 void *sk_security; 199 #endif 200 __u32 sk_mark; 201 u32 sk_classid; 202 struct cg_proto *sk_cgrp; 203 void (*sk_state_change)(struct sock *sk); 204 void (*sk_data_ready)(struct sock *sk, int bytes); 205 void (*sk_write_space)(struct sock *sk); 206 void (*sk_error_report)(struct sock *sk); 207 int (*sk_backlog_rcv)(struct sock *sk, 208 struct sk_buff *skb); 209 void (*sk_destruct)(struct sock *sk); 210 };
/* The AF_UNIX socket */
1 /* The AF_UNIX socket */ 2 struct unix_sock { 3 /* WARNING: sk has to be the first member */ 4 struct sock sk; 5 struct unix_address *addr; 6 struct path path; 7 struct mutex readlock; 8 struct sock *peer; 9 struct list_head link; 10 atomic_long_t inflight; 11 spinlock_t lock; 12 unsigned char recursion_level; 13 unsigned long gc_flags; 14 #define UNIX_GC_CANDIDATE 0 15 #define UNIX_GC_MAYBE_CYCLE 1 16 struct socket_wq peer_wq; 17 wait_queue_t peer_wake; 18 };
1 /** 2 * struct sk_buff - socket buffer 3 * @next: Next buffer in list 4 * @prev: Previous buffer in list 5 * @tstamp: Time we arrived 6 * @sk: Socket we are owned by 7 * @dev: Device we arrived on/are leaving by 8 * @cb: Control buffer. Free for use by every layer. Put private vars here 9 * @_skb_refdst: destination entry (with norefcount bit) 10 * @sp: the security path, used for xfrm 11 * @len: Length of actual data 12 * @data_len: Data length 13 * @mac_len: Length of link layer header 14 * @hdr_len: writable header length of cloned skb 15 * @csum: Checksum (must include start/offset pair) 16 * @csum_start: Offset from skb->head where checksumming should start 17 * @csum_offset: Offset from csum_start where checksum should be stored 18 * @priority: Packet queueing priority 19 * @local_df: allow local fragmentation 20 * @cloned: Head may be cloned (check refcnt to be sure) 21 * @ip_summed: Driver fed us an IP checksum 22 * @nohdr: Payload reference only, must not modify header 23 * @nfctinfo: Relationship of this skb to the connection 24 * @pkt_type: Packet class 25 * @fclone: skbuff clone status 26 * @ipvs_property: skbuff is owned by ipvs 27 * @peeked: this packet has been seen already, so stats have been 28 * done for it, don't do them again 29 * @nf_trace: netfilter packet trace flag 30 * @protocol: Packet protocol from driver 31 * @destructor: Destruct function 32 * @nfct: Associated connection, if any 33 * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c 34 * @skb_iif: ifindex of device we arrived on 35 * @tc_index: Traffic control index 36 * @tc_verd: traffic control verdict 37 * @rxhash: the packet hash computed on receive 38 * @queue_mapping: Queue mapping for multiqueue devices 39 * @ndisc_nodetype: router type (from link layer) 40 * @ooo_okay: allow the mapping of a socket to a queue to be changed 41 * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport 42 * ports. 43 * @wifi_acked_valid: wifi_acked was set 44 * @wifi_acked: whether frame was acked on wifi or not 45 * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS 46 * @dma_cookie: a cookie to one of several possible DMA operations 47 * done by skb DMA functions 48 * @napi_id: id of the NAPI struct this skb came from 49 * @secmark: security marking 50 * @mark: Generic packet mark 51 * @dropcount: total number of sk_receive_queue overflows 52 * @vlan_proto: vlan encapsulation protocol 53 * @vlan_tci: vlan tag control information 54 * @inner_protocol: Protocol (encapsulation) 55 * @inner_transport_header: Inner transport layer header (encapsulation) 56 * @inner_network_header: Network layer header (encapsulation) 57 * @inner_mac_header: Link layer header (encapsulation) 58 * @transport_header: Transport layer header 59 * @network_header: Network layer header 60 * @mac_header: Link layer header 61 * @tail: Tail pointer 62 * @end: End pointer 63 * @head: Head of buffer 64 * @data: Data head pointer 65 * @truesize: Buffer size 66 * @users: User count - see {datagram,tcp}.c 67 */ 68 69 struct sk_buff { 70 /* These two members must be first. */ 71 struct sk_buff *next; 72 struct sk_buff *prev; 73 74 ktime_t tstamp; 75 76 struct sock *sk; 77 struct net_device *dev; 78 79 /* 80 * This is the control buffer. It is free to use for every 81 * layer. Please put your private variables there. If you 82 * want to keep them across layers you have to do a skb_clone() 83 * first. This is owned by whoever has the skb queued ATM. 84 */ 85 char cb[48] __aligned(8); 86 87 unsigned long _skb_refdst; 88 #ifdef CONFIG_XFRM 89 struct sec_path *sp; 90 #endif 91 unsigned int len, 92 data_len; 93 __u16 mac_len, 94 hdr_len; 95 union { 96 __wsum csum; 97 struct { 98 __u16 csum_start; 99 __u16 csum_offset; 100 }; 101 }; 102 __u32 priority; 103 kmemcheck_bitfield_begin(flags1); 104 __u8 local_df:1, 105 cloned:1, 106 ip_summed:2, 107 nohdr:1, 108 nfctinfo:3; 109 __u8 pkt_type:3, 110 fclone:2, 111 ipvs_property:1, 112 peeked:1, 113 nf_trace:1; 114 kmemcheck_bitfield_end(flags1); 115 __be16 protocol; 116 117 void (*destructor)(struct sk_buff *skb); 118 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 119 struct nf_conntrack *nfct; 120 #endif 121 #ifdef CONFIG_BRIDGE_NETFILTER 122 struct nf_bridge_info *nf_bridge; 123 #endif 124 125 int skb_iif; 126 127 __u32 rxhash; 128 129 __be16 vlan_proto; 130 __u16 vlan_tci; 131 132 #ifdef CONFIG_NET_SCHED 133 __u16 tc_index; /* traffic control index */ 134 #ifdef CONFIG_NET_CLS_ACT 135 __u16 tc_verd; /* traffic control verdict */ 136 #endif 137 #endif 138 139 __u16 queue_mapping; 140 kmemcheck_bitfield_begin(flags2); 141 #ifdef CONFIG_IPV6_NDISC_NODETYPE 142 __u8 ndisc_nodetype:2; 143 #endif 144 __u8 pfmemalloc:1; 145 __u8 ooo_okay:1; 146 __u8 l4_rxhash:1; 147 __u8 wifi_acked_valid:1; 148 __u8 wifi_acked:1; 149 __u8 no_fcs:1; 150 __u8 head_frag:1; 151 /* Encapsulation protocol and NIC drivers should use 152 * this flag to indicate to each other if the skb contains 153 * encapsulated packet or not and maybe use the inner packet 154 * headers if needed 155 */ 156 __u8 encapsulation:1; 157 /* 6/8 bit hole (depending on ndisc_nodetype presence) */ 158 kmemcheck_bitfield_end(flags2); 159 160 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL 161 union { 162 unsigned int napi_id; 163 dma_cookie_t dma_cookie; 164 }; 165 #endif 166 #ifdef CONFIG_NETWORK_SECMARK 167 __u32 secmark; 168 #endif 169 union { 170 __u32 mark; 171 __u32 dropcount; 172 __u32 reserved_tailroom; 173 }; 174 175 __be16 inner_protocol; 176 __u16 inner_transport_header; 177 __u16 inner_network_header; 178 __u16 inner_mac_header; 179 __u16 transport_header; 180 __u16 network_header; 181 __u16 mac_header; 182 /* These elements must be at the end, see alloc_skb() for details. */ 183 sk_buff_data_t tail; 184 sk_buff_data_t end; 185 unsigned char *head, 186 *data; 187 unsigned int truesize; 188 atomic_t users; 189 };