35 struct net { 36 atomic_t count; /* To decided when the network 37 * namespace should be freed. 38 */ 39 #ifdef NETNS_REFCNT_DEBUG 40 atomic_t use_count; /* To track references we 41 * destroy on demand 42 */ 43 #endif 44 struct list_head list; /* list of network namespaces */ 45 struct list_head cleanup_list; /* namespaces on death row */ 46 struct list_head exit_list; /* Use only net_mutex */ 47 48 struct proc_dir_entry *proc_net; 49 struct proc_dir_entry *proc_net_stat; 50 51 #ifdef CONFIG_SYSCTL 52 struct ctl_table_set sysctls; 53 #endif 54 55 struct net_device *loopback_dev; /* The loopback */ 56 57 struct list_head dev_base_head; 58 struct hlist_head *dev_name_head; 59 struct hlist_head *dev_index_head; 60 61 /* core fib_rules */ 62 struct list_head rules_ops; 63 spinlock_t rules_mod_lock; 64 65 struct sock *rtnl; /* rtnetlink socket */ 66 struct sock *genl_sock; 67 68 struct netns_core core; 69 struct netns_mib mib; 70 struct netns_packet packet; 71 struct netns_unix unx; 72 struct netns_ipv4 ipv4; 73 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 74 struct netns_ipv6 ipv6; 75 #endif 76 #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) 77 struct netns_dccp dccp; 78 #endif 79 #ifdef CONFIG_NETFILTER 80 struct netns_xt xt; 81 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 82 struct netns_ct ct; 83 #endif 84 struct sock *nfnl; 85 struct sock *nfnl_stash; 86 #endif 87 #ifdef CONFIG_XFRM 88 struct netns_xfrm xfrm; 89 #endif 90 #ifdef CONFIG_WEXT_CORE 91 struct sk_buff_head wext_nlevents; 92 #endif 93 struct net_generic *gen; 94 };
The following patchset was built against the latest net-2.6.24 tree, and should be safe to apply assume not issues are found during the review. In the interest of keeping the patcheset to a reviewable size, just the core of the network stack has been covered. The 10,000 foot overview. We want to make it look to user space like the kernel implements multiple network stacks. To implement this some of the currently global variables in the network stack need to have one instance per network namespace, or the global data structure needs to have a network namespace field. Currently control enters the network stack in one of 4 major ways. Through operations on a socket, through a packet coming in from a network device, through miscellaneous syscalls from a process, and through operations on a virtual filesystem. So the current design calls for placing a pointer to struct net (the network namespace structure) on network devices, sockets, processes, and on filesystems so we have a clear understanding of which network namespace operations should be done in the context of. Packets do not contain a pointer to a network device structure. Instead their network device is derived from which network device or which socket they are passing through. On the input path we only need to look at the network namespace to determine which routing tables to use, and which sockets the packet can be destined for. Similarly on the output path we only need to consult the network namespace for the output routing tables which point to which network devices we can use. So while there are accesses to the network namespace as we process each packet they are in well contained spots that occur rarely. Where the network namespace appears most is on the control, setup, and clean up code paths, in the network stack that we change rarely. There we currently don't have anything except a global context so modifications are necessary, but since the network parameter is not implicit it should not require much thought to use. The implementation strategy follows the classic global lock reduction pattern. First all of the interfaces at a given level in the network stack are made to filter out traffic from anything except the initial network namespace, and then those interfaces are allowed to see packets from any network namespace. Then some subset of those interfaces are taught to handle packets from all namespaces, after the more specific protocol layers below them have been made to filter those packets. What this means is that we start out with large intrusive stupid patches and end up with small patches that enable small bits of functionality in the secondary network namespaces. Eric - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html