这里接着第一篇继续梳理了p2p server的流程,上一篇中说道,创建以太坊节点Node, 只是组装器,分为上下两层,上层是实现相同接口的service(如eth),下层是p2p server,具体负责的就是node 发现,peer连接,以及处理上层service的消息手法工作,是ethereum基础的基础。
还是老样子,先有个框架印象,然后再逐步梳理,在最初的node.go里面的start函数,调用了running.Start()开始了p2p server的流程,这个函数并列执行了4个函数,也就4个流程
流程1 比较简单,就是创建本地的node(这里的node不同于最初的node,最初的node时逻辑上的ethereum节点,这里的node,可以说是物理链路上的一个节点),配置节点protoHandshake结构(version, name, ID(即公钥),caps(即支持的上层协议及版本))并且开了一个db,这个会关联到后面k-桶,用来持久化k-桶中5分钟没变的node
流程2,启动tcp监听,然后就go srv.listenLoop(),持续监听消息,收到消息的话,会尝试连接这个node, srv.SetupConn(fd, inboundConn, nil), 注意这里第三个参数是nil,后续会用此判断是被动接收到的消息,在流程3中,还会调用到这个函数,就会传入要连接的node,即主动发起的连接
流程3,创建一个拨号客户端,主动拨号node,尝试将流程4中发现的node 转为peer
流程4,node的发现过程,这里面涉及到k-桶的所有逻辑(创建,距离计算,发现,刷新,4种消息处理等等)
这样,创建本地node,然后发现网络中其他node,主动拨号node,尝试转为peer,被动接收tcp消息,也尝试转为peer,如果成功转为peer了,就会执行peer.run(), 进而执行protocol.run。如果看过前一篇内容的话,有提到service在构造的时候,定义了Run, NodeInfo,PeerInfo. 这里protocol.run, 就开始执行了所有支持的protocol的run, 也就跟上层业务联系起来了。
好了,有了上面的一个整体流程印象,下面就分布来看4个流程代码,还是不会很细致,还是会省略很多代码,梳理框架为主
流程1:setupLocalNode
func (srv *Server) setupLocalNode() error {
// Create the devp2p handshake.
// 创建protoHandshake结构体
pubkey := crypto.FromECDSAPub(&srv.PrivateKey.PublicKey)
srv.ourHandshake = &protoHandshake{Version: baseProtocolVersion, Name: srv.Name, ID: pubkey[1:]}
for _, p := range srv.Protocols {
srv.ourHandshake.Caps = append(srv.ourHandshake.Caps, p.cap())
}
sort.Sort(capsByNameAndVersion(srv.ourHandshake.Caps))
// 创建本地db(内存数据库或者持久化数据库)
// Create the local node.
db, err := enode.OpenDB(srv.Config.NodeDatabase)
if err != nil {
return err
}
srv.nodedb = db
srv.localnode = enode.NewLocalNode(db, srv.PrivateKey)
srv.localnode.SetFallbackIP(net.IP{127, 0, 0, 1})
srv.localnode.Set(capsByNameAndVersion(srv.ourHandshake.Caps))
// TODO: check conflicts
for _, p := range srv.Protocols {
for _, e := range p.Attributes {
srv.localnode.Set(e)
}
}
......
return nil
}
这里很简单,前面说过了,配置srv中的ourHandshake,nodedb,localnode等
流程2:setupListening
func (srv *Server) setupListening() error {
// Launch the TCP listener.
listener, err := net.Listen("tcp", srv.ListenAddr)
if err != nil {
return err
}
// 配置tcp listener
laddr := listener.Addr().(*net.TCPAddr)
srv.ListenAddr = laddr.String()
srv.listener = listener
srv.localnode.Set(enr.TCP(laddr.Port))
//启动持续监听
srv.loopWG.Add(1)
go srv.listenLoop()
......
return nil
}
----------------------------------------------------------------------------------
// listenLoop runs in its own goroutine and accepts
// inbound connections.
func (srv *Server) listenLoop() {
defer srv.loopWG.Done()
srv.log.Debug("TCP listener up", "addr", srv.listener.Addr())
// 50个pending节点
tokens := defaultMaxPendingPeers
if srv.MaxPendingPeers > 0 {
tokens = srv.MaxPendingPeers
}
// 填充槽位
slots := make(chan struct{}, tokens)
for i := 0; i < tokens; i++ {
slots <- struct{}{}
}
for {
// Wait for a handshake slot before accepting.
<-slots
var (
fd net.Conn
err error
)
for {
//循环持续接收 收到fd
fd, err = srv.listener.Accept()
......
break
}
// 白名单审核,如果有
// Reject connections that do not match NetRestrict.
if srv.NetRestrict != nil {
if tcp, ok := fd.RemoteAddr().(*net.TCPAddr); ok && !srv.NetRestrict.Contains(tcp.IP) {
srv.log.Debug("Rejected conn (not whitelisted in NetRestrict)", "addr", fd.RemoteAddr())
fd.Close()
slots <- struct{}{}
continue
}
}
var ip net.IP
if tcp, ok := fd.RemoteAddr().(*net.TCPAddr); ok {
ip = tcp.IP
}
// 计数
fd = newMeteredConn(fd, true, ip)
srv.log.Trace("Accepted connection", "addr", fd.RemoteAddr())
// 启动协程,尝试连接,注意这里第三个参数是nil,代表是被动接收导致的连接
// 这里会涉及到最后rlp加密握手的不同流程
go func() {
srv.SetupConn(fd, inboundConn, nil)
slots <- struct{}{}
}()
}
}
----------------------------------------------------------------------------------
// SetupConn runs the handshakes and attempts to add the connection
// as a peer. It returns when the connection has been added as a peer
// or the handshakes have failed.
func (srv *Server) SetupConn(fd net.Conn, flags connFlag, dialDest *enode.Node) error {
c := &conn{fd: fd, transport: srv.newTransport(fd), flags: flags, cont: make(chan error)}
err := srv.setupConn(c, flags, dialDest)
if err != nil {
c.close(err)
srv.log.Trace("Setting up connection failed", "addr", fd.RemoteAddr(), "err", err)
}
return err
}
----------------------------------------------------------------------------------
这里是node转peer的流程,会被2处调用,一处是这里的被动接收消息,还有一处是后面主动拨号后进行连接
func (srv *Server) setupConn(c *conn, flags connFlag, dialDest *enode.Node) error {
// Prevent leftover pending conns from entering the handshake.
srv.lock.Lock()
running := srv.running
srv.lock.Unlock()
if !running {
return errServerStopped
}
// 这里的判断就是前面提到的第三个参数,如果不为空,是主动拨号的,需要得到远端的公钥
// If dialing, figure out the remote public key.
var dialPubkey *ecdsa.PublicKey
if dialDest != nil {
dialPubkey = new(ecdsa.PublicKey)
if err := dialDest.Load((*enode.Secp256k1)(dialPubkey)); err != nil {
return errors.New("dial destination doesn't have a secp256k1 public key")
}
}
// 这里调用的事rlp的doEncHandshake,即秘钥交换,加密通道建立,根据dialPubkey是否为空,
// 分为 receiver 和 initiator
// Run the encryption handshake.
remotePubkey, err := c.doEncHandshake(srv.PrivateKey, dialPubkey)
if err != nil {
srv.log.Trace("Failed RLPx handshake", "addr", c.fd.RemoteAddr(), "conn", c.flags, "err", err)
return err
}
if dialDest != nil {
// For dialed connections, check that the remote public key matches.
if dialPubkey.X.Cmp(remotePubkey.X) != 0 || dialPubkey.Y.Cmp(remotePubkey.Y) != 0 {
return DiscUnexpectedIdentity
}
c.node = dialDest
} else {
c.node = nodeFromConn(remotePubkey, c.fd)
}
if conn, ok := c.fd.(*meteredConn); ok {
conn.handshakeDone(c.node.ID())
}
clog := srv.log.New("id", c.node.ID(), "addr", c.fd.RemoteAddr(), "conn", c.flags)
// 这里是同步操作,通知posthandshake通道
err = srv.checkpoint(c, srv.posthandshake)
if err != nil {
clog.Trace("Rejected peer before protocol handshake", "err", err)
return err
}
// 第二次握手 协商双方的协议版本,是否支持Snappy加密方式等操作
// Run the protocol handshake
phs, err := c.doProtoHandshake(srv.ourHandshake)
if err != nil {
clog.Trace("Failed proto handshake", "err", err)
return err
}
......
// 这里通知addpeer通道,正式进入peer.run的流程
c.caps, c.name = phs.Caps, phs.Name
err = srv.checkpoint(c, srv.addpeer)
if err != nil {
clog.Trace("Rejected peer", "err", err)
return err
}
// If the checks completed successfully, runPeer has now been
// launched by run.
clog.Trace("connection set up", "inbound", dialDest == nil)
return nil
}
流程2,也如上面总结的,持续监听tcp,收到消息后,尝试连接peer,走了一个公共流程,2步握手,交换秘钥,建立通道,最后通知addpeer通道,进行peer.run,进而处理service的protocol.run
流程3:dialer := newDialState(...)srv.run(dialer)
func newDialState(self enode.ID, static []*enode.Node, bootnodes []*enode.Node, ntab discoverTable, maxdyn int, netrestrict *netutil.Netlist) *dialstate {
s := &dialstate{
maxDynDials: maxdyn,
ntab: ntab,
self: self,
netrestrict: netrestrict,
static: make(map[enode.ID]*dialTask),
dialing: make(map[enode.ID]connFlag),
bootnodes: make([]*enode.Node, len(bootnodes)),
randomNodes: make([]*enode.Node, maxdyn/2),
hist: new(dialHistory),
}
copy(s.bootnodes, bootnodes)
for _, n := range static {
s.addStatic(n)
}
return s
}
----------------------------------------------------------------------------------
func (srv *Server) run(dialstate dialer) {
srv.log.Info("Started P2P networking", "self", srv.localnode.Node())
defer srv.loopWG.Done()
defer srv.nodedb.Close()
var (
peers = make(map[enode.ID]*Peer)
inboundCount = 0
trusted = make(map[enode.ID]bool, len(srv.TrustedNodes))
taskdone = make(chan task, maxActiveDialTasks)
runningTasks []task
queuedTasks []task // tasks that can't run yet
)
// Put trusted nodes into a map to speed up checks.
// Trusted peers are loaded on startup or added via AddTrustedPeer RPC.
for _, n := range srv.TrustedNodes {
trusted[n.ID()] = true
}
// 定义函数,删除runningTask中的task
// removes t from runningTasks
delTask := func(t task) {
for i := range runningTasks {
if runningTasks[i] == t {
runningTasks = append(runningTasks[:i], runningTasks[i+1:]...)
break
}
}
}
// 启动Task,执行Task的Do函数(一共两个task,dialTask和discoverTask)
// starts until max number of active tasks is satisfied
startTasks := func(ts []task) (rest []task) {
i := 0
for ; len(runningTasks) < maxActiveDialTasks && i < len(ts); i++ {
t := ts[i]
srv.log.Trace("New dial task", "task", t)
go func() { t.Do(srv); taskdone <- t }()
runningTasks = append(runningTasks, t)
}
return ts[i:]
}
// 定义函数,调度任务
scheduleTasks := func() {
// 先调度queue里面的,最多16个任务,剩下的再放回queue
// Start from queue first.
queuedTasks = append(queuedTasks[:0], startTasks(queuedTasks)...)
// Query dialer for new tasks and start as many as possible now.
// 为拨号创建任务,并尝试执行,剩下的还是放入到queue中
// newTasks很重要
if len(runningTasks) < maxActiveDialTasks {
nt := dialstate.newTasks(len(runningTasks)+len(queuedTasks), peers, time.Now())
queuedTasks = append(queuedTasks, startTasks(nt)...)
}
}
running:
for {
//开始调度
scheduleTasks()
select {
......
// 上面流程2,完成第一次握手后,同步发送的信道
case c := <-srv.posthandshake:
// A connection has passed the encryption handshake so
// the remote identity is known (but hasn't been verified yet).
if trusted[c.node.ID()] {
// Ensure that the trusted flag is set before checking against MaxPeers.
c.flags |= trustedConn
}
// TODO: track in-progress inbound node IDs (pre-Peer) to avoid dialing them.
select {
case c.cont <- srv.encHandshakeChecks(peers, inboundCount, c):
case <-srv.quit:
break running
}
// 这里最重要的addpeer通道,也是流程2说的,当然上面的dialTask.do也会发送这个信道
case c := <-srv.addpeer:
// At this point the connection is past the protocol handshake.
// Its capabilities are known and the remote identity is verified.
err := srv.protoHandshakeChecks(peers, inboundCount, c)
if err == nil {
// The handshakes are done and it passed all checks.
p := newPeer(c, srv.Protocols)
// If message events are enabled, pass the peerFeed
// to the peer
if srv.EnableMsgEvents {
p.events = &srv.peerFeed
}
name := truncateName(c.name)
srv.log.Debug("Adding p2p peer", "name", name, "addr", c.fd.RemoteAddr(), "peers", len(peers)+1)
go srv.runPeer(p)
peers[c.node.ID()] = p
if p.Inbound() {
inboundCount++
}
}
// The dialer logic relies on the assumption that
// dial tasks complete after the peer has been added or
// discarded. Unblock the task last.
select {
case c.cont <- err:
case <-srv.quit:
break running
}
......
}
流程3,还可以更细致的进入dial.go去看两个任务,dialTask,尝试拨号,如果节点不够,会执行discoverTask,它将k-桶的node加入到拨号中,将node转为peer,
流程4: setupDiscovery()
func (srv *Server) setupDiscovery() error {
if srv.NoDiscovery && !srv.DiscoveryV5 {
return nil
}
// 转成UDP地址
addr, err := net.ResolveUDPAddr("udp", srv.ListenAddr)
if err != nil {
return err
}
//启动UDP监听
conn, err := net.ListenUDP("udp", addr)
if err != nil {
return err
}
realaddr := conn.LocalAddr().(*net.UDPAddr)
srv.log.Debug("UDP listener up", "addr", realaddr)
if srv.NAT != nil {
if !realaddr.IP.IsLoopback() {
go nat.Map(srv.NAT, srv.quit, "udp", realaddr.Port, realaddr.Port, "ethereum discovery")
}
}
srv.localnode.SetFallbackUDP(realaddr.Port)
// Discovery V4
var unhandled chan discover.ReadPacket
var sconn *sharedUDPConn
if !srv.NoDiscovery {
if srv.DiscoveryV5 {
unhandled = make(chan discover.ReadPacket, 100)
sconn = &sharedUDPConn{conn, unhandled}
}
cfg := discover.Config{
PrivateKey: srv.PrivateKey,
NetRestrict: srv.NetRestrict,
Bootnodes: srv.BootstrapNodes,
Unhandled: unhandled,
}
//启动发现流程,涉及K-桶的所有流程
ntab, err := discover.ListenUDP(conn, srv.localnode, cfg)
if err != nil {
return err
}
srv.ntab = ntab
}
......
return nil
}
----------------------------------------------------------------------------------
// ListenUDP returns a new table that listens for UDP packets on laddr.
func ListenUDP(c conn, ln *enode.LocalNode, cfg Config) (*Table, error) {
tab, _, err := newUDP(c, ln, cfg)
if err != nil {
return nil, err
}
return tab, nil
}
func newUDP(c conn, ln *enode.LocalNode, cfg Config) (*Table, *udp, error) {
udp := &udp{
conn: c,
priv: cfg.PrivateKey,
netrestrict: cfg.NetRestrict,
localNode: ln,
db: ln.Database(),
closing: make(chan struct{}),
gotreply: make(chan reply),
addReplyMatcher: make(chan *replyMatcher),
}
//这里创建k-桶,并维护,将localnode的db传入
tab, err := newTable(udp, ln.Database(), cfg.Bootnodes)
if err != nil {
return nil, nil, err
}
udp.tab = tab
udp.wg.Add(2)
go udp.loop()
// 持续接收udp消息,主要是kd协议的4中报文的处理
go udp.readLoop(cfg.Unhandled)
return udp.tab, udp, nil
}
流程4,进入了kd协议的实现,k-桶的维护,先详细看下 newTable,
func newTable(t transport, db *enode.DB, bootnodes []*enode.Node) (*Table, error) {
tab := &Table{
net: t,
db: db,
refreshReq: make(chan chan struct{}),
initDone: make(chan struct{}),
closeReq: make(chan struct{}),
closed: make(chan struct{}),
rand: mrand.New(mrand.NewSource(0)),
ips: netutil.DistinctNetSet{Subnet: tableSubnet, Limit: tableIPLimit},
}
// 这里将bootnode添加到tab.nursery里,后面shuaxin
if err := tab.setFallbackNodes(bootnodes); err != nil {
return nil, err
}
for i := range tab.buckets {
tab.buckets[i] = &bucket{
ips: netutil.DistinctNetSet{Subnet: bucketSubnet, Limit: bucketIPLimit},
}
}
tab.seedRand()
// 加载数据库中已知的node 和 前面配置的tab.nursery
tab.loadSeedNodes()
// 启动各种定时器,来刷新k-桶
go tab.loop()
return tab, nil
}
这里借用一张图说明一下后续的流程
上图参考https://blog.csdn.net/wangdenghui2005/article/details/86692624 这里面图形化的梳理了p2p流程
时间问题,梳理告一段落,至此,脑子中有了一个整体的框架流程,后续还需要继续努力,细看代码