查看nsqd模块的入口文件apps/nsqd/nsqd.go可知,nsqd模块从开始到结束经历了5步:
1、初始化nsqd实例nsqd.New();
2、读取元数据nsqd.LoadMetadata();
3、保存元数据nsqd.PersistMetadata();
4、主方法nsqd.Main();
5、程序终止nsqd.Exit()
初始化
初始化时设置了--data-path的默认值(当前程序目录),并锁定该目录。之后验证配置参数是否有效。:
func New(opts *Options) *NSQD {
//数据保存地址
dataPath := opts.DataPath
if opts.DataPath == "" {
//获取当前目录,类似Linux的pwd命令
cwd, _ := os.Getwd()
dataPath = cwd
}
if opts.Logger == nil {
opts.Logger = log.New(os.Stderr, opts.LogPrefix, log.Ldate|log.Ltime|log.Lmicroseconds)
}
n := &NSQD{
startTime: time.Now(),
topicMap: make(map[string]*Topic),
exitChan: make(chan int),
notifyChan: make(chan interface{}),
optsNotificationChan: make(chan struct{}, 1),
ci: clusterinfo.New(opts.Logger, http_api.NewClient(nil, opts.HTTPClientConnectTimeout, opts.HTTPClientRequestTimeout)),
dl: dirlock.New(dataPath),
}
//原子操作
n.swapOpts(opts)
n.errValue.Store(errStore{})
//--data-path目录写锁
//跟踪代码可知,n.dl.Lock()直接返回nil,但是如果--data-path已经被使用了,还是会报错,不知道怎么实现的
err := n.dl.Lock()
if err != nil {
n.logf("FATAL: --data-path=%s in use (possibly by another instance of nsqd)", dataPath)
os.Exit(1)
}
//检查配置选项是否合法
if opts.MaxDeflateLevel < 1 || opts.MaxDeflateLevel > 9 {
n.logf("FATAL: --max-deflate-level must be [1,9]")
os.Exit(1)
}
if opts.ID < 0 || opts.ID >= 1024 {
n.logf("FATAL: --node-id must be [0,1024)")
os.Exit(1)
}
if opts.StatsdPrefix != "" {
var port string
_, port, err = net.SplitHostPort(opts.HTTPAddress)
if err != nil {
n.logf("ERROR: failed to parse HTTP address (%s) - %s", opts.HTTPAddress, err)
os.Exit(1)
}
statsdHostKey := statsd.HostKey(net.JoinHostPort(opts.BroadcastAddress, port))
prefixWithHost := strings.Replace(opts.StatsdPrefix, "%s", statsdHostKey, -1)
if prefixWithHost[len(prefixWithHost)-1] != '.' {
prefixWithHost += "."
}
opts.StatsdPrefix = prefixWithHost
}
if opts.TLSClientAuthPolicy != "" && opts.TLSRequired == TLSNotRequired {
opts.TLSRequired = TLSRequired
}
tlsConfig, err := buildTLSConfig(opts)
if err != nil {
n.logf("FATAL: failed to build TLS config - %s", err)
os.Exit(1)
}
if tlsConfig == nil && opts.TLSRequired != TLSNotRequired {
n.logf("FATAL: cannot require TLS client connections without TLS key and cert")
os.Exit(1)
}
n.tlsConfig = tlsConfig
n.logf(version.String("nsqd"))
n.logf("ID: %d", opts.ID)
return n
}
读取元数据nsqd.LoadMetadata()
从--data-path指定的目录中读取元数据,元数据保存了两份,方便回滚时用,元数据在文件中是已json形式保存的,读取后解析,并验证数据中的topic和channel是否有效,元数据的结构为:
type meta struct {
Topics []struct {
Name string `json:"name"`
Paused bool `json:"paused"`
Channels []struct {
Name string `json:"name"`
Paused bool `json:"paused"`
} `json:"channels"`
} `json:"topics"`
}
查看元数据:
$ tail -200 nsqd.856.dat
{"topics":[{"channels":[],"name":"test","paused":false},{"channels":[],"name":"test1","paused":false}],"version":"0.3.8"}
这里查看的是备份文件中的元数据,其中856是当前nsqd的work-id,默认为856,可在配置文件中或命令行中修改。
func (n *NSQD) LoadMetadata() error {
//标识元数据已加载
atomic.StoreInt32(&n.isLoading, 1)
defer atomic.StoreInt32(&n.isLoading, 0)
//$ tail -200 nsqd.856.dat
//{"topics":[{"channels":[],"name":"test","paused":false},{"channels":[],"name":"test1","paused":false}],"version":"0.3.8"}
//元数据文件名称,这里有两个文件,其中一个文件使用当前nsqd的ID命名,作备份用的,方便回滚
fn := newMetadataFile(n.getOpts())
// old metadata filename with ID, maintained in parallel to enable roll-back
fnID := oldMetadataFile(n.getOpts())
//读取元数据文件
data, err := readOrEmpty(fn)
if err != nil {
return err
}
dataID, errID := readOrEmpty(fnID)
if errID != nil {
return errID
}
//备份数据和最新数据都为空,程序重新开始
if data == nil && dataID == nil {
return nil // fresh start
}
//元数据不匹配,报错,程序终止
if data != nil && dataID != nil {
if bytes.Compare(data, dataID) != 0 {
return fmt.Errorf("metadata in %s and %s do not match (delete one)", fn, fnID)
}
}
//新数据为空,使用老数据
if data == nil {
// only old metadata file exists, use it
fn = fnID
data = dataID
}
var m meta
err = json.Unmarshal(data, &m)
if err != nil {
return fmt.Errorf("failed to parse metadata in %s - %s", fn, err)
}
for _, t := range m.Topics {
//验证topic名称是否有效
if !protocol.IsValidTopicName(t.Name) {
n.logf("WARNING: skipping creation of invalid topic %s", t.Name)
continue
}
//获取指向该topic对象的指针(nsqd/topic.go中Topic struct)
topic := n.GetTopic(t.Name)
//topic暂停使用,标注到topic对象中
if t.Paused {
topic.Pause()
}
for _, c := range t.Channels {
if !protocol.IsValidChannelName(c.Name) {
n.logf("WARNING: skipping creation of invalid channel %s", c.Name)
continue
}
//同上面的topic一样
channel := topic.GetChannel(c.Name)
if c.Paused {
channel.Pause()
}
}
}
return nil
}
保存元数据nsqd.PersistMetadata()
将上面读取到的元数据写入文件,刚读取了,立马重新写入文件中,不理解为什么要这么做,写入的数据只是将读取的数据简单过滤了一下,将标记为ephemeral的数据过滤,可能是在读取后还没来得及写入时,元数据有变动,目前还没发现。func (n *NSQD) PersistMetadata() error {
// persist metadata about what topics/channels we have, across restarts
fileName := newMetadataFile(n.getOpts())
// old metadata filename with ID, maintained in parallel to enable roll-back
fileNameID := oldMetadataFile(n.getOpts())
n.logf("NSQ: persisting topic/channel metadata to %s", fileName)
js := make(map[string]interface{})
topics := []interface{}{}
//过滤topic和channel
for _, topic := range n.topicMap {
//如果是临时的,忽略
if topic.ephemeral {
continue
}
topicData := make(map[string]interface{})
topicData["name"] = topic.name
topicData["paused"] = topic.IsPaused()
channels := []interface{}{}
topic.Lock()
for _, channel := range topic.channelMap {
channel.Lock()
if channel.ephemeral {
channel.Unlock()
continue
}
channelData := make(map[string]interface{})
channelData["name"] = channel.name
channelData["paused"] = channel.IsPaused()
channels = append(channels, channelData)
channel.Unlock()
}
topic.Unlock()
topicData["channels"] = channels
topics = append(topics, topicData)
}
js["version"] = version.Binary
js["topics"] = topics
data, err := json.Marshal(&js)
if err != nil {
return err
}
tmpFileName := fmt.Sprintf("%s.%d.tmp", fileName, rand.Int())
//写入临时文件
err = writeSyncFile(tmpFileName, data)
if err != nil {
return err
}
//文件重命名,如果fileName已存在,自动替换掉
err = os.Rename(tmpFileName, fileName)
if err != nil {
return err
}
// technically should fsync DataPath here
//判断如果已经有软连接,直接返回
stat, err := os.Lstat(fileNameID)
if err == nil && stat.Mode()&os.ModeSymlink != 0 {
return nil
}
// if no symlink (yet), race condition:
// crash right here may cause next startup to see metadata conflict and abort
tmpFileNameID := fmt.Sprintf("%s.%d.tmp", fileNameID, rand.Int())
//如果不是Windows系统,建立软连接
if runtime.GOOS != "windows" {
//建立软连接
err = os.Symlink(fileName, tmpFileNameID)
} else {
// on Windows need Administrator privs to Symlink
// instead write copy every time
err = writeSyncFile(tmpFileNameID, data)
}
if err != nil {
return err
}
err = os.Rename(tmpFileNameID, fileNameID)
if err != nil {
return err
}
// technically should fsync DataPath here
return nil
}
主方法nsqd.Main()
Main()方法和nsqlookupd/nsqlookupd.go中的Main()方法逻辑差不多,区别在于这里添加了HTTPS的处理,为了降低阅读量,暂时放弃HTTPS相关的逻辑,以后再看这块。
程序终止nsqd.Exit()
程序终止,做一些清理工作,网络监听、channel这些,该关闭的关闭,该删除的删除,并将元数据和内存中未消费的消息写入本地磁盘
func (n *NSQD) Exit() {
if n.tcpListener != nil {
n.tcpListener.Close()
}
if n.httpListener != nil {
n.httpListener.Close()
}
if n.httpsListener != nil {
n.httpsListener.Close()
}
n.Lock()
//将元数据写入本地磁盘
err := n.PersistMetadata()
if err != nil {
n.logf("ERROR: failed to persist metadata - %s", err)
}
n.logf("NSQ: closing topics")
for _, topic := range n.topicMap {
//关闭channel,将内存中未消费的信息写入本地磁盘
topic.Close()
}
n.Unlock()
close(n.exitChan)
n.waitGroup.Wait()
n.dl.Unlock()
}