摘要
当业务量发生变化时,需要对上游服务进行扩缩容,或者因服务器硬件故障需要更换服务器。如果网关是通过配置来维护上游服务信息,在微服务架构模式下,其带来的维护成本可想而知。再者因不能及时更新这些信息,也会对业务带来一定的影响,还有人为误操作带来的影响也不可忽视,所以网关非常必要通过服务注册中心动态获取最新的服务实例信息。架构图如下所示:
服务启动时将自身的一些信息,比如服务名、IP、端口等信息上报到注册中心;各个服务与注册中心使用一定机制(例如心跳)通信,如果注册中心与服务长时间无法通信,就会注销该实例;当服务下线时,会删除注册中心的实例信息;
网关会准实时地从注册中心获取服务实例信息;
当用户通过网关请求服务时,网关从注册中心获取的实例列表中选择一个进行代理;
常见的注册中心:Eureka, Etcd, Consul, Nacos, Zookeeper 等
备注
具体服务发现实现流程,与开放control API “/dump”,参考文章
APISIX源码解析-服务发现-discover【dns】
discover【kubernetes】
关键属性
discovery:
kubernetes:
service:
# apiserver schema, options [http, https]
schema: https #default https
# apiserver host, options [ipv4, ipv6, domain, environment variable]
host: ${KUBERNETES_SERVICE_HOST} #default ${KUBERNETES_SERVICE_HOST}
# apiserver port, options [port number, environment variable]
port: ${KUBERNETES_SERVICE_PORT} #default ${KUBERNETES_SERVICE_PORT}
client:
# serviceaccount token or token_file
token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
#token: |-
# eyJhbGciOiJSUzI1NiIsImtpZCI6Ikx5ME1DNWdnbmhQNkZCNlZYMXBsT3pYU3BBS2swYzBPSkN3ZnBESGpkUEEif
# 6Ikx5ME1DNWdnbmhQNkZCNlZYMXBsT3pYU3BBS2swYzBPSkN3ZnBESGpkUEEifeyJhbGciOiJSUzI1NiIsImtpZCI
# kubernetes discovery plugin support use namespace_selector
# you can use one of [equal, not_equal, match, not_match] filter namespace
namespace_selector:
# only save endpoints with namespace equal default
equal: default
# only save endpoints with namespace not equal default
#not_equal: default
# only save endpoints with namespace match one of [default, ^my-[a-z]+$]
#match:
#- default
#- ^my-[a-z]+$
# only save endpoints with namespace not match one of [default, ^my-[a-z]+$]
#not_match:
#- default
#- ^my-[a-z]+$
# kubernetes discovery plugin support use label_selector
# for the expression of label_selector, please refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels
label_selector: |-
first="a",second="b"
如果 Kubernetes 服务发现模块运行在 Pod 内, 你可以使用最简配置:
discovery:
kubernetes: { }
如果 Kubernetes 服务发现模块运行在 Pod 外, 你需要新建或选取指定的 ServiceAccount, 获取其 Token 值, 然后使用如下配置:
discovery:
kubernetes:
service:
schema: https
host: # enter apiserver host value here
port: # enter apiServer port value here
client:
token: # enter serviceaccount token value here
#token_file: # enter file path here
源码实现
kubernetes.init.lua
function _M.nodes(service_name)
local pattern = "^(.*):(.*)$" -- namespace/name:port_name
local match = ngx.re.match(service_name, pattern, "jo")
if not match then
core.log.info("get unexpected upstream service_name: ", service_name)
return nil
end
local endpoint_key = match[1]
local endpoint_port = match[2]
local endpoint_version = endpoint_dict:get_stale(endpoint_key .. "#version")
if not endpoint_version then
core.log.info("get empty endpoint version from discovery DICT ", endpoint_key)
return nil
end
-- 返回endpoint_lrucache: service_name->endpoint[endpoint_port] 缓存具体的端点服务
--返回值
--{
-- {
-- host="10.5.10.109",
-- port= 3306,
-- weight= 50,
-- },
-- {
-- host="10.5.10.110",
-- port= 3306,
-- weight= 50,
-- },
--}
return endpoint_lrucache(service_name, endpoint_version,
create_endpoint_lrucache, endpoint_key, endpoint_port)
end
function _M.init_worker()
endpoint_dict = ngx.shared.kubernetes
if not endpoint_dict then
error("failed to get lua_shared_dict: kubernetes, please check your APISIX version")
end
if process.type() ~= "privileged agent" then
return
end
local discovery_conf = local_conf.discovery.kubernetes
default_weight = discovery_conf.default_weight
-- 初始化apiserver配置
local apiserver, err = get_apiserver(discovery_conf)
if err then
error(err)
return
end
-- 初始化informer
local endpoints_informer, err = informer_factory.new("", "v1",
"Endpoints", "endpoints", "")
if err then
error(err)
return
end
-- 设置namespace_selector
setup_namespace_selector(discovery_conf, endpoints_informer)
-- 设置label_selector
setup_label_selector(discovery_conf, endpoints_informer)
-- 设置infromer事件方法
endpoints_informer.on_added = on_endpoint_modified
endpoints_informer.on_modified = on_endpoint_modified
endpoints_informer.on_deleted = on_endpoint_deleted
endpoints_informer.pre_list = pre_list
endpoints_informer.post_list = post_list
local timer_runner
timer_runner = function(premature)
if premature then
return
end
-- 开启informer
local ok, status = pcall(endpoints_informer.list_watch, endpoints_informer, apiserver)
local retry_interval = 0
if not ok then
core.log.error("list_watch failed, kind: ", endpoints_informer.kind,
", reason: ", "RuntimeException", ", message : ", status)
retry_interval = 40
elseif not status then
retry_interval = 40
end
-- 不停重建informer,以此保持长连接,若上一次异常,则等待40s
ngx.timer.at(retry_interval, timer_runner)
end
ngx.timer.at(0, timer_runner)
end
lua informer
--- 每次informer只执行一次list
local function list(httpc, apiserver, informer)
local response, err = httpc:request({
path = informer.path,
query = list_query(informer),
headers = {
["Host"] = apiserver.host .. ":" .. apiserver.port,
["Authorization"] = "Bearer " .. apiserver.token,
["Accept"] = "application/json",
["Connection"] = "keep-alive"
}
})
core.log.info("--raw=", informer.path, "?", list_query(informer))
if not response then
return false, "RequestError", err or ""
end
if response.status ~= 200 then
return false, response.reason, response:read_body() or ""
end
local body, err = response:read_body()
if err then
return false, "ReadBodyError", err
end
local data = core.json.decode(body)
if not data or data.kind ~= informer.list_kind then
return false, "UnexpectedBody", body
end
informer.version = data.metadata.resourceVersion
if informer.on_added then
for _, item in ipairs(data.items or {}) do
informer:on_added(item, "list")
end
end
informer.continue = data.metadata.continue
-- 如果还有数据,接着请求,k8s单次有默认限制
if informer.continue and informer.continue ~= "" then
list(httpc, apiserver, informer)
end
return true
end
local function watch(httpc, apiserver, informer)
-- 默认循环8次
local watch_times = 8
for _ = 1, watch_times do
local watch_seconds = 1800 + math.random(9, 999)
informer.overtime = watch_seconds
local http_seconds = watch_seconds + 120
-- 连接时间、发送时间、数据获取时间
httpc:set_timeouts(2000, 3000, http_seconds * 1000)
local response, err = httpc:request({
path = informer.path,
query = watch_query(informer),
headers = {
["Host"] = apiserver.host .. ":" .. apiserver.port,
["Authorization"] = "Bearer " .. apiserver.token,
["Accept"] = "application/json",
["Connection"] = "keep-alive"
}
})
core.log.info("--raw=", informer.path, "?", watch_query(informer))
if err then
return false, "RequestError", err
end
if response.status ~= 200 then
return false, response.reason, response:read_body() or ""
end
local ok
local remainder_body
local body
local reason
-- read_timeout 时间范围内获取数据
while true do
body, err = response.body_reader()
if err then
return false, "ReadBodyError", err
end
if not body then
break
end
if remainder_body and #remainder_body > 0 then
body = remainder_body .. body
end
ok, remainder_body, reason, err = split_event(body, dispatch_event, informer)
if not ok then
if reason == "ResourceGone" then
return true
end
return false, reason, err
end
end
end
return true
end
内存存储格式
endpoint_dict = ngx.shared.kubernetes
local endpoint_key = endpoint.metadata.namespace .. "/" .. endpoint.metadata.name
-- endpoint_content 格式:
-- port -> nodes -> node {
--- host = address.ip,
-- port = port.port,
-- weight = default_weight
-- }
local endpoint_content = core.json.encode(endpoint_buffer, true)
local endpoint_version = ngx.crc32_long(endpoint_content)
local _, err
_, err = endpoint_dict:safe_set(endpoint_key .. "#version", endpoint_version)
if err then
core.log.error("set endpoint version into discovery DICT failed, ", err)
return
end
_, err = endpoint_dict:safe_set(endpoint_key, endpoint_content)
if err then
core.log.error("set endpoint into discovery DICT failed, ", err)
endpoint_dict:delete(endpoint_key .. "#version")
end