K8S和Airflow
根据配置信息,找到任务taskinstance指定的yaml模版,以及相关参数组装出pod执行的必要信息,推送到k8s环境执行运行,等待结果
KubernetesExecutor
class KubernetesExecutor(BaseExecutor, LoggingMixin):
"""Executor for Kubernetes"""
def __init__(self):
# 根据参数配置KubernetesExecutor
self.kube_config = KubeConfig()
self._manager = multiprocessing.Manager()
self.task_queue: 'Queue[KubernetesJobType]' = self._manager.Queue()
self.result_queue: 'Queue[KubernetesResultsType]' = self._manager.Queue()
self.kube_scheduler: Optional[AirflowKubernetesScheduler] = None
self.kube_client: Optional[client.CoreV1Api] = None
self.scheduler_job_id: Optional[str] = None
super().__init__(parallelism=self.kube_config.parallelism)
@provide_session
def clear_not_launched_queued_tasks(self, session=None) -> None:
self.log.debug("Clearing tasks that have not been launched")
if not self.kube_client:
raise AirflowException(NOT_STARTED_MESSAGE)
queued_tasks = session.query(TaskInstance).filter(TaskInstance.state == State.QUEUED).all()
self.log.info('When executor started up, found %s queued task instances', len(queued_tasks))
for task in queued_tasks:
# pylint: disable=protected-access
self.log.debug("Checking task %s", task)
dict_string = "dag_id={},task_id={},execution_date={},airflow-worker={}".format(
pod_generator.make_safe_label_value(task.dag_id),
pod_generator.make_safe_label_value(task.task_id),
pod_generator.datetime_to_label_safe_datestring(task.execution_date),
pod_generator.make_safe_label_value(str(self.scheduler_job_id)),
)
# pylint: enable=protected-access
# 根据参数在k8s中查询taskinstance是否有pod 在运行,没有则改状态为NONE
kwargs = dict(label_selector=dict_string)
if self.kube_config.kube_client_request_args:
for key, value in self.kube_config.kube_client_request_args.items():
kwargs[key] = value
pod_list = self.kube_client.list_namespaced_pod(self.kube_config.kube_namespace, **kwargs)
if not pod_list.items:
self.log.info(
'TaskInstance: %s found in queued state but was not launched, rescheduling', task
)
session.query(TaskInstance).filter(
TaskInstance.dag_id == task.dag_id,
TaskInstance.task_id == task.task_id,
TaskInstance.execution_date == task.execution_date,
).update({TaskInstance.state: State.NONE})
def start(self) -> None:
"""Starts the executor"""
self.log.info('Start Kubernetes executor')
if not self.job_id:
raise AirflowException("Could not get scheduler_job_id")
self.scheduler_job_id = self.job_id
self.log.debug('Start with scheduler_job_id: %s', self.scheduler_job_id)
# 获取kube client 启动 AirflowKubernetesScheduler
self.kube_client = get_kube_client()
self.kube_scheduler = AirflowKubernetesScheduler(
self.kube_config, self.task_queue, self.result_queue, self.kube_client, self.scheduler_job_id
)
self.clear_not_launched_queued_tasks()
def execute_async(
self,
key: TaskInstanceKey,
command: CommandType,
queue: Optional[str] = None,
executor_config: Optional[Any] = None,
) -> None:
# 异步执行任务
"""Executes task asynchronously"""
self.log.info('Add task %s with command %s with executor_config %s', key, command, executor_config)
# 获得生成pod的kube配置
kube_executor_config = PodGenerator.from_obj(executor_config)
if executor_config:
# 获得pod启动模版
pod_template_file = executor_config.get("pod_template_override", None)
else:
pod_template_file = None
if not self.task_queue:
raise AirflowException(NOT_STARTED_MESSAGE)
# 在结果缓存中添加 <TaskInstanceKey,state,scheduler_job_id>
self.event_buffer[key] = (State.QUEUED, self.scheduler_job_id)
# 加入队列
self.task_queue.put((key, command, kube_executor_config, pod_template_file))
def sync(self) -> None:
"""
Synchronize task state.
同步等待执行结果
"""
...
last_resource_version = None
while True: # pylint: disable=too-many-nested-blocks
try:
# 非阻塞获取一个result_queue结果,无则Empty
results = self.result_queue.get_nowait()
try:
key, state, pod_id, namespace, resource_version = results
last_resource_version = resource_version
self.log.info('Changing state of %s to %s', results, state)
try:
self._change_state(key, state, pod_id, namespace)
except Exception as e: # pylint: disable=broad-except
self.log.exception(
"Exception: %s when attempting to change state of %s to %s, re-queueing.",
e,
results,
state,
)
self.result_queue.put(results)
finally:
self.result_queue.task_done()
except Empty:
break # 跳出循环
resource_instance = ResourceVersion()
resource_instance.resource_version = last_resource_version or resource_instance.resource_version
# pylint: disable=too-many-nested-blocks
for _ in range(self.kube_config.worker_pods_creation_batch_size):
try:
task = self.task_queue.get_nowait()
try:
# 执行任务
self.kube_scheduler.run_next(task)
except ApiException as e:
...
@staticmethod
# 构造pod的相关信息
def construct_pod( # pylint: disable=too-many-arguments
dag_id: str,
task_id: str,
pod_id: str,
try_number: int,
kube_image: str,
date: datetime.datetime,
args: List[str],
pod_override_object: Optional[k8s.V1Pod],
base_worker_pod: k8s.V1Pod,
namespace: str,
scheduler_job_id: int,
) -> k8s.V1Pod:
"""
Construct a pod by gathering and consolidating the configuration from 3 places:
- airflow.cfg
- executor_config
- dynamic arguments
"""
try:
image = pod_override_object.spec.containers[0].image # type: ignore
if not image:
image = kube_image
except Exception: # pylint: disable=W0703
image = kube_image
task_id = make_safe_label_value(task_id)
dag_id = make_safe_label_value(dag_id)
scheduler_job_id = make_safe_label_value(str(scheduler_job_id))
# pod 信息
dynamic_pod = k8s.V1Pod(
metadata=k8s.V1ObjectMeta(
namespace=namespace,
annotations={
'dag_id': dag_id,
'task_id': task_id,
'execution_date': date.isoformat(),
'try_number': str(try_number),
},
name=PodGenerator.make_unique_pod_id(pod_id),
# 标签
labels={
'airflow-worker': scheduler_job_id,
'dag_id': dag_id,
'task_id': task_id,
'execution_date': datetime_to_label_safe_datestring(date),
'try_number': str(try_number),
'airflow_version': airflow_version.replace('+', '-'),
'kubernetes_executor': 'True',
},
),
spec=k8s.V1PodSpec(
containers=[
k8s.V1Container(
name="base",
args=args,
image=image,
env=[k8s.V1EnvVar(name="AIRFLOW_IS_K8S_EXECUTOR_POD", value="True")],
)
]
),
)
# 调整融合pod
# Reconcile the pods starting with the first chronologically,
# Pod from the pod_template_File -> Pod from executor_config arg -> Pod from the K8s executor
# 从第一个开始按照时间顺序排列,
# pod_template_File中的Pod,executor_config参数中的pod, Pod—K8s执行器中的Pod
pod_list = [base_worker_pod, pod_override_object, dynamic_pod]
return reduce(PodGenerator.reconcile_pods, pod_list)
def run_pod_async(self, pod: V1Pod, **kwargs):
"""Runs POD asynchronously"""
pod_mutation_hook(pod)
# 净化整理pod
sanitized_pod = self._client.api_client.sanitize_for_serialization(pod)
json_pod = json.dumps(sanitized_pod, indent=2)
self.log.debug('Pod Creation Request: \n%s', json_pod)
try:
# 在k8s启动pod
resp = self._client.create_namespaced_pod(
body=sanitized_pod, namespace=pod.metadata.namespace, **kwargs
)
self.log.debug('Pod Creation Response: %s', resp)
except Exception as e:
self.log.exception('Exception when attempting to create Namespaced Pod: %s', json_pod)
raise e
return resp
参考资料
Airflow 源码解析和性能优化
https://gitbook.cn/books/5db9939362adeb3006d193f9/index.html
思路非常的清晰,主要参考学习了这篇,是一篇付费文章,物超所值,感兴趣的可以购买,里面还有关于airflow优化的部分。
airflow源码解读
https://www.zhihu.com/column/c_208839854
带注解的airflow代码,分支:v1-9-stable