1.安装docker
https://my.oschina.net/ilovetao/blog/3034502
2.docker安装airflow参考:
https://github.com/puckel/docker-airflow
SequentialExecutor 执行:
docker run -d -p 8080:8080 puckel/docker-airflow webserver
LocalExecutor 执行:
docker-compose -f docker-compose-LocalExecutor.yml up -d
CeleryExecutor 执行:
docker-compose -f docker-compose-CeleryExecutor.yml up -d
airflow.cfg文件配置:
docker方式是按照环境变量方式代替airflow.cfg
例如:
airflow.cfg配置为:
sql_alchemy_conn
docker配置为:
AIRFLOW__CORE__SQL_ALCHEMY_CONN
Airflow 依赖python包:
放在Dockerfile或者yml同级目录下
$(pwd)/requirements.txt
如果仍然想用airflow.cfg怎么办?
我用的是CeleryExecutor 模式:
直接通过volumes共享目录
修改docker-compose-CeleryExecutor.yml
增加:
Dags目录也是一样的。
最后,这个是我的最后修改的:
文件:
docker-compose.yml
version: '2.1'
services:
redis:
image: 'redis:3.2.7'
# command: redis-server --requirepass redispass
postgres:
image: postgres:9.6
environment:
- POSTGRES_USER=airflow
- POSTGRES_PASSWORD=airflow
- POSTGRES_DB=airflow
# Uncomment these lines to persist data on the local filesystem.
# - PGDATA=/var/lib/postgresql/data/pgdata
# volumes:
# - ./pgdata:/var/lib/postgresql/data/pgdata
webserver:
image: puckel/docker-airflow:1.10.2
restart: always
depends_on:
- postgres
- redis
environment:
- LOAD_EX=n
- FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
- EXECUTOR=Celery
#- REDIS_PASSWORD=redispass
volumes:
- ./dags:/usr/local/airflow/dags
- ./requirements.txt:/requirements.txt
- ./entrypoint.sh:/entrypoint.sh
- ./airflow.cfg:/usr/local/airflow/airflow.cfg
# Uncomment to include custom plugins
# - ./plugins:/usr/local/airflow/plugins
ports:
- "8080:8080"
command:
- webserver
healthcheck:
test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"]
interval: 30s
timeout: 30s
retries: 3
flower:
image: puckel/docker-airflow:1.10.2
restart: always
depends_on:
- redis
environment:
- EXECUTOR=Celery
#- REDIS_PASSWORD=redispass
ports:
- "5555:5555"
command: flower
scheduler:
image: puckel/docker-airflow:1.10.2
restart: always
depends_on:
- webserver
volumes:
- ./dags:/usr/local/airflow/dags
- ./requirements.txt:/requirements.txt
- ./entrypoint.sh:/entrypoint.sh
- ./airflow.cfg:/usr/local/airflow/airflow.cfg
# Uncomment to include custom plugins
# - ./plugins:/usr/local/airflow/plugins
environment:
- LOAD_EX=n
- FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
- EXECUTOR=Celery
#- REDIS_PASSWORD=redispass
command: scheduler
worker:
image: puckel/docker-airflow:1.10.2
restart: always
depends_on:
- scheduler
volumes:
- ./dags:/usr/local/airflow/dags
- ./requirements.txt:/requirements.txt
- ./entrypoint.sh:/entrypoint.sh
- ./airflow.cfg:/usr/local/airflow/airflow.cfg
# Uncomment to include custom plugins
# - ./plugins:/usr/local/airflow/plugins
environment:
- FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
- EXECUTOR=Celery
#- REDIS_PASSWORD=redispass
command: worker
entrypoint.sh
#!/usr/bin/env bash
TRY_LOOP="20"
: "${REDIS_HOST:="redis"}"
: "${REDIS_PORT:="6379"}"
: "${REDIS_PASSWORD:=""}"
: "${POSTGRES_HOST:="postgres"}"
: "${POSTGRES_PORT:="5432"}"
: "${POSTGRES_USER:="airflow"}"
: "${POSTGRES_PASSWORD:="airflow"}"
: "${POSTGRES_DB:="airflow"}"
# Defaults and back-compat
: "${AIRFLOW__CORE__FERNET_KEY:=${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}}"
: "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR:-Sequential}Executor}"
export \
AIRFLOW__CELERY__BROKER_URL \
AIRFLOW__CELERY__RESULT_BACKEND \
AIRFLOW__CORE__EXECUTOR \
AIRFLOW__CORE__FERNET_KEY \
AIRFLOW__CORE__LOAD_EXAMPLES \
AIRFLOW__CORE__SQL_ALCHEMY_CONN \
# Load DAGs exemples (default: Yes)
if [[ -z "$AIRFLOW__CORE__LOAD_EXAMPLES" && "${LOAD_EX:=n}" == n ]]
then
AIRFLOW__CORE__LOAD_EXAMPLES=False
fi
# Install custom python package if requirements.txt is present
if [ -e "/requirements.txt" ]; then
$(which pip) install --user -r /requirements.txt
fi
if [ -n "$REDIS_PASSWORD" ]; then
REDIS_PREFIX=:${REDIS_PASSWORD}@
else
REDIS_PREFIX=
fi
wait_for_port() {
local name="$1" host="$2" port="$3"
local j=0
while ! nc -z "$host" "$port" >/dev/null 2>&1 < /dev/null; do
j=$((j+1))
if [ $j -ge $TRY_LOOP ]; then
echo >&2 "$(date) - $host:$port still not reachable, giving up"
exit 1
fi
echo "$(date) - waiting for $name... $j/$TRY_LOOP"
sleep 5
done
}
if [ "$AIRFLOW__CORE__EXECUTOR" != "SequentialExecutor" ]; then
AIRFLOW__CORE__SQL_ALCHEMY_CONN="mysql://airflow:123@10.6.107.66/airflowdb2"
AIRFLOW__CELERY__RESULT_BACKEND="db+mysql://airflow:123@10.6.107.66/airflowdb2"
wait_for_port "Postgres" "$POSTGRES_HOST" "$POSTGRES_PORT"
fi
if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then
AIRFLOW__CELERY__BROKER_URL="redis://$REDIS_PREFIX$REDIS_HOST:$REDIS_PORT/1"
wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT"
fi
case "$1" in
webserver)
airflow initdb
if [ "$AIRFLOW__CORE__EXECUTOR" = "LocalExecutor" ]; then
# With the "Local" executor it should all run in one container.
airflow scheduler &
fi
exec airflow webserver
;;
worker|scheduler)
# To give the webserver time to run initdb.
sleep 10
exec airflow "$@"
;;
flower)
sleep 10
exec airflow "$@"
;;
version)
exec airflow "$@"
;;
*)
# The command is something like bash, not an airflow subcommand. Just run it in the right environment.
exec "$@"
;;
esac