我创建了一个 dag,其中包含几个运行简单 bash 命令的 subdag。我可以看到,几乎从一开始,许多任务就陷入了运行或无状态模式,并且无法继续。一段时间后,我可以看到越来越多的 dag 实例被卡住,而我只剩下一个实例真正在运行。我能做些什么来确保不会发生这种情况?
这是我的一天:
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.subdag_operator import SubDagOperator
from datetime import datetime, timedelta
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime(2016, 1, 1),
'email': ['airflow@airflow.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
# 'queue': 'bash_queue',
# 'pool': 'backfill',
# 'priority_weight': 10,
# 'end_date': datetime(2016, 1, 1),
}
dag = DAG('performance', default_args=default_args,
schedule_interval='@hourly', max_active_runs=30)
adapterSubDag = DAG('performance.adapterSubDagOperator',
default_args=default_args)
for index in range(10):
bash = BashOperator(
task_id='bash_{}'.format(index),
bash_command='java -jar /home/presidio/dev-projects/presidio-
core/presidio-workflows/tests/resources/jars/test.jar',
dag=adapterSubDag)
adapterSubdagOperator = SubDagOperator(
subdag=adapterSubDag,
task_id='adapterSubDagOperator',
dag=dag
)
presidioSubDag = DAG('performance.presidioSubDagOperator',
default_args=default_args)
presidioSubdagOperator = SubDagOperator(
subdag=presidioSubDag,
task_id='presidioSubDagOperator',
dag=dag
)
inputSubDag = DAG('performance.presidioSubDagOperator.inputSubDagOperator',
default_args=default_args)
for index in range(10):
bash = BashOperator(
task_id='bash_{}'.format(index),
bash_command='java -jar /home/presidio/dev-projects/presidio-
core/presidio-workflows/tests/resources/jars/test.jar',
dag=inputSubDag)
inputSubdagOperator = SubDagOperator(
subdag=inputSubDag,
task_id='inputSubDagOperator',
dag=presidioSubDag
)
adeSubDag = DAG('performance.presidioSubDagOperator.adeSubDagOperator',
default_args=default_args)
for index in range(10):
bash = BashOperator(
task_id='bash_{}'.format(index),
bash_command='java -jar /home/presidio/dev-projects/presidio-
core/presidio-workflows/tests/resources/jars/test.jar',
dag=adeSubDag)
adeSubdagOperator = SubDagOperator(
subdag=adeSubDag,
task_id='adeSubDagOperator',
dag=presidioSubDag
)
outputSubDag =
DAG('performance.presidioSubDagOperator.outputSubDagOperator',
default_args=default_args)
for index in range(10):
bash = BashOperator(
task_id='bash_{}'.format(index),
bash_command='java -jar /home/presidio/dev-projects/presidio-core/presidio-workflows/tests/resources/jars/test.jar',
dag=outputSubDag)
outputSubdagOperator = SubDagOperator(
subdag=outputSubDag,
task_id='outputSubDagOperator',
dag=presidioSubDag
)
inputSubdagOperator >> adeSubdagOperator >> outputSubdagOperator
adapterSubdagOperator >> presidioSubdagOperator