Is there any way to make a user-defined macro in Airflow which is itself computed from other macros?
from airflow impo
I would vote for making Airflow Plugin to inject your pre-defined macros. Using this method, you can use your pre-defined macro in any Operator without declare anything.
Below are some custom macros that we're using.
Example using: {{ macros.dagtz_next_execution_date(ti) }}
from airflow.plugins_manager import AirflowPlugin
from datetime import datetime, timedelta
from airflow.utils.db import provide_session
from airflow.models import DagRun
import pendulum
@provide_session
def _get_dag_run(ti, session=None):
"""Get DagRun obj of the TaskInstance ti
Args:
ti (TYPE): the TaskInstance object
session (None, optional): Not in use
Returns:
DagRun obj: the DagRun obj of the TaskInstance ti
"""
task = ti.task
dag_run = None
if hasattr(task, 'dag'):
dag_run = (
session.query(DagRun)
.filter_by(
dag_id=task.dag.dag_id,
execution_date=ti.execution_date)
.first()
)
session.expunge_all()
session.commit()
return dag_run
def ds_add_no_dash(ds, days):
"""
Add or subtract days from a YYYYMMDD
:param ds: anchor date in ``YYYYMMDD`` format to add to
:type ds: str
:param days: number of days to add to the ds, you can use negative values
:type days: int
>>> ds_add('20150101', 5)
'20150106'
>>> ds_add('20150106', -5)
'20150101'
"""
ds = datetime.strptime(ds, '%Y%m%d')
if days:
ds = ds + timedelta(days)
return ds.isoformat()[:10].replace('-', '')
def dagtz_execution_date(ti):
"""get the TaskInstance execution date (in DAG timezone) in pendulum obj
Args:
ti (TaskInstance): the TaskInstance object
Returns:
pendulum obj: execution_date in pendulum object (in DAG tz)
"""
execution_date_pdl = pendulum.instance(ti.execution_date)
dagtz_execution_date_pdl = execution_date_pdl.in_timezone(ti.task.dag.timezone)
return dagtz_execution_date_pdl
def dagtz_next_execution_date(ti):
"""get the TaskInstance next execution date (in DAG timezone) in pendulum obj
Args:
ti (TaskInstance): the TaskInstance object
Returns:
pendulum obj: next execution_date in pendulum object (in DAG tz)
"""
# For manually triggered dagruns that aren't run on a schedule, next/previous
# schedule dates don't make sense, and should be set to execution date for
# consistency with how execution_date is set for manually triggered tasks, i.e.
# triggered_date == execution_date.
dag_run = _get_dag_run(ti)
if dag_run and dag_run.external_trigger:
next_execution_date = ti.execution_date
else:
next_execution_date = ti.task.dag.following_schedule(ti.execution_date)
next_execution_date_pdl = pendulum.instance(next_execution_date)
dagtz_next_execution_date_pdl = next_execution_date_pdl.in_timezone(ti.task.dag.timezone)
return dagtz_next_execution_date_pdl
def dagtz_next_ds(ti):
"""get the TaskInstance next execution date (in DAG timezone) in YYYY-MM-DD string
"""
dagtz_next_execution_date_pdl = dagtz_next_execution_date(ti)
return dagtz_next_execution_date_pdl.strftime('%Y-%m-%d')
def dagtz_next_ds_nodash(ti):
"""get the TaskInstance next execution date (in DAG timezone) in YYYYMMDD string
"""
dagtz_next_ds_str = dagtz_next_ds(ti)
return dagtz_next_ds_str.replace('-', '')
def dagtz_prev_execution_date(ti):
"""get the TaskInstance previous execution date (in DAG timezone) in pendulum obj
Args:
ti (TaskInstance): the TaskInstance object
Returns:
pendulum obj: previous execution_date in pendulum object (in DAG tz)
"""
# For manually triggered dagruns that aren't run on a schedule, next/previous
# schedule dates don't make sense, and should be set to execution date for
# consistency with how execution_date is set for manually triggered tasks, i.e.
# triggered_date == execution_date.
dag_run = _get_dag_run(ti)
if dag_run and dag_run.external_trigger:
prev_execution_date = ti.execution_date
else:
prev_execution_date = ti.task.dag.previous_schedule(ti.execution_date)
prev_execution_date_pdl = pendulum.instance(prev_execution_date)
dagtz_prev_execution_date_pdl = prev_execution_date_pdl.in_timezone(ti.task.dag.timezone)
return dagtz_prev_execution_date_pdl
def dagtz_prev_ds(ti):
"""get the TaskInstance prev execution date (in DAG timezone) in YYYY-MM-DD string
"""
dagtz_prev_execution_date_pdl = dagtz_prev_execution_date(ti)
return dagtz_prev_execution_date_pdl.strftime('%Y-%m-%d')
def dagtz_prev_ds_nodash(ti):
"""get the TaskInstance prev execution date (in DAG timezone) in YYYYMMDD string
"""
dagtz_prev_ds_str = dagtz_prev_ds(ti)
return dagtz_prev_ds_str.replace('-', '')
# Defining the plugin class
class AirflowTestPlugin(AirflowPlugin):
name = "custom_macros"
macros = [dagtz_execution_date, ds_add_no_dash,
dagtz_next_execution_date, dagtz_next_ds, dagtz_next_ds_nodash,
dagtz_prev_execution_date, dagtz_prev_ds, dagtz_prev_ds_nodash]
user_defined_macros
are not processed as templates by default. If you want to keep a template in a user_defined_macro
(or if you use a template in a params
variable), you can always re-run the templating function manually:
class DoubleTemplatedBashOperator(BashOperator):
def pre_execute(self, context):
context['ti'].render_templates()
And this will work for templates that don't also reference other parameters or UDMs. This way, you can have "two-deep" templates.
Or put your UDM directly in the BashOperator
's command instead (the easiest solution):
BashOperator(
task_id='bash_op',
bash_command='echo "{{ dag.following_schedule(execution_date) }}"',
dag=dag,
)
Here are some solutions:
BashOperator
to add some values to the contextclass NextExecutionDateAwareBashOperator(BashOperator):
def render_template(self, attr, content, context):
dag = context['dag']
execution_date = context['execution_date']
context['next_execution_date'] = dag.following_schedule(execution_date)
return super().render_templates(attr, content, context)
# or in python 2:
# return super(NextExecutionDateAwareBashOperator, self).render_templates(attr, content, context)
The good part with this approach: you can capture some repeated code in your custom operator.
The bad part: you have to write a custom operator to add values to the context, before templated fields are rendered.
Macros are not necessarily values. They can be functions.
In your dag :
def compute_next_execution_date(dag, execution_date):
return dag.following_schedule(execution_date)
dag = DAG(
'simple',
schedule_interval='0 21 * * *',
user_defined_macros={
'next_execution_date': compute_next_execution_date,
},
)
task = BashOperator(
task_id='bash_op',
bash_command='echo "{{ next_execution_date(dag, execution_date) }}"',
dag=dag,
)
The good part: you can define reusable functions to process values available at runtime (XCom values, job instance properties, task instance properties, etc...), and make your function result available to render a template.
The bad part (but not that annoying): you have to import such a function as a user defined macro in every dag where needed.
This solution is the simplest (as mentioned by Ardan's answer), and probably the good one in your case.
BashOperator(
task_id='bash_op',
bash_command='echo "{{ dag.following_schedule(execution_date) }}"',
dag=dag,
)
Ideal for simple calls like this one. And they are some other objects directly available as macros (like task
, task_instance
, etc...); even some standard modules are available (like macros.time
, ...).