feat: add eventlog for spark
This commit is contained in:
41
pet-project/dag_spark_create_dm_pg.py
Normal file
41
pet-project/dag_spark_create_dm_pg.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from airflow import DAG
|
||||
from airflow.providers.apache.spark.operators.spark_submit import SparkSubmitOperator
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
default_args = {
|
||||
"owner": "anazarenko",
|
||||
"depends_on_past": False,
|
||||
"start_date": datetime(2025, 1, 1),
|
||||
"retries": 1,
|
||||
"retry_delay": timedelta(minutes=5),
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id="spark_create_dm_pg",
|
||||
start_date=datetime(2026, 1, 1),
|
||||
schedule=None,
|
||||
catchup=False,
|
||||
default_args=default_args,
|
||||
tags=["spark", "postgres"],
|
||||
) as dag:
|
||||
|
||||
spark_task = SparkSubmitOperator(
|
||||
task_id="run_spark_job",
|
||||
application="/opt/airflow/dag/anazarenko/spark/create_dm_pg.py",
|
||||
conn_id="spark_default",
|
||||
name="airflow-spark",
|
||||
|
||||
application_args=[
|
||||
"--input", "s3a://anazarenko-bucket/input/",
|
||||
"--output", "s3a://anazarenko-bucket/output/",
|
||||
],
|
||||
conf={
|
||||
"spark.eventlog.enabled": "true",
|
||||
"spark.eventlog.dir": "file:/opt/spark-events",
|
||||
"spark.eventlog.compress": "true"
|
||||
},
|
||||
#packages="org.postgresql:postgresql:42.7.3",
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user