Source code for mozetl.shield.utils
from datetime import date, timedelta
from moztelemetry.dataset import Dataset
[docs]def shield_etl_boilerplate(transform_func, s3_path):
def etl_job(sc, sqlContext, submission_date=None, save=True):
if submission_date is None:
submission_date = (date.today() - timedelta(1)).strftime("%Y%m%d")
pings = (
Dataset.from_source("telemetry")
.where(
docType="shield-study",
submissionDate=submission_date,
appName="Firefox",
)
.records(sc)
)
transformed_pings = transform_func(sqlContext, pings)
if save:
path = s3_path + "/submission_date={}".format(submission_date)
transformed_pings.repartition(1).write.mode("overwrite").parquet(path)
return transformed_pings
return etl_job