Data Engineering/Manual maintenance/Refined flags script

From Wikitech
from datetime import datetime, timedelta

from_dt = datetime.strptime('2021-07-19 01', '%Y-%m-%d %H')
to_dt = datetime.strptime('2021-08-09 00', '%Y-%m-%d %H')


def get_date_parts(dt):
    return dt.year, dt.month, dt.day, dt.hour


for n in range(int((to_dt - from_dt).total_seconds() / 60 / 60) + 1):
    year, month, day, hour = get_date_parts(from_dt + timedelta(hours=n))

    # make parent directories
    print(f'hdfs dfs -mkdir -p /wmf/data/event/mediawiki_page_move/datacenter=eqiad/year={year}/month={month}/day={day}/hour={hour}/')

    # copy flags
    time_partitions = f'year={year}/month={month}/day={day}/hour={hour}'
    from_path = f'/wmf/data/event/mediawiki_page_move/datacenter=codfw/{time_partitions}'
    to_path = f'/wmf/data/event/mediawiki_page_move/datacenter=eqiad/{time_partitions}'
    print(f'hdfs dfs -cp {from_path}/_REFINED {to_path}/_REFINED')