from datetime import datetime, timedelta
from_dt = datetime.strptime('2021-07-19 01', '%Y-%m-%d %H')
to_dt = datetime.strptime('2021-08-09 00', '%Y-%m-%d %H')
def get_date_parts(dt):
return dt.year, dt.month, dt.day, dt.hour
for n in range(int((to_dt - from_dt).total_seconds() / 60 / 60) + 1):
year, month, day, hour = get_date_parts(from_dt + timedelta(hours=n))
# make parent directories
print(f'hdfs dfs -mkdir -p /wmf/data/event/mediawiki_page_move/datacenter=eqiad/year={year}/month={month}/day={day}/hour={hour}/')
# copy flags
time_partitions = f'year={year}/month={month}/day={day}/hour={hour}'
from_path = f'/wmf/data/event/mediawiki_page_move/datacenter=codfw/{time_partitions}'
to_path = f'/wmf/data/event/mediawiki_page_move/datacenter=eqiad/{time_partitions}'
print(f'hdfs dfs -cp {from_path}/_REFINED {to_path}/_REFINED')