# https://wikitech.wikimedia.org/wiki/Analytics/Cluster/Oozie
# SSH to stat1002
ssh stat1002.eqiad.wmnet
# Check out the changes required, for example...
cd refinery/refinery-source
git fetch https://gerrit.wikimedia.org/r/mediawiki/core refs/changes/59/278859/23 && git checkout FETCH_HEAD
cd ..
cd refinery/refinery
git fetch https://gerrit.wikimedia.org/r/analytics/refinery refs/changes/07/296407/4 && git checkout FETCH_HEAD
cd ..
# If the source needs testing then build it!
cd refinery/refinery-source
mvn clean package -DskipTests
# Run the job using spark submit (client mode) (NOTE: update the params)
cd refinery/refinery-source
spark-submit \
--class org.wikimedia.analytics.refinery.job.WikidataArticlePlaceholderMetrics \
--master yarn \
--deploy-mode client \
--jars /usr/lib/hive/lib/datanucleus-api-jdo-3.2.6.jar,/usr/lib/hive/lib/datanucleus-core-3.2.10.jar,/usr/lib/hive/lib/datanucleus-rdbms-3.2.9.jar \
--files /usr/lib/hive/conf/hive-site.xml \
~/refinery/refinery-source/refinery-job/target/refinery-job-0.0.36-SNAPSHOT.jar \
--year 2016 \
--month 11 \
--day 11 \
--graphite-namespace daily.wikidata.articleplaceholder \
--graphite-host graphite-in.eqiad.wmnet
# Remove old stuff and add new stuff to hdfs
hdfs dfs -rm -r /user/addshore/oozie
hdfs dfs -put ~/refinery/refinery/oozie /user/addshore
hdfs dfs -rm /user/addshore/refinery-job-0.0.36-SNAPSHOT.jar
hdfs dfs -put ~/refinery/refinery-source/refinery-job/target/refinery-job-0.0.36-SNAPSHOT.jar /user/addshore
hdfs dfs -ls
# Run the job using oozie
# use -dryrun for a dryrun or -run for a real run
oozie job \
-Drefinery_directory=hdfs://analytics-hadoop$(hdfs dfs -ls -d /wmf/refinery/2016* | tail -n 1 | awk '{print $NF}') \
-Doozie_directory=/user/addshore/oozie \
-Dstart_time=2016-11-11T00:00Z \
-Dstop_time=2016-11-13T00:00Z \
-Dgraphite_namespace=daily.wikidata.articleplaceholder \
-Dspark_job_jar=hdfs://analytics-hadoop/user/addshore/refinery-job-0.0.36-SNAPSHOT.jar \
-config ~/refinery/refinery/oozie/wikidata/articleplaceholder_metrics/coordinator.properties \
-run
# Then use -run instead of -dryrun
# https://hue.wikimedia.org/oozie/list_oozie_coordinators/
# Hue can be a bit slow to make things appear
# If something is bad you can kill a job with (oozie job -kill OOZIE_ID)