User:Addshore/Notes/2016/Oozie

From Wikitech
# https://wikitech.wikimedia.org/wiki/Analytics/Cluster/Oozie

# SSH to stat1002
ssh stat1002.eqiad.wmnet

# Check out the changes required, for example...
cd refinery/refinery-source
git fetch https://gerrit.wikimedia.org/r/mediawiki/core refs/changes/59/278859/23 && git checkout FETCH_HEAD
cd ..

cd refinery/refinery
git fetch https://gerrit.wikimedia.org/r/analytics/refinery refs/changes/07/296407/4 && git checkout FETCH_HEAD
cd ..

# If the source needs testing then build it!
cd refinery/refinery-source
mvn clean package -DskipTests

# Run the job using spark submit (client mode) (NOTE: update the params)
cd refinery/refinery-source
spark-submit \
  --class org.wikimedia.analytics.refinery.job.WikidataArticlePlaceholderMetrics \
  --master yarn \
  --deploy-mode client \
  --jars /usr/lib/hive/lib/datanucleus-api-jdo-3.2.6.jar,/usr/lib/hive/lib/datanucleus-core-3.2.10.jar,/usr/lib/hive/lib/datanucleus-rdbms-3.2.9.jar \
  --files /usr/lib/hive/conf/hive-site.xml \
  ~/refinery/refinery-source/refinery-job/target/refinery-job-0.0.36-SNAPSHOT.jar \
  --year 2016 \
  --month 11 \
  --day 11 \
  --graphite-namespace daily.wikidata.articleplaceholder \
  --graphite-host graphite-in.eqiad.wmnet

# Remove old stuff and add new stuff to hdfs
hdfs dfs -rm -r /user/addshore/oozie
hdfs dfs -put ~/refinery/refinery/oozie /user/addshore
hdfs dfs -rm /user/addshore/refinery-job-0.0.36-SNAPSHOT.jar
hdfs dfs -put ~/refinery/refinery-source/refinery-job/target/refinery-job-0.0.36-SNAPSHOT.jar /user/addshore
hdfs dfs -ls

# Run the job using oozie
# use -dryrun for a dryrun or -run for a real run
oozie job \
  -Drefinery_directory=hdfs://analytics-hadoop$(hdfs dfs -ls -d /wmf/refinery/2016* | tail -n 1 | awk '{print $NF}') \
  -Doozie_directory=/user/addshore/oozie \
  -Dstart_time=2016-11-11T00:00Z \
  -Dstop_time=2016-11-13T00:00Z \
  -Dgraphite_namespace=daily.wikidata.articleplaceholder \
  -Dspark_job_jar=hdfs://analytics-hadoop/user/addshore/refinery-job-0.0.36-SNAPSHOT.jar \
  -config ~/refinery/refinery/oozie/wikidata/articleplaceholder_metrics/coordinator.properties \
  -run

# Then use -run instead of -dryrun
# https://hue.wikimedia.org/oozie/list_oozie_coordinators/
# Hue can be a bit slow to make things appear
# If something is bad you can kill a job with (oozie job -kill OOZIE_ID)