User:Addshore/Notes/2016/Oozie
Appearance
# https://wikitech.wikimedia.org/wiki/Analytics/Cluster/Oozie # SSH to stat1002 ssh stat1002.eqiad.wmnet # Check out the changes required, for example... cd refinery/refinery-source git fetch https://gerrit.wikimedia.org/r/mediawiki/core refs/changes/59/278859/23 && git checkout FETCH_HEAD cd .. cd refinery/refinery git fetch https://gerrit.wikimedia.org/r/analytics/refinery refs/changes/07/296407/4 && git checkout FETCH_HEAD cd .. # If the source needs testing then build it! cd refinery/refinery-source mvn clean package -DskipTests # Run the job using spark submit (client mode) (NOTE: update the params) cd refinery/refinery-source spark-submit \ --class org.wikimedia.analytics.refinery.job.WikidataArticlePlaceholderMetrics \ --master yarn \ --deploy-mode client \ --jars /usr/lib/hive/lib/datanucleus-api-jdo-3.2.6.jar,/usr/lib/hive/lib/datanucleus-core-3.2.10.jar,/usr/lib/hive/lib/datanucleus-rdbms-3.2.9.jar \ --files /usr/lib/hive/conf/hive-site.xml \ ~/refinery/refinery-source/refinery-job/target/refinery-job-0.0.36-SNAPSHOT.jar \ --year 2016 \ --month 11 \ --day 11 \ --graphite-namespace daily.wikidata.articleplaceholder \ --graphite-host graphite-in.eqiad.wmnet # Remove old stuff and add new stuff to hdfs hdfs dfs -rm -r /user/addshore/oozie hdfs dfs -put ~/refinery/refinery/oozie /user/addshore hdfs dfs -rm /user/addshore/refinery-job-0.0.36-SNAPSHOT.jar hdfs dfs -put ~/refinery/refinery-source/refinery-job/target/refinery-job-0.0.36-SNAPSHOT.jar /user/addshore hdfs dfs -ls # Run the job using oozie # use -dryrun for a dryrun or -run for a real run oozie job \ -Drefinery_directory=hdfs://analytics-hadoop$(hdfs dfs -ls -d /wmf/refinery/2016* | tail -n 1 | awk '{print $NF}') \ -Doozie_directory=/user/addshore/oozie \ -Dstart_time=2016-11-11T00:00Z \ -Dstop_time=2016-11-13T00:00Z \ -Dgraphite_namespace=daily.wikidata.articleplaceholder \ -Dspark_job_jar=hdfs://analytics-hadoop/user/addshore/refinery-job-0.0.36-SNAPSHOT.jar \ -config ~/refinery/refinery/oozie/wikidata/articleplaceholder_metrics/coordinator.properties \ -run # Then use -run instead of -dryrun # https://hue.wikimedia.org/oozie/list_oozie_coordinators/ # Hue can be a bit slow to make things appear # If something is bad you can kill a job with (oozie job -kill OOZIE_ID)