User:Elukey/Analytics/Druid

From Wikitech

Tunnels for Coordinator/Overlord

# Coordinator UI
ssh -L 8081:localhost:8081 druid1004.eqiad.wmnet -N

# Overlord UI
ssh -L 8090:localhost:8090 druid1004.eqiad.wmnet -N

Useful API commands

# Show datasources

elukey@druid1001:~$ curl druid1001.eqiad.wmnet:8082/druid/v2/datasources
{"dimensions":[],"metrics":[]}elukey@druid1003:~$ curl druid1001.eqiad.wmnet:8082/druid/v2/datasources
["unique_devices_per_project_family_monthly","geowiki_archive_monthly",
 "virtualpageviews_hourly","webrequest_sampled_128","pageviews_hourly",
 "unique_devices_per_project_family_daily","unique_devices_per_domain_daily",
 "mediawiki-history-beta","tbayer_popups","mediawiki_geoeditors_monthly",
 "netflow","pageviews_daily","banner_activity_minutely",
 "unique_devices_per_domain_monthly"]

Example of indexing commands for Webrequest:

# webrequest Druid hourly
sudo -u hdfs oozie job --oozie $OOZIE_URL \
    -Drefinery_directory=hdfs://analytics-hadoop$(hdfs dfs -ls -d /wmf/refinery/$(date +%Y)* | tail -n 1 | awk '{print $NF}') \
    -Dqueue_name=production \
    -Doozie_launcher_queue_name=production \
    -Dstart_time=2018-06-01T00:00Z \
    -config /srv/deployment/analytics/refinery/oozie/webrequest/druid/hourly/coordinator.properties \
    -run

# webrequest Druid daily
sudo -u hdfs oozie job --oozie $OOZIE_URL \
    -Drefinery_directory=hdfs://analytics-hadoop$(hdfs dfs -ls -d /wmf/refinery/$(date +%Y)* | tail -n 1 | awk '{print $NF}') \
    -Dqueue_name=production \
    -Doozie_launcher_queue_name=production \
    -Dstart_time=2018-05-25T00:00Z \
    -config /srv/deployment/analytics/refinery/oozie/webrequest/druid/daily/coordinator.properties \
    -run

Example of queries

curl -L -H'Content-Type: application/json' -XPOST --data-binary '
{
  "queryType": "topN",
  "dataSource": "webrequest",
  "granularity": "hour",
  "dimension": "uri_path",
  "metric": "events",
  "threshold": 5,
  "aggregations": [
      { "type": "longSum", "name": "events", "fieldName": "events" }
    ],
  "intervals": [ "2018-05-18T00:00:00.000/2018-05-18T01:00:00.000" ]
}
' http://d-1.analytics.eqiad.wmflabs:8082/druid/v2/

elukey@stat1004:~$ cat query
{"query":"select as_number FROM webrequest_sampled_128 LIMIT 5"}
elukey@stat1004:~$ curl -XPOST -H'Content-Type: application/json' http://druid1001.eqiad.wmnet:8082/druid/v2/sql/ -d @query