Puppet/PQL
PQL is a simple query syntax for the PuppetDB api. its much nicer to use then the older query syntax and is also supported by pypuppetdb.
Examples
PQL can be quite useful for debugging or collecting data on puppet resources. the following lists a few working PQL examples additional examples may be found on puppetdb1002:/home/jbond/pql
The boiler plate python code is as follows, in the examples below i will mostly just add the actual PQL code unless there is something a bit more complicated
from pypuppetdb import connect
db = connect()
pql = """ ...the query... """
results = db.pql(pql)
for result in results:
print(result)
you can also use the following curl command to get the raw json response, assumes the pql query is in a file call ./pql.txt
. Uses the example below
$ curl -sX GET http://localhost:8080/pdb/query/v4 --data-urlencode "query=$(<pql.txt)" | jq .
[
{
"parameters": {
"key": "AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBK2t99nf1hGiPs3whPlgx4GTCeIIM55C5zrLIaaINE9rTEikbGHSDj1/Kv8B9vC6n9J9h4yLHLRhV3eNLcVsuck=",
"type": "ecdsa-sha2-nistp256",
"ensure": "present",
"host_aliases": [
"ganeti2028",
"10.192.0.127",
"2620:0:860:101:10:192:0:127"
]
},
"title": "ganeti2028.codfw.wmnet"
}
]
All nodes with Package['confd']
nodes {
resources { type = 'Package' and title = 'confd' }
}
All exported resources
resources[parameters,title] {
exported = true
}
All exported sshkey resources
resources[parameters,title] {
exported = true and type = 'Sshkey' and
}
All exported sshkey resources for a specific host
resources[parameters,title] {
exported = true and type = 'Sshkey' and
nodes { certname = "ganeti2028.codfw.wmnet" }
}
All instances of a resource with a specific paramater value
from pypuppetdb import connect
db = connect()
pql = """
resources{
type = 'Systemd::Timer::Job' and
parameters.monitoring_enabled = true
}
"""
resources = db.pql(pql)
for resource in resources:
print(f'{resource} ({resource.parameters["monitoring_contact_groups"]})')
A list files matching a specific path
The following was used in T309268 to get a list of all managed sudo files. This list was then used to see if we have any unmanaged files in the infrastructure. For the curious a similar script was used using the older puppetdb syntax to audit apt sources apt sources
The following was used
from pypuppetdb import connect
found_files = set()
db = connect()
pql = """
resources[parameters,title] {
type = 'File' and (parameters.path ~ '^\/etc\/sudoers\.d\/' or title ~ '^\/etc\/sudoers\.d\/')
}
"""
resources = db.pql(pql)
for resource in resources:
if 'path' in resource['parameters']:
path = resource['parameters']['path']
else:
path = resource['title']
found_files.add(basename(path))
print('|'.join(found_files))
List all teams and the roles they own
#!/usr/bin/python3
from collections import defaultdict
from pypuppetdb import connect
db = connect()
pql = """
resources [parameters, tags]{
type = 'Class' and
title = 'Profile::Contacts'
}
"""
owners = defaultdict(set)
resources = db.pql(pql)
for resource in resources:
try:
owner = resource['parameters']['role_contacts'][0]
except IndexError:
owner = 'Unknown'
role = [r for r in resource['tags'] if r.startswith('role::')][0]
owners[owner].add(role)
for owner, roles in dict(sorted(owners.items())).items():
print(f'= {owner} =')
for role in sorted(roles):
print(f'[] {role.lstrip("role::")}')
Produce a list of all files changed during a specific window
The following script was used to audit files that where unexpectedly altered when we [ https://gerrit.wikimedia.org/r/c/operations/puppet/+/809095 changed the default file owner to root]
from collections import defaultdict
from pypuppetdb import connect
resources = defaultdict(list)
fixed = [
'/etc/ferm',
# as files where fixed add them here
]
fixed_prefix = (
'/srv/phab/phabricator/scripts',
# when all files in a folder are fixed add it here
db = connect()
pql = """
reports {
start_time >= "2022-09-07 11:33:25" and start_time <= "2022-09-07 12:03:25"
order by start_time
}
"""
reports = list(db.pql(pql))
for report in reports:
if report.node in reports:
continue
for event in report.events():
if event.item['type'] != 'File':
continue
if event.item['property'] not in ['group', 'owner']:
continue
if event.item['title'] in fixed:
continue
if event.item['title'].startswith(fixed_prefix):
continue
try:
item = '|'.join([
str(event.item['source_file']), # is sometimes none
event.item['title'],
event.item['property'],
event.item['old'],
event.item['new'],
])
print(f'{report.node}:{item}')
resources[report.node] = item
except:
print(event.item)
raise
All changed resources using a specific change revision
For the above we could also query all reports with a specific sha hash. however if a second change is merged within the 30 then this may not align as expected as such its better to query for the specific window. however i include it here as it could be useful for something
reports {
configuration_version ~ "7acc980cf4"
order by start_time
}
List of hosts on specific version
used to generate the list for the reboot tasks
#!/usr/bin/env python3
import logging
import re
# this is dprecate we should switch to packaging.version.parse but thats third party
from distutils.version import LooseVersion
from pypuppetdb import connect
TARGET_VERSION = '5.10.149'
def main():
kernel_fact_matcher = re.compile(r'Debian\s+(?P<version>\d+\.\d+\.\d+)')
db_matcher = re.compile(r'^(?:db(?:store|proxy)?|es|pc)\d{4}\.')
wmcs_matcher = re.compile(r'^(?:cloud|labs)')
min_kernel_version = LooseVersion(TARGET_VERSION)
target_major = TARGET_VERSION.rsplit('.', 1)[0]
db = connect()
pql = fr"""
inventory {{ facts.kernelmajversion = "{target_major}" }}
"""
results = db.pql(pql)
hosts = {
'database': [],
'wmcs': [],
'prod': [],
}
for result in sorted(results, key=lambda x: x.node):
match = kernel_fact_matcher.search(result.facts['kernel_details']['version'])
if match is None:
logging.warning(
'%s: unable to parse kernel version (%s)',
result.node, facts['kernel_details']['version']
)
continue
current_version = LooseVersion(match['version'])
upgraded = 'x' if current_version >= min_kernel_version else ''
line = f"[{upgraded}] {result.node} ({result.facts['os']['distro']['codename']})"
if db_matcher.match(result.node):
hosts['database'].append(line)
continue
if wmcs_matcher.match(result.node):
hosts['wmcs'].append(line)
continue
hosts['prod'].append(line)
for team, hosts in hosts.items():
print(f"{team} hosts\n{'='*40})")
print("\n".join(hosts) + "\n\n")
if __name__ == '__main__':
raise SystemExit(main())