User:Tucoxn/afdstats.cgi
- This is based on Scottywong's script
- !/usr/bin/env python
- -*- coding: utf-8 -*-
- TO DO:
- Improve voteregex to catch unsigned votes
- Add CSS classes to table, to make it look nicer, as well as to make green/red background for votes which were correct/incorrect, and for diagonals on
- vote matrix
- Add query string for start timestamp
import MySQLdb import sys import os import traceback import cgi import urllib import re import datetime import time import htmllib
starttime = time.time() voteregex = re.compile("'{3}?.*?'{3}?.*?(?:(?:\{\{unsigned.*?\}\})|(?:)|(?:\[\[User.*?\]\].*?\(UTC\)))", re.IGNORECASE) #may need to add "{{unsigned" and " as optional terminators to reduce errors due to idiots not signing their votes userregex = re.compile("\[\[User.*?:(.*?)(?:\||(?:\]\]))", re.IGNORECASE) resultregex = re.compile("The result (?:of the debate )?was(?:.*?)(?:'{3}?)(.*?)(?:'{3}?)", re.IGNORECASE) timeregex = re.compile("(\d{2}:\d{2}, .*?) \(UTC\)") timeparseregex = re.compile("\d{2}:\d{2}, (\d{1,2}) ([A-Za-z]*) (\d{4})") timeunparseregex = re.compile("([A-Za-z]*) (\d{1,2}), (\d{4})") timestampparseregex = re.compile("(\d{4})-(\d{2})-(\d{2})") drvregex = re.compile("(?:(?:\{\{delrev xfd)|(?:\{\{delrevafd)|(?:\{\{delrevxfd))(.*?)\}\}", re.IGNORECASE) drvdateregex = re.compile("\|date=(\d{4} \w*? \d{1,2})", re.IGNORECASE) drvpageregex = re.compile("\|page=(.*?)(?:\||$)", re.IGNORECASE) strikethroughregex = re.compile("<(s|strike|del)>.*?</(s|strike|del)>", re.IGNORECASE|re.DOTALL) monthmap = {"01":"January", "02":"February", "03":"March", "04":"April", "05":"May", "06":"June", "07":"July", "08":"August", "09":"September", "10":"October", "11":"November", "12":"December"} username = "" maxsearch = 50 maxlimit = 250 startdate = "" altusername = "" matchstats = [0,0,0] #matches, non-matches, no consensus nomsonly = False
stats = {} statsresults = ["k", "d", "sk", "sd", "m", "r", "t", "u", "nc"] votetypes = ["Keep", "Delete", "Speedy Keep", "Speedy Delete", "Merge", "Redirect", "Transwiki", "Userfy"] statsvotes = statsresults[:-1] for v in statsvotes:
for r in statsresults:
stats[v+r] = 0
for v in votetypes:
stats[v] = 0
tablelist = []
def main():
global username
global maxsearch
global maxlimit
global startdate
global altusername
global nomsonly
tehdate = ""
try:
errors = False
db = MySQLdb.connect(db='enwiki_p', host="enwiki-p.rrdb.toolserver.org", read_default_file=os.path.expanduser("~/.my.cnf"))
cursor = db.cursor()
form = cgi.FieldStorage()
if "name" not in form:
print "No name entered."
errors = True
else:
try:
if "max" in form:
try:
maxsearch = min(maxlimit, int(form['max'].value))
except:
maxsearch = 50
if "key" in form:
if form['key'].value == "huggadugga":
maxsearch = int(form['max'].value)
if "startdate" in form:
try:
tehdate = str(form['startdate'].value)
if len(tehdate) != 8 or int(tehdate) < 20000000 or int(tehdate) > 20150000:
pass
else:
startdate = " AND rev_timestamp<=" + str(form['startdate'].value) + "235959"
except:
pass
if "nomsonly" in form:
if form['nomsonly'].value.lower() in ['1', 'true', 'yes']:
nomsonly = True
if "altname" in form:
altusername = urllib.unquote(form.getvalue('altname'))
username = form['name'].value.replace("_", " ").replace("+", " ")
username = urllib.unquote(username)
username = username[0].capitalize() + username[1:]
if os.environ["HTTP_X_FORWARDED_FOR"].startswith("89.151.116.5"):
sys.exit(0)
f = open("/home/snottywong/afdstatslog.txt", "a")
f.write("<log><ip>" + os.environ["HTTP_X_FORWARDED_FOR"] + "</ip><username>" + username + "</username><max>" + str(maxsearch) + "</max><timestamp>" + datetime.datetime.today().strftime("%m/%d/%y %H:%M:%S") + "</timestamp>" + ("<startdate>" + tehdate + "</startdate>" if startdate else "") + ("<altname>" + altusername + "</altname>" if altusername else "") + ("<nomsonly>true</nomsonly>" if nomsonly else "") + "</log>\n")
f.close()
#cursor.execute(u'SELECT user_id FROM user WHERE user_name=%s;', (username)) #<--Stupid
#userid = cursor.fetchall()[0][0]
except:
#print sys.exc_info()[0]
#print "
"
#print traceback.print_exc(file=sys.stdout)
#print "
"
print "Username not found."
errors = True
if not errors:
cursor = db.cursor()
if nomsonly:
cursor.execute(u'SELECT page_title FROM revision JOIN page ON rev_page=page_id WHERE rev_user_text=%s AND page_namespace=4 AND page_title LIKE "Articles_for_deletion%%" AND NOT page_title LIKE "Articles_for_deletion/Log/%%" AND rev_parent_id=0' + startdate + ' ORDER BY rev_timestamp DESC;', (username))
else:
cursor.execute(u'SELECT DISTINCT page_title FROM revision JOIN page ON rev_page=page_id WHERE rev_user_text=%s AND page_namespace=4 AND page_title LIKE "Articles_for_deletion%%" AND NOT page_title LIKE "Articles_for_deletion/Log/%%"' + startdate + ' ORDER BY rev_timestamp DESC;', (username))
results = cursor.fetchall()
#results = tuple(reversed(results))
db.close()
print """<!doctype html>
<HTML>
<HEAD>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<LINK href="/~snottywong/greyscale.css" rel="stylesheet" type="text/css">
<LINK href="/~snottywong/menubar3.css" rel="stylesheet" type="text/css">
<TITLE>AfD Stats</TITLE>
</HEAD>
<BODY id="no">
<script type="text/javascript" src="/~snottywong/menubar.js"></script>
<a href="http://toolserver.org/~snottywong/afdstats.html">←New search</a> """
#print "print "
Debugging: <XMP>" + username + " " + altusername + "</XMP>
"
AfD statistics for User:" + username + "
if len(results) == 0:
print "No AfD's found. Try a different date range. Also, note that if the user's username does not appear in the wikitext of their signature, you may need to specify an alternate name.
\n"
else:
print "These statistics were compiled by an automated process, and may contain errors or omissions due to the wide variety of styles with which people cast votes at AfD. Any result fields which contain \"UNDETERMINED\" were not able to be parsed, and should be examined manually.\n"
print "Vote totals
datestr = ""
if startdate:
tehdate = str(form["startdate"].value)
datestr = " from " + tehdate[4:6] + "/" + tehdate[6:8] + "/" + tehdate[:4] + " and earlier"
print "Total number of unique AfD pages edited by " + username + datestr + ": " + str(len(results)) + "
\n"
print "Analyzed the last " + str(min(maxsearch, len(results))) + " votes by this user.
\n"
analyze(results[:min(maxsearch, len(results))])
printtable()
elapsed = time.time() - starttime
print "Elapsed time: " + str(round(elapsed, 2)) + " seconds.
\n"
print datetime.datetime.today().strftime("%m/%d/%y %H:%M:%S") + "
"
except:
print sys.exc_info()[0]
print "
"
print traceback.print_exc(file=sys.stdout)
print "
Unspecified error.
"
pass
def analyze(pages):
if len(pages) <= 50:
alldata = APIgetlotsofpages(pages)
else:
alldata = {}
for i in range(0, len(pages), 50):
newdata = APIgetlotsofpages(pages[i:min(i+50, len(pages))])
alldata = dict(alldata.items() + newdata.items())
for entry in pages:
try:
page = entry[0]
data = unescape(alldata["Wikipedia:" + page.replace("_", " ")])
data = strikethroughregex.sub("", data) #remove all struck-through text, so that it is ignored
votes = voteregex.findall(data[data.find("=="):])
result = findresults(data[:max(data.find("=="), data.find("(UTC)"))])
dupvotes = []
deletionreviews = findDRV(data[:data.find("==")], page)
for vote in votes:
try:
votermatch = userregex.match(vote[vote.rfind("[[User"):])
if votermatch == None:
continue
else:
voter = votermatch.group(1).strip()
#print "<XMP>" + page + " " + voter + " " + altusername + " " + str(altusername.lower()==voter.lower()) + "</XMP>
"
if voter.lower() == username.lower() or voter.lower() == altusername.lower():
votetype = parsevote(vote[3:vote.find("'", 3)])
if votetype == None:
continue
if votetype == "UNDETERMINED":
continue
timematch = timeregex.search(vote)
if timematch == None:
votetime = ""
else:
votetime = parsetime(timematch.group(1))
dupvotes.append((page, votetype, votetime, result, 0, deletionreviews))
except:
#print sys.exc_info()[0]
#print "
"
#print traceback.print_exc(file=sys.stdout)
continue
if len(dupvotes) < 1:
firsteditor = APIfirsteditor(page)
if firsteditor:
if firsteditor[0].lower() == username.lower(): #user is nominator
tablelist.append((page, "Delete", firsteditor[1], result, 1, deletionreviews))
updatestats("Delete", result)
elif len(dupvotes) > 1:
#ch = choosevote(dupvotes) - not doing this anymore, just take the last vote found as it is probably the correct one (i.e. if someone changed their vote)
ch = len(dupvotes) - 1
tablelist.append(dupvotes[ch])
updatestats(dupvotes[ch][1], dupvotes[ch][3])
else:
tablelist.append(dupvotes[0])
updatestats(dupvotes[0][1], dupvotes[0][3])
except:
#print sys.exc_info()[0]
#print "
"
#print traceback.print_exc(file=sys.stdout)
continue
def parsevote(v):
v = v.lower()
if "comment" in v:
return None
elif "note" in v:
return None
elif "merge" in v:
return "Merge"
elif "redirect" in v:
return "Redirect"
elif "speedy keep" in v:
return "Speedy Keep"
elif "speedy delete" in v:
return "Speedy Delete"
elif "keep" in v:
return "Keep"
elif "delete" in v:
return "Delete"
elif "transwiki" in v:
return "Transwiki"
elif ("userfy" in v) or ("userfied" in v) or ("incubat" in v):
return "Userfy"
else:
return "UNDETERMINED"
def findresults(thepage):
resultsearch = resultregex.search(thepage)
if resultsearch == None:
if "The following discussion is an archived debate of the proposed deletion of the article below" in thepage or "This page is an archive of the proposed deletion of the article below." in thepage or "This page is no longer live." in thepage:
return "UNDETERMINED"
else:
return "Not closed yet"
else:
result = resultsearch.group(1).lower()
if "no consensus" in result:
return "No Consensus"
elif "merge" in result:
return "Merge"
elif "redirect" in result:
return "Redirect"
elif "speedy keep" in result or "speedily kept" in result or "speedily keep" in result or "snow keep" in result or "snowball keep" in result or "speedy close" in result:
return "Speedy Keep"
elif "speedy delete" in result or "speedily deleted" in result or "snow delete" in result or "snowball delete" in result:
return "Speedy Delete"
elif "keep" in result:
return "Keep"
elif "delete" in result:
return "Delete"
elif "transwiki" in result:
return "Transwiki"
elif ("userfy" in result) or ("userfied" in result) or ("incubat" in result):
return "Userfy"
elif "withdraw" in result:
return "Speedy Keep"
else:
return "UNDETERMINED"
def findDRV(thepage, pagename):
try:
drvs = ""
drvcounter = 0
for drv in drvregex.finditer(thepage):
drvdate = drvdateregex.search(drv.group(1))
if drvdate:
drvcounter += 1
name = drvpageregex.search(drv.group(1))
if name:
nametext = urllib.quote(name.group(1))
else:
nametext = urllib.quote(pagename.replace("Articles_for_deletion/", "", 1))
drvs += '<a href="http://en.wikipedia.org/wiki/Wikipedia:Deletion_review/Log/' + drvdate.group(1).strip().replace(" ", "_") + '#' + nametext + '">[' + str(drvcounter) + ']</a>'
return drvs
except:
#print sys.exc_info()[0]
#print "
"
#print traceback.print_exc(file=sys.stdout)
#print "findDRV error"
return ""
def parsetime(t):
tm = timeparseregex.search(t)
if tm == None:
return ""
else:
return tm.group(2) + " " + tm.group(1) + ", " + tm.group(3)
def updatestats(v, r):
if v == "Merge":
vv = "m"
elif v == "Redirect":
vv = "r"
elif v == "Speedy Keep":
vv = "sk"
elif v == "Speedy Delete":
vv = "sd"
elif v == "Keep":
vv = "k"
elif v == "Delete":
vv = "d"
elif v == "Transwiki":
vv = "t"
elif v == "Userfy":
vv = "u"
else:
return
stats[v] += 1
if r == "Merge":
rr = "m"
elif r == "Redirect":
rr = "r"
elif r == "Speedy Keep":
rr = "sk"
elif r == "Speedy Delete":
rr = "sd"
elif r == "Keep":
rr = "k"
elif r == "Delete":
rr = "d"
elif r == "Transwiki":
rr = "t"
elif r == "Userfy":
rr = "u"
elif r == "No Consensus":
rr = "nc"
else:
return
stats[vv+rr] += 1
def match(v, r, drv):
if r == "No Consensus":
matchstats[2] += 1
return '' + r + drv + ''
elif r == "Not closed yet":return '' + r + drv + ''
elif r == "UNDETERMINED":return '' + r + drv + ''
elif v == r:
matchstats[0] += 1
return '' + r + drv + ''
elif v == "Speedy Keep" and r == "Keep":
matchstats[0] += 1
return '' + r + drv + ''
elif r == "Speedy Keep" and v == "Keep":
matchstats[0] += 1
return '' + r + drv + ''
elif v == "Speedy Delete" and r == "Delete":
matchstats[0] += 1
return '' + r + drv + ''
elif r == "Speedy Delete" and v == "Delete":
matchstats[0] += 1
return '' + r + drv + ''
elif r == "Redirect" and v == "Delete":
matchstats[0] += 1
return '' + r + drv + ''
elif r == "Delete" and v == "Redirect":
matchstats[0] += 1
return '' + r + drv + ''
elif r == "Merge" and v == "Redirect":
matchstats[0] += 1
return '' + r + drv + ''
elif r == "Redirect" and v == "Merge":
matchstats[0] += 1
return '' + r + drv + ''
else:
matchstats[1] += 1
return '' + r + drv + ''
def matrixmatch(v, r):
if stats[v+r]:
if r=="nc":
return ''
elif v == r:return ''
elif v=="sk" and r=="k":return ''
elif v=="k" and r=="sk":return ''
elif v=="d" and r=="sd":return ''
elif v=="sd" and r=="d":return ''
elif v=="d" and r=="r":return ''
elif v=="r" and r=="d":return ''
elif v=="m" and r=="r":return ''
elif v=="r" and r=="m":return ''
else:return ''
else:
if r=="nc":
return ''
elif v == r:return ''
elif v=="sk" and r=="k":return ''
elif v=="k" and r=="sk":return ''
elif v=="d" and r=="sd":return ''
elif v=="sd" and r=="d":return ''
elif v=="d" and r=="r":return ''
elif v=="r" and r=="d":return ''
elif v=="m" and r=="r":return ''
elif v=="r" and r=="m":return ''
else:return ''
def APIget(p):
try:
u = urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Wikipedia:" + urllib.quote(p) + "&rvprop=content&format=xml")
xml = u.read()
u.close()
text = re.search(r'<rev.*?xml:space="preserve">(.*?)</rev>', xml, re.DOTALL).group(1)
return text
except:
#print sys.exc_info()[0]
#print "
"
#print traceback.print_exc(file=sys.stdout)
#print "
"
return None
def APIgetlotsofpages(rawpagelist):
try:
p =
for page in rawpagelist:
p += urllib.quote("Wikipedia:" + page[0].replace("_", " ") + "|")
#u = urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=xml&titles=" + p[:-3])
u = urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&prop=revisions%7Cinfo&rvprop=content&format=xml&titles=" + p[:-3])
xml = u.read()
u.close()
pagelist = re.findall(r'<page.*?>.*?</page>', xml, re.DOTALL)
pagedict = {}
for i in pagelist:
try:
pagename = re.search(r'<page.*?title=\"(.*?)\"', i).group(1)
text = re.search(r'<rev.*?xml:space="preserve">(.*?)</rev>', i, re.DOTALL).group(1)
if re.search('<page.*?redirect=\"\".*?>', i): #AfD page is a redirect
continue
pagedict[unescape(pagename)] = text
except:
#print sys.exc_info()[0]
#print "
"
#print traceback.print_exc(file=sys.stdout)
#print "
"
continue
return pagedict
except:
#print sys.exc_info()[0]
#print "
"
#print traceback.print_exc(file=sys.stdout)
#print "
"
return None
def APIfirsteditor(p):
try:
u = urllib.urlopen("http://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Wikipedia:" + urllib.quote(p) + "&rvlimit=1&rvprop=timestamp|user&rvdir=newer&format=xml")
xml = u.read()
u.close()
s = re.search("<rev user=\"(?P<user>.*?)\" timestamp=\"(?P<timestamp>.*?)\" />", xml)
user = s.group("user")
timestamp = timestampparseregex.search(s.group("timestamp"))
timestamptext = monthmap[timestamp.group(2)] + " " + timestamp.group(3).lstrip("0") + ", " + timestamp.group(1)
return (user, timestamptext)
except:
#print sys.exc_info()[0]
#print "
"
#print traceback.print_exc(file=sys.stdout)
return None
def link(p):
text = cgi.escape(p.replace("_", " ")[22:])
if len(text) > 64:
text = text[:61] + "..."
return '<a href="http://en.wikipedia.org/wiki/Wikipedia:' + urllib.quote(p) + '">' + text + '</a>'
def unescape(s):
p = htmllib.HTMLParser(None) p.save_bgn() p.feed(s) return p.save_end()
def datefmt(datestr):
try:
tg = timeunparseregex.search(datestr)
if tg == None:
return ""
month = [k for k,v in monthmap.items() if v==tg.group(1)][0]
day = tg.group(2)
year = tg.group(3)
if len(day) == 1:
day = "0" + day
return year + month + day
except:
return ""
def printtable():
totalvotes = 0
for i in votetypes:
totalvotes += stats[i]
if totalvotes > 0:
print "- \n"
for i in votetypes:
print "
- " + i + " votes: " + str(stats[i]) + " (" + str(round((100.0*stats[i]) / totalvotes, 1)) + "%) " print "
\n" print """
Voting matrix
This table compares the user's votes to the way the AfD eventually closed. The only AfD's included in this matrix are those that have already closed, where both the vote and result could be reliably determined. Results are across the top, and the user's votes down the side. Green cells indicate "matches", meaning that the user's vote matched (or closely resembled) the way the AfD eventually closed, whereas red cells indicate that the vote and the end result did not match.
for vv in statsvotes:print "\n"
for rr in statsresults:print matrixmatch(vv, rr) + str(stats[vv+rr]) + "" print "" print "</tbody>" print "
| Results | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| " + i.upper() + " | ||||||||||
| Votes | ||||||||||
| " + vv.upper() + " | ||||||||||
"
print """
Abbreviation key:
K = Keep
D = Delete
SK = Speedy Keep
SD = Speedy Delete
M = Merge
R = Redirect
T = Transwiki
U = Userfy
NC = No Consensus
Individual AfD's
\n"
if len(tablelist) > 0 and tablelist[-1][2]:
printstr += '<a href="http://toolserver.org/~snottywong/cgi-bin/afdstats.cgi?name=' + username.replace(" ", "_") + '&max=' + str(maxsearch) + '&startdate=' + datefmt(tablelist[-1][2]) + '&altname=' + altusername + '">Next ' + str(maxsearch) + " AfD's →</a>
"
printstr += """if i[4] == 1:printstr += "\n"
else:printstr += "\n"
printstr += match(i[1], i[3], i[5]) + "\n"printstr += "\n" printstr += "</tbody>\n
| Page | Date | Vote | Result |
|---|---|---|---|
| " + link(i[0]) + " | " + i[2] + " | " + i[1] + " (Nom) | " + i[1] + " |
\n" printstr += '
"
if sum(matchstats) > 0:
print "Number of AfD's where vote matched result (green cells): " + str(matchstats[0]) + " (" + str(round((100.0*matchstats[0]) / sum(matchstats), 1)) + "%)
"
print "Number of AfD's where vote didn't match result (red cells): " + str(matchstats[1]) + " (" + str(round((100.0*matchstats[1]) / sum(matchstats), 1)) + "%)
"
print "Number of AfD's where result was \"No Consensus\" (yellow cells): " + str(matchstats[2]) + " (" + str(round((100.0*matchstats[2]) / sum(matchstats), 1)) + "%)
\n"
print printstr
else:
print "
No votes found."
main()
print 'Bugs, suggestions, questions? Contact the author at <a href="http://en.wikipedia.org/wiki/User_talk:Snottywong">User talk:Snottywong</a>
'
print '<a href="http://toolserver.org/~snottywong/afdstats.html">←New search</a>'
</BODY>\n</HTML>"