import re
import wikipedia
import traceback
import time

html = open("../links.html", "w")
html.write("""<html>
<head>
<title>M:Robe 500i Development Wiki - Link Statistics</title>
<link rel="stylesheet" href="style.css?"/>
</head>
<body>
<h1>Link Statistics</h1>
""")
html.write("<p>Updated every 6 hours. Last updated %s.</p>" % time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()))
html.write("""<table width="100%" border="1">
<tr><th>Page</th><th>Links</th><th>Link Text</th><th>Total Text</th><th>Links per Total Text (ppm)</th></tr>
""")

try:
	site = wikipedia.getSite()
	allpages = list(site.allpages(includeredirects=False))
	if len(allpages) < 10:
		raise "Less than 10 allpages"
	wikipedia.getall(site, allpages)

	R = re.compile(r"(?<!\[)\[[^[\]]+\]")
	
	rows = []
	for page in allpages:
		text = page.get()
		links = R.findall(text)
		links_len = sum([len(link) for link in links])
		row = (page.urlname(), page.title(), page.urlname(), len(links),
		       links_len, len(text),
		       int(1e6 * len(links) / len(text)))
		rows.append(row)
	
	rows.sort(key=lambda row: -row[-1])
	for row in rows:
		html.write("<tr>"
		           "<td><a href=\"http://mrobefan.elwiki.com/%s\">%s</a> "
		           "[<a href=\"http://mrobefan.elwiki.com/%s?action=history\">history</a>]</td>"
		           "<td>%d</td>"
		           "<td>%d</td>"
		           "<td>%d</td>"
		           "<td>%d</td>"
		           "</tr>\n" % row)
except:
	html.write('<h2 class="error">Exception: <pre>')
	traceback.print_exc(file=html)
	html.write('</pre>Please notify Cat ASAP.</h2>\n')

html.write("""</table>
</body>
</html>
""")
html.close()

