#!/usr/bin/python import gzip, re, time, sys, gdchart import psyco; psyco.full() def swizzleDate(d): t = time.strptime(d, "%d/%b/%Y") return time.strftime("%Y-%m-%d", t) assert('2004-04-25' == swizzleDate('25/Apr/2004')) parseRE=re.compile(r'\[(\d+/\w+/\d+).*GET /~nelson/weblog/index\.rss.*Bloglines.*?(\d+) subscribers') data = {} def parse(fp): for l in fp: m = parseRE.search(l) if m: date, count = m.groups() date = swizzleDate(date) data[date] = count parse(file('/var/log/apache/access.log')) parse(file('/var/log/apache/access.log.1')) for n in range(2,39): parse(gzip.open('/var/log/apache/access.log.%d.gz' % n)) dates = data.keys() dates.sort() for d in dates: print d, data[d] gdchart.option( bg_color=0xffffff, grid=gdchart.GDC_TICK_NONE, format=gdchart.GDC_PNG, title='Bloglines readers over time', set_color=[0x000000] ) graphDates=[(d[:4] + d[5:7]) for d in dates] graphData =[int(data[d]) for d in dates] gdchart.chart(gdchart.GDC_LINE, (350, 225), 'bloglines.png', graphDates, graphData)