#!/usr/bin/python """Extract information about RSS subscribership from hosted RSS readers by parsing their User-Agent data.""" __author__ = "Nelson Minar " __url__ = "http://www.nelson.monkey.org/~nelson/weblog/" # I place this code in the public domain. Use it for what you want. # No warranty or guarantee, of course. import re, unittest, sys, gzip # Regexps for each aggregator # Regexps capture data in named groups: readers, and views regexps = ( ('Yahoo', r''' YahooFeedSeeker.*; \s*users\s*(?P\d+); \s*views\s*(?P\d+) '''), ('LiveJournal', r''' LiveJournal\.com.*; \s*(?P\d+)\s*readers '''), ('NewsIsFree', r''' NIF.* users:(?P\d+) '''), ('BlogLines', r''' Bloglines.* ;\s*(?P\d+)\s*subscribers '''), ('NewsGator', r''' NewsGatorOnline.* ;\s*(?P\d+)\s*subscribers '''), ) # Basic regexp prepended to list above, used for all user agents. baseRE = r''' \[ (?P\d+/\S+/\d\d\d\d) : (?P