I fail at attachment. Vincent Povirk On Wed, Aug 20, 2008 at 3:14 PM, Vincent Povirk <madewokherd+8cd9@xxxxxxxxx> wrote: > On Wed, Aug 20, 2008 at 8:54 AM, Anthony Kozar > <mailing-lists-1001@xxxxxxxxxxxxxxxx> wrote: >> Thanks Bryan -- I applaud your efforts in making the Blanxx card listings >> more useful. Unfortunately, these sorts of "manual" solutions to >> organization tend to not be maintained over time (or require a great deal >> more effort to maintain). As you probably know, this sort of thing screams >> "database"! > > Nope, it screams "Crazed robot!". > > Or, uh.. maybe not.. > > Anyway, before I read this, I had the idea of throwing in a few more > lists that would naturally be generated by a robot. Basically, certain > cards (particularly goals and new rules) tend to mention other cards. > Generally these cards will only make sense in a deck that contains the > cards they mention. So I want: > * For each card that is mentioned in another card's text, a list of > cards whose text it is mentioned in. > * A list of cards that do not have any links to other cards, in either > direction. I.e. they can be added to any Fluxx deck. > * For each standard deck (Fluxx 1.0, Fluxx 2.x, etc.), a list of cards > that can be added based on the cards in that deck. > > For right now, I have a python program that can download the Fluxx > Blanxx list from the wiki and generate a list by author. I have > attached this in case it is useful (and boldly replaced the page > contents with the bot-generated list). > > There's more individual-situation-specific logic in the script than > I'd like (and I did have to modify the pages a bit to actually get a > nice list), and it cannot make any automated changes (this is by > design). > > If someone beats me to providing the information I want with a > database, that's fine with me. > > Vincent Povirk >
#!/usr/bin/env python import urllib2 import xml.sax import xml.sax.handler card_types = ['Rule', 'Keeper', 'Goal', 'Action', 'Ungoal', 'Creeper'] class WikiContentHandler(xml.sax.handler.ContentHandler): def startDocument(self): self.contents = {} self.title = '' self.content = [] self.inpage = False self.inrev = False def endDocument(self): pass def startElement(self, name, attrs): if name == 'page': self.inpage = True self.title = attrs['title'] elif name == 'rev' and self.inpage: self.inrev = True self.content[:] = () def endElement(self, name): if name == 'page': self.inpage = False self.contents[self.title] = ''.join(self.content) elif name == 'rev' and self.inpage: self.inrev = False def characters(self, content): if self.inrev: self.content.append(content) def ignorableWhitespace(self, whitespace): if self.inrev: self.content.append(whitespace) def get_wiki_contents(host, pages): url = 'http://%s/w/api.php?action=query&prop=revisions&titles=%s&rvprop=content&format=xml' % (host, '|'.join(pages)) parser = xml.sax.make_parser() handler = WikiContentHandler() parser.setContentHandler(handler) parser.parse(urllib2.urlopen(url)) return handler.contents author_accounts = {} def get_author(line): # FIXME: hack title = None for x in ['Backwards', 'Reverse', 'Reverse Pivot', 'Russian Roulette']: if x in line: title = x if 'submitted by: ' in line: dummy, author = line.split('submitted by: ') if '[[User:' in author: # [[User:username|Author]] Extra Information author, dummy = author.split(']]', 1) dummy, author = author.split('[[User:', 1) if '|' in author: account, author = author.split('|', 1) else: account = author author_accounts[author] = account if ' (' in author: # Author (Extra Information) author, dummy = author.split(' (', 1) if author.endswith('"') and ' "' in author: # Ross Andrews "not one of my favorites" author, dummy = author.split(' "', 1) if ' via ' in author: # Mark "Daigohji" Mascaro via Neil Raynar author, dummy = author.split(' via ', 1) if ' who ' in author: # Hal Haag who borrowed it from June Swords author, dummy = author.split(' who ', 1) if author.endswith(' ???'): # Author ??? author = author[:-4] if author == 'unknown': author = 'Unknown' return author, title elif 'Author: ' in line: dummy, author = line.split('Author: ') author = author.rstrip('-> \t') return author, title return None, None def get_blanxx_cards(): result = {} print 'Getting wiki contents..' pages = get_wiki_contents('rabbits.continuation.org', ('Fluxx_Blanxx_%ss' % card_type for card_type in card_types)) dups = set() print 'Parsing wiki contents..' for page in pages: card_type = page[13:-1] title = None text = None anchor = None author = None for line in pages[page].splitlines(): line = line.strip() if line.startswith('*') and "'''" in line: #add the previous entry if one exists if title: if author is None: author = 'Unknown' print "WARNING: Author of %s (%s) unknown" % (title, card_type) result[title] = [card_type, anchor, text, author] title = text = anchor = author = None dummy, title, rest = line.split("'''", 2) dummy, anchor, rest = rest.split('"', 2) if ' - ' in rest: dummy, text = rest.split(' - ', 1) elif '</span>' in rest: dummy, text = rest.split('</span>', 1) else: text = '' if title in result: print 'WARNING: There are multiple cards named %s' % title # FIXME: this is a shameful hack; I would argue, though, # that two different cards should never have the same name, # Toast notwithstanding while title in result: title = '%s ' % title else: new_author, new_title = get_author(line) if new_title is not None: # FIXME: hack for Backwards, Reverse, or Reverse Pivot if new_author is not None: if author is not None: result[title] = [card_type, anchor, text, author] title = new_title author = new_author anchor = title.replace(' ', '_') elif new_author is not None: if author is not None: author = '%s and %s' % (author, new_author) else: author = new_author if title: if author is None: author = 'Unknown' print "WARNING: Author of %s (%s) unknown" % (title, card_type) result[title] = [card_type, anchor, text, author] return result blanxx_cards = get_blanxx_cards() def card_link(title): card_type, anchor, text, author = blanxx_cards[title] title = title.rstrip(' ') if title.endswith(', The'): title = 'The %s' % title[:-4] title = title.replace(',', '') return '[[Fluxx_Blanxx_%ss#%s|%s]]' % (card_type, anchor, title) #generate a list of cards by author print "Generating byauthor.txt" def do_byauthor(): authors = {} for title in blanxx_cards: card_type, anchor, text, author = blanxx_cards[title] while ' and ' in author: # special case: First Author and Second Author author, second_author = author.rsplit(' and ', 1) if second_author not in authors: authors[second_author] = [] authors[second_author].append(title) if author not in authors: authors[author] = [] authors[author].append(title) author_names = list(authors) #sort authors by last name suffixes = set(['iii', 'ii', 'sr.']) def lastname(string): string = string.lower() while ' ' in string: string, lastname = string.rsplit(' ', 1) if lastname in suffixes: continue return lastname return string author_names.sort(key=lastname) #create file f = open('byauthor.txt', 'w') try: for author in author_names: # group cards by type bytype = {} for cardtitle in authors[author]: type = blanxx_cards[cardtitle][0] if type not in bytype: bytype[type] = [] bytype[type].append(cardtitle) if author in author_accounts: f.write('== [[User:%s|%s]] ==\n' % (author_accounts[author], author)) else: f.write('== %s ==\n' % author) for type in card_types: if type in bytype: if type == 'Rule': printed_type = 'New Rule' else: printed_type = type def lower(string): return string.lower() f.write(("* '''%s''': %s\n" % (printed_type, ', '.join(card_link(x) for x in sorted(bytype[type], key=lower)))).encode('utf8')) f.write('\n\n') finally: f.close() do_byauthor()