I fail at attachment.
Vincent Povirk
On Wed, Aug 20, 2008 at 3:14 PM, Vincent Povirk
<madewokherd+8cd9@xxxxxxxxx> wrote:
> On Wed, Aug 20, 2008 at 8:54 AM, Anthony Kozar
> <mailing-lists-1001@xxxxxxxxxxxxxxxx> wrote:
>> Thanks Bryan -- I applaud your efforts in making the Blanxx card listings
>> more useful. Unfortunately, these sorts of "manual" solutions to
>> organization tend to not be maintained over time (or require a great deal
>> more effort to maintain). As you probably know, this sort of thing screams
>> "database"!
>
> Nope, it screams "Crazed robot!".
>
> Or, uh.. maybe not..
>
> Anyway, before I read this, I had the idea of throwing in a few more
> lists that would naturally be generated by a robot. Basically, certain
> cards (particularly goals and new rules) tend to mention other cards.
> Generally these cards will only make sense in a deck that contains the
> cards they mention. So I want:
> * For each card that is mentioned in another card's text, a list of
> cards whose text it is mentioned in.
> * A list of cards that do not have any links to other cards, in either
> direction. I.e. they can be added to any Fluxx deck.
> * For each standard deck (Fluxx 1.0, Fluxx 2.x, etc.), a list of cards
> that can be added based on the cards in that deck.
>
> For right now, I have a python program that can download the Fluxx
> Blanxx list from the wiki and generate a list by author. I have
> attached this in case it is useful (and boldly replaced the page
> contents with the bot-generated list).
>
> There's more individual-situation-specific logic in the script than
> I'd like (and I did have to modify the pages a bit to actually get a
> nice list), and it cannot make any automated changes (this is by
> design).
>
> If someone beats me to providing the information I want with a
> database, that's fine with me.
>
> Vincent Povirk
>
#!/usr/bin/env python
import urllib2
import xml.sax
import xml.sax.handler
card_types = ['Rule', 'Keeper', 'Goal', 'Action', 'Ungoal', 'Creeper']
class WikiContentHandler(xml.sax.handler.ContentHandler):
def startDocument(self):
self.contents = {}
self.title = ''
self.content = []
self.inpage = False
self.inrev = False
def endDocument(self):
pass
def startElement(self, name, attrs):
if name == 'page':
self.inpage = True
self.title = attrs['title']
elif name == 'rev' and self.inpage:
self.inrev = True
self.content[:] = ()
def endElement(self, name):
if name == 'page':
self.inpage = False
self.contents[self.title] = ''.join(self.content)
elif name == 'rev' and self.inpage:
self.inrev = False
def characters(self, content):
if self.inrev:
self.content.append(content)
def ignorableWhitespace(self, whitespace):
if self.inrev:
self.content.append(whitespace)
def get_wiki_contents(host, pages):
url = 'http://%s/w/api.php?action=query&prop=revisions&titles=%s&rvprop=content&format=xml' % (host, '|'.join(pages))
parser = xml.sax.make_parser()
handler = WikiContentHandler()
parser.setContentHandler(handler)
parser.parse(urllib2.urlopen(url))
return handler.contents
author_accounts = {}
def get_author(line):
# FIXME: hack
title = None
for x in ['Backwards', 'Reverse', 'Reverse Pivot', 'Russian Roulette']:
if x in line:
title = x
if 'submitted by: ' in line:
dummy, author = line.split('submitted by: ')
if '[[User:' in author: # [[User:username|Author]] Extra Information
author, dummy = author.split(']]', 1)
dummy, author = author.split('[[User:', 1)
if '|' in author:
account, author = author.split('|', 1)
else:
account = author
author_accounts[author] = account
if ' (' in author: # Author (Extra Information)
author, dummy = author.split(' (', 1)
if author.endswith('"') and ' "' in author: # Ross Andrews "not one of my favorites"
author, dummy = author.split(' "', 1)
if ' via ' in author: # Mark "Daigohji" Mascaro via Neil Raynar
author, dummy = author.split(' via ', 1)
if ' who ' in author: # Hal Haag who borrowed it from June Swords
author, dummy = author.split(' who ', 1)
if author.endswith(' ???'): # Author ???
author = author[:-4]
if author == 'unknown': author = 'Unknown'
return author, title
elif 'Author: ' in line:
dummy, author = line.split('Author: ')
author = author.rstrip('-> \t')
return author, title
return None, None
def get_blanxx_cards():
result = {}
print 'Getting wiki contents..'
pages = get_wiki_contents('rabbits.continuation.org', ('Fluxx_Blanxx_%ss' % card_type for card_type in card_types))
dups = set()
print 'Parsing wiki contents..'
for page in pages:
card_type = page[13:-1]
title = None
text = None
anchor = None
author = None
for line in pages[page].splitlines():
line = line.strip()
if line.startswith('*') and "'''" in line:
#add the previous entry if one exists
if title:
if author is None:
author = 'Unknown'
print "WARNING: Author of %s (%s) unknown" % (title, card_type)
result[title] = [card_type, anchor, text, author]
title = text = anchor = author = None
dummy, title, rest = line.split("'''", 2)
dummy, anchor, rest = rest.split('"', 2)
if ' - ' in rest:
dummy, text = rest.split(' - ', 1)
elif '</span>' in rest:
dummy, text = rest.split('</span>', 1)
else:
text = ''
if title in result:
print 'WARNING: There are multiple cards named %s' % title
# FIXME: this is a shameful hack; I would argue, though,
# that two different cards should never have the same name,
# Toast notwithstanding
while title in result:
title = '%s ' % title
else:
new_author, new_title = get_author(line)
if new_title is not None:
# FIXME: hack for Backwards, Reverse, or Reverse Pivot
if new_author is not None:
if author is not None:
result[title] = [card_type, anchor, text, author]
title = new_title
author = new_author
anchor = title.replace(' ', '_')
elif new_author is not None:
if author is not None:
author = '%s and %s' % (author, new_author)
else:
author = new_author
if title:
if author is None:
author = 'Unknown'
print "WARNING: Author of %s (%s) unknown" % (title, card_type)
result[title] = [card_type, anchor, text, author]
return result
blanxx_cards = get_blanxx_cards()
def card_link(title):
card_type, anchor, text, author = blanxx_cards[title]
title = title.rstrip(' ')
if title.endswith(', The'):
title = 'The %s' % title[:-4]
title = title.replace(',', '')
return '[[Fluxx_Blanxx_%ss#%s|%s]]' % (card_type, anchor, title)
#generate a list of cards by author
print "Generating byauthor.txt"
def do_byauthor():
authors = {}
for title in blanxx_cards:
card_type, anchor, text, author = blanxx_cards[title]
while ' and ' in author:
# special case: First Author and Second Author
author, second_author = author.rsplit(' and ', 1)
if second_author not in authors:
authors[second_author] = []
authors[second_author].append(title)
if author not in authors:
authors[author] = []
authors[author].append(title)
author_names = list(authors)
#sort authors by last name
suffixes = set(['iii', 'ii', 'sr.'])
def lastname(string):
string = string.lower()
while ' ' in string:
string, lastname = string.rsplit(' ', 1)
if lastname in suffixes:
continue
return lastname
return string
author_names.sort(key=lastname)
#create file
f = open('byauthor.txt', 'w')
try:
for author in author_names:
# group cards by type
bytype = {}
for cardtitle in authors[author]:
type = blanxx_cards[cardtitle][0]
if type not in bytype:
bytype[type] = []
bytype[type].append(cardtitle)
if author in author_accounts:
f.write('== [[User:%s|%s]] ==\n' % (author_accounts[author], author))
else:
f.write('== %s ==\n' % author)
for type in card_types:
if type in bytype:
if type == 'Rule':
printed_type = 'New Rule'
else:
printed_type = type
def lower(string):
return string.lower()
f.write(("* '''%s''': %s\n" % (printed_type, ', '.join(card_link(x) for x in sorted(bytype[type], key=lower)))).encode('utf8'))
f.write('\n\n')
finally:
f.close()
do_byauthor()