#!/usr/bin/env python """ mtvapi1.py by Javantea Oct 27, 2008 Based on pwmap2.py Maps artists/videos using MTV API Each artist has a list of related artists and a list of videos. This is a perfect system to map. This model is wrong: white_stripes -- white_stripes-videos \-- white_stripes-related -- ranconteurs """ import urllib2 from os import path from time import sleep from xml.etree import ElementTree # We use urlopen and file depending on protocol. # http://api.mtvnservices.com/ def parseMTVUrl(state1, url='/1/artist/browse/w', level=1): p_new = state1.i pageqs = '' #if pageno != 1: pageqs = '&page=' + str(pageno) filename = 'data/' + url.replace('/', '_')[1:] + '.xml' #print '<!-- Grabbing', 'http://api.mtvnservices.com' + url, '-->' if not path.exists(filename): # Need an absolute url. try: data = urllib2.urlopen('http://api.mtvnservices.com' + url + pageqs).read() file(filename, 'wb').write(data) sleep(1.0) except urllib2.URLError, e: print 'Warning: Could not grab artist data', url, ':', e #skip over adding this friend's friends. return #end try #end if # Use XML # Thanks to Yahoo Developer for the idea: # http://developer.yahoo.com/python/python-xml.html tree1 = ElementTree.parse(filename) root = tree1.getroot() #print root[0].tag, root[0].text NS = "http://www.w3.org/2005/Atom" MRSSNS = "http://search.yahoo.com/mrss/" this_artist = [i.text for i in root.findall('{%s}title' % (NS, ))] urls = [((i.get('rel') == 'self') and i.get('href')) for i in root.findall('{%s}entry/{%s}link' % (NS, NS))] artists = [i.text for i in root.findall('{%s}entry/{%s}title' % (NS, NS))] self_urls = [] for lurl in urls: if lurl: self_urls.append(lurl) #end if #next url name_pts = [] #print 'u', url #print 't', this_artist artist_name = '' if ((len(this_artist) == 1) and this_artist[0] != None): if this_artist[0][-16:] == ' Related Artists': artist_name = this_artist[0][:-16] else: artist_name = this_artist[0] #end if else: print "Missing artist_name:", this_artist #end if if artist_name not in state1.names.keys(): state1.p.append({'name':artist_name, 'color':'#99ccff', 'url':'http://api.mtvnservices.com' + url, 'conns':[]}) state1.names[artist_name] = state1.i name_pts.append(state1.i) artist_i = state1.i state1.i += 1 else: artist_i = state1.names[artist_name] #end if # They must be crazy putting null at the first of all. if artists[0] == None: artists.pop(0) self_urls.pop(0) #end if j = 0 for artist in artists: # What are they thinking putting a <title /> in here? if artist == None: j += 1 continue #end if if artist not in state1.names.keys(): state1.p.append({'name':artist, 'color':'#99ccff', 'url':self_urls[j], 'conns':[]}) state1.names[artist] = state1.i name_pts.append(state1.i) state1.p[artist_i]['conns'].append(state1.p[state1.i]) state1.i += 1 else: name_pts.append(state1.names[artist]) #end if # There are two links per mashup, we want the first j += 1 #next artist if level > 0: # Recursive Call... Each level gets smaller by one. for url in self_urls[:4]: parseMTVUrl(state1, url.replace('http://api.mtvnservices.com', '') + 'related', level - 1) #next url #end if #end def parseMTVUrl(state1, [url], [levels]) class up1: pass def main(): url = '/1/artist/white_stripes/related' from sys import argv if len(argv) > 1: url = argv[1] #end if state1 = up1() state1.p = [] state1.i = 0 state1.names = {} parseMTVUrl(state1, url, 1) for point in state1.p: # If they don't have any connections, they're a outlier. if len(point['conns']) == 0: continue print 'p', point['name'] for conn in point['conns']: print ' c', conn['name'] #next conn #next point #end def main() if __name__ == '__main__': main() #end if