Python code for exploring `foaf` files
Backlink: KatieRivard
Hack hack hack, hackhackhack, hackhack. This is mostly raw first- or second-pass code, with no planning time involved. Read accordingly.
The program takes input in the form of a text file piped from foafing, a wget-esque program I wrote. The first line of the file is the URL, followed by the contents of the file at that location. Dumping the first two lines gets rid of the URL label and the HTML version tag heading most foaf files. Probably shouldn't dump that last one, but it hasn't broken anything seriously yet, and this is just poking.
sinkUtils manages the list structure: triples are held in a list of tuples, from which you can ask for everything that has the subject "ham" or the like.
ksort holds my mergeSort program, which may or may not be used in this version.
foafing holds a wget clone used to download a new file; also happens to be the program where you get the text input to this script.
#system libs import string import sys import os import copy #local files import rdfxml import ksort import sinkUtils as sinku import foafing usestr="Usage: \n\t$ ./foafproc2.py file" if len(sys.argv) < 2: print usestr sys.exit() fname = sys.argv[1] print "Reading from", fname f = open(fname, 'r') # dump the two garbage lines @beginning of file(this should # be fixed at some point to be more reactive) f.readline(), f.readline() s = f.read() f.close() # constants located in sinkUtils for tuple referencing SUBJECT = sinku.SUBJECT PREDICATE = sinku.PREDICATE OBJECT = sinku.OBJECT # "subtracts" two lists; ie, find all items in lsta not in lstb. ## Like subtraction, ORDER MATTERS. def listSub(lsta, lstb): ret = [] for i in lsta: if i not in lstb: ret.append(i) return ret # slices just one attribute for each list item def vertSlice(sink, spo): ret = [] for i in sink.result: ret.append(i[spo]) return ret ###################### # Process file ###################### # actual parsing ans = rdfxml.parseRDF(s, base=None, sink=sinku.Sink()) #random facts: #of nodes, #entries, #names entryKeys = ans.withObject("_:id") print ans.countNodes(), "nodes in file,", len(entryKeys.result), "unnamed" print len(ans.result), "entries in file" names = ans.withPredicate("name") print len(names.result), "\"name\" entries" # get people "known" to this person baseNode = names.result[0][SUBJECT] # baseNode is first Name in file(cross fingers, no mistakes yet) baseNodeEntries = ans.withSubject(baseNode) baseKnows = baseNodeEntries.withPredicate("knows") frendz = copy.copy(baseKnows) print len(baseKnows.result), "\"knows\" entries in base node" # rationalize with "names" set and eliminate non-named known persons(?) ## this just so happens to eliminate the "seeAlso" entries which were ## not actually people's foaf files, which is convenient but probably ## not trustworthy. print "Disagreements:" for i in listSub(vertSlice(names, SUBJECT), vertSlice(baseKnows, OBJECT)): print "\t", i, "\t\tin \"names\"" for i in listSub(vertSlice(baseKnows, OBJECT), vertSlice(names, SUBJECT)): print "\t", i, "\t\tin base node \"knows\"" for j in frendz.withObject(i).result: frendz.result.remove(j) print print "Friends:" frendz.write() # get foaf entry for top name in friends list foafs = None for i in frendz.result: name = i[OBJECT] foafs = ans.withExactSubject(name).withPredicate("seeAlso") foafs.write() break # get an unused file of format "out#.txt" n = 1 while os.access("out%d.txt" % n, os.F_OK): n += 1 fofile = file("out%d.txt" % n, 'w') # get uri from foaf entry and dl foaf file into "out" file foafuri = foafs.result[0][OBJECT][1:-1] print foafuri foafstr = foafing.getFoaf(foafuri) fofile.write(foafstr) fofile.close()