Python code for exploring `foaf` files

Backlink: KatieRivard

Hack hack hack, hackhackhack, hackhack. This is mostly raw first- or second-pass code, with no planning time involved. Read accordingly.

The program takes input in the form of a text file piped from foafing, a wget-esque program I wrote. The first line of the file is the URL, followed by the contents of the file at that location. Dumping the first two lines gets rid of the URL label and the HTML version tag heading most foaf files. Probably shouldn't dump that last one, but it hasn't broken anything seriously yet, and this is just poking.

sinkUtils manages the list structure: triples are held in a list of tuples, from which you can ask for everything that has the subject "ham" or the like.

ksort holds my mergeSort program, which may or may not be used in this version.

foafing holds a wget clone used to download a new file; also happens to be the program where you get the text input to this script.

#system libs
import string
import sys
import os
import copy

#local files
import rdfxml
import ksort
import sinkUtils as sinku
import foafing

usestr="Usage: \n\t$ ./foafproc2.py file"

if len(sys.argv) < 2:
    print usestr
    sys.exit()
    
fname = sys.argv[1]
print "Reading from", fname

f = open(fname, 'r')
# dump the two garbage lines @beginning of file(this should
#  be fixed at some point to be more reactive)
f.readline(), f.readline()
s = f.read()
f.close()

# constants located in sinkUtils for tuple referencing
SUBJECT = sinku.SUBJECT
PREDICATE = sinku.PREDICATE
OBJECT = sinku.OBJECT

# "subtracts" two lists; ie, find all items in lsta not in lstb.
## Like subtraction, ORDER MATTERS.
def listSub(lsta, lstb):
    ret = []
    for i in lsta:
        if i not in lstb:
            ret.append(i)
    return ret

# slices just one attribute for each list item
def vertSlice(sink, spo):
    ret = []
    for i in sink.result:
        ret.append(i[spo])
    return ret

######################
# Process file
######################

# actual parsing
ans = rdfxml.parseRDF(s, base=None, sink=sinku.Sink())

#random facts: #of nodes, #entries, #names
entryKeys = ans.withObject("_:id")
print ans.countNodes(), "nodes in file,", len(entryKeys.result), "unnamed"
print len(ans.result), "entries in file"
names = ans.withPredicate("name")
print len(names.result), "\"name\" entries"

# get people "known" to this person
baseNode = names.result[0][SUBJECT] # baseNode is first Name in file(cross fingers, no mistakes yet)
baseNodeEntries = ans.withSubject(baseNode)
baseKnows = baseNodeEntries.withPredicate("knows")
frendz = copy.copy(baseKnows)

print len(baseKnows.result), "\"knows\" entries in base node"

# rationalize with "names" set and eliminate non-named known persons(?)
## this just so happens to eliminate the "seeAlso" entries which were
## not actually people's foaf files, which is convenient but probably
## not trustworthy.
print "Disagreements:"
for i in listSub(vertSlice(names, SUBJECT), vertSlice(baseKnows, OBJECT)):
    print "\t", i, "\t\tin \"names\""
for i in listSub(vertSlice(baseKnows, OBJECT), vertSlice(names, SUBJECT)):
    print "\t", i, "\t\tin base node \"knows\""
    for j in frendz.withObject(i).result:
        frendz.result.remove(j)

print
print "Friends:"
frendz.write()

# get foaf entry for top name in friends list
foafs = None
for i in frendz.result:
    name = i[OBJECT]
    foafs = ans.withExactSubject(name).withPredicate("seeAlso")
    foafs.write()
    break

# get an unused file of format "out#.txt"
n = 1
while os.access("out%d.txt" % n, os.F_OK):
    n += 1
fofile = file("out%d.txt" % n, 'w')

# get uri from foaf entry and dl foaf file into "out" file
foafuri = foafs.result[0][OBJECT][1:-1]
print foafuri
foafstr = foafing.getFoaf(foafuri)
fofile.write(foafstr)

fofile.close()