cs215 ยป

Hopefully the code is commented well enough to be self-explanatory.

```
#############
# first, construct the weighted graph
#
# a version of make_link that increments that
# value of the link if we make multiple nodes
#
# For the marvel graph, this will be used to count
# how many times character1 is in the same comic
# as character2
def make_link(G, node1, node2):
if node1 not in G:
G[node1] = {}
if node2 not in G[node1]:
(G[node1])[node2] = 0
(G[node1])[node2] += 1
if node2 not in G:
G[node2] = {}
if node1 not in G[node2]:
(G[node2])[node1] = 0
(G[node2])[node1] += 1
return G
# Reads the graph in CSV format. Each line is in edge
# Keeps a list of characters
tsv = csv.reader(open("uniq_edges.tsv"), delimiter='\t')
# loop through the raw data creating a bipartite graph
marvelG = {}
characters = set()
for (char, comic) in tsv:
if char not in characters:
characters.add(char)
make_link(marvelG, char, comic)
# now, loop through the bipartite graph
# creating a graph of connected characters
# making a link (which increments the weight)
# everytime we see two characters in the same
# comic together
charG = {}
for char1 in characters:
for book in marvelG[char1]:
for char2 in marvelG[book]:
# don't want to double count
# so make this check
if char1 < char2:
make_link(charG, char1, char2)
# loop through charG and change the weights
# in charG to be inverse
for char1 in charG:
char1 = charG[char1]
for char2 in char1:
char1[char2] = 1.0 / char1[char2]
################
# now find shortest paths
#
# here is an implementation of breadth first search
# which will be used to find the shortest path by hops
from collections import deque
def bfs(G, node):
final_dist = {node:(0, node, None)}
# breadth first is a queue (fifo)
# and so using deque is more efficient
# then a list
open_list = deque([node])
while len(open_list) > 0:
node = open_list.popleft()
dist, _, _ = final_dist[node]
for neighbor in G[node]:
if neighbor in final_dist:
continue
final_dist[neighbor] = (dist + 1, neighbor, node)
open_list.append(neighbor)
return final_dist
# this is a standard implementation of dijkstras
# with one exception - the distance at each node
# is the sum of the edge weights along with the number
# of hops taken to get there. This means
# that if two paths have the same distance, the one
# with the least number of hops will be selected
from heap import *
# when following an edge to a new node, need to
# add the distance and increment the hop-count
# this function accomplishes that
def add_dist(dista, tupleb):
# adds dista to the distance of tupleb
# and increments the hop-count of tupleb
return (dista + tupleb[0], 1 + tupleb[1])
def dijkstra(graph, node):
# (0, 0) is the initial weight - the value is zero and the hops are zero
first_entry = ((0, 0), node, None) #distance, node, parent/previous
heap = [first_entry]
# location is a dictionary that tracks the location of each entry
location = {first_entry:0}
# dist_so_far is the frontier. A mapping of nodes we've explored
# and the shortest paths we've seen so far to get there
dist_so_far = {node:first_entry}
# this is nodes we've fully explored
final_dist = {}
while len(dist_so_far) > 0:
# find the closest un-explored node
w = heappopmin(heap, location)
# a stupid little optimization
node = w[1]
dist = w[0]
# lock it down!
del dist_so_far[node]
# final_dist[node] = (dist, node, w.parent)
final_dist[node] = w
# look at its neighbors
for x in graph[node]:
# but only those that haven't been locked down
if x not in final_dist:
# for the marvel graph, dist is tuple
# and graph[node][x] is just a number
new_dist = add_dist(graph[node][x], dist)
new_entry = (new_dist, x, node)
if x not in dist_so_far:
# we haven't see this yet
# so add to the heap and the dictionary
dist_so_far[x] = new_entry
insert_heap(heap, new_entry, location)
# this comparision uses the fact that
# tuples are comparable
elif new_entry < dist_so_far[x]:
# the new distance is less then the
# best known
# and then add a new entry
# for this node
decrease_val(heap, location, dist_so_far[x], new_entry)
dist_so_far[x] = new_entry
return final_dist
# given a `dist` object (a mapping of a node to the shortest distance
# to that node and its parent) and a `target` node, return the path
# needed to get to the target
def get_parent(pair): return pair[2]
def find_path(dist, target):
node = target
path = [target]
while True:
prev = get_parent(dist[node])
if prev is None:
# We've rached our target, so return
# the path
return path
path.append(prev)
node = prev
# a list to store my answers in
answers = [] #store a tuple ((char1, char2), (char_path, hop_dist))
# the characters that the problem asks us to look at
chars = ['SPIDER-MAN/PETER PAR',
'GREEN GOBLIN/NORMAN ',
'WOLVERINE/LOGAN ',
'PROFESSOR X/CHARLES ',
'CAPTAIN AMERICA']
for char1 in chars:
# calculate the distance to each other character
char_dist = dijkstra(charG, char1)
# and calculate the hops required
hop_dist = bfs(charG, char1)
for char2 in char_dist:
if char1 == char2:
continue
char_path = find_path(char_dist, char2)
hop_path = find_path(hop_dist, char2)
# if the weighted path is longer then the hop path, we need
# to save it
if len(char_path) > len(hop_path):
answers.append(((char1, char2), (char_path, hop_path)))
# and now we print out the answer
print len(answers)
```