Notebook

In [1]:

import sys
sys.path.append('.')
sys.path.append('..')
from problem_loader import ProblemLoader
from helpers import obfuscate
from collections import namedtuple

data_urls = {
    'problem1': 'https://d18ky98rnyall9.cloudfront.net/_dcf1d02570e57d23ab526b1e33ba6f12_dijkstraData.txt?Expires=1623542400&Signature=Uw4b5BkLSsE2gX7900tYHPA-9oLQ1r1MiC76kkAVKkYiqjKsL26WwOzThW9xmES6dAilHGVEQ1oE3l2WDW8c9Qy8opwN799Dut-54931i1lZPkcQq96dDFWRLsgwAiWjN3u9X7MAQxJpDiI47YUyAhHjY5fyc6l5nhZZyeJ2UGA_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A'
}

Problem 1¶

The file contains an adjacency list representation of an undirected weighted graph with 200 vertices labeled 1 to 200. Each row consists of the node tuples that are adjacent to that particular vertex along with the length of that edge. For example, the 6th row has 6 as the first entry indicating that this row corresponds to the vertex labeled $6$. The next entry of this row "141,8200" indicates that there is an edge between $vertex_6$ and $vertex_{141}$ that has length $8200$. The rest of the pairs of this row indicate the other vertices adjacent to $vertex_6$ and the lengths of the corresponding edges.

Your task is to run Dijkstra's shortest-path algorithm on this graph, using $1$ (the first vertex) as the source vertex, and to compute the shortest-path distances between 1 and every other vertex of the graph. If there is no path between a $vertex_v$ and $vertex_1$, we'll define the shortest-path distance between $1$ and $v$ to be $1000000$.

You should report the shortest-path distances to the following ten vertices, in order: $$7,37,59,82,99,115,133,165,188,197$$
You should encode the distances as a comma-separated string of integers. So if you find that all ten of these vertices except 115 are at distance 1000 away from vertex 1 and 115 is 2000 distance away, then your answer should be $$1000,1000,1000,1000,1000,2000,1000,1000,1000,1000$$ Remember the order of reporting DOES MATTER, and the string should be in the same order in which the above ten vertices are given. The string should not contain any spaces. Please type your answer in the space provided.

IMPLEMENTATION NOTES:¶

This graph is small enough that the straightforward $O(mn)$ time implementation of Dijkstra's algorithm should work fine.

OPTIONAL:¶

For those of you seeking an additional challenge, try implementing the heap-based version. Note this requires a heap that supports deletions, and you'll probably need to maintain some kind of mapping between vertices and their positions in the heap.

In [2]:

from collections import defaultdict

Edge = namedtuple('Edge', ['to', 'weight'])

def process_weighted_adjacencies(data):
    res = defaultdict(list)
    for node in data.split(b'\n'):
        if len(node):
            x, edges = tuple(node.decode('utf-8').strip().split('\t', 1))
            for y in edges.split('\t'):
                dest, w = y.split(',')
                res[int(x)].append(Edge(to=int(dest), weight=int(w)))
    return dict(res)

In [3]:

values = ProblemLoader(
    data_urls['problem1'], 
    fname="edges.p", 
    preprocessor=process_weighted_adjacencies
).fetch()
print(values[2])

[Edge(to=42, weight=1689), Edge(to=127, weight=9365), Edge(to=5, weight=8026), Edge(to=170, weight=9342), Edge(to=131, weight=7005), Edge(to=172, weight=1438), Edge(to=34, weight=315), Edge(to=30, weight=2455), Edge(to=26, weight=2328), Edge(to=6, weight=8847), Edge(to=11, weight=1873), Edge(to=17, weight=5409), Edge(to=157, weight=8643), Edge(to=159, weight=1397), Edge(to=142, weight=7731), Edge(to=182, weight=7908), Edge(to=93, weight=8177)]

In [4]:

from dataclasses import dataclass, field
from typing import Any
from heapq import heappush, heappop, heapify

@dataclass(order=True)
class PrioritizedItem:
    priority: int
    item: Any=field(compare=False)

class PriorityQueue():
    def __init__(self):
        self.pq = []
        self.entry_finder = {}
        
    def add_with_priority(self, task, priority=0):
        entry = PrioritizedItem(priority=priority, item=task)
        self.entry_finder[task] = entry
        heappush(self.pq, entry)
    
    def decrease_priority(self, task, priority=0):
        entry = PrioritizedItem(priority=priority, item=task)
        i = self.pq.index(self.entry_finder[task])
        del self.pq[i]
        self.pq.append(entry)
        heapify(self.pq)
        self.entry_finder[task] = entry

    def extract_min(self):
        while self.pq:
            task = heappop(self.pq)
            del self.entry_finder[task.item]
            return task.item
        raise KeyError('pop from an empty Z')

In [5]:

def dijkstra(graph={}, start=None, target=None, max_distance=1000000):
    """find distances and paths in graph"""
    X,A,B,Q = preprocess_graph(graph, start, max_distance)
        
    while len(Q.pq) > 0:
        u = Q.extract_min()            # Remove and return best vertex
        if u == target:
            return A[target], get_path(start=start, target=target, prev=B)
        X.append(u)
        for edge in list(filter(lambda x: x.to not in X, graph[u])):  
            v = edge.to
            d = A[u] + edge.weight
            if d < A[v]:
                A[v] = d
                B[v] = u
                Q.decrease_priority(v, d)

def preprocess_graph(graph, start, max_distance):
    """build the initial dj variables for distance and pathing"""
    X = []                            # vertices processed so far
    A = {}                            # computed shortest path distances
    B = {}                            # computed shortest paths

    A[start] = 0 
    Q = PriorityQueue()
    for key in graph.keys():
        v = int(key)          
        if v != start:
            A[v] = max_distance        # Unknown distance from source to v
            B[v] = None                # Predecessor of v
        Q.add_with_priority(v, A[v])

    return X,A,B,Q


def get_path(start, target, prev):
    """traverse a dict of previous vertices to vertex to produce the path 
    from start to target"""
    S = []
    u = target
    if (u in prev) and (prev[u] != None):        # Do something only if the vertex is reachable
        while (u != None) and (u != start):      # Construct the shortest path with a stack S
            S.insert(0, u)                       # Push the vertex onto the stack
            u = prev[u] if (u in prev) else None # Traverse from target to source
    return S

In [6]:

distances = []
#for v in list(filter(lambda x: x != 1, list(values.keys()))):
for v in [7,37,59,82,99,115,133,165,188,197]:
    d, p = dijkstra(values, 1, v)
    #if v == 59:
    #    print('\n'.join([str(v) + ': ' + str(values[v]) for v in p]))
    distances.append(str(d))

obfuscate(','.join(distances))

In [7]:

### test case
v2 = process_weighted_adjacencies("""1	2,3	3,2
1,3	4,3	4,7	6,1
1,2	5,2	6,2	7,2
2,3	2,7	8,3	7,7
3,2	8,8
2,1	3,2
3,2	4,7
4,3	5,8""".encode('utf-8'))
dijkstra(v2, 1, 8)

Out[7]:

(9, [2, 4, 8])