python - Why does my code keep throwing a KeyError?


Keywords:python 


Question: 

I can't, for the life of me, figure out why my code is throwing a KeyError. I feel like the values should be there - I added them all in the first for loop and the lists I added them to aren't empty (I've checked with print statements). So why does line 54 keep relentlessly throwing a KeyError? I'm sure that I've just overlooked something, but after working on this all day, I'm pretty stuck.

The functions used (graph(), and shortest_path(), etc.) are here.

edgeData has the following structure:

{string value: [list of string values that are groupings of the values in the dictionary's keys]}

tagUsers has the following structure:

{string value: [grouped lists of string values found in edgeData]}

Thanks in advance.

from collections import defaultdict, deque
import json

class Graph(object):
    def __init__(self):
        self.nodes = set()
        self.edges = defaultdict(list)
        self.distances = {}

    def add_node(self, value):
        self.nodes.add(value)

    def add_edge(self, from_node, to_node, distance):
        self.edges[from_node].append(to_node)
        self.edges[to_node].append(from_node)
        self.distances[(from_node, to_node)] = distance


def dijkstra(graph, initial):
    visited = {initial: 0}
    path = {}

    nodes = set(graph.nodes)

    while nodes:
        min_node = None
        for node in nodes:
            if node in visited:
                if min_node is None:
                    min_node = node
                elif visited[node] < visited[min_node]:
                    min_node = node
        if min_node is None:
            break

        nodes.remove(min_node)
        current_weight = visited[min_node]

        for edge in graph.edges[min_node]:
            try:
                weight = current_weight + graph.distances[(min_node, edge)]
            except:
                continue
            if edge not in visited or weight < visited[edge]:
                visited[edge] = weight
                path[edge] = min_node

    return visited, path


def shortest_path(graph, origin, destination):
    visited, paths = dijkstra(graph, origin)
    full_path = deque()
    _destination = paths[destination]

    while _destination != origin:
        full_path.appendleft(_destination)
        _destination = paths[_destination]

    full_path.appendleft(origin)
    full_path.append(destination)

    return visited[destination]
if __name__ == '__main__':
    edgeData = {'a': ['c', 'd'], 'b': ['d'], 'c': ['d'], 'd': ['a', 'b']}
    tagUsers = {'hashtag1': ['a', 'c', 'd'], 'hashtag2': ['b'], 'hashtag3': ['b', 'd']}
    shortestpaths = {}

    graph = Graph()

    users = []


    # calls function, builds graph with data in edgeData
    for key, value in edgeData.items():
        users.append(key)
        graph.add_node(key)

        for each in value:
            graph.add_edge(key, each, 1)

    # determines how many users used each hashtag
    hashtags = {}
    for key, value in tagUsers.items():
        tmpTags = key
        count = len(value)
        hashtags[key] = count

    # normally determines which hashtag was used the most
    # Here, it's pre-set
    topTag = ['hashtag1']

    # calculates the shortest path from each user to another user
    # that uses the most-used hashtag
    count = 0
    if count < 1:
        for key, value in edgeData.items():
            tmpDict = {}
            for tag in topTag:
                shortest = 10000
                for k, v in tagUsers.items():
                    if k == tag:
                        for each in v:
                            flag = False
                            if key != each
                                flag = True
                                tmpShort = shortest_path(graph, key, each)
                                if tmpShort < shortest:
                                    shortest = tmpShort
                if flag:
                    tmpDict[tag] = shortest
            shortestpaths[key] = tmpDict
            count += 1

The goal is for the data in shortestpaths to contain, for each user, the shortest distance to another user who uses the top hashtags

The function calls are referencing this code, courtesy of mdsrosa on github.

Specifically, the error gets thrown in shortest_path() at `_destination = paths[_destination]


1 Answer: 

Adding some logging to shortest_path shows the problem:

def shortest_path(graph, origin, destination):
    print 'shortest_path     Origin:%s  Destination:%s' % (origin, destination)
    visited, paths = dijkstra(graph, origin)
    full_path = deque()
    print 'paths: %s' % paths
    _destination = paths[destination]

results in:

shortest_path     Origin:a  Destination:a
paths: {'c': 'a', 'b': 'd', 'd': 'a'}
Traceback (most recent call last):
  File "e.py", line 43, in <module>
    tmpShort = dj.shortest_path(graph, key, each)
  File "E:\kiribati\dijkstra.py", line 61, in shortest_path
    _destination = paths[destination]
KeyError: 'a'

You need to handle the edge case where your origin and destination are the same

One option would be to add the check if key == each before calling shortest_path

    for k, v in tagUsers.items():
        if k == tag:
            for each in v:
                if key == each:
                    continue
                tmpShort = dj.shortest_path(graph, key, each)
                if tmpShort < shortest:
                    shortest = tmpShort
    tmpDict[tag] = shortest

Also change your loop variables from k, v, key, value, each to something that describes the actual data