#!/usr/bin/env python # coding: utf-8 # # Voting Strengths # # Chapter 10 of [Real World Algorithms](https://mitpress.mit.edu/books/real-world-algorithms). # # --- # # > Panos Louridas
# > Athens University of Economics and Business # ## The Schulze Method # # To start with the Schulze method, we need a way to input ballots. # # We assume that the ballots are saved in a file, one ballot per line. In each line, that is, ballot, the candidates are listed in decreasing preference. # # We'll use the file [ballots.csv](ballots.csv) as an example. The file is in [Comma-separated Values (CSV)](https://en.wikipedia.org/wiki/Comma-separated_values) format. # # So, the first line is: # ``` # D,B,A,C # ``` # which means that the first preference of the voter is candidate D, then B, then A, then C. # Although seemingly simple, CSV is a treacherous format. # # There are many details than one would think at first sight; for example, what happens if a field in the line contains a comma, could we have different delimiters, etc. # # For that reason, you should always use a ready-made library for handling CVS files. # # Our ballots file is simple, but there is no reason not to use [Python's CSV library](https://docs.python.org/3/library/csv.html) anyway. # # We'll get all the ballots and we'll put them into a list. # In[1]: import csv import pprint with open('ballots.csv') as ballots_file: reader = csv.reader(ballots_file) ballots = list(reader) pprint.pprint(ballots, width=30) # The first step in the Schulze method is to calculate the pairwise preferences of the voters regarding the candidates. # # That is an array $P$, such that element $P[c_j, c_k]$ shows how many voters prefer candidate $c_j$ to candidate $c_k$. # # As our candidates are given by characters, we'll assign a number, starting from zero, to each of the candidates, so that we'll be able to use integer-based indices. # In[2]: from collections import defaultdict candidates = { 'A': 0, 'B': 1, 'C': 2, 'D': 3 } def calc_pairwise_prefs(ballots, candidates): # Initialize p to 0. p = [ [0 for j in candidates.keys() ] for i in candidates.keys() ] # Take each ballot in turn. for ballot in ballots: # Take each candidate in the ballot. for i, c_i in enumerate(ballot): # Take all following candidates in the ballot. for c_j in ballot[i+1:]: # Add to the score of c_i vs c_j. p[candidates[c_i]][candidates[c_j]] += 1 return p p = calc_pairwise_prefs(ballots, candidates) pprint.pprint(p, width=20) # The second step in the Schulze method is to create an election graph. # # This will be represented by an adjacency matrix. # # If for two candidates $c_i$ and $c_j$ the number $P[c_i, c_j]$ of voters that prefer $c_i$ over $c_j$ is greater than the number of voters $P[c_j, c_i]$ that prefer $c_j$ over $c_i$, we add the link $c_i \rightarrow c_j$ and we assign the number $P[c_i, c_j] - P[c_j, c_i]$ as the weight of the link $c_i \rightarrow c_j$. # # We'll assign the value $-1$ for all other pairs (or $-\infty$, but as $-1$ is not a valid weight, it will also do). # In[3]: def create_election_graph(p): n = len(p) g = [ [-1 for j in range(n) ] for i in range(n) ] for i in range(n): for j in range(n): if p[i][j] > p[j][i]: g[i][j] = p[i][j] - p[j][i] return g # We can then see the adjacency matrix for our election example: # In[4]: g = create_election_graph(p) pprint.pprint(g, width=20) # With the adjacency matrix available, we can implement the calculation of the strongest paths. # # The function `calc_strongest_paths(p, candidates)` will take as input the adjacency matrix and the candidates and will return: # * `s`, a matrix of size $n \times n$ such that `s[i][j]` is the strongest path between nodes `i` and `j`. # * `pred`, a matrix of size $n \times n$ such that `pred[i][j]` is the predecessor of node `i` in the strongest path to node `j`. # # The algorithm finds the strongest paths iteratively, by allowing to use one additional node as intermediate node in the paths in each iteration. # In[5]: def calc_strongest_paths(p): n = len(p) # Initialize strongest paths array. s = [ [ -1 for j in range(n) ] for i in range(n) ] # Initialize predecessors array. pred = [ [ -1 for j in range(n) ] for i in range(n) ] # Initially the strength of the path s[i][j] is simply # the difference in the weights between p[i][j] # and p[j][i]. for i in range(n): for j in range(n): if p[i][j] > p[j][i]: s[i][j] = p[i][j] - p[j][i] pred[i][j] = i # For each k, i, j, such that the path from i to j # can be strengthened by taking the detour from i to k # and k to j adjust the path and the predecessor. # This can happen at most n times. for k in range(n): for i in range(n): if i != k: for j in range(n): if j != i: if s[i][j] < min(s[i][k], s[k][j]): s[i][j] = min(s[i][k], s[k][j]) pred[i][j] = pred[k][j] return (s, pred) # We now apply `calc_strongest_paths(p)` to our example: # In[6]: s, pred = calc_strongest_paths(p) print('strongest paths') pprint.pprint(s, width=30) print('predecessors') pprint.pprint(pred, width=30) # The final step in the Schulze algorithm is finding, for each candidate the candidates that are less popular. # # That is a matter of comparing `s[i][j]` and `s[j][i]`. # # We implement the logic in `calc_results(s)`. # In[7]: def calc_results(s): n = len(s) wins = [ [] for i in range(n) ] for i in range(n): for j in range(n): if i != j: if s[i][j] > s[j][i]: wins[i].append(j) return wins # Finally, we can find the winner of the election: # In[8]: wins = calc_results(s) print(wins) # * Candidate `A` wins over `C`. # * Candidate `B` wins over `A`, `C`. # * Candidate `D` wins over `A`, `B`, `C`. # * Candidate `D` wins the election. # ## The Schulze Method: An Alternative # # We can implement the Schulze method with an alternative implementation, in which instead of an adjacency matrix we use a dictionary to represent the preferences. # # The logic is entirely the same. # We implement `calc_pairwise_prefs(ballots)` to return a dictionary `p` such that `p[(c_i, c_j)]` shows how many voters prefer candidate `c_i` to candidate `c_j`. # # The keys to the dictionary are the tuples `(c_i, c_j)`. # # Note that we do not need to work with indices instead of the actual voters. # # We use a `defaultdict(int)`, so the dictionary will return 0 if `(c_i, c_j)` is not a key. # # Essentially this is like initializing the preferences matrix to zero. # In[9]: from collections import defaultdict def calc_pairwise_prefs(ballots): p = defaultdict(int) for ballot in ballots: for i, c_i in enumerate(ballot): for c_j in ballot[i+1:]: p[(c_i, c_j)] += 1 return p p = calc_pairwise_prefs(ballots) pprint.pprint(p) # The printout of the preferences dictionary is less elegant than the printout of the preferences matrix that we had before. # # We can fix that by writing a short helper function that will output our dictionaries in matrix format. # In[10]: p = calc_pairwise_prefs(ballots) import itertools candidates = ['A', 'B', 'C', 'D'] def print_matrix(candidates, matrix, col_width=5): print(' ', end="") num_candidates = len(candidates) for candidate in candidates: print(f'{candidate:^{col_width}}', end="") i = 0 for c1, c2 in itertools.product(candidates, repeat=2): if i % num_candidates == 0: print() print(f'{candidates[i // num_candidates]:<2}', end="") print(f'{matrix[(c1, c2)]:^{col_width}}', end="") i += 1 print() print_matrix(candidates, p, 5) # We then create the election graph. # # We use again a dictionary to store the graph. The keys of the dictionary are node tuples and the values are differences in preferences. # # Note that not all tuples are actually stored in the dictionary. We store explicitly only the tuples with a positive difference in preferences. # # We use a `defaultdict(lambda:-1)`, which will return -1 for any other (non-existing) key, so for all other couples. # In[11]: def create_election_graph(p): g = defaultdict(lambda:-1) for (c_i, c_j), pref in p.items(): if pref > p[(c_j, c_i)]: g[(c_i, c_j)] = pref - p[(c_j, c_i)] return g # In this way we save space. # # We can still use `print_matrix(candidates, g, 5)` to print the dictionary in matrix format. # # Only those entries that are not equal to -1 are actually stored in the dictionary. # In[12]: g = create_election_graph(p) print_matrix(candidates, g, 5) # We'll use again `defaultdict`s to implement `calc_strongest_paths(p, candidates)`. # # We need to pass `candidates` to the function as we no longer use numerical indices, but the actual candidates. # In[13]: def calc_strongest_paths(p, candidates): # Initialize strongest paths dict. s = defaultdict(lambda:-1) # Initialize predecessors dict. pred = defaultdict(lambda:-1) # Initially the strength of the path from c_i to c_j is simply # the difference in the weights between s[(c_i, c_j)] # and s[(c_j, c_i)]. for (c_i, c_j), pref in p.items(): if pref > p[(c_j, c_i)]: s[(c_i, c_j)] = pref - p[(c_j, c_i)] pred[(c_i, c_j)] = c_i # For each c_k, c_i, c_j, such that the path from c_i to c_j # can be strengthened by taking the detour from c_i to c_k # and then to c_k and c_j adjust the path and the predecessor. # This can happen at most as many times as there are candidates. for c_k in candidates: for c_i in candidates: if c_i != c_k: for c_j in candidates: if c_j != c_i: if s[(c_i, c_j)] < min(s[(c_i, c_k)], s[(c_k, c_j)]): s[(c_i, c_j)] = min(s[(c_i, c_k)], s[(c_k, c_j)]) pred[(c_i, c_j)] = pred[(c_k, c_j)] return (s, pred) # We now apply `calc_strongest_paths(p, candidates)` to our example: # In[14]: s, pred = calc_strongest_paths(p, candidates) print('strongest paths') print_matrix(candidates, s, 5) print('predecessors') print_matrix(candidates, pred, 5) # Finally, we calculate the results. # # We do as before, but we return a dictionary instead. # # The keys are the candidates. # # The value of a key is a list containing the candidates that lose to the particular candidate indicated by the key. # In[15]: def calc_results(s): wins = defaultdict(list) for (c_i, c_j), v in s.items(): if s[(c_i, c_j)] > s[(c_j, c_i)]: wins[c_i].append(c_j) return wins # So, here are the results again: # In[16]: wins = calc_results(s) pprint.pprint(wins) # ## Floyd-Warshall All Pairs Shortest Paths # # The strongest paths is a variation of the Floyd-Warshall all pairs shortest paths algorithm. # # As with the strongest paths, it finds shortest paths by using more and more nodes as intermediaries. # In[23]: import sys MAX_INT = sys.maxsize def floyd_warshall(w): n = len(w) # Initialize distances matrix. dist = [ [ MAX_INT for j in range(n) ] for i in range(n) ] # Initialize predecessors matrix. pred = [ [ -1 for j in range(n) ] for i in range(n) ] # Initially the length of the path from i to j is simply # the weight between w[i][j], if it exists, and then # i is the predecessor of j. for i in range(n): for j in range(n): if w[i][j] != 0: dist[i][j] = w[i][j] pred[i][j] = i # For each k, i, j, such that the path from i to j # can be shortened by taking the detour from i to k # and k to j adjust the path and the predecessor. # This can happen at most n times. for k in range(n): for i in range(n): if i != k: for j in range(n): if j != i: if (dist[i][k] != MAX_INT and dist[k][j] != MAX_INT and dist[i][j] > dist[i][k] + dist[k][j]): dist[i][j] = dist[i][k] + dist[k][j] pred[i][j] = pred[k][j] return (dist, pred) # We'll use the algorithm on the familiar [traffic_grid_graph.txt](traffic_grid_graph.txt) algorithm. # # # Here is the function that reads the graph: # In[18]: def read_graph(filename, directed=False): graph = {} with open(filename) as input_file: for line in input_file: parts = line.split() if len(parts) != 3: continue # not a valid line, ignore [n1, n2, w] = [ int (x) for x in parts ] if n1 not in graph: graph[n1] = [] if n2 not in graph: graph[n2] = [] graph[n1].append((n2, w)) if not directed: graph[n2].append((n1, w)) return graph # We go ahead and read it: # In[19]: g = read_graph('traffic_grid_graph.txt') pprint.pprint(g) # Our implementation of the Floyd-Warshall algorithms requires an adjacency matrix as input. # # So, we'll use a function that converts the graph from an adjacency list representation to an adjacency matrix one. # In[20]: def adjlist_to_matrix(g): m = [ [ MAX_INT for j in g.keys() ] for i in g.keys() ] for u in g.keys(): m[u][u] = 0 for u in g.keys(): for (v, w) in g[u]: m[u][v] = w return m # We do the conversion, and then we run the Floyd-Warshall algorithm. # In[21]: m = adjlist_to_matrix(g) dist, pred = floyd_warshall(m) for s in sorted(g.keys()): print('starting node:', s) print(pred[s]) print(dist[s]) # You may have noticed than the distance of a node to itself has been set to `MAX_INT`. # # If that bothers us, and we like it to fix it to zero, that's easy to do: # In[22]: for i in range(len(dist)): dist[i][i] = 0 for s in sorted(g.keys()): print('starting node:', s) print(pred[s]) print(dist[s])