#!/usr/bin/env python
# coding: utf-8
# # Voting Strengths
#
# Chapter 10 of [Real World Algorithms](https://mitpress.mit.edu/books/real-world-algorithms).
#
# ---
#
# > Panos Louridas
# > Athens University of Economics and Business
# ## The Schulze Method
#
# To start with the Schulze method, we need a way to input ballots.
#
# We assume that the ballots are saved in a file, one ballot per line. In each line, that is, ballot, the candidates are listed in decreasing preference.
#
# We'll use the file [ballots.csv](ballots.csv) as an example. The file is in [Comma-separated Values (CSV)](https://en.wikipedia.org/wiki/Comma-separated_values) format.
#
# So, the first line is:
# ```
# D,B,A,C
# ```
# which means that the first preference of the voter is candidate D, then B, then A, then C.
# Although seemingly simple, CSV is a treacherous format.
#
# There are many details than one would think at first sight; for example, what happens if a field in the line contains a comma, could we have different delimiters, etc.
#
# For that reason, you should always use a ready-made library for handling CVS files.
#
# Our ballots file is simple, but there is no reason not to use [Python's CSV library](https://docs.python.org/3/library/csv.html) anyway.
#
# We'll get all the ballots and we'll put them into a list.
# In[1]:
import csv
import pprint
with open('ballots.csv') as ballots_file:
reader = csv.reader(ballots_file)
ballots = list(reader)
pprint.pprint(ballots, width=30)
# The first step in the Schulze method is to calculate the pairwise preferences of the voters regarding the candidates.
#
# That is an array $P$, such that element $P[c_j, c_k]$ shows how many voters prefer candidate $c_j$ to candidate $c_k$.
#
# As our candidates are given by characters, we'll assign a number, starting from zero, to each of the candidates, so that we'll be able to use integer-based indices.
# In[2]:
from collections import defaultdict
candidates = {
'A': 0,
'B': 1,
'C': 2,
'D': 3
}
def calc_pairwise_prefs(ballots, candidates):
# Initialize p to 0.
p = [ [0 for j in candidates.keys() ] for i in candidates.keys() ]
# Take each ballot in turn.
for ballot in ballots:
# Take each candidate in the ballot.
for i, c_i in enumerate(ballot):
# Take all following candidates in the ballot.
for c_j in ballot[i+1:]:
# Add to the score of c_i vs c_j.
p[candidates[c_i]][candidates[c_j]] += 1
return p
p = calc_pairwise_prefs(ballots, candidates)
pprint.pprint(p, width=20)
# The second step in the Schulze method is to create an election graph.
#
# This will be represented by an adjacency matrix.
#
# If for two candidates $c_i$ and $c_j$ the number $P[c_i, c_j]$ of voters that prefer $c_i$ over $c_j$ is greater than the number of voters $P[c_j, c_i]$ that prefer $c_j$ over $c_i$, we add the link $c_i \rightarrow c_j$ and we assign the number $P[c_i, c_j] - P[c_j, c_i]$ as the weight of the link $c_i \rightarrow c_j$.
#
# We'll assign the value $-1$ for all other pairs (or $-\infty$, but as $-1$ is not a valid weight, it will also do).
# In[3]:
def create_election_graph(p):
n = len(p)
g = [ [-1 for j in range(n) ] for i in range(n) ]
for i in range(n):
for j in range(n):
if p[i][j] > p[j][i]:
g[i][j] = p[i][j] - p[j][i]
return g
# We can then see the adjacency matrix for our election example:
# In[4]:
g = create_election_graph(p)
pprint.pprint(g, width=20)
# With the adjacency matrix available, we can implement the calculation of the strongest paths.
#
# The function `calc_strongest_paths(p, candidates)` will take as input the adjacency matrix and the candidates and will return:
# * `s`, a matrix of size $n \times n$ such that `s[i][j]` is the strongest path between nodes `i` and `j`.
# * `pred`, a matrix of size $n \times n$ such that `pred[i][j]` is the predecessor of node `i` in the strongest path to node `j`.
#
# The algorithm finds the strongest paths iteratively, by allowing to use one additional node as intermediate node in the paths in each iteration.
# In[5]:
def calc_strongest_paths(p):
n = len(p)
# Initialize strongest paths array.
s = [ [ -1 for j in range(n) ] for i in range(n) ]
# Initialize predecessors array.
pred = [ [ -1 for j in range(n) ] for i in range(n) ]
# Initially the strength of the path s[i][j] is simply
# the difference in the weights between p[i][j]
# and p[j][i].
for i in range(n):
for j in range(n):
if p[i][j] > p[j][i]:
s[i][j] = p[i][j] - p[j][i]
pred[i][j] = i
# For each k, i, j, such that the path from i to j
# can be strengthened by taking the detour from i to k
# and k to j adjust the path and the predecessor.
# This can happen at most n times.
for k in range(n):
for i in range(n):
if i != k:
for j in range(n):
if j != i:
if s[i][j] < min(s[i][k], s[k][j]):
s[i][j] = min(s[i][k], s[k][j])
pred[i][j] = pred[k][j]
return (s, pred)
# We now apply `calc_strongest_paths(p)` to our example:
# In[6]:
s, pred = calc_strongest_paths(p)
print('strongest paths')
pprint.pprint(s, width=30)
print('predecessors')
pprint.pprint(pred, width=30)
# The final step in the Schulze algorithm is finding, for each candidate the candidates that are less popular.
#
# That is a matter of comparing `s[i][j]` and `s[j][i]`.
#
# We implement the logic in `calc_results(s)`.
# In[7]:
def calc_results(s):
n = len(s)
wins = [ [] for i in range(n) ]
for i in range(n):
for j in range(n):
if i != j:
if s[i][j] > s[j][i]:
wins[i].append(j)
return wins
# Finally, we can find the winner of the election:
# In[8]:
wins = calc_results(s)
print(wins)
# * Candidate `A` wins over `C`.
# * Candidate `B` wins over `A`, `C`.
# * Candidate `D` wins over `A`, `B`, `C`.
# * Candidate `D` wins the election.
# ## The Schulze Method: An Alternative
#
# We can implement the Schulze method with an alternative implementation, in which instead of an adjacency matrix we use a dictionary to represent the preferences.
#
# The logic is entirely the same.
# We implement `calc_pairwise_prefs(ballots)` to return a dictionary `p` such that `p[(c_i, c_j)]` shows how many voters prefer candidate `c_i` to candidate `c_j`.
#
# The keys to the dictionary are the tuples `(c_i, c_j)`.
#
# Note that we do not need to work with indices instead of the actual voters.
#
# We use a `defaultdict(int)`, so the dictionary will return 0 if `(c_i, c_j)` is not a key.
#
# Essentially this is like initializing the preferences matrix to zero.
# In[9]:
from collections import defaultdict
def calc_pairwise_prefs(ballots):
p = defaultdict(int)
for ballot in ballots:
for i, c_i in enumerate(ballot):
for c_j in ballot[i+1:]:
p[(c_i, c_j)] += 1
return p
p = calc_pairwise_prefs(ballots)
pprint.pprint(p)
# The printout of the preferences dictionary is less elegant than the printout of the preferences matrix that we had before.
#
# We can fix that by writing a short helper function that will output our dictionaries in matrix format.
# In[10]:
p = calc_pairwise_prefs(ballots)
import itertools
candidates = ['A', 'B', 'C', 'D']
def print_matrix(candidates, matrix, col_width=5):
print(' ', end="")
num_candidates = len(candidates)
for candidate in candidates:
print(f'{candidate:^{col_width}}', end="")
i = 0
for c1, c2 in itertools.product(candidates, repeat=2):
if i % num_candidates == 0:
print()
print(f'{candidates[i // num_candidates]:<2}', end="")
print(f'{matrix[(c1, c2)]:^{col_width}}', end="")
i += 1
print()
print_matrix(candidates, p, 5)
# We then create the election graph.
#
# We use again a dictionary to store the graph. The keys of the dictionary are node tuples and the values are differences in preferences.
#
# Note that not all tuples are actually stored in the dictionary. We store explicitly only the tuples with a positive difference in preferences.
#
# We use a `defaultdict(lambda:-1)`, which will return -1 for any other (non-existing) key, so for all other couples.
# In[11]:
def create_election_graph(p):
g = defaultdict(lambda:-1)
for (c_i, c_j), pref in p.items():
if pref > p[(c_j, c_i)]:
g[(c_i, c_j)] = pref - p[(c_j, c_i)]
return g
# In this way we save space.
#
# We can still use `print_matrix(candidates, g, 5)` to print the dictionary in matrix format.
#
# Only those entries that are not equal to -1 are actually stored in the dictionary.
# In[12]:
g = create_election_graph(p)
print_matrix(candidates, g, 5)
# We'll use again `defaultdict`s to implement `calc_strongest_paths(p, candidates)`.
#
# We need to pass `candidates` to the function as we no longer use numerical indices, but the actual candidates.
# In[13]:
def calc_strongest_paths(p, candidates):
# Initialize strongest paths dict.
s = defaultdict(lambda:-1)
# Initialize predecessors dict.
pred = defaultdict(lambda:-1)
# Initially the strength of the path from c_i to c_j is simply
# the difference in the weights between s[(c_i, c_j)]
# and s[(c_j, c_i)].
for (c_i, c_j), pref in p.items():
if pref > p[(c_j, c_i)]:
s[(c_i, c_j)] = pref - p[(c_j, c_i)]
pred[(c_i, c_j)] = c_i
# For each c_k, c_i, c_j, such that the path from c_i to c_j
# can be strengthened by taking the detour from c_i to c_k
# and then to c_k and c_j adjust the path and the predecessor.
# This can happen at most as many times as there are candidates.
for c_k in candidates:
for c_i in candidates:
if c_i != c_k:
for c_j in candidates:
if c_j != c_i:
if s[(c_i, c_j)] < min(s[(c_i, c_k)], s[(c_k, c_j)]):
s[(c_i, c_j)] = min(s[(c_i, c_k)], s[(c_k, c_j)])
pred[(c_i, c_j)] = pred[(c_k, c_j)]
return (s, pred)
# We now apply `calc_strongest_paths(p, candidates)` to our example:
# In[14]:
s, pred = calc_strongest_paths(p, candidates)
print('strongest paths')
print_matrix(candidates, s, 5)
print('predecessors')
print_matrix(candidates, pred, 5)
# Finally, we calculate the results.
#
# We do as before, but we return a dictionary instead.
#
# The keys are the candidates.
#
# The value of a key is a list containing the candidates that lose to the particular candidate indicated by the key.
# In[15]:
def calc_results(s):
wins = defaultdict(list)
for (c_i, c_j), v in s.items():
if s[(c_i, c_j)] > s[(c_j, c_i)]:
wins[c_i].append(c_j)
return wins
# So, here are the results again:
# In[16]:
wins = calc_results(s)
pprint.pprint(wins)
# ## Floyd-Warshall All Pairs Shortest Paths
#
# The strongest paths is a variation of the Floyd-Warshall all pairs shortest paths algorithm.
#
# As with the strongest paths, it finds shortest paths by using more and more nodes as intermediaries.
# In[23]:
import sys
MAX_INT = sys.maxsize
def floyd_warshall(w):
n = len(w)
# Initialize distances matrix.
dist = [ [ MAX_INT for j in range(n) ] for i in range(n) ]
# Initialize predecessors matrix.
pred = [ [ -1 for j in range(n) ] for i in range(n) ]
# Initially the length of the path from i to j is simply
# the weight between w[i][j], if it exists, and then
# i is the predecessor of j.
for i in range(n):
for j in range(n):
if w[i][j] != 0:
dist[i][j] = w[i][j]
pred[i][j] = i
# For each k, i, j, such that the path from i to j
# can be shortened by taking the detour from i to k
# and k to j adjust the path and the predecessor.
# This can happen at most n times.
for k in range(n):
for i in range(n):
if i != k:
for j in range(n):
if j != i:
if (dist[i][k] != MAX_INT and
dist[k][j] != MAX_INT and
dist[i][j] > dist[i][k] + dist[k][j]):
dist[i][j] = dist[i][k] + dist[k][j]
pred[i][j] = pred[k][j]
return (dist, pred)
# We'll use the algorithm on the familiar [traffic_grid_graph.txt](traffic_grid_graph.txt) algorithm.
#
#
# Here is the function that reads the graph:
# In[18]:
def read_graph(filename, directed=False):
graph = {}
with open(filename) as input_file:
for line in input_file:
parts = line.split()
if len(parts) != 3:
continue # not a valid line, ignore
[n1, n2, w] = [ int (x) for x in parts ]
if n1 not in graph:
graph[n1] = []
if n2 not in graph:
graph[n2] = []
graph[n1].append((n2, w))
if not directed:
graph[n2].append((n1, w))
return graph
# We go ahead and read it:
# In[19]:
g = read_graph('traffic_grid_graph.txt')
pprint.pprint(g)
# Our implementation of the Floyd-Warshall algorithms requires an adjacency matrix as input.
#
# So, we'll use a function that converts the graph from an adjacency list representation to an adjacency matrix one.
# In[20]:
def adjlist_to_matrix(g):
m = [ [ MAX_INT for j in g.keys() ] for i in g.keys() ]
for u in g.keys():
m[u][u] = 0
for u in g.keys():
for (v, w) in g[u]:
m[u][v] = w
return m
# We do the conversion, and then we run the Floyd-Warshall algorithm.
# In[21]:
m = adjlist_to_matrix(g)
dist, pred = floyd_warshall(m)
for s in sorted(g.keys()):
print('starting node:', s)
print(pred[s])
print(dist[s])
# You may have noticed than the distance of a node to itself has been set to `MAX_INT`.
#
# If that bothers us, and we like it to fix it to zero, that's easy to do:
# In[22]:
for i in range(len(dist)):
dist[i][i] = 0
for s in sorted(g.keys()):
print('starting node:', s)
print(pred[s])
print(dist[s])