In [18]:
import numpy as np
from scipy.spatial.distance import squareform

distance_matrix = np.array([[0.0, .25, .25, .25],
[.25, 0.0, .25, .25],
[.25, .25, 0.0, .25],
[.25, .25, .25, 0.0]])

In [19]:
distance_matrix

Out[19]:
array([[ 0.  ,  0.25,  0.25,  0.25],
[ 0.25,  0.  ,  0.25,  0.25],
[ 0.25,  0.25,  0.  ,  0.25],
[ 0.25,  0.25,  0.25,  0.  ]])
In [20]:
compressed_distance_matrix = squareform(distance_matrix)

In [21]:
Z

Out[21]:
array([[ 0.  ,  1.  ,  0.25,  2.  ],
[ 2.  ,  4.  ,  0.25,  3.  ],
[ 3.  ,  5.  ,  0.25,  4.  ]])
In [28]:
# Num desired clusters
k = 2

# Count entries in the matrix
initial_count = len(distance_matrix[0])

# Fill an array with indices from the matrix
c = [[i] for i in range(initial_count)]
# Now extend with empty entries for merging into
c.extend([[]]*(initial_count))

In [38]:
# Loop the linkage matrix, making merges
# into new clusters until we have k clusters.
for i, block in enumerate(Z):
j = i + initial_count
a = int(block[0])
b = int(block[1])

# don't cluster randomly.
if block[2] >= 1:
break

# merge a and b into j
c[j] = c[a]
c[j].extend(c[b])
# empty a and b
c[a] = c[b] = []

# count clusters and see if we have
# reached our target number
cluster_count = sum([min(len(x),1) for x in c])
if cluster_count <= k:
break

In [37]:
clusters = [x for x in c if x]
clusters

Out[37]:
[[3], [2, 0, 1]]
In [ ]:
# I have 2 clusters of size 1 and 3, but I'd like 2 and 2
# More generally I want to cluster evenly where possible for larger arrays.
# If  I have a small k it will then start combining those clusters (also evenly).