In :
import numpy as np
from fastcluster import linkage
from scipy.spatial.distance import squareform

distance_matrix = np.array([[0.0, .25, .25, .25],
[.25, 0.0, .25, .25],
[.25, .25, 0.0, .25],
[.25, .25, .25, 0.0]])

In :
distance_matrix

Out:
array([[ 0.  ,  0.25,  0.25,  0.25],
[ 0.25,  0.  ,  0.25,  0.25],
[ 0.25,  0.25,  0.  ,  0.25],
[ 0.25,  0.25,  0.25,  0.  ]])
In :
compressed_distance_matrix = squareform(distance_matrix)
Z = linkage(compressed_distance_matrix, method="complete", metric="cosine")

In :
Z

Out:
array([[ 0.  ,  1.  ,  0.25,  2.  ],
[ 2.  ,  4.  ,  0.25,  3.  ],
[ 3.  ,  5.  ,  0.25,  4.  ]])
In :
# Num desired clusters
k = 2

# Count entries in the matrix
initial_count = len(distance_matrix)

# Fill an array with indices from the matrix
c = [[i] for i in range(initial_count)]
# Now extend with empty entries for merging into
c.extend([[]]*(initial_count))

In :
# Loop the linkage matrix, making merges
# into new clusters until we have k clusters.
for i, block in enumerate(Z):
j = i + initial_count
a = int(block)
b = int(block)

# don't cluster randomly.
if block >= 1:
break

# merge a and b into j
c[j] = c[a]
c[j].extend(c[b])
# empty a and b
c[a] = c[b] = []

# count clusters and see if we have
# reached our target number
cluster_count = sum([min(len(x),1) for x in c])
if cluster_count <= k:
break

In :
clusters = [x for x in c if x]
clusters

Out:
[, [2, 0, 1]]
In [ ]:
# I have 2 clusters of size 1 and 3, but I'd like 2 and 2
# More generally I want to cluster evenly where possible for larger arrays.
# If  I have a small k it will then start combining those clusters (also evenly).