In [7]:
#voc = [1, 1, 1, 1, 1, 1, 1, 1] ## complete tree
voc = [4, 3, 2, 1] ## single path tree
voc_size = len(voc)

count = voc + [1e5 for _ in range(voc_size + 1)]
binary = [0 for _ in range(voc_size * 2 + 1)]
parent_node = [0 for _ in range(voc_size * 2 + 1)]
print count
[4, 3, 2, 1, 100000.0, 100000.0, 100000.0, 100000.0, 100000.0]
In [8]:
pos1, pos2 = voc_size - 1, voc_size
for i in range(0, voc_size-1):
    if pos1 >= 0:
        if count[pos1] < count[pos2]:
            min1i = pos1
            pos1 -= 1
        else:
            min1i = pos2
            pos2 += 1
    else:
        min1i = pos2
        pos2 += 1
        
    if pos1 >= 0:
        if count[pos1] < count[pos2]:
            min2i = pos1
            pos1 -= 1
        else:
            min2i = pos2
            pos2 += 1
    else:
        min2i = pos2
        pos2 += 1
    count[voc_size + i] = count[min1i] + count[min2i]
    parent_node[min1i] = voc_size + i
    parent_node[min2i] = voc_size + i
    ## implicitly binary[min1i] = 0
    binary[min2i] = 1
In [9]:
print count
print binary
[4, 3, 2, 1, 3, 6, 10, 100000.0, 100000.0]
[0, 1, 1, 0, 0, 1, 0, 0, 0]

So it proves that only 2 voc_size - 1 nodes will be needed to construct a tree, even the tree is complete*

In [10]:
point = [0 for _ in range(40)]
code = [0 for _ in range(40)]
In [12]:
for a in range(voc_size):
    b = a
    i = 0
    while True:
        code[i] = binary[b]
        point[i] = b
        i += 1
        b = parent_node[b]
        if (b == voc_size * 2 - 2):
            break
In [13]:
print code
print point
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
In [ ]: