1

这段代码不是我写的。我试图从中画出一棵霍夫曼树,但我想要父节点的值并将它们放在一个带有二进制代码的列表中。我怎样才能做到这一点?

import heapq
from collections import defaultdict


def encode(frequency):
    heap = [[weight, [symbol, '']] for symbol, weight in frequency.items()]
    heapq.heapify(heap)
    while len(heap) > 1:
        lo = heapq.heappop(heap)
        hi = heapq.heappop(heap)
        for pair in lo[1:]:
            pair[1] = '0' + pair[1]
        for pair in hi[1:]:
            pair[1] = '1' + pair[1]
        heapq.heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
    return sorted(heapq.heappop(heap)[1:], key=lambda p: (len(p[-1]), p))


data="ALL PEOPLE SEEM TO NEED DATA PROCESSING."

frequency = defaultdict(int)
for symbol in data:
    frequency[symbol] += 1

huff = encode(frequency)
print ("Symbol".ljust(10) + "Weight".ljust(10) + "Huffman Code")
for p in huff:
    print(p[0].ljust(10) + str(frequency[p[0]]).ljust(10) + p[1])

使用 Turtle 链接到 Huffman 树的图像:https ://imgur.com/a/Ql0QIDD

4

1 回答 1

1

我从中编写了一个完整的霍夫曼编码器,其中包括编码的二进制文件:

您可以通过取消注释打印来看到一棵树,但如果您需要更多内容,请告诉我。

from heapq import heappush, heappop, heapify
from collections import defaultdict
from functools import reduce

def encode(symb2freq):
    heap = [[wt, [sym, ""]] for sym, wt in symb2freq.items()]
    heapify(heap)
    while len(heap) > 1:
        lo = heappop(heap)
        hi = heappop(heap)
        for pair in lo[1:]:
            pair[1] = '0' + pair[1]
        for pair in hi[1:]:
            pair[1] = '1' + pair[1]
        heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
    return dict(sorted(heappop(heap)[1:], key=lambda p: (p, len(p[-1]))))

# recreates the original message from your huffman code table 
# uncomment print(a) to see how it works
def decompressHuffmanCode(a, bit):
    # print(a)
    return ('', a[1] + s[a[0]+bit[0]]) if (a[0]+bit[0] in s) else (a[0]+bit[0], a[1])

data="ALL PEOPLE SEEM TO NEED DATA PROCESSING."

# Create symbol to frequency table
symb2freq = defaultdict(int)
for ch in data:
    symb2freq[ch] += 1
enstr=encode(symb2freq)

# Create Huffman code table from frequency table
s=dict((v,k) for k,v in dict(enstr).items())

# Create compressible binary. We add 1 to the front, and remove it when read from disk
compressed_binary = '1' + ''.join([enstr[item] for item in data])

# Read compressible binary so we can uncompress it. We strip the first bit.
read_compressed_binary = compressed_binary[1:]

# Recreate the compressed message from read_compressed_binary
remainder,bytestr = reduce(decompressHuffmanCode, read_compressed_binary, ('', ''))
print(bytestr)
于 2019-11-16T07:53:50.753 回答