Module dsa.huffman
a collection of Huffman functions
Expand source code
""" a collection of Huffman functions """
import heapq
class Node:
""" binary node implementation """
def __init__(self, left, right, value=None):
self.left = left
self.right = right
self.value = value
def __lt__(self, other):
return False
def __repr__(self):
if self.value is None:
return "none"
else:
return self.value
def character_frequency(s: str):
""" takes a string a returns a dictionary on character frequency """
d = {}
for c in s:
if c not in d:
d[c] = 1
else:
d[c] += 1
return d
def build_frequency_table(s: str):
""" accepts a string to encode and returns a heap of the characters """
frequency_dictionary = character_frequency(s)
# add to priority queue
h = []
for item in frequency_dictionary.items():
heapq.heappush(h, (item[1], Node(None, None, item[0])))
return h
def build_huffman_tree(heap):
""" accepts a heap and returns a Huffman Tree """
while len(heap) > 1:
n1 = heapq.heappop(heap)
n2 = heapq.heappop(heap)
node = Node(n1[1], n2[1])
heapq.heappush(heap, (n1[0] + n2[0], node))
return heap[0][1]
def build_huffman_dictionary(node, bit_string: str=""):
""" given a Huffman Node, build a Huffman Dictionary """
d = {}
if node.left is None and node.right is None:
return {node.value: bit_string}
d.update(build_huffman_dictionary(node.left, bit_string + '0'))
d.update(build_huffman_dictionary(node.right, bit_string + '1'))
return d
def huffman_encode(st, hd):
s = ""
for c in st:
s += hd[c]
return s
def huffman_decode(encoded_data, tree):
root = tree
s = ""
for bit in encoded_data:
if int(bit) == 0:
tree = tree.left
else:
tree = tree.right
if tree.left is None and tree.right is None:
s += tree.value
tree = root
return s
def bitstring_to_bytes(s):
return bytes(int(s[i : i + 8], 2) for i in range(0, len(s), 8))
def bytes_to_bitstring(ba, bitlength=8):
s = ""
for b in ba[:-1]:
byte = f"{b:08b}"
s += byte
byte = f"{ba[-1]:b}".zfill(bitlength)
s += byte
return s
Functions
def bitstring_to_bytes(s)
-
Expand source code
def bitstring_to_bytes(s): return bytes(int(s[i : i + 8], 2) for i in range(0, len(s), 8))
def build_frequency_table(s: str)
-
accepts a string to encode and returns a heap of the characters
Expand source code
def build_frequency_table(s: str): """ accepts a string to encode and returns a heap of the characters """ frequency_dictionary = character_frequency(s) # add to priority queue h = [] for item in frequency_dictionary.items(): heapq.heappush(h, (item[1], Node(None, None, item[0]))) return h
def build_huffman_dictionary(node, bit_string: str = '')
-
given a Huffman Node, build a Huffman Dictionary
Expand source code
def build_huffman_dictionary(node, bit_string: str=""): """ given a Huffman Node, build a Huffman Dictionary """ d = {} if node.left is None and node.right is None: return {node.value: bit_string} d.update(build_huffman_dictionary(node.left, bit_string + '0')) d.update(build_huffman_dictionary(node.right, bit_string + '1')) return d
def build_huffman_tree(heap)
-
accepts a heap and returns a Huffman Tree
Expand source code
def build_huffman_tree(heap): """ accepts a heap and returns a Huffman Tree """ while len(heap) > 1: n1 = heapq.heappop(heap) n2 = heapq.heappop(heap) node = Node(n1[1], n2[1]) heapq.heappush(heap, (n1[0] + n2[0], node)) return heap[0][1]
def bytes_to_bitstring(ba, bitlength=8)
-
Expand source code
def bytes_to_bitstring(ba, bitlength=8): s = "" for b in ba[:-1]: byte = f"{b:08b}" s += byte byte = f"{ba[-1]:b}".zfill(bitlength) s += byte return s
def character_frequency(s: str)
-
takes a string a returns a dictionary on character frequency
Expand source code
def character_frequency(s: str): """ takes a string a returns a dictionary on character frequency """ d = {} for c in s: if c not in d: d[c] = 1 else: d[c] += 1 return d
def huffman_decode(encoded_data, tree)
-
Expand source code
def huffman_decode(encoded_data, tree): root = tree s = "" for bit in encoded_data: if int(bit) == 0: tree = tree.left else: tree = tree.right if tree.left is None and tree.right is None: s += tree.value tree = root return s
def huffman_encode(st, hd)
-
Expand source code
def huffman_encode(st, hd): s = "" for c in st: s += hd[c] return s
Classes
class Node (left, right, value=None)
-
binary node implementation
Expand source code
class Node: """ binary node implementation """ def __init__(self, left, right, value=None): self.left = left self.right = right self.value = value def __lt__(self, other): return False def __repr__(self): if self.value is None: return "none" else: return self.value