Source code for tcmpr.algorithms.lzw.decompressor

"""
This module is responsible for implementing
LZW CODING algorithm which can be used to
decompress input file with extension ".lzw".
"""
import os


[docs]class Step: def __init__(self): self._step = 0
[docs] def set_step(self, step): self._step = step
[docs] def get_step(self): return self._step
[docs]def decompress_lzw(input_file): """ Function to decompress input file with extension ".lzw" """ output_filename = os.path.splitext(os.path.basename(input_file))[0] output_file = os.path.join(os.path.dirname(input_file), output_filename) with open(input_file, 'rb') as f: data = f.read() # Because we store codes in 2-bytes in file so when reading it back, # we must read each 2 bytes for converting back properly step_setting = Step() if int.from_bytes(data[0:2], byteorder='big', signed=False) == 2: step_setting.set_step(2) elif int.from_bytes(data[0:2], byteorder='big', signed=False) == 4: step_setting.set_step(4) length = len(data) codes = [] step = step_setting.get_step() for i in range(2, length, step): b = int.from_bytes(data[i:i + step], byteorder='big', signed=False) codes.append(b) encoded_str = decode_codes(codes) with open(output_file, 'w') as out: out.write(encoded_str) return output_file
[docs]def decode_codes(codes): # Initialize dictionary dictionary = {c: chr(c) for c in range(0, 256)} max_code = 255 z = None entries = [] for key in codes: entry = dictionary.get(key, None) if entry is None: entry = z + z[0] entries.append(entry) if z is not None: max_code += 1 dictionary[max_code] = z + entry[0] z = entry return ''.join(entries)