Skip to main content

Data Structures 13 - Huffman Tree

Code

#include <stdio.h>
#include <stdlib.h>
#define MAXSIZE 100

// Define a node structure
typedef struct NODE {
char c; // Character
int weight; // Weight
int binary; // Binary encoding
int flag; // Flag bit
struct NODE *lchild, *rchild, *parent; // Left child, right child, parent node pointers
} NODE, *PNODE;

// Define a stack structure
typedef struct {
int valuse[MAXSIZE]; // Stack values
int top; // Stack top pointer
} STACK;

// Initialize the stack
void initStack(STACK &stack) {
stack.top = 0;
}

// Push operation
void push(STACK &stack, int value) {
stack.valuse[stack.top++] = value;
}

// Pop operation
int pop(STACK &stack) {
if (stack.top == 0)
return -1;
stack.top--;
return stack.valuse[stack.top];
}

// Initialize a node
void initNode(PNODE node) {
node->lchild = NULL;
node->parent = NULL;
node->rchild = NULL;
node->flag = 0;
node->weight = 0;
node->c = -1;
node->binary = 0;
}

// Create a node
PNODE createNode(int weight) {
PNODE node = (PNODE)malloc(sizeof(NODE));
if (node) {
initNode(node);
node->weight = weight;
}
return node;
}

// Character encoding
STACK charEncode(char c, PNODE childrenNodes, int lenOfNodes) {
STACK stack;
initStack(stack);
for (int i = 0; i < lenOfNodes; i++) {
if (c == childrenNodes[i].c) {
PNODE tmp = &childrenNodes[i];
while (tmp->parent != NULL) {
push(stack, tmp->binary);
tmp = tmp->parent;
}
break;
}
}
return stack;
}

// String encoding
char *strEncode(char *str, PNODE childrenNodes, int lenOfNodes) {
char *result = (char *)malloc(sizeof(char) * MAXSIZE * lenOfNodes);
int len = 0;
while (*str != '\0') {
STACK stack = charEncode(*str, childrenNodes, lenOfNodes);
while (stack.top > 0) {
result[len++] = pop(stack) + '0';
}
str++;
}
result[len] = '\0';
return result;
}

// Get the node with minimum weight
PNODE getMinWeightNode(PNODE nodes, int lenOfNodes) {
PNODE node;
int min = 0, i;
while (min < lenOfNodes) {
if (nodes[min].flag == 0) {
break;
}
min++;
}
if (min == lenOfNodes) {
return NULL;
}
for (i = min + 1; i < lenOfNodes; i++) {
if (nodes[i].flag == 0 && nodes[i].weight < nodes[min].weight) {
min = i;
continue;
}
}
nodes[min].flag = 1;
return &nodes[min];
}

// Create the Huffman tree
PNODE createHuffmanTree(PNODE nodes, int lenOfNodes, PNODE childNode) {
PNODE minWeightNode, parentNode;
minWeightNode = getMinWeightNode(nodes, lenOfNodes);
if (!minWeightNode)
return childNode;
if (!childNode) {
parentNode = minWeightNode;
} else {
parentNode = createNode(childNode->weight + minWeightNode->weight);
if (childNode->weight < minWeightNode->weight) {
parentNode->lchild = childNode;
parentNode->rchild = minWeightNode;
} else {
parentNode->rchild = childNode;
parentNode->lchild = minWeightNode;
}
parentNode->lchild->binary = 0;
parentNode->rchild->binary = 1;
childNode->parent = minWeightNode->parent = parentNode;
}
createHuffmanTree(nodes, lenOfNodes, parentNode);
}

// Convert a character to lowercase
char charTolowercase(char c) {
if (c >= 'A' && c <= 'Z') {
c += 'a' - 'A';
}
return c;
}

// Read characters from a file, count weights, and build the Huffman tree
PNODE readFromSource(const char *filePath, char *buff, PNODE childrenNodes, int &lenOfNodes) {
int lenOfStr = 0, i;
char c;
FILE *file = fopen(filePath, "rb");
if (file == NULL) {
puts("Can't find source file!");
exit(0);
}
c = fgetc(file);
while (!feof(file)) {
c = charTolowercase(c);
initNode(&childrenNodes[lenOfNodes]);
buff[lenOfStr++] = c;
for (i = 0; i < lenOfNodes; i++) {
if (childrenNodes[i].c == c) {
childrenNodes[i].weight++;
break;
}
}
if (i == lenOfNodes) {
childrenNodes[lenOfNodes].c = c;
childrenNodes[lenOfNodes++].weight++;
}
c = fgetc(file);
}
buff[lenOfStr] = '\0';
fclose(file);
return createHuffmanTree(childrenNodes, lenOfNodes, NULL);
}

// Write encoding results to a file
void writeResult(const char *filePath, char *result) {
FILE *fp = fopen(filePath, "wb");
if (fputs(result, fp) >= 0) {
printf("Result generated successfully\r\n");
}
fclose(fp);
}

// String decoding
char *strDecode(const char *str, PNODE TreeRoot) {
const char *tmp = str;
char *result = (char *)malloc(sizeof(char) * MAXSIZE);
int len = 0;
while (*tmp != '\0') {
PNODE tmpNode = TreeRoot;
while (tmpNode->lchild && tmpNode->rchild) {
tmpNode = *tmp == '0' ? tmpNode->lchild : tmpNode->rchild;
tmp++;
}
result[len++] = tmpNode->c;
}
result[len] = '\0';
return result;
}

int main() {
char buff[MAXSIZE];
NODE childrenNodes[MAXSIZE];
int len = 0;
PNODE root = readFromSource("source.txt", buff, childrenNodes, len);
writeResult("result.txt", strEncode(buff, childrenNodes, len));
printf("%s", strDecode("11111111111110111111111110011101111111111011001110011111111101111111101111111011111101111101100111101010", root));
return 0;
}

Code Summary

This code implements an example of Huffman encoding. Huffman encoding is a lossless data compression algorithm. The main features of the code include:

  1. Node definition and initialization:

    • Defines a node structure NODE containing character, weight, binary encoding, flag bit, and left/right child and parent node pointers.
    • Defines and implements a stack structure STACK with its initialization, push, and pop operations.
  2. Node operations:

    • The initNode function initializes a node.
    • The createNode function creates a new node and initializes its weight.
  3. Huffman tree construction:

    • The getMinWeightNode function gets the node with the minimum weight from the node array.
    • The createHuffmanTree function recursively creates the Huffman tree.
  4. Character encoding and decoding:

    • The charEncode function encodes a single character into Huffman encoding.
    • The strEncode function encodes a string into Huffman encoding.
    • The strDecode function decodes Huffman encoding back to the original string.
  5. File operations:

    • The readFromSource function reads characters from a file, counts their weights, and builds the Huffman tree.
    • The writeResult function writes the encoded string to a file.
  6. Main function:

    • The main function reads data from source.txt, generates the Huffman tree, writes the encoding result to result.txt, and finally decodes and prints an example encoded string.

This code demonstrates in detail the entire process of using Huffman encoding for data compression and decompression.

Running Result

No source file