#pragma once
template
struct HuffmanTreeNode{
T data; //数据
unsigned int count; //权值
char code[128]; //结点的哈弗曼编码
HuffmanTreeNode *leftChild; //左子女
HuffmanTreeNode *rightChild; //右子女
HuffmanTreeNode *parent; //父节点
HuffmanTreeNode():leftChild(NULL),rightChild(NULL),parent(NULL){} //构造函数
bool operator <=(HuffmanTreeNode &R){return count <= R.count;} //重载<=和>运算符
bool operator >(HuffmanTreeNode &R){return count > R.count;}
};
template
class HuffmanTree
{
public:
HuffmanTree();
HuffmanTree(HuffmanTree &t);
~HuffmanTree(void);
void printData();
int getWPL(); //获取WPL //打开文件
void creat(char str[],char data[],unsigned int count[]); //哈弗曼的建立
void creat(char data[],unsigned int count[]); //重载
template
friend void findKey(HuffmanTreeNode *subTree,T key,char code[]){ //寻找结点的code
static T firstNodeData = subTree->data;
if(subTree != NULL){
if(subTree->data != key){
findKey(subTree->leftChild,key,code);
findKey(subTree->rightChild,key,code);
}
else{
int i = 0;
for(;subTree->code[i] != 0;i++)
code[i] = subTree->code[i];
code[i] = 0;
}
}
}
HuffmanTreeNode *root;
protected:
private:
int WPL;
void census(unsigned int count[],char data[],char str[],unsigned int &size); //建立哈弗曼树时统计各字符出现的次数
void deleteTree(HuffmanTreeNode *subTree); //删除subTree结点的所有子结点
void encode(HuffmanTreeNode *subTree,char code[] ,char m = 0); //编码
void calculateWPL(HuffmanTreeNode *subTree,int i = 0); //计算WPL
void printCode(HuffmanTreeNode *subTree); //输出哈弗曼编码的子函数
void printData(HuffmanTreeNode *subTree); //输出所有结点data和权值的子函数
};
#pragma once
#include "StdAfx.h"
#include "HuffmanTree.h"
#include "MinHeap.cpp"
#include
template
HuffmanTree::HuffmanTree(){
WPL = 0;
root = new HuffmanTreeNode;
};
template
HuffmanTree::~HuffmanTree(){
deleteTree(root); //删除哈弗曼树
}
template
void HuffmanTree::creat(char str[],char data[],unsigned int count[]){
unsigned int size; //字符的个数
for(unsigned int i = 0; i < 128; i++) //count的初始化
count[i] = 0;
census(count,data,str,size);
minHeap> hp;
HuffmanTreeNode *parent, *first, *second;
HuffmanTreeNode *work;
for(unsigned int i = 0; i < size; i++){ //把每个元素都插入最小堆中
work = new HuffmanTreeNode;
work->data = data[i];
work->count = count[i];
work->leftChild = NULL;
work->rightChild = NULL;
work->parent = NULL;
hp.insert(*work);
}
for(unsigned int i = 0; i < size-1; i++){
parent = new HuffmanTreeNode;
first = new HuffmanTreeNode;
second = new HuffmanTreeNode;
hp.removeMin(*first); //每次从最小堆中取出两个最小的数
hp.removeMin(*second);
parent->leftChild = first; //parent作为他们的父节点
parent->rightChild = second;
parent->count = first->count + second->count;
parent->data = '#'; //parent不是根结点所以把它的data设为'#'
first->parent = parent;
second->parent = parent;
hp.insert(*parent); //再把parent插入最小堆中,进行循环判断
}
root = parent; //最后一个parent就是根结点
char code[128];
for(unsigned int i = 0;i < 128; i++)
code[i] = 0;
encode(root,code); //对结点进行哈弗曼编码(空的地方取0)
};
template
void HuffmanTree::creat(char data[],unsigned int count[]){
unsigned int size = 0;
for(unsigned int i = 0; count[i] != 0; i++)
size++;
minHeap> hp;
HuffmanTreeNode *parent, *first, *second;
HuffmanTreeNode *work;
for(unsigned int i = 0; i < size; i++){ //把每个元素都插入最小堆中
work = new HuffmanTreeNode;
work->data = data[i];
work->count = count[i];
work->leftChild = NULL;
work->rightChild = NULL;
work->parent = NULL;
hp.insert(*work);
}
for(unsigned int i = 0; i < size-1; i++){
parent = new HuffmanTreeNode;
first = new HuffmanTreeNode;
second = new HuffmanTreeNode;
hp.removeMin(*first); //每次从最小堆中取出两个最小的数
hp.removeMin(*second);
parent->leftChild = first; //parent作为他们的父节点
parent->rightChild = second;
parent->count = first->count + second->count;
parent->data = '#'; //parent不是根结点所以把它的data设为'#'
first->parent = parent;
second->parent = parent;
hp.insert(*parent); //再把parent插入最小堆中,进行循环判断
}
root = parent; //最后一个parent就是根结点
char code[128];
for(unsigned int i = 0;i < 128; i++)
code[i] = 0;
encode(root,code); //对结点进行哈弗曼编码(空的地方取0)
}
template
void HuffmanTree::deleteTree(HuffmanTreeNode *subTree){
if(subTree != NULL){
deleteTree(subTree->leftChild); //后序遍历来删除结点
deleteTree(subTree->rightChild);
delete subTree;
}
}
template
void HuffmanTree::census(unsigned int count[],char data[],char str[],unsigned int &size){
//用于统计字符出现的次数
for(int i = 0; str[i] != 0;i++){
if(str[i] == '#') //当出现的是已出现过的字符时就执行下次循环
continue;
static int n = 0;
count[n]++; //第一次出现时加一
data[n] = str[i];
for(int j = 0; str[j] != 0;j++){
if(str[i] == str[j] && i != j){
count[n]++; //每次出现重复的时候加一并且
str[j] = '#'; //表明已出现过
}
}
data[++n] = 0;
size = n;
}
}
template
void HuffmanTree::encode(HuffmanTreeNode *subTree,char code[] ,char m = 0){
if(subTree != NULL){
int j;
for(j = 0;code[j] != 0;j++){
subTree->code[j] = code[j];
}
for(j = 0;code[j] != 0;j++){
}
subTree->code[j] = m;
subTree->code[j+1] = 0;
encode(subTree->leftChild,subTree->code,'0');
encode(subTree->rightChild,subTree->code,'1');
}
}
template
void HuffmanTree::calculateWPL(HuffmanTreeNode *subTree,int i = 0){
if(subTree != NULL){
if(subTree->data != '#')
WPL += i*subTree->count; //i为当前层数,count为结点权值
calculateWPL(subTree->leftChild,++i); //前序遍历
i--;
calculateWPL(subTree->rightChild,++i);
i--;
}
}
template
int HuffmanTree::getWPL(){
return WPL;
}
#pragma once
#define DafaultSize 50
template
class minHeap{
public:
minHeap(int size = DafaultSize);
~minHeap();
bool insert(const E& x);
bool removeMin(E& x);
private:
E *heap;
int currentSize;
int maxHeapSize;
void siftDown(int start, int m);
void siftUp(int start);
};
#pragma once
#include "StdAfx.h"
#include "MinHeap.h"
template
minHeap::minHeap(int size){
maxHeapSize = (DafaultSize
minHeap::~minHeap(){
delete heap;
}
template
void minHeap::siftDown(int start, int m){
int i = start; //初始位置
int j = 2*i+1; //j是i的左子女位置
E temp = heap[i];
while(j <= m){
if(j heap[j+1]) //左子女大于右子女时,j指向右子女
j++;
if(temp <= heap[j])
break;
else{ //大则向上移
heap[i] = heap[j];
i = j;
j = 2*j+1;
}
}
heap[i] = temp;
};
template
void minHeap::siftUp(int start){
int j = start;
int i = (j-1)/2; //i的左子女是j
E temp = heap[j];
while(j>0){
if(heap[i] <= temp) //如果父节点大
break;
else{ //如果父节点小,上滑
heap[j] = heap[i];
j = i;
i = (i-1)/2;
}
}
heap[j] = temp;
};
template
bool minHeap::insert(const E& x){
if(currentSize == maxHeapSize){ //堆满时
// cout<<"堆已满"<
bool minHeap::removeMin(E& x){
if(currentSize == 0){ //堆空时
// cout<<"堆为空!!"<#pragma once
#include "HuffmanTree.cpp"
class sZip
{
public:
sZip(void);
~sZip(void);
void zip(char str1[],char str2[],HuffmanTreeNode* subTree,int &size,char data[]);
void unzip(char str1[],char str2[]);
private:
bool codeEqual(char code1[],char code2[][128],int &num);
void getHuffCode(HuffmanTreeNode* subTree,char code[][128],char data[]);
void strBinary(char str[],int &size);
void binaryStr(char str1[]);
};
#include "StdAfx.h"
#include "sZip.h"
#include
using namespace std;
sZip::sZip(void){
}
sZip::~sZip(void)
{
}
void sZip::zip(char str1[],char str2[],HuffmanTreeNode* subTree,int &size,char data[]){
char code[128][128];
getHuffCode(subTree,code,data); //获取subTree的哈弗曼编码
unsigned int i = 0;
unsigned int n = -1;
for(; str1[i] != 0 ;i++){ //遍历str1,把里面的字符转化为哈弗曼编码存在str2中
int j = 0;
while(data[j] != str1[i]){
j++;
if(data[j] == 0)
break;
}
if(data[j] == str1[i]){
for(int count = 0;code[j][count] != 0;count++)
str2[++n] = code[j][count];
str2[n+1]=0;
}
}
strBinary(str2,size); //把str2的每一个字符变成每一个bit,8个bit合成一个字符
}
void sZip::unzip(char str1[],char str2[]){
unsigned int count[128];
for(int i = 0;i < 127;i++)
count[i] = 0;
char code[128][128];
char data[128];
for(int i = 0;i < 128;i++)
code[i][0] = 0;
int i = 0;
for(;str1[i] != '#';i++)
data[i] = str1[i];
data[i] = 0;
int j = i+2;
i = -1;
while(1){
if(str1[j] != '#' && str1[j+1] == '#') //个位
count[++i] = str1[j]-48;
else if(str1[j+1] != '#' && str1[j+2] == '#'){ //十位
count[++i] = int(str1[j]-48)*10+int(str1[j+1]-48);
j++;
}
else if(str1[j+1] != '#' && str1[j+2] != '#' && str1[j+3] == '#'){ //百位
count[++i] = int(str1[j]-48)*100+int(str1[j+1]-48)*10+int(str1[j+2]-48);
j = j+2;
}
else if(str1[j+1] != '#' && str1[j+2] != '#' && str1[j+3] != '#'&& str1[j+4] =='#'){ //千位
count[++i] = int(str1[j]-48)*1000+int(str1[j+1]-48)*100+int(str1[j+2]-48)*10+int(str1[j+3]-48);
}
else if(str1[j] == '#' && str1[j+1] == '#') //读完
break;
else
cout<<"读取错误"< tree;
tree.creat(data,count);
getHuffCode(tree.root,code,data);
j = j+2;
i = -1;
char tempchar[100000];
for(;str1[j] != 0;j++)
tempchar[++i] = str1[j];
tempchar[i+1] = 0;
binaryStr(tempchar); //把压缩文件转化为二进制编码
char tempcode[128];
j = -1;
int num;
int n = -1;
i = 0;
for(;tempchar[i] != 0;i++){ //遍历tempchar,把它从哈弗曼编码转化为对应的字符
tempcode[++n] = tempchar[i];
tempcode[n+1] = 0;
if(codeEqual(tempcode,code,num)){
str2[++j] = data[num];
for(n = 0;tempcode[n] != 0;n++) //重置tempcode
tempcode[n] = 0;
n = -1;
}
else
continue;
}
str2[++j] = 0;
}
void sZip::getHuffCode(HuffmanTreeNode* subTree,char code[][128],char data[]){
for(int i = 0;data[i] != 0;i++)
findKey(subTree,data[i],code[i]);
}
void sZip::strBinary(char str[],int &size){
char str2[100000];
char bit[8];
int n = 0;
int count = 0;
while(str[n] == '1' || str[n] == '0'){
for(int i = 0;i < 8 ;i++){
if(str[n] == 0){
str[n] = '0';
str[n+1] = 0;
}
bit[i] = str[n];
n++;
}
str2[count] = 0;
for(int i = 0;i < 8 ;i++){
if(bit[i] == '1'){
char temp = 1;
str2[count] = (str2[count]<<1)|temp; //给它最后一位加上一即:左移一位,然后和00000001求或
}
else
str2[count] = (str2[count]<<1);
}
count++;
}
for(int i = 0;i <= count;i++)
str[i] = str2[i];
for(int i = count;str[i] != 0;i++)
str[i] = 0;
size = count;
}
void sZip::binaryStr(char str1[]){
char str2[100000];
int i = -1;
int n = 0;
while(str1[n] != 0){
int temp[8];
int tempchar = abs(str1[n]);
for(int count = 0;count < 8;count++){
temp[count] = tempchar%2;
tempchar /= 2;
}
if(str1[n]<0 && str1[n] > -128){ //当为负数时,二进制为正数第一位为1(符号位)的反码最后一位加一(补码)
temp[7] = 1;
for(int count = 0;count < 7;count++){ //求反码
if(temp[count] == 0)
temp[count] = 1;
else
temp[count] = 0;
}
int x = 1; //进位数
for(int count = 0;count < 8;count++){ //末位加一
if(temp[count] == 0){
if(x == 1){
temp[count] = 1;
x = 0;
}
}
else
if(x == 1)
temp[count] = 0;
}
}
for(int count = 7;count != -1;count--)
str2[++i] = char(temp[count]+48);
n++;
}
str2[++i] = 0;
for(int count = 0;count <= i;count++)
str1[count] = str2[count];
}
bool sZip::codeEqual(char code1[],char code2 [][128],int &num){
unsigned int size1 = 0;
unsigned int size2 = 0;
for(int i = 0;code1[i] != 0;i++)
size1++;
for(int i = 0;code2[i][0] != 0;i++)
size2++;
for(int i = 0;i < size2;i++){
int j = 0;
int size3 = 0;
for(int n = 0;code2[i][n] != 0;n++)
size3++;
for(;j < size1;j++){
if(code1[j] != code2[i][j])
break; //有一位不等于就直接跳出
}
if(j == size1 && j == size3){
num = i;
return true;
}
}
return false;
}
本文标题:利用哈弗曼编码进行压缩
本文链接:http://cxhlcq.com/article/pscdhp.html