LZW 编解码算法实现与分析

2021/4/18 12:55:10

编程Tag： string 字符算法 Code Dictionary int 编解码 lzw bf

本文主要是介绍LZW 编解码算法实现与分析，对大家解决编程问题具有一定的参考价值，需要的程序猿们随着小编来一起学习吧！

本文以解释代码为主，弄清代码结构及思路

文章目录

一、实验目的
二、实验原理
- 1.编码
- 2.解码
三、实验步骤
- 1.编码
- 2.解码
四、代码实现及注释
五、实验结果及分析
总结

一、实验目的

掌握词典编码的基本原理，用C/C++语言编程实现LZW解码器并分析编解码算法。

二、实验原理

1.编码

（1）编码0-255用来存储Ascii码为[0,255]的字符，放在字典里。
（2）编码从256开始，将出现过的字符计入字典
（3）核心思想：利用字符的可重用性，每当往结果输出一个编码，就将一个新的string存入dictionary

2.解码

初始化字典，建立字典、查字典的方法可以实现文件的解压缩，但是解压缩过程并不简单等同于压缩过程。解压缩完成后压缩文件时建立的字典和解压缩建立的字典完全一样。

三、实验步骤

1.编码

步骤1：将词典初始化为包含所有可能的单字符，当前前缀P初始化为空。
步骤2：当前字符C=字符流中的下一个字符。
步骤3：判断P＋C是否在词典中
（1）如果“是”，则用C扩展P，即让P=P＋C，返回到步骤2。
（2）如果“否”，则输出与当前前缀P相对应的码字W；将P＋C添加到词典中；令P=C，并返回到步骤2
在这里插入图片描述

2.解码

步骤1：在开始译码时词典包含所有可能的前缀根。
3
步骤2：令CW：=码字流中的第一个码字。
步骤3：输出当前缀-符串string.CW到码字流。
步骤4：先前码字PW：=当前码字CW。
步骤5：当前码字CW：=码字流的下一个码字。
步骤6：判断当前缀-符串string.CW 是否在词典中。
（1）如果”是”，则把当前缀-符串string.CW输出到字符流；当前前缀P：=先前缀-符串string.PW；当前字符C：=当前前缀-符串string.CW的第一个字符；把缀-符串P+C添加到词典。
（2）如果”否”，则当前前缀P：=先前缀-符串string.PW；当前字符C：=当前缀-符串string.CW的第一个字符；输出缀-符串P+C到字符流,然后把它添加到词典中。
步骤7：判断码字流中是否还有码字要译。
（1）如果”是”，就返回步骤4。
（2）如果”否”，结束

四、代码实现及注释

头文件：

typedef struct {//自定义的结构体变量，用于作为文件流
	FILE* fp;
	unsigned char mask;
	int rack;
}BITFILE;

BITFILE* OpenBitFileInput(char* filename);//结构体变量用于存储输入的文件
BITFILE* OpenBitFileOutput(char* filename);//结构体变量用于存储输出的文件
void CloseBitFileInput(BITFILE* bf); //关闭比特流文件
void CloseBitFileOutput(BITFILE* bf);//关闭比特流文件
int BitInput(BITFILE* bf);
unsigned long BitsInput(BITFILE* bf, int count);//输入比特流
void BitOutput(BITFILE* bf, int bit);
void BitsOutput(BITFILE* bf, unsigned long code, int count);//将字符对应的编码映射成固定长度的编码

源文件：

struct {
	int suffix;
	int parent, firstchild, nextsibling;
} dictionary[MAX_CODE + 1];
int next_code;
int d_stack[MAX_CODE]; // stack for decoding a phrase

#define input(f) ((int)BitsInput( f, 16))
#define output(f, x) BitsOutput( f, (unsigned long)(x), 16)

int DecodeString(int start, int code);
void InitDictionary(void);
void PrintDictionary(void) {
	//打印出自定义的字典
	int n;
	int count;
	for (n = 256; n < next_code; n++) {
		count = DecodeString(0, n);
		printf("%4d->", n);
		while (0 < count--) printf("%c", (char)(d_stack[count]));
		printf("\n");
	}
}

int DecodeString(int start, int code) {
	//从码解出字符串到d_stack这个栈中
	int count;
	count = start;
	while (0 <= code) {
		d_stack[count] = dictionary[code].suffix;
		code = dictionary[code].parent;
		count++;
	}
	return count;
}
void InitDictionary(void) {
	//将字典进行初始化，以树的逻辑数据结构进行结构体的设置，和数组的存储结构
	int i;

	for (i = 0; i < 256; i++) {
		dictionary[i].suffix = i;
		dictionary[i].parent = -1;
		dictionary[i].firstchild = -1;
		dictionary[i].nextsibling = i + 1;
	}
	dictionary[255].nextsibling = -1;
	next_code = 256;
}
/*
 * Input: string represented by string_code in dictionary,
 * Output: the index of character+string in the dictionary
 * 		index = -1 if not found
 */
int InDictionary(int character, int string_code) {
	//查询 加上新来的character的字符串 是否在字典里
	int sibling;
	if (0 > string_code) return character;
	sibling = dictionary[string_code].firstchild;
	while (-1 < sibling) {
		if (character == dictionary[sibling].suffix) return sibling;
		sibling = dictionary[sibling].nextsibling;
	}
	return -1;
}

void AddToDictionary(int character, int string_code) {
	//将一个新组成的字符串加入字典
	int firstsibling, nextsibling;
	if (0 > string_code) return;
	dictionary[next_code].suffix = character;
	dictionary[next_code].parent = string_code;
	dictionary[next_code].nextsibling = -1;
	dictionary[next_code].firstchild = -1;
	firstsibling = dictionary[string_code].firstchild;
	if (-1 < firstsibling) {	// the parent has child
		nextsibling = firstsibling;
		while (-1 < dictionary[nextsibling].nextsibling)
			nextsibling = dictionary[nextsibling].nextsibling;
		dictionary[nextsibling].nextsibling = next_code;
	}
	else {// no child before, modify it to be the first
		dictionary[string_code].firstchild = next_code;
	}
	next_code++;
}

void LZWEncode(FILE* fp, BITFILE* bf) {
	int character;
	int string_code;//字符或字符串所对应的词典编码
	int index;
	unsigned long file_length;

	fseek(fp, 0, SEEK_END);
	file_length = ftell(fp);
	fseek(fp, 0, SEEK_SET);
	BitsOutput(bf, file_length, 4 * 8);
	InitDictionary();//初始化词典
	string_code = -1;
	while (EOF != (character = fgetc(fp))) {//从文件里读取字符
		index = InDictionary(character, string_code);
		if (0 <= index) {	// string+character in dictionary
			string_code = index;
		}
		else {	
			output(bf, string_code);// string+character not in dictionary
			if (MAX_CODE > next_code) {	//词典里还有空间时
				AddToDictionary(character, string_code);// add string+character to dictionary
			}
			string_code = character;//将当前字符的代号放入已存在的代号串之后
		}
	}
	output(bf, string_code);// 将字符串对应的代号写入到二进制文件中
}

void LZWDecode(BITFILE* bf, FILE* fp) {
	int character;//字符代号
	int new_code, last_code;
	int phrase_length;
	unsigned long file_length;//文件长度

	file_length = BitsInput(bf, 4 * 8);
	if (-1 == file_length) file_length = 0;
	/*需填充*/
	InitDictionary();//初始化字典
	last_code = -1;
	while (0 < file_length) {
		new_code = input(bf);//读入一个字符
		if (new_code >= next_code) { // 该字符代号大于字典里的字符代号时
			d_stack[0] = character;
			phrase_length = DecodeString(1, last_code); //解出字符，存入d_stack栈里
		}
		else {//该字符代号小于字典里的字符代号时
			phrase_length = DecodeString(0, new_code);//解出字符，存入d_stack栈里
		}
		character = d_stack[phrase_length - 1];
		while (0 < phrase_length) {//输出字符串到文本文件中
			phrase_length--;
			fputc(d_stack[phrase_length], fp);
			file_length--;
		}
		if (MAX_CODE > next_code) {//词典中还有空间时
			AddToDictionary(character, last_code);//将字符加入词典
		}
		last_code = new_code;//更新字典条数last_code为最新的new_code
	}
}

int main(int argc, char** argv) {
	FILE* fp;//输入的文件指针
	BITFILE* bf; //输出的二进制文件流指针

	if (4 > argc) {//输入参数错误时
		fprintf(stdout, "usage: \n%s <o> <ifile> <ofile>\n", argv[0]);
		fprintf(stdout, "\t<o>: E or D reffers encode or decode\n");
		fprintf(stdout, "\t<ifile>: input file name\n");
		fprintf(stdout, "\t<ofile>: output file name\n");
		return -1;
	}
	if ('E' == argv[1][0]) { // do encoding
		fp = fopen(argv[2], "rb");//读入文件
		bf = OpenBitFileOutput(argv[3]);//创建输出文件
		if (NULL != fp && NULL != bf) {
			LZWEncode(fp, bf);//编码
			fclose(fp);//关闭文件指针
			CloseBitFileOutput(bf);//关闭文件流指针
			fprintf(stdout, "encoding done\n");
		}
	}
	else if ('D' == argv[1][0]) {	// do decoding
		bf = OpenBitFileInput(argv[2]);//读入文件流
		fp = fopen(argv[3], "wb");//创建输出文件
		if (NULL != fp && NULL != bf) {
			LZWDecode(bf, fp);//解码
			fclose(fp);//关闭文件指针
			CloseBitFileInput(bf);//关闭文件流指针
			fprintf(stdout, "decoding done\n");
		}
	}
	else {	// otherwise
		fprintf(stderr, "not supported operation\n");
	}
	return 0;
}

五、实验结果及分析

在这里插入图片描述

经过十多个类型文件的比较之后，得出结论：

文本文件压缩效果较好，图像视频文件压缩效果较差。

LZW编码原理是基于重复字符，所以对于数据中重复率较低的文本文件，需要创建新字符串存入词典，也会导致压缩效率低。

但该编码还是主要用于图像数据的压缩。对于简单图像和平滑且噪声小的信号源具有较高的压缩比，并且有较高的压缩和解压缩速度。

压缩yuv文件时没有任何变化。

总结

lzw压缩是基于表查寻算法把文件压缩成小文件的无损压缩方法。

这篇关于LZW 编解码算法实现与分析的文章就介绍到这儿，希望我们推荐的文章对大家有所帮助，也希望大家多多支持为之网！

LZW 编解码算法实现与分析

文章目录

一、实验目的

二、实验原理

1.编码

2.解码

三、实验步骤

1.编码

2.解码

四、代码实现及注释

五、实验结果及分析

总结

相关编程文章