LZ78 压缩算法的 Java 实现

举报
简简单单Onlinezuozuo 发表于 2022/02/18 23:15:05 2022/02/18
【摘要】 文章目录 LZ78 压缩算法的 Java 实现1、压缩算法的实现2、解压缩算法的实现3、测试和使用4、Python 版本的实现代码 LZ78 压缩算法的 Java 实现 ...

LZ78 压缩算法的 Java 实现


1、压缩算法的实现

通过多路搜索树提高检索速度

package com.wretchant.lz78;

import java.util.*;

/**
 多路英文单词查找树
 */
class Trie {
    private TrieNode root;

    public Trie() {
        root = new TrieNode();
        root.wordEnd = false;
    }

    public void insert(String word) {
        TrieNode node = root;
        for (int i = 0; i < word.length(); i++) {
            Character c = word.charAt(i);
            if (!node.childdren.containsKey(c)) {
                node.childdren.put(c, new TrieNode());
            }
            node = node.childdren.get(c);
        }
        node.wordEnd = true;
    }

    public boolean search(String word) {
        TrieNode node = root;
        for (int i = 0; i < word.length(); i++) {
            Character c = word.charAt(i);
            if (!node.childdren.containsKey(c)) {
                return false;
            }
            node = node.childdren.get(c);
        }
        return node.wordEnd;
    }

}

class TrieNode {
    Map<Character, TrieNode> childdren;
    boolean wordEnd;

    public TrieNode() {
        childdren = new HashMap<Character, TrieNode>();
        wordEnd = false;
    }
}

/**
 编码表
 */
class Output {
    private Integer index;
    private Character character;

    Output(Integer index, Character character) {
        this.index = index;
        this.character = character;
    }

    public Integer getIndex() {
        return index;
    }

    public Character getCharacter() {
        return character;
    }
}

class LZencode {
    @FunctionalInterface
    interface Encode {
        List<Output> encode(String message);
    }

    /**
     构建多路搜索树
     */
    static Trie buildTree(Set<String> keys) {
        Trie trie = new Trie();
        keys.forEach(trie::insert);
        return trie;
    }

    public static final Encode ENCODE = message -> {
        // 构建压缩后的编码表
        List<Output> outputs = new ArrayList<>();
        Map<String, Integer> treeDict = new HashMap<>();
        int mLen = message.length();
        int i = 0;

        while (i < mLen) {
            Set<String> keySet = treeDict.keySet();
            // 生成多路搜索树
            Trie trie = buildTree(keySet);
            char messageI = message.charAt(i);
            String messageIStr = String.valueOf(messageI);
            // 使用多路树进行搜索
            if (!trie.search(messageIStr)) {
                outputs.add(new Output(0, messageI));
                treeDict.put(messageIStr, treeDict.size() + 1);
                i++;
            } else if (i == mLen - 1) {
                outputs.add(new Output(treeDict.get(messageIStr), ' '));
                i++;
            } else {
                for (int j = i + 1; j < mLen; j++) {
                    String substring = message.substring(i, j + 1);
                    String str = message.substring(i, j);
                    // 使用多路树进行搜索
                    if (!trie.search(substring)) {
                        outputs.add(new Output(treeDict.get(str), message.charAt(j)));
                        treeDict.put(substring, treeDict.size() + 1);
                        i = j + 1;
                        break;
                    }
                    if (j == mLen - 1) {
                        outputs.add(new Output(treeDict.get(substring), ' '));
                        i = j + 1;
                    }
                }
            }
        }
        return outputs;
    };


}


  
 
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132

2、解压缩算法的实现

package com.wretchant.lz78;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class LZdecode {


    @FunctionalInterface
    interface Decode {
        /**
         @param outputs 编码表
         @return 解码后的字符串
         */
        String decode(List<Output> outputs);
    }

    /**
     根据编码表进行解码
     */
    public static final Decode DECODE = (List<Output> outputs) -> {
        StringBuilder unpacked = new StringBuilder();
        Map<Integer, String> treeDict = new HashMap<>();

        for (Output output : outputs) {
            Integer index = output.getIndex();
            Character character = output.getCharacter();
            if (index == 0) {
                unpacked.append(character);
                treeDict.put(treeDict.size() + 1, character.toString());
                continue;
            }
            String term = "" + treeDict.get(index) + character;
            unpacked.append(term);
            treeDict.put(treeDict.size() + 1, term);

        }

        return unpacked.toString();
    };
}


  
 
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43

3、测试和使用

package com.wretchant.lz78;

import java.io.InputStream;
import java.util.List;
import java.util.Scanner;
import java.util.function.ToIntFunction;

public class LZpack {

    public static final ToIntFunction<List<Output>> DICT_PRINT = outputs -> {
        outputs.forEach(output -> {
            System.out.println("index :" + output.getIndex() + " char :" + output.getCharacter());
        });
        return 1;
    };

    public static void main(String[] args) {

        Scanner scanner = new Scanner(System.in);
        System.out.println("Please input text ");
        String input = scanner.nextLine();

        LZencode.Encode encode = LZencode.ENCODE;
        List<Output> outputs = encode.encode(input);
        DICT_PRINT.applyAsInt(outputs);
    }
}


  
 
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28

测试结果如下
在这里插入图片描述

4、Python 版本的实现代码

def compress(message):
    tree_dict, m_len, i = {}, len(message), 0
    while i < m_len:
        # case I
        if message[i] not in tree_dict.keys():
            yield (0, message[i])
            tree_dict[message[i]] = len(tree_dict) + 1
            i += 1
        # case III
        elif i == m_len - 1:
            yield (tree_dict.get(message[i]), '')
            i += 1
        else:
            for j in range(i + 1, m_len):
                # case II
                if message[i:j + 1] not in tree_dict.keys():
                    yield (tree_dict.get(message[i:j]), message[j])
                    tree_dict[message[i:j + 1]] = len(tree_dict) + 1
                    i = j + 1
                    break
                # case III
                elif j == m_len - 1:
                    yield (tree_dict.get(message[i:j + 1]), '')
                    i = j + 1


def uncompress(packed):
    unpacked, tree_dict = '', {}
    for index, ch in packed:
        if index == 0:
            unpacked += ch
            tree_dict[len(tree_dict) + 1] = ch
        else:
            term = tree_dict.get(index) + ch
            unpacked += term
            tree_dict[len(tree_dict) + 1] = term
    return unpacked


if __name__ == '__main__':
    messages = ['ABBCBCABABCAABCAAB', 'BABAABRRRA', 'AAAAAAAAA']
    for m in messages:
        pack = compress(m)
        unpack = uncompress(pack)
        print(unpack == m)

  
 
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45

文章来源: wretchant.blog.csdn.net,作者:简简单单OnlineZuozuo,版权归原作者所有,如需转载,请联系作者。

原文链接:wretchant.blog.csdn.net/article/details/116411274

【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。