How to compress a string?

女生的网名这么多〃 提交于 2019-11-28 23:16:03
olliej

Many compression algorithms are well documented, a couple even have js implementations:

  • GZip A common (reasonably) good compression algorithm, I know there's a JS impl, i'm just hunting the URL

  • LZW Another question points to an LZW implementation in JS

  • Arithmetic coding (i did this, but the model it uses is stupid so doesn't achieve the best compression rates it could)

Sounds like you might benefit from single and double RLE compression.

A primer on this can be seen here:

http://pp19dd.com/2011/10/query-string-limits-encoding-hundreds-of-checkboxes-with-rle/#demo

The library should be flexible enough to modify your compression pattern to something more preferable. The writeup explains how this works; might be a good start to optimize your SVG case.

You could try Huffman compression. Number of different chars is 20-30, and if the string is long, compression should be effective.

The following solution returns a compressed Base64 encoded string.

Create a file called zip.js with the code below and then see usage below that.

// Apply LZW-compression to a string and return base64 compressed string.
export function zip (s) {
  try {
    var dict = {}
    var data = (s + '').split('')
    var out = []
    var currChar
    var phrase = data[0]
    var code = 256
    for (var i = 1; i < data.length; i++) {
      currChar = data[i]
      if (dict[phrase + currChar] != null) {
        phrase += currChar
      } else {
        out.push(phrase.length > 1 ? dict[phrase] : phrase.charCodeAt(0))
        dict[phrase + currChar] = code
        code++
        phrase = currChar
      }
    }
    out.push(phrase.length > 1 ? dict[phrase] : phrase.charCodeAt(0))
    for (var j = 0; j < out.length; j++) {
      out[j] = String.fromCharCode(out[j])
    }
    return utoa(out.join(''))
  } catch (e) {
    console.log('Failed to zip string return empty string', e)
    return ''
  }
}

// Decompress an LZW-encoded base64 string
export function unzip (base64ZippedString) {
  try {
    var s = atou(base64ZippedString)
    var dict = {}
    var data = (s + '').split('')
    var currChar = data[0]
    var oldPhrase = currChar
    var out = [currChar]
    var code = 256
    var phrase
    for (var i = 1; i < data.length; i++) {
      var currCode = data[i].charCodeAt(0)
      if (currCode < 256) {
        phrase = data[i]
      } else {
        phrase = dict[currCode] ? dict[currCode] : oldPhrase + currChar
      }
      out.push(phrase)
      currChar = phrase.charAt(0)
      dict[code] = oldPhrase + currChar
      code++
      oldPhrase = phrase
    }
    return out.join('')
  } catch (e) {
    console.log('Failed to unzip string return empty string', e)
    return ''
  }
}

// ucs-2 string to base64 encoded ascii
function utoa (str) {
  return window.btoa(unescape(encodeURIComponent(str)))
}
// base64 encoded ascii to ucs-2 string
function atou (str) {
  return decodeURIComponent(escape(window.atob(str)))
}

Usage:

import { zip, unzip } from './zip'

// Zip a string
const str = 'zip it'
const base64CompressedString = zip(str)

// Zip an object
const obj = { a: 123, b: 'zipit' }
const base64CompressedString = zip(JSON.stringify(obj))

// Unzip the base64 compressed string back to an object.
const originalObject = JSON.parse(unzip(base64CompressedString))

BTW... if you're concerned about escape/unescape being depreciated consider a polyfill

LZW algorithm from here and base64 encoding from here

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!