@ wrote... (3 years, 3 months ago)

I just figured out how to compress any file to a fixed size. I can't even begin to tell you how revolutionary this is.

This will change the world over night:

  • you'll be able to download a 4k movie in a Tweet
  • upload an image of your hard drive in milliseconds
  • compression speed is O(n), as fast as your disk can read
  • written in python, zero external dependencies
  • any file compresses to ~ 130 bytes, yes, you read that right

Because I believe in Open Source I'm releasing this to the world. Pull requests for decompress() are welcome.

#!/usr/bin/env python3

# usage example
# echo ab > ab.txt
# ./o1compress.py -c ab.txt
# ./o1compress.py -d ab.txt.X

import sys
import hashlib
import itertools
import argparse

def compress(data):
    """
    return the length and sha512 hash of the data
    THIS IS WHERE THE MAGIC LIVES!!1!
    """
    return len(data), hashlib.sha512(data).hexdigest()


def decompress(length, hash):
    """
    try every combination of bytes of given length, return
    the bytes when we've found the matching hash

    TODO: improve speed of this function, length > 3 is problematic
    THINK: might need quantum computer? Ask university nerd.
    THINK: probably just need to unroll the loops and rewrite in rust

    length  time
    1       .04s mostly overhead
    2       .07s NICE!!!
    3       2.6s
    4       195s hmmm...
    5       ???  computer fan broke
    """

    for data in itertools.product(range(256), repeat=length):
        data = bytes(data)

        if hash == hashlib.sha512(data).hexdigest():
            return data

    raise Exception("error with input, couldn't decompress")


def test():
    for data in ['a', 'ab', 'ba', 'a\n', '\na']:
        data = data.encode()
        assert decompress(*compress(data)) == data, "data doesn't match"

    print("tests passed")


def parse_cmdline():

    parser = argparse.ArgumentParser(description='O(1) compressor')
    parser.add_argument('-t', dest='action', action='store_const',
                        const='test',
                        help='run a quick test, no input file')
    parser.add_argument('-c', dest='action', action='store_const',
                        const='compress',
                        help='compress a file')
    parser.add_argument('-d', dest='action', action='store_const',
                        const='decompress',
                        help='decompress a file')
    parser.add_argument('filename', type=str, nargs='*',
                        help='file to compress/decompress')

    return parser.parse_args()


def main():
    args = parse_cmdline()

    if args.action == 'test':
        test()

    elif args.action == 'compress':
        with open(args.filename[0], 'rb') as f:
            data = f.read()

        length, hash = compress(data)

        with open(args.filename[0] + '.X', 'wb') as f:
            data = f"{length} {hash}".encode()
            f.write(data)

    elif args.action == 'decompress':
        with open(args.filename[0], 'rb') as f:
            data = f.read().decode()
            length, hash = data.split()
            length = int(length)

        data = decompress(length, hash)

        print(data)

if __name__ == '__main__':
    main()

OMG I'm gonna be so rich….

Category: tech, Tags: compsci, sarcasm
Comments: 0
Click here to add a comment