I just figured out how to compress any file to a fixed size. I can't even begin to tell you how revolutionary this is.
This will change the world over night:
- you'll be able to download a 4k movie in a Tweet™
- upload an image of your hard drive in milliseconds
- compression speed is O(n), as fast as your disk can read
- written in python, zero external dependencies
- any file compresses to ~ 130 bytes, yes, you read that right
Because I believe in Open Source I'm releasing this to the world. Pull requests for decompress()
are welcome.
#!/usr/bin/env python3
# usage example
# echo ab > ab.txt
# ./o1compress.py -c ab.txt
# ./o1compress.py -d ab.txt.X
import sys
import hashlib
import itertools
import argparse
def compress(data):
"""
return the length and sha512 hash of the data
THIS IS WHERE THE MAGIC LIVES!!1!
"""
return len(data), hashlib.sha512(data).hexdigest()
def decompress(length, hash):
"""
try every combination of bytes of given length, return
the bytes when we've found the matching hash
TODO: improve speed of this function, length > 3 is problematic
THINK: might need quantum computer? Ask university nerd.
THINK: probably just need to unroll the loops and rewrite in rust
length time
1 .04s mostly overhead
2 .07s NICE!!!
3 2.6s
4 195s hmmm...
5 ??? computer fan broke
"""
for data in itertools.product(range(256), repeat=length):
data = bytes(data)
if hash == hashlib.sha512(data).hexdigest():
return data
raise Exception("error with input, couldn't decompress")
def test():
for data in ['a', 'ab', 'ba', 'a\n', '\na']:
data = data.encode()
assert decompress(*compress(data)) == data, "data doesn't match"
print("tests passed")
def parse_cmdline():
parser = argparse.ArgumentParser(description='O(1) compressor')
parser.add_argument('-t', dest='action', action='store_const',
const='test',
help='run a quick test, no input file')
parser.add_argument('-c', dest='action', action='store_const',
const='compress',
help='compress a file')
parser.add_argument('-d', dest='action', action='store_const',
const='decompress',
help='decompress a file')
parser.add_argument('filename', type=str, nargs='*',
help='file to compress/decompress')
return parser.parse_args()
def main():
args = parse_cmdline()
if args.action == 'test':
test()
elif args.action == 'compress':
with open(args.filename[0], 'rb') as f:
data = f.read()
length, hash = compress(data)
with open(args.filename[0] + '.X', 'wb') as f:
data = f"{length} {hash}".encode()
f.write(data)
elif args.action == 'decompress':
with open(args.filename[0], 'rb') as f:
data = f.read().decode()
length, hash = data.split()
length = int(length)
data = decompress(length, hash)
print(data)
if __name__ == '__main__':
main()
OMG I'm gonna be so rich….