minigzip.py 4.3 KB
Newer Older
1
#!/usr/bin/env python3
Guido van Rossum's avatar
Guido van Rossum committed
2 3
# Demo program for zlib; it compresses or decompresses files, but *doesn't*
# delete the original.  This doesn't support all of gzip's options.
Thomas Wouters's avatar
Thomas Wouters committed
4 5 6 7 8
#
# The 'gzip' module in the standard library provides a more complete
# implementation of gzip-format files.

import zlib, sys, os
Guido van Rossum's avatar
Guido van Rossum committed
9 10 11 12

FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16

def write32(output, value):
Georg Brandl's avatar
Georg Brandl committed
13 14 15 16
    output.write(bytes([value & 255])) ; value=value // 256
    output.write(bytes([value & 255])) ; value=value // 256
    output.write(bytes([value & 255])) ; value=value // 256
    output.write(bytes([value & 255]))
17

Guido van Rossum's avatar
Guido van Rossum committed
18
def read32(input):
Thomas Wouters's avatar
Thomas Wouters committed
19 20 21 22
    v = ord(input.read(1))
    v += (ord(input.read(1)) << 8 )
    v += (ord(input.read(1)) << 16)
    v += (ord(input.read(1)) << 24)
Guido van Rossum's avatar
Guido van Rossum committed
23 24
    return v

Georg Brandl's avatar
Georg Brandl committed
25 26 27
def compress(filename, input, output):
    output.write(b'\037\213\010')        # Write the header, ...
    output.write(bytes([FNAME]))         # ... flag byte ...
Guido van Rossum's avatar
Guido van Rossum committed
28

Georg Brandl's avatar
Georg Brandl committed
29
    statval = os.stat(filename)          # ... modification time ...
Thomas Wouters's avatar
Thomas Wouters committed
30
    mtime = statval[8]
Guido van Rossum's avatar
Guido van Rossum committed
31
    write32(output, mtime)
Georg Brandl's avatar
Georg Brandl committed
32 33 34 35
    output.write(b'\002')                # ... slowest compression alg. ...
    output.write(b'\377')                # ... OS (=unknown) ...
    bfilename = filename.encode(sys.getfilesystemencoding())
    output.write(bfilename + b'\000')    # ... original filename ...
Guido van Rossum's avatar
Guido van Rossum committed
36

Georg Brandl's avatar
Georg Brandl committed
37
    crcval = zlib.crc32(b'')
Thomas Wouters's avatar
Thomas Wouters committed
38
    compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
Guido van Rossum's avatar
Guido van Rossum committed
39
                             zlib.DEF_MEM_LEVEL, 0)
Thomas Wouters's avatar
Thomas Wouters committed
40 41
    while True:
        data = input.read(1024)
Georg Brandl's avatar
Georg Brandl committed
42
        if data == b'':
Thomas Wouters's avatar
Thomas Wouters committed
43 44
            break
        crcval = zlib.crc32(data, crcval)
Guido van Rossum's avatar
Guido van Rossum committed
45
        output.write(compobj.compress(data))
Guido van Rossum's avatar
Guido van Rossum committed
46
    output.write(compobj.flush())
Guido van Rossum's avatar
Guido van Rossum committed
47 48
    write32(output, crcval)             # ... the CRC ...
    write32(output, statval[6])         # and the file size.
Guido van Rossum's avatar
Guido van Rossum committed
49

Georg Brandl's avatar
Georg Brandl committed
50
def decompress(input, output):
Thomas Wouters's avatar
Thomas Wouters committed
51
    magic = input.read(2)
Georg Brandl's avatar
Georg Brandl committed
52
    if magic != b'\037\213':
53
        print('Not a gzipped file')
Thomas Wouters's avatar
Thomas Wouters committed
54 55
        sys.exit(0)
    if ord(input.read(1)) != 8:
56
        print('Unknown compression method')
Thomas Wouters's avatar
Thomas Wouters committed
57 58
        sys.exit(0)
    flag = ord(input.read(1))
Guido van Rossum's avatar
Guido van Rossum committed
59 60
    input.read(4+1+1)                   # Discard modification time,
                                        # extra flags, and OS byte.
Guido van Rossum's avatar
Guido van Rossum committed
61
    if flag & FEXTRA:
Guido van Rossum's avatar
Guido van Rossum committed
62
        # Read & discard the extra field, if present
Thomas Wouters's avatar
Thomas Wouters committed
63 64
        xlen = ord(input.read(1))
        xlen += 256*ord(input.read(1))
Guido van Rossum's avatar
Guido van Rossum committed
65
        input.read(xlen)
Guido van Rossum's avatar
Guido van Rossum committed
66
    if flag & FNAME:
Guido van Rossum's avatar
Guido van Rossum committed
67
        # Read and discard a null-terminated string containing the filename
Thomas Wouters's avatar
Thomas Wouters committed
68 69
        while True:
            s = input.read(1)
Georg Brandl's avatar
Georg Brandl committed
70
            if s == b'\0': break
Guido van Rossum's avatar
Guido van Rossum committed
71
    if flag & FCOMMENT:
Guido van Rossum's avatar
Guido van Rossum committed
72
        # Read and discard a null-terminated string containing a comment
Thomas Wouters's avatar
Thomas Wouters committed
73
        while True:
Georg Brandl's avatar
Georg Brandl committed
74 75
            s = input.read(1)
            if s == b'\0': break
Guido van Rossum's avatar
Guido van Rossum committed
76
    if flag & FHCRC:
Guido van Rossum's avatar
Guido van Rossum committed
77
        input.read(2)                   # Read & discard the 16-bit header CRC
Thomas Wouters's avatar
Thomas Wouters committed
78 79

    decompobj = zlib.decompressobj(-zlib.MAX_WBITS)
Georg Brandl's avatar
Georg Brandl committed
80
    crcval = zlib.crc32(b'')
Thomas Wouters's avatar
Thomas Wouters committed
81 82
    length = 0
    while True:
Georg Brandl's avatar
Georg Brandl committed
83 84
        data = input.read(1024)
        if data == b"":
Thomas Wouters's avatar
Thomas Wouters committed
85 86 87 88 89 90 91 92 93 94
            break
        decompdata = decompobj.decompress(data)
        output.write(decompdata)
        length += len(decompdata)
        crcval = zlib.crc32(decompdata, crcval)

    decompdata = decompobj.flush()
    output.write(decompdata)
    length += len(decompdata)
    crcval = zlib.crc32(decompdata, crcval)
95

Guido van Rossum's avatar
Guido van Rossum committed
96 97 98
    # We've read to the end of the file, so we have to rewind in order
    # to reread the 8 bytes containing the CRC and the file size.  The
    # decompressor is smart and knows when to stop, so feeding it
99
    # extra data is harmless.
Guido van Rossum's avatar
Guido van Rossum committed
100
    input.seek(-8, 2)
Thomas Wouters's avatar
Thomas Wouters committed
101 102 103
    crc32 = read32(input)
    isize = read32(input)
    if crc32 != crcval:
104
        print('CRC check failed.')
Thomas Wouters's avatar
Thomas Wouters committed
105
    if isize != length:
106
        print('Incorrect length of data produced')
Thomas Wouters's avatar
Thomas Wouters committed
107 108 109

def main():
    if len(sys.argv)!=2:
110 111
        print('Usage: minigzip.py <filename>')
        print('  The file will be compressed or decompressed.')
Thomas Wouters's avatar
Thomas Wouters committed
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
        sys.exit(0)

    filename = sys.argv[1]
    if filename.endswith('.gz'):
        compressing = False
        outputname = filename[:-3]
    else:
        compressing = True
        outputname = filename + '.gz'

    input = open(filename, 'rb')
    output = open(outputname, 'wb')

    if compressing:
        compress(filename, input, output)
    else:
        decompress(input, output)

    input.close()
    output.close()
Guido van Rossum's avatar
Guido van Rossum committed
132

Thomas Wouters's avatar
Thomas Wouters committed
133 134
if __name__ == '__main__':
    main()