Source code for pyminifier.compression

# -*- coding: utf-8 -*-

__doc__ = """\
compression.py - A module providing functions to turn a python script into a
self-executing archive in a few different formats...

**gz_pack format:**

    - Typically provides better compression than bzip2 (for Python scripts).
    - Scripts compressed via this method can still be imported as modules.
    - The resulting binary data is base64-encoded which isn't optimal compression.

**bz2_pack format:**

    - In some cases may provide better compression than gzip.
    - Scripts compressed via this method can still be imported as modules.
    - The resulting binary data is base64-encoded which isn't optimal compression.

**lzma_pack format:**

    - In some cases may provide better compression than bzip2.
    - Scripts compressed via this method can still be imported as modules.
    - The resulting binary data is base64-encoded which isn't optimal compression.

The gz_pack, bz2_pack, and lzma_pack formats only work on individual .py
files.  To pack a number of files at once using this method use the
``--destdir`` command line option:

.. code-block: shell

    $ pyminifier --gzip --destdir=/tmp/minified *.py

**zip_pack format:**

    - Provides the best compression of Python scripts.
    - Resulting script cannot be imported as a module.
    - Any required modules that are local (implied path) will be automatically
      included in the archive.
"""

# Import standard library modules
import os
import shutil
import tempfile

# Import our own supporting modules
from . import analyze, minification, obfuscate, token_utils


[docs]def bz2_pack(source):
    """
    Returns "source" as a bzip2-compressed, self-extracting python script.

    .. note::

        This method uses up more space than the zip_pack method but it has the
        advantage in that the resulting .py file can still be imported into a
        python program.
    """
    import base64
    import bz2
    out = ""
    # Preserve shebangs (don't care about encodings for this)
    first_line = source.split("\n")[0]
    if analyze.shebang.match(first_line):
        if first_line.rstrip().endswith("python"):  # Make it python3
            first_line = first_line.rstrip()
            first_line += "3"  # #!/usr/bin/env python3
        out = first_line + "\n"
    compressed_source = bz2.compress(source.encode("utf-8"))
    out += "import bz2, base64\n"
    out += "exec(bz2.decompress(base64.b64decode('"
    out += base64.b64encode(compressed_source).decode("utf-8")
    out += "')))\n"
    return out


[docs]def gz_pack(source):
    """
    Returns "source" as a gzip-compressed, self-extracting python script.

    .. note::

        This method uses up more space than the zip_pack method but it has the
        advantage in that the resulting .py file can still be imported into a
        python program.
    """
    import base64
    import zlib
    out = ""
    # Preserve shebangs (don't care about encodings for this)
    first_line = source.split("\n")[0]
    if analyze.shebang.match(first_line):
        if first_line.rstrip().endswith("python"):  # Make it python3
            first_line = first_line.rstrip()
            first_line += "3"  # #!/usr/bin/env python3
        out = first_line + "\n"
    compressed_source = zlib.compress(source.encode("utf-8"))
    out += "import zlib, base64\n"
    out += "exec(zlib.decompress(base64.b64decode('"
    out += base64.b64encode(compressed_source).decode("utf-8")
    out += "')))\n"
    return out


[docs]def lzma_pack(source):
    """
    Returns "source" as a lzma-compressed, self-extracting python script.

    .. note::

        This method uses up more space than the zip_pack method but it has the
        advantage in that the resulting .py file can still be imported into a
        python program.
    """
    import base64
    import lzma
    out = ""
    # Preserve shebangs (don't care about encodings for this)
    first_line = source.split("\n")[0]
    if analyze.shebang.match(first_line):
        if first_line.rstrip().endswith("python"):  # Make it python3
            first_line = first_line.rstrip()
            first_line += "3"  # #!/usr/bin/env python3
        out = first_line + "\n"
    compressed_source = lzma.compress(source.encode("utf-8"))
    out += "import lzma, base64\n"
    out += "exec(lzma.decompress(base64.b64decode('"
    out += base64.b64encode(compressed_source).decode("utf-8")
    out += "')))\n"
    return out


[docs]def prepend(line, path):
    """
    Appends *line* to the _beginning_ of the file at the given *path*.

    If *line* doesn't end in a newline one will be appended to the end of it.
    """
    if isinstance(line, str):
        line = line.encode("utf-8")
    if not line.endswith(b"\n"):
        line += b"\n"
    temp = tempfile.NamedTemporaryFile("wb")
    temp_name = temp.name  # We really only need a random path-safe name
    temp.close()
    with open(temp_name, "wb") as temp:
        temp.write(line)
        with open(path, "rb") as r:
            temp.write(r.read())
    # Now replace the original with the modified version
    shutil.move(temp_name, path)


def get_name_generator(options):
    name_generator = None  # So we can tell if we need to obfuscate
    if (options.obfuscate or options.obf_classes
            or options.obf_functions or options.obf_variables
            or options.obf_builtins or options.obf_import_methods):
        # Put together that will be used for all obfuscation functions:
        identifier_length = int(options.replacement_length)
        name_generator = obfuscate.obfuscation_machine(
            use_unicode=options.use_nonlatin,
            identifier_length=identifier_length)

    return name_generator


[docs]def zip_pack(filepath, options):
    """
    Creates a zip archive containing the script at *filepath* along with all
    imported modules that are local to *filepath* as a self-extracting python
    script.  A shebang will be appended to the beginning of the resulting
    zip archive which will allow it to

    If the `lzma` module is available, the resulting "pyz" file will use
    ZIP_LZMA compression to maximize compression.

    *options* is expected to be the the same options parsed from pyminifier.py
    on the command line.

    .. note::

        * The file resulting from this method cannot be imported as a module into
          another python program (command line execution only).
        * Any required local (implied path) modules will be automatically
          included (well, it does its best).
        * The result will be saved as a .pyz file (which is an extension
          I invented for this format).
    """
    import zipfile

    # Hopefully some day we'll be able to use ZIP_LZMA too as the compression
    # format to save even more space...
    compression_format = zipfile.ZIP_DEFLATED
    cumulative_size = 0  # For tracking size reduction stats
    # Record the filesize for later comparison
    cumulative_size += os.path.getsize(filepath)
    dest = options.pyz
    z = zipfile.ZipFile(dest, "w", compression_format)
    # Take care of minifying our primary script first:

    with open(filepath, encoding="utf-8") as f:
        source = f.read()

    primary_tokens = token_utils.listified_tokenizer(source)
    # Preserve shebangs (don't care about encodings for this)
    shebang = analyze.get_shebang(primary_tokens)
    if not shebang:
        # We *must* have a shebang for this to work so make a conservative default:
        shebang = "#!/usr/bin/env python"
    if shebang.rstrip().endswith("python"):  # Make it python3 (to be safe)
        shebang = shebang.rstrip()
        shebang += "3\n"  # #!/usr/bin/env python3
    if not options.nominify:  # Minify as long as we don't have this option set
        source = minification.minify(primary_tokens, options)

    # So we can tell if we need to obfuscate
    name_generator = get_name_generator(options)

    if name_generator:
        tokens = token_utils.listified_tokenizer(source)
        obfuscate.obfuscate(
            filepath,
            tokens,
            options,
            name_generator=name_generator,
        )
        source = token_utils.untokenize(tokens)

    # Need the path where the script lives for the next steps:
    path = os.path.split(filepath)[0]
    filename = os.path.split(filepath)[1]
    if not path:
        path = os.getcwd()
    if filename == "__main__.py":
        z.writestr(filename, source)
    else:
        main_py = path + "/__main__.py"

        # There's an existing __main__.py, use it
        if os.path.exists(main_py):
            z.write(main_py, "__main__.py")
            z.writestr(filename, source)
        else:
            # No __main__.py so we rename our main script to be the __main__.py
            # This is so it will still execute as a zip
            z.writestr("__main__.py", source)

    # Now write any required modules into the zip as well
    local_modules = analyze.enumerate_local_modules(primary_tokens, path)

    if name_generator:
        table = [{}]

    included_modules = []
    for module in local_modules:
        module = module.replace(".", "/")
        module = "%s.py" % module

        # Avoid writing the filepath itself again
        if module == filename:
            continue

        if module == "__main__.py":
            continue

        # Add the filesize to our total
        cumulative_size += os.path.getsize(os.path.join(path, module))
        # Also record that we've added it to the archive
        included_modules.append(module)
        # Minify these files too
        with open(os.path.join(path, module), encoding="utf-8") as f:
            source = f.read()
        tokens = token_utils.listified_tokenizer(source)
        maybe_more_modules = analyze.enumerate_local_modules(tokens, path)
        for mod in maybe_more_modules:
            if mod not in local_modules:
                local_modules.append(mod)  # Extend the current loop, love it =)
        if not options.nominify:
            # Perform minification (this also handles obfuscation)
            source = minification.minify(tokens, options)
        # Have to re-tokenize for obfucation (it's quick):
        tokens = token_utils.listified_tokenizer(source)
        # Perform obfuscation if any of the related options were set
        if name_generator:
            obfuscate.obfuscate(
                module,
                tokens,
                options,
                name_generator=name_generator,
                table=table
            )
        # Convert back to text
        result = token_utils.untokenize(tokens)

        from . import RESULT_FOOTER

        result += "{}\n".format(RESULT_FOOTER)
        # Write out to a temporary file to add to our zip
        z.writestr(module, result)

    z.close()
    # Finish up by writing the shebang to the beginning of the zip
    prepend(shebang, dest)
    os.chmod(dest, 0o755)  # Make it executable (since we added the shebang)
    pyz_filesize = os.path.getsize(dest)
    percent_saved = round(float(pyz_filesize) / float(cumulative_size) * 100, 2)
    print("%s saved as compressed executable zip: %s" % (filepath, dest))
    print("The following modules were automatically included (as automagic "
          "dependencies):\n")
    for module in included_modules:
        print("\t%s" % module)
    print("\nOverall size reduction: %s%% of original size" % percent_saved)