# -*- coding: utf-8 -*-
__doc__ = """\
compression.py - A module providing functions to turn a python script into a
self-executing archive in a few different formats...
**gz_pack format:**
- Typically provides better compression than bzip2 (for Python scripts).
- Scripts compressed via this method can still be imported as modules.
- The resulting binary data is base64-encoded which isn't optimal compression.
**bz2_pack format:**
- In some cases may provide better compression than gzip.
- Scripts compressed via this method can still be imported as modules.
- The resulting binary data is base64-encoded which isn't optimal compression.
**lzma_pack format:**
- In some cases may provide better compression than bzip2.
- Scripts compressed via this method can still be imported as modules.
- The resulting binary data is base64-encoded which isn't optimal compression.
The gz_pack, bz2_pack, and lzma_pack formats only work on individual .py
files. To pack a number of files at once using this method use the
``--destdir`` command line option:
.. code-block: shell
$ pyminifier --gzip --destdir=/tmp/minified *.py
**zip_pack format:**
- Provides the best compression of Python scripts.
- Resulting script cannot be imported as a module.
- Any required modules that are local (implied path) will be automatically
included in the archive.
"""
# Import standard library modules
import os
import shutil
import tempfile
# Import our own supporting modules
from . import analyze, minification, obfuscate, token_utils
[docs]def bz2_pack(source):
"""
Returns "source" as a bzip2-compressed, self-extracting python script.
.. note::
This method uses up more space than the zip_pack method but it has the
advantage in that the resulting .py file can still be imported into a
python program.
"""
import base64
import bz2
out = ""
# Preserve shebangs (don't care about encodings for this)
first_line = source.split("\n")[0]
if analyze.shebang.match(first_line):
if first_line.rstrip().endswith("python"): # Make it python3
first_line = first_line.rstrip()
first_line += "3" # #!/usr/bin/env python3
out = first_line + "\n"
compressed_source = bz2.compress(source.encode("utf-8"))
out += "import bz2, base64\n"
out += "exec(bz2.decompress(base64.b64decode('"
out += base64.b64encode(compressed_source).decode("utf-8")
out += "')))\n"
return out
[docs]def gz_pack(source):
"""
Returns "source" as a gzip-compressed, self-extracting python script.
.. note::
This method uses up more space than the zip_pack method but it has the
advantage in that the resulting .py file can still be imported into a
python program.
"""
import base64
import zlib
out = ""
# Preserve shebangs (don't care about encodings for this)
first_line = source.split("\n")[0]
if analyze.shebang.match(first_line):
if first_line.rstrip().endswith("python"): # Make it python3
first_line = first_line.rstrip()
first_line += "3" # #!/usr/bin/env python3
out = first_line + "\n"
compressed_source = zlib.compress(source.encode("utf-8"))
out += "import zlib, base64\n"
out += "exec(zlib.decompress(base64.b64decode('"
out += base64.b64encode(compressed_source).decode("utf-8")
out += "')))\n"
return out
[docs]def lzma_pack(source):
"""
Returns "source" as a lzma-compressed, self-extracting python script.
.. note::
This method uses up more space than the zip_pack method but it has the
advantage in that the resulting .py file can still be imported into a
python program.
"""
import base64
import lzma
out = ""
# Preserve shebangs (don't care about encodings for this)
first_line = source.split("\n")[0]
if analyze.shebang.match(first_line):
if first_line.rstrip().endswith("python"): # Make it python3
first_line = first_line.rstrip()
first_line += "3" # #!/usr/bin/env python3
out = first_line + "\n"
compressed_source = lzma.compress(source.encode("utf-8"))
out += "import lzma, base64\n"
out += "exec(lzma.decompress(base64.b64decode('"
out += base64.b64encode(compressed_source).decode("utf-8")
out += "')))\n"
return out
[docs]def prepend(line, path):
"""
Appends *line* to the _beginning_ of the file at the given *path*.
If *line* doesn't end in a newline one will be appended to the end of it.
"""
if isinstance(line, str):
line = line.encode("utf-8")
if not line.endswith(b"\n"):
line += b"\n"
temp = tempfile.NamedTemporaryFile("wb")
temp_name = temp.name # We really only need a random path-safe name
temp.close()
with open(temp_name, "wb") as temp:
temp.write(line)
with open(path, "rb") as r:
temp.write(r.read())
# Now replace the original with the modified version
shutil.move(temp_name, path)
def get_name_generator(options):
name_generator = None # So we can tell if we need to obfuscate
if (options.obfuscate or options.obf_classes
or options.obf_functions or options.obf_variables
or options.obf_builtins or options.obf_import_methods):
# Put together that will be used for all obfuscation functions:
identifier_length = int(options.replacement_length)
name_generator = obfuscate.obfuscation_machine(
use_unicode=options.use_nonlatin,
identifier_length=identifier_length)
return name_generator
[docs]def zip_pack(filepath, options):
"""
Creates a zip archive containing the script at *filepath* along with all
imported modules that are local to *filepath* as a self-extracting python
script. A shebang will be appended to the beginning of the resulting
zip archive which will allow it to
If the `lzma` module is available, the resulting "pyz" file will use
ZIP_LZMA compression to maximize compression.
*options* is expected to be the the same options parsed from pyminifier.py
on the command line.
.. note::
* The file resulting from this method cannot be imported as a module into
another python program (command line execution only).
* Any required local (implied path) modules will be automatically
included (well, it does its best).
* The result will be saved as a .pyz file (which is an extension
I invented for this format).
"""
import zipfile
# Hopefully some day we'll be able to use ZIP_LZMA too as the compression
# format to save even more space...
compression_format = zipfile.ZIP_DEFLATED
cumulative_size = 0 # For tracking size reduction stats
# Record the filesize for later comparison
cumulative_size += os.path.getsize(filepath)
dest = options.pyz
z = zipfile.ZipFile(dest, "w", compression_format)
# Take care of minifying our primary script first:
with open(filepath, encoding="utf-8") as f:
source = f.read()
primary_tokens = token_utils.listified_tokenizer(source)
# Preserve shebangs (don't care about encodings for this)
shebang = analyze.get_shebang(primary_tokens)
if not shebang:
# We *must* have a shebang for this to work so make a conservative default:
shebang = "#!/usr/bin/env python"
if shebang.rstrip().endswith("python"): # Make it python3 (to be safe)
shebang = shebang.rstrip()
shebang += "3\n" # #!/usr/bin/env python3
if not options.nominify: # Minify as long as we don't have this option set
source = minification.minify(primary_tokens, options)
# So we can tell if we need to obfuscate
name_generator = get_name_generator(options)
if name_generator:
tokens = token_utils.listified_tokenizer(source)
obfuscate.obfuscate(
filepath,
tokens,
options,
name_generator=name_generator,
)
source = token_utils.untokenize(tokens)
# Need the path where the script lives for the next steps:
path = os.path.split(filepath)[0]
filename = os.path.split(filepath)[1]
if not path:
path = os.getcwd()
if filename == "__main__.py":
z.writestr(filename, source)
else:
main_py = path + "/__main__.py"
# There's an existing __main__.py, use it
if os.path.exists(main_py):
z.write(main_py, "__main__.py")
z.writestr(filename, source)
else:
# No __main__.py so we rename our main script to be the __main__.py
# This is so it will still execute as a zip
z.writestr("__main__.py", source)
# Now write any required modules into the zip as well
local_modules = analyze.enumerate_local_modules(primary_tokens, path)
if name_generator:
table = [{}]
included_modules = []
for module in local_modules:
module = module.replace(".", "/")
module = "%s.py" % module
# Avoid writing the filepath itself again
if module == filename:
continue
if module == "__main__.py":
continue
# Add the filesize to our total
cumulative_size += os.path.getsize(os.path.join(path, module))
# Also record that we've added it to the archive
included_modules.append(module)
# Minify these files too
with open(os.path.join(path, module), encoding="utf-8") as f:
source = f.read()
tokens = token_utils.listified_tokenizer(source)
maybe_more_modules = analyze.enumerate_local_modules(tokens, path)
for mod in maybe_more_modules:
if mod not in local_modules:
local_modules.append(mod) # Extend the current loop, love it =)
if not options.nominify:
# Perform minification (this also handles obfuscation)
source = minification.minify(tokens, options)
# Have to re-tokenize for obfucation (it's quick):
tokens = token_utils.listified_tokenizer(source)
# Perform obfuscation if any of the related options were set
if name_generator:
obfuscate.obfuscate(
module,
tokens,
options,
name_generator=name_generator,
table=table
)
# Convert back to text
result = token_utils.untokenize(tokens)
from . import RESULT_FOOTER
result += "{}\n".format(RESULT_FOOTER)
# Write out to a temporary file to add to our zip
z.writestr(module, result)
z.close()
# Finish up by writing the shebang to the beginning of the zip
prepend(shebang, dest)
os.chmod(dest, 0o755) # Make it executable (since we added the shebang)
pyz_filesize = os.path.getsize(dest)
percent_saved = round(float(pyz_filesize) / float(cumulative_size) * 100, 2)
print("%s saved as compressed executable zip: %s" % (filepath, dest))
print("The following modules were automatically included (as automagic "
"dependencies):\n")
for module in included_modules:
print("\t%s" % module)
print("\nOverall size reduction: %s%% of original size" % percent_saved)