35

我正在为 Python 编写 C++ 扩展,并且正在使用 distutils 来编译项目。随着项目的发展,重建它需要的时间越来越长。有没有办法加快构建过程?

我读到make -jdistutils 无法进行并行构建(如 )。有没有比 distutils 更快的替代品?

我还注意到每次调用时它都会重新编译所有目标文件python setup.py build,即使我只更改了一个源文件。应该是这种情况还是我在这里做错了什么?

如果有帮助,这里是我尝试编译的一些文件:https ://gist.github.com/2923577

谢谢!

4

4 回答 4

39
  1. 尝试使用环境变量CC="ccache gcc"进行构建,这将在源未更改时显着加快构建速度。(奇怪的是,distutilsCC也用于 c++ 源文件)。当然,安装 ccache 包。

  2. 由于您有一个由多个已编译目标文件组装而成的扩展,因此您可以对 distutils 进行猴子补丁以并行编译它们(它们是独立的) - 将其放入您的 setup.py (根据需要调整):N=2

    # monkey-patch for parallel compilation
    def parallelCCompile(self, sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None):
        # those lines are copied from distutils.ccompiler.CCompiler directly
        macros, objects, extra_postargs, pp_opts, build = self._setup_compile(output_dir, macros, include_dirs, sources, depends, extra_postargs)
        cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
        # parallel code
        N=2 # number of parallel compilations
        import multiprocessing.pool
        def _single_compile(obj):
            try: src, ext = build[obj]
            except KeyError: return
            self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
        # convert to list, imap is evaluated on-demand
        list(multiprocessing.pool.ThreadPool(N).imap(_single_compile,objects))
        return objects
    import distutils.ccompiler
    distutils.ccompiler.CCompiler.compile=parallelCCompile
    
  3. 为了完整起见,如果您有多个扩展名,则可以使用以下解决方案:

    import os
    import multiprocessing
    try:
        from concurrent.futures import ThreadPoolExecutor as Pool
    except ImportError:
        from multiprocessing.pool import ThreadPool as LegacyPool
    
        # To ensure the with statement works. Required for some older 2.7.x releases
        class Pool(LegacyPool):
            def __enter__(self):
                return self
    
            def __exit__(self, *args):
                self.close()
                self.join()
    
    def build_extensions(self):
        """Function to monkey-patch
        distutils.command.build_ext.build_ext.build_extensions
    
        """
        self.check_extensions_list(self.extensions)
    
        try:
            num_jobs = os.cpu_count()
        except AttributeError:
            num_jobs = multiprocessing.cpu_count()
    
        with Pool(num_jobs) as pool:
            pool.map(self.build_extension, self.extensions)
    
    def compile(
        self, sources, output_dir=None, macros=None, include_dirs=None,
        debug=0, extra_preargs=None, extra_postargs=None, depends=None,
    ):
        """Function to monkey-patch distutils.ccompiler.CCompiler"""
        macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
            output_dir, macros, include_dirs, sources, depends, extra_postargs
        )
        cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
    
        for obj in objects:
            try:
                src, ext = build[obj]
            except KeyError:
                continue
            self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
    
        # Return *all* object filenames, not just the ones we just built.
        return objects
    
    
    from distutils.ccompiler import CCompiler
    from distutils.command.build_ext import build_ext
    build_ext.build_extensions = build_extensions
    CCompiler.compile = compile
    
于 2012-11-01T12:11:31.980 回答
7

我已经使用 clcache 在 Windows 上进行了这项工作,源自 eudoxos 的回答:

# Python modules
import datetime
import distutils
import distutils.ccompiler
import distutils.sysconfig
import multiprocessing
import multiprocessing.pool
import os
import sys

from distutils.core import setup
from distutils.core import Extension
from distutils.errors import CompileError
from distutils.errors import DistutilsExecError

now = datetime.datetime.now

ON_LINUX = "linux" in sys.platform

N_JOBS = 4

#------------------------------------------------------------------------------
# Enable ccache to speed up builds

if ON_LINUX:
    os.environ['CC'] = 'ccache gcc'

# Windows
else:

    # Using clcache.exe, see: https://github.com/frerich/clcache

    # Insert path to clcache.exe into the path.

    prefix = os.path.dirname(os.path.abspath(__file__))
    path = os.path.join(prefix, "bin")

    print "Adding %s to the system path." % path
    os.environ['PATH'] = '%s;%s' % (path, os.environ['PATH'])

    clcache_exe = os.path.join(path, "clcache.exe")

#------------------------------------------------------------------------------
# Parallel Compile
#
# Reference:
#
# http://stackoverflow.com/questions/11013851/speeding-up-build-process-with-distutils
#

def linux_parallel_cpp_compile(
        self,
        sources,
        output_dir=None,
        macros=None,
        include_dirs=None,
        debug=0,
        extra_preargs=None,
        extra_postargs=None,
        depends=None):

    # Copied from distutils.ccompiler.CCompiler

    macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
        output_dir, macros, include_dirs, sources, depends, extra_postargs)

    cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)

    def _single_compile(obj):

        try:
            src, ext = build[obj]
        except KeyError:
            return

        self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)

    # convert to list, imap is evaluated on-demand

    list(multiprocessing.pool.ThreadPool(N_JOBS).imap(
        _single_compile, objects))

    return objects


def windows_parallel_cpp_compile(
        self,
        sources,
        output_dir=None,
        macros=None,
        include_dirs=None,
        debug=0,
        extra_preargs=None,
        extra_postargs=None,
        depends=None):

    # Copied from distutils.msvc9compiler.MSVCCompiler

    if not self.initialized:
        self.initialize()

    macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
        output_dir, macros, include_dirs, sources, depends, extra_postargs)

    compile_opts = extra_preargs or []
    compile_opts.append('/c')

    if debug:
        compile_opts.extend(self.compile_options_debug)
    else:
        compile_opts.extend(self.compile_options)

    def _single_compile(obj):

        try:
            src, ext = build[obj]
        except KeyError:
            return

        input_opt = "/Tp" + src
        output_opt = "/Fo" + obj
        try:
            self.spawn(
                [clcache_exe]
                + compile_opts
                + pp_opts
                + [input_opt, output_opt]
                + extra_postargs)

        except DistutilsExecError, msg:
            raise CompileError(msg)

    # convert to list, imap is evaluated on-demand

    list(multiprocessing.pool.ThreadPool(N_JOBS).imap(
        _single_compile, objects))

    return objects

#------------------------------------------------------------------------------
# Only enable parallel compile on 2.7 Python

if sys.version_info[1] == 7:

    if ON_LINUX:
        distutils.ccompiler.CCompiler.compile = linux_parallel_cpp_compile

    else:
        import distutils.msvccompiler
        import distutils.msvc9compiler

        distutils.msvccompiler.MSVCCompiler.compile = windows_parallel_cpp_compile
        distutils.msvc9compiler.MSVCCompiler.compile = windows_parallel_cpp_compile

# ... call setup() as usual
于 2013-09-24T21:44:36.473 回答
4

如果你有 Numpy 1.10 可用,你可以很容易地做到这一点。只需添加:

 try:
     from numpy.distutils.ccompiler import CCompiler_compile
     import distutils.ccompiler
     distutils.ccompiler.CCompiler.compile = CCompiler_compile
 except ImportError:
     print("Numpy not found, parallel compile not available")

使用-j N或设置NPY_NUM_BUILD_JOBS.

于 2019-04-13T22:54:08.297 回答
1

在您在链接中提供的有限示例中,很明显您对该语言的某些功能存在一些误解。例如,gsminterface.h有很多命名空间级别static,这可能是无意的。包含该标头的每个翻译单元都将为该标头中声明的每个符号编译它自己的版本。这样做的副作用不仅是编译时间,还有代码膨胀(更大的二进制文件)和链接时间,因为链接器需要处理所有这些符号。

还有很多影响构建过程的问题你还没有回答,比如每次重新编译之前是否清理。如果您正在这样做,那么您可能需要考虑ccache,这是一个缓存构建过程结果的工具,这样如果您运行make clean; make target任何未更改的翻译单元,则只运行预处理器。请注意,只要您继续维护标头中的大部分代码,这不会提供太多优势,因为标头中的更改会修改包含它的所有翻译单元。(我不知道你的构建系统,所以我不能告诉你是否python setup.py build清理

否则这个项目看起来并不大,所以如果编译时间超过几秒钟,我会感到惊讶。

于 2012-06-13T12:02:04.170 回答