tools/docs: sphinx-build-wrapper: allow building PDF files in parallel
Use POSIX jobserver when available or -j<number> to run PDF builds in parallel, restoring pdf build performance. Yet, running it when debugging troubles is a bad idea, so, when calling directly via command line, except if "-j" is splicitly requested, it will serialize the build. With such change, a PDF doc builds now takes around 5 minutes on a Ryzen 9 machine with 32 cpu threads: # Explicitly paralelize both Sphinx and LaTeX pdf builds $ make cleandocs; time scripts/sphinx-build-wrapper pdfdocs -j 33 real 5m17.901s user 15m1.499s sys 2m31.482s # Use POSIX jobserver to paralelize both sphinx-build and LaTeX $ make cleandocs; time make pdfdocs real 5m22.369s user 15m9.076s sys 2m31.419s # Serializes PDF build, while keeping Sphinx parallelized. # it is equivalent of passing -jauto via command line $ make cleandocs; time scripts/sphinx-build-wrapper pdfdocs real 11m20.901s user 13m2.910s sys 1m44.553s Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> Message-ID: <42eef319f9af6f9feb12bcd74ca6392c8119929d.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet <corbet@lwn.net>pull/1354/merge
parent
2f99b85e22
commit
08e14bc17e
|
|
@ -51,6 +51,8 @@ import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from concurrent import futures
|
||||||
|
|
||||||
from lib.python_version import PythonVersion
|
from lib.python_version import PythonVersion
|
||||||
from lib.latex_fonts import LatexFontChecker
|
from lib.latex_fonts import LatexFontChecker
|
||||||
|
|
||||||
|
|
@ -291,6 +293,87 @@ class SphinxBuilder:
|
||||||
except (OSError, IOError) as e:
|
except (OSError, IOError) as e:
|
||||||
print(f"Warning: Failed to copy CSS: {e}", file=sys.stderr)
|
print(f"Warning: Failed to copy CSS: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
def build_pdf_file(self, latex_cmd, from_dir, path):
|
||||||
|
"""Builds a single pdf file using latex_cmd"""
|
||||||
|
try:
|
||||||
|
subprocess.run(latex_cmd + [path],
|
||||||
|
cwd=from_dir, check=True, env=self.env)
|
||||||
|
|
||||||
|
return True
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def pdf_parallel_build(self, tex_suffix, latex_cmd, tex_files, n_jobs):
|
||||||
|
"""Build PDF files in parallel if possible"""
|
||||||
|
builds = {}
|
||||||
|
build_failed = False
|
||||||
|
max_len = 0
|
||||||
|
has_tex = False
|
||||||
|
|
||||||
|
#
|
||||||
|
# LaTeX PDF error code is almost useless for us:
|
||||||
|
# any warning makes it non-zero. For kernel doc builds it always return
|
||||||
|
# non-zero even when build succeeds. So, let's do the best next thing:
|
||||||
|
# Ignore build errors. At the end, check if all PDF files were built,
|
||||||
|
# printing a summary with the built ones and returning 0 if all of
|
||||||
|
# them were actually built.
|
||||||
|
#
|
||||||
|
with futures.ThreadPoolExecutor(max_workers=n_jobs) as executor:
|
||||||
|
jobs = {}
|
||||||
|
|
||||||
|
for from_dir, pdf_dir, entry in tex_files:
|
||||||
|
name = entry.name
|
||||||
|
|
||||||
|
if not name.endswith(tex_suffix):
|
||||||
|
continue
|
||||||
|
|
||||||
|
name = name[:-len(tex_suffix)]
|
||||||
|
|
||||||
|
max_len = max(max_len, len(name))
|
||||||
|
|
||||||
|
has_tex = True
|
||||||
|
|
||||||
|
future = executor.submit(self.build_pdf_file, latex_cmd,
|
||||||
|
from_dir, entry.path)
|
||||||
|
jobs[future] = (from_dir, pdf_dir, name)
|
||||||
|
|
||||||
|
for future in futures.as_completed(jobs):
|
||||||
|
from_dir, pdf_dir, name = jobs[future]
|
||||||
|
|
||||||
|
pdf_name = name + ".pdf"
|
||||||
|
pdf_from = os.path.join(from_dir, pdf_name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
success = future.result()
|
||||||
|
|
||||||
|
if success and os.path.exists(pdf_from):
|
||||||
|
pdf_to = os.path.join(pdf_dir, pdf_name)
|
||||||
|
|
||||||
|
os.rename(pdf_from, pdf_to)
|
||||||
|
|
||||||
|
#
|
||||||
|
# if verbose, get the name of built PDF file
|
||||||
|
#
|
||||||
|
if self.verbose:
|
||||||
|
builds[name] = os.path.relpath(pdf_to, self.builddir)
|
||||||
|
else:
|
||||||
|
builds[name] = "FAILED"
|
||||||
|
build_failed = True
|
||||||
|
except futures.Error as e:
|
||||||
|
builds[name] = f"FAILED ({repr(e)})"
|
||||||
|
build_failed = True
|
||||||
|
|
||||||
|
#
|
||||||
|
# Handle case where no .tex files were found
|
||||||
|
#
|
||||||
|
if not has_tex:
|
||||||
|
name = "Sphinx LaTeX builder"
|
||||||
|
max_len = max(max_len, len(name))
|
||||||
|
builds[name] = "FAILED (no .tex file was generated)"
|
||||||
|
build_failed = True
|
||||||
|
|
||||||
|
return builds, build_failed, max_len
|
||||||
|
|
||||||
def handle_pdf(self, output_dirs, deny_vf):
|
def handle_pdf(self, output_dirs, deny_vf):
|
||||||
"""
|
"""
|
||||||
Extra steps for PDF output.
|
Extra steps for PDF output.
|
||||||
|
|
@ -301,6 +384,8 @@ class SphinxBuilder:
|
||||||
"""
|
"""
|
||||||
builds = {}
|
builds = {}
|
||||||
max_len = 0
|
max_len = 0
|
||||||
|
tex_suffix = ".tex"
|
||||||
|
tex_files = []
|
||||||
|
|
||||||
#
|
#
|
||||||
# Since early 2024, Fedora and openSUSE tumbleweed have started
|
# Since early 2024, Fedora and openSUSE tumbleweed have started
|
||||||
|
|
@ -327,74 +412,83 @@ class SphinxBuilder:
|
||||||
|
|
||||||
latex_cmd.extend(shlex.split(self.latexopts))
|
latex_cmd.extend(shlex.split(self.latexopts))
|
||||||
|
|
||||||
tex_suffix = ".tex"
|
# Get a list of tex files to process
|
||||||
|
|
||||||
#
|
|
||||||
# Process each .tex file
|
|
||||||
#
|
|
||||||
|
|
||||||
has_tex = False
|
|
||||||
build_failed = False
|
|
||||||
with os.scandir(from_dir) as it:
|
with os.scandir(from_dir) as it:
|
||||||
for entry in it:
|
for entry in it:
|
||||||
if not entry.name.endswith(tex_suffix):
|
if entry.name.endswith(tex_suffix):
|
||||||
continue
|
tex_files.append((from_dir, pdf_dir, entry))
|
||||||
|
|
||||||
name = entry.name[:-len(tex_suffix)]
|
#
|
||||||
has_tex = True
|
# When using make, this won't be used, as the number of jobs comes
|
||||||
|
# from POSIX jobserver. So, this covers the case where build comes
|
||||||
|
# from command line. On such case, serialize by default, except if
|
||||||
|
# the user explicitly sets the number of jobs.
|
||||||
|
#
|
||||||
|
n_jobs = 1
|
||||||
|
|
||||||
#
|
# n_jobs is either an integer or "auto". Only use it if it is a number
|
||||||
# LaTeX PDF error code is almost useless for us:
|
if self.n_jobs:
|
||||||
# any warning makes it non-zero. For kernel doc builds it
|
try:
|
||||||
# always return non-zero even when build succeeds.
|
n_jobs = int(self.n_jobs)
|
||||||
# So, let's do the best next thing: check if all PDF
|
except ValueError:
|
||||||
# files were built. If they're, print a summary and
|
pass
|
||||||
# return 0 at the end of this function
|
|
||||||
#
|
|
||||||
try:
|
|
||||||
subprocess.run(latex_cmd + [entry.path],
|
|
||||||
cwd=from_dir, check=True, env=self.env)
|
|
||||||
except subprocess.CalledProcessError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
pdf_name = name + ".pdf"
|
#
|
||||||
pdf_from = os.path.join(from_dir, pdf_name)
|
# When using make, jobserver.claim is the number of jobs that were
|
||||||
pdf_to = os.path.join(pdf_dir, pdf_name)
|
# used with "-j" and that aren't used by other make targets
|
||||||
|
#
|
||||||
|
with JobserverExec() as jobserver:
|
||||||
|
n_jobs = 1
|
||||||
|
|
||||||
if os.path.exists(pdf_from):
|
#
|
||||||
os.rename(pdf_from, pdf_to)
|
# Handle the case when a parameter is passed via command line,
|
||||||
builds[name] = os.path.relpath(pdf_to, self.builddir)
|
# using it as default, if jobserver doesn't claim anything
|
||||||
else:
|
#
|
||||||
builds[name] = "FAILED"
|
if self.n_jobs:
|
||||||
build_failed = True
|
try:
|
||||||
|
n_jobs = int(self.n_jobs)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
name = entry.name.removesuffix(".tex")
|
if jobserver.claim:
|
||||||
max_len = max(max_len, len(name))
|
n_jobs = jobserver.claim
|
||||||
|
|
||||||
if not has_tex:
|
builds, build_failed, max_len = self.pdf_parallel_build(tex_suffix,
|
||||||
name = os.path.basename(from_dir)
|
latex_cmd,
|
||||||
max_len = max(max_len, len(name))
|
tex_files,
|
||||||
builds[name] = "FAILED (no .tex)"
|
n_jobs)
|
||||||
build_failed = True
|
|
||||||
|
|
||||||
msg = "Summary"
|
#
|
||||||
msg += "\n" + "=" * len(msg)
|
# In verbose mode, print a summary with the build results per file.
|
||||||
print()
|
# Otherwise, print a single line with all failures, if any.
|
||||||
print(msg)
|
# On both cases, return code 1 indicates build failures,
|
||||||
|
#
|
||||||
|
if self.verbose:
|
||||||
|
msg = "Summary"
|
||||||
|
msg += "\n" + "=" * len(msg)
|
||||||
|
print()
|
||||||
|
print(msg)
|
||||||
|
|
||||||
for pdf_name, pdf_file in builds.items():
|
for pdf_name, pdf_file in builds.items():
|
||||||
print(f"{pdf_name:<{max_len}}: {pdf_file}")
|
print(f"{pdf_name:<{max_len}}: {pdf_file}")
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
if build_failed:
|
||||||
|
msg = LatexFontChecker().check()
|
||||||
|
if msg:
|
||||||
|
print(msg)
|
||||||
|
|
||||||
|
sys.exit("Error: not all PDF files were created.")
|
||||||
|
|
||||||
|
elif build_failed:
|
||||||
|
n_failures = len(builds)
|
||||||
|
failures = ", ".join(builds.keys())
|
||||||
|
|
||||||
if build_failed:
|
|
||||||
msg = LatexFontChecker().check()
|
msg = LatexFontChecker().check()
|
||||||
if msg:
|
if msg:
|
||||||
print(msg)
|
print(msg)
|
||||||
|
|
||||||
sys.exit("PDF build failed: not all PDF files were created.")
|
sys.exit(f"Error: Can't build {n_failures} PDF file(s): {failures}")
|
||||||
else:
|
|
||||||
print("All PDF files were built.")
|
|
||||||
|
|
||||||
def handle_info(self, output_dirs):
|
def handle_info(self, output_dirs):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue