summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2022-04-15 14:31:00 -0700
committerSean Whitton <spwhitton@spwhitton.name>2022-04-15 14:31:00 -0700
commit9cc69dd57404de2c1499fef851e2a030af311978 (patch)
tree69a99959025152197481cb82bdea90626e874b85
parent783fdf51b7c403109f297ce77dc79051803f7dc3 (diff)
parentf970cfcdc7f034800014cf59149036098dcf842f (diff)
downloadocrmypdf-9cc69dd57404de2c1499fef851e2a030af311978.tar.gz
Merge tag 'v13.4.3+dfsg' into debian
DFSG-clean upstream version v13.4.3
-rw-r--r--.docker/Dockerfile2
-rw-r--r--.github/workflows/build.yml16
-rw-r--r--.pre-commit-config.yaml12
-rw-r--r--docs/jbig2.rst3
-rw-r--r--docs/plugins.rst10
-rw-r--r--docs/release_notes.rst45
-rw-r--r--misc/webservice.py2
-rw-r--r--setup.cfg4
-rw-r--r--src/ocrmypdf/_exec/ghostscript.py21
-rw-r--r--src/ocrmypdf/_exec/unpaper.py4
-rw-r--r--src/ocrmypdf/_pipeline.py13
-rw-r--r--src/ocrmypdf/_validation.py2
-rw-r--r--src/ocrmypdf/cli.py8
-rw-r--r--src/ocrmypdf/pdfinfo/info.py8
-rw-r--r--src/ocrmypdf/pluginspec.py8
-rw-r--r--src/ocrmypdf/subprocess/_windows.py4
-rw-r--r--tests/conftest.py3
-rw-r--r--tests/plugins/tesseract_cache.py23
-rw-r--r--tests/test_completion.py6
-rw-r--r--tests/test_concurrency.py1
-rw-r--r--tests/test_main.py6
-rw-r--r--tests/test_pdfinfo.py8
-rw-r--r--tests/test_rotation.py8
-rw-r--r--tests/test_stdio.py6
24 files changed, 160 insertions, 63 deletions
diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index 36c913a1..c7614a8b 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -1,7 +1,7 @@
# OCRmyPDF
#
-FROM ubuntu:21.04 as base
+FROM debian:bookworm-slim as base
ENV LANG=C.UTF-8
ENV TZ=UTC
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b9bdc84f..d5e0915b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -21,17 +21,19 @@ jobs:
matrix:
include:
- os: ubuntu-18.04
- python: 3.7
+ python: "3.7"
- os: ubuntu-20.04
- python: 3.8
+ python: "3.8"
- os: ubuntu-20.04
- python: 3.9
+ python: "3.9"
+ - os: ubuntu-20.04
+ python: "3.10"
- os: ubuntu-latest
- python: 3.9
+ python: "3.9"
- os: ubuntu-latest
python: "pypy-3.7"
- os: ubuntu-latest
- python: 3.9
+ python: "3.9"
tesseract5: true
env:
@@ -120,7 +122,7 @@ jobs:
strategy:
matrix:
os: [macos-latest]
- python: ["3.9"]
+ python: ["3.9", "3.10"]
env:
OS: ${{ matrix.os }}
@@ -175,7 +177,7 @@ jobs:
strategy:
matrix:
os: [windows-latest]
- python: ["3.9"]
+ python: ["3.9", "3.10"]
env:
OS: ${{ matrix.os }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index aae14891..f913fc40 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.0.1
+ rev: v4.1.0
hooks:
- id: check-case-conflict
- id: check-merge-conflict
@@ -8,26 +8,26 @@ repos:
- id: check-yaml
- id: debug-statements
- repo: https://github.com/pycqa/isort
- rev: 5.9.3
+ rev: 5.10.1
hooks:
- id: isort
args: ["--profile", "black"]
- repo: https://github.com/psf/black
- rev: 21.9b0
+ rev: 22.3.0
hooks:
- id: black
language_version: python
- repo: https://github.com/asottile/setup-cfg-fmt
- rev: v1.19.0
+ rev: v1.20.1
hooks:
- id: setup-cfg-fmt
- repo: https://github.com/asottile/pyupgrade
- rev: v2.29.0
+ rev: v2.31.1
hooks:
- id: pyupgrade
args: ["--py37-plus"]
- repo: https://github.com/pre-commit/mirrors-mypy
- rev: v0.910-1
+ rev: v0.942
hooks:
- id: mypy
additional_dependencies:
diff --git a/docs/jbig2.rst b/docs/jbig2.rst
index c49f0e9c..2e7b7746 100644
--- a/docs/jbig2.rst
+++ b/docs/jbig2.rst
@@ -32,6 +32,9 @@ For all other Linux, you must build a JBIG2 encoder from source:
.. _jbig2-lossy:
+Dependencies include libtoolize and libleptonica, which on Ubuntu systems
+are packaged as libtool and libleptonica-dev.
+
Lossy mode JBIG2
================
diff --git a/docs/plugins.rst b/docs/plugins.rst
index c8d3e5ae..1d8f3f70 100644
--- a/docs/plugins.rst
+++ b/docs/plugins.rst
@@ -152,6 +152,16 @@ hooks. As such, you cannot "chain" a series of plugin filters together in this
way. Instead, a single hook implementation should be responsible for any such
chaining operations.
+Examples
+========
+
+* OCRmyPDF's test suite contains several plugins that are used to simulate certain
+ test conditions.
+* `ocrmypdf-papermerge <https://github.com/papermerge/OCRmyPDF_papermerge>`_ is
+ a production plugin that integrates OCRmyPDF and the Papermerge document
+ management system.
+
+
Custom command line arguments
-----------------------------
diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index 04ee67f9..458d5ace 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -10,16 +10,53 @@ that is, output messages may be improved at any release level, so parsing them
may be unreliable. Use the API to depend on precise behavior.
The public API may be useful in scripts that launch OCRmyPDF processes or that
-wish to use some of its features for working with PDFs..
+wish to use some of its features for working with PDFs.
+
+The most recent release of OCRmyPDF is |OCRmyPDF PyPI|. Any newer versions
+referred to in these notes may exist the main branch but have not been
+tagged yet.
+
+.. |OCRmyPDF PyPI| image:: https://img.shields.io/pypi/v/ocrmypdf.svg
+
+v13.4.3
+=======
+
+- Fix error on pytest.skip() with older versions of pytest.
+- Documentation updates.
+
+v13.4.2
+=======
+
+- Worked around a
+ `major regression in Ghostscript 9.56.0 <https://bugs.ghostscript.com/show_bug.cgi?id=705187>`__
+ where **all OCR text is stripped out of the PDF**. It simply removes all text,
+ even generated by software other than OCRmyPDF. Fortunately, we can ask
+ Ghostscript 9.56.0 to use its old behavior that worked correctly for our purposes.
+ Users must avoid the combination (Ghostscript 9.56.0, ocrmypdf <13.4.2) since
+ older versions of OCRmyPDF have no way of detecting that this particular
+ version of Ghostscript removes all OCR text.
+- Marked pdfminer 20220319 as supported.
+- Fixed some deprecation warnings from recent versions of Pillow and pytest.
+- Test suite now covers Python 3.10 (Python 3.10 worked fine before, but was not
+ being tested).
+- Docker image now uses debian:bookworm-slim as the base image to fix the Docker
+ image build.
+
+v13.4.1
+=======
+
+- Temporarily make threads rather than processes the default executor worker, due
+ to a persistent deadlock issue when processes are used. Add a new command line
+ argument ``--no-use-threads`` to disable this.
v13.4.0
=======
- Fixed test failures when using pikepdf 5.0.0.
- Various improvements to the optimizer. In particular, we now recognize PDF images
- that are encoded with both flate and DCT (JPEG), and also produce PDF with images
- compressed with flate and DCT, since this often yields file size improvements
- compared to plain DCT.
+ that are encoded with both deflate (PNG) and DCT (JPEG), and also produce PDF
+ with images compressed with deflate and DCT, since this often yields file size
+ improvements compared to plain DCT.
v13.3.0
=======
diff --git a/misc/webservice.py b/misc/webservice.py
index e005b7c2..0e2079e8 100644
--- a/misc/webservice.py
+++ b/misc/webservice.py
@@ -59,7 +59,7 @@ def do_ocrmypdf(file):
return Response("--sidecar not supported", 501, mimetype='text/plain')
ocrmypdf_args = ["ocrmypdf", *cmd_args, up_file, down_file]
- proc = run(ocrmypdf_args, stdout=PIPE, stderr=PIPE, encoding="utf-8")
+ proc = run(ocrmypdf_args, capture_output=True, encoding="utf-8")
if proc.returncode != 0:
stderr = proc.stderr
return Response(stderr, 400, mimetype='text/plain')
diff --git a/setup.cfg b/setup.cfg
index c2208da6..ed06abdc 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -49,8 +49,8 @@ install_requires =
coloredlogs>=14.0 # strictly optional
img2pdf>=0.3.0,<0.5 # pure Python
packaging>=20
- pdfminer.six!=20200720,>=20191110,<=20211012
- pikepdf>=4.0.0,!=5.0.0
+ pdfminer.six!=20200720,>=20191110,<=20220319
+ pikepdf!=5.0.0,>=4.0.0
pluggy>=0.13.0,<2
reportlab>=3.5.66
tqdm>=4
diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py
index 5d977a80..0ccadc45 100644
--- a/src/ocrmypdf/_exec/ghostscript.py
+++ b/src/ocrmypdf/_exec/ghostscript.py
@@ -24,6 +24,13 @@ from ocrmypdf.exceptions import MissingDependencyError, SubprocessOutputError
from ocrmypdf.helpers import Resolution
from ocrmypdf.subprocess import get_version, run, run_polling_stderr
+# Remove this workaround when we require Pillow >= 10
+try:
+ Transpose = Image.Transpose # type: ignore
+except AttributeError:
+ # Pillow 9 shim
+ Transpose = Image # type: ignore
+
log = logging.getLogger(__name__)
missing_gs_error = """
@@ -132,11 +139,11 @@ def rasterize_pdf(
# rotation is a clockwise angle and Image.ROTATE_* is
# counterclockwise so this cancels out the rotation
if rotation == 90:
- im = im.transpose(Image.ROTATE_90)
+ im = im.transpose(Transpose.ROTATE_90)
elif rotation == 180:
- im = im.transpose(Image.ROTATE_180)
+ im = im.transpose(Transpose.ROTATE_180)
elif rotation == 270:
- im = im.transpose(Image.ROTATE_270)
+ im = im.transpose(Transpose.ROTATE_270)
if rotation % 180 == 90:
page_dpi = page_dpi.flip_axis()
im.save(fspath(output_file), dpi=page_dpi)
@@ -210,14 +217,18 @@ def generate_pdfa(
# Older versions of Ghostscript expect a leading slash in
# sColorConversionStrategy, newer ones should not have it. See Ghostscript
# git commit fe1c025d.
- strategy = ('/' + strategy) if version() < '9.19' else strategy
+ gs_version = version()
+ strategy = ('/' + strategy) if gs_version < '9.19' else strategy
- if version() == '9.23':
+ if gs_version == '9.23':
# 9.23: added JPEG passthrough as a new feature, but with a bug that
# incorrectly formats some images. Fixed as of 9.24. So we disable this
# feature for 9.23.
# https://bugs.ghostscript.com/show_bug.cgi?id=699216
compression_args.append('-dPassThroughJPEGImages=false')
+ elif gs_version == '9.56.0':
+ # 9.56.0 breaks our OCR...?
+ compression_args.append('-dNEWPDF=false')
# nb no need to specify ProcessColorModel when ColorConversionStrategy
# is set; see:
diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py
index 38132ac9..69eb58ee 100644
--- a/src/ocrmypdf/_exec/unpaper.py
+++ b/src/ocrmypdf/_exec/unpaper.py
@@ -18,7 +18,7 @@ from decimal import Decimal
from pathlib import Path
from subprocess import PIPE, STDOUT
from tempfile import TemporaryDirectory
-from typing import List, Optional, Tuple, Union
+from typing import Iterator, List, Optional, Tuple, Union
from PIL import Image
@@ -76,7 +76,7 @@ def _convert_image(im: Image.Image) -> Tuple[Image.Image, bool, str]:
@contextmanager
-def _setup_unpaper_io(input_file: Path) -> Tuple[Path, Path, Path]:
+def _setup_unpaper_io(input_file: Path) -> Iterator[Tuple[Path, Path, Path]]:
with Image.open(input_file) as im:
if im.width * im.height >= UNPAPER_IMAGE_PIXEL_LIMIT:
raise UnpaperImageTooLargeError(w=im.width, h=im.height)
diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py
index 447471cc..f0c690ca 100644
--- a/src/ocrmypdf/_pipeline.py
+++ b/src/ocrmypdf/_pipeline.py
@@ -18,7 +18,7 @@ from typing import Dict, Iterable, Optional
import img2pdf
import pikepdf
from pikepdf.models.metadata import encode_pdf_date
-from PIL import Image, ImageDraw
+from PIL import Image, ImageColor, ImageDraw
from ocrmypdf._concurrent import Executor
from ocrmypdf._exec import unpaper
@@ -38,6 +38,13 @@ from ocrmypdf.optimize import optimize
from ocrmypdf.pdfa import generate_pdfa_ps
from ocrmypdf.pdfinfo import Colorspace, Encoding, PdfInfo
+# Remove this workaround when we require Pillow >= 10
+try:
+ BICUBIC = Image.Resampling.BICUBIC # type: ignore
+except AttributeError:
+ # Pillow 9 shim
+ BICUBIC = Image.BICUBIC # type: ignore
+
log = logging.getLogger(__name__)
VECTOR_PAGE_DPI = 400
@@ -484,7 +491,9 @@ def preprocess_deskew(input_file: Path, page_context: PageContext):
# According to Pillow docs, .rotate() will automatically use Image.NEAREST
# resampling if image is mode '1' or 'P'
deskewed = im.rotate(
- deskew_angle_degrees, resample=Image.BICUBIC, fillcolor='white'
+ deskew_angle_degrees,
+ resample=BICUBIC,
+ fillcolor=ImageColor.getcolor('white', mode=im.mode),
)
deskewed.save(output_file, dpi=dpi)
diff --git a/src/ocrmypdf/_validation.py b/src/ocrmypdf/_validation.py
index a69514f1..613e1c9f 100644
--- a/src/ocrmypdf/_validation.py
+++ b/src/ocrmypdf/_validation.py
@@ -41,7 +41,7 @@ log = logging.getLogger(__name__)
def check_platform():
- if os.name == 'nt' and sys.maxsize <= 2 ** 32: # pragma: no cover
+ if os.name == 'nt' and sys.maxsize <= 2**32: # pragma: no cover
# 32-bit interpreter on Windows
log.error(
"You are running OCRmyPDF in a 32-bit (x86) Python interpreter."
diff --git a/src/ocrmypdf/cli.py b/src/ocrmypdf/cli.py
index db12cdb7..322f1239 100644
--- a/src/ocrmypdf/cli.py
+++ b/src/ocrmypdf/cli.py
@@ -229,7 +229,13 @@ after installing the ocrmypdf-doc package.
help=argparse.SUPPRESS,
)
jobcontrol.add_argument(
- '--use-threads', action='store_true', help=argparse.SUPPRESS
+ '--use-threads', action='store_true', default=True, help=argparse.SUPPRESS
+ )
+ jobcontrol.add_argument(
+ '--no-use-threads',
+ action='store_false',
+ dest='use_threads',
+ help=argparse.SUPPRESS,
)
metadata = parser.add_argument_group(
diff --git a/src/ocrmypdf/pdfinfo/info.py b/src/ocrmypdf/pdfinfo/info.py
index 399b2579..c63bfa03 100644
--- a/src/ocrmypdf/pdfinfo/info.py
+++ b/src/ocrmypdf/pdfinfo/info.py
@@ -647,8 +647,8 @@ def _pdf_pageinfo_concurrent(
max_workers,
check_pages,
detailed_analysis=False,
-) -> List[Optional['PageInfo']]:
- pages = [None] * len(pdf.pages)
+) -> Sequence[Optional['PageInfo']]:
+ pages: Sequence[Optional['PageInfo']] = [None] * len(pdf.pages)
def update_pageinfo(result, pbar):
page = result
@@ -925,11 +925,11 @@ class PdfInfo:
@property
def min_version(self) -> str:
# The minimum PDF is the maximum version that any particular page needs
- return max(page.min_version for page in self.pages)
+ return max(page.min_version for page in self.pages if page)
@property
def has_userunit(self) -> bool:
- return any(page.userunit != 1.0 for page in self.pages)
+ return any(page.userunit != 1.0 for page in self.pages if page)
@property
def has_acroform(self) -> bool:
diff --git a/src/ocrmypdf/pluginspec.py b/src/ocrmypdf/pluginspec.py
index bec8bcba..9db079e2 100644
--- a/src/ocrmypdf/pluginspec.py
+++ b/src/ocrmypdf/pluginspec.py
@@ -132,6 +132,7 @@ def get_progressbar_class():
Here is how OCRmyPDF will use the progress bar:
Example:
+
pbar_class = pm.hook.get_progressbar_class()
with pbar_class(**tqdm_kwargs) as pbar:
...
@@ -235,9 +236,9 @@ def filter_page_image(page: 'PageContext', image_filename: Path) -> Path:
``image_filename``. The hook may overwrite ``image_filename`` with a new file.
The output image should preserve the same physical unit dimensions, that is
- (width * dpi_x, height * dpi_y). That is, if the image is resized, the DPI
+ ``(width * dpi_x, height * dpi_y)``. That is, if the image is resized, the DPI
must be adjusted by the reciprocal. If this is not preserved, the PDF page
- will be resized and the OCR layer misaligned. OCRmyPDF does not nothing
+ will be resized and the OCR layer misaligned. OCRmyPDF does nothing
to enforce these constraints; it is up to the plugin to do sensible things.
OCRmyPDF will create the PDF page based on the image format used (unless the
@@ -399,8 +400,7 @@ def get_ocr_engine() -> OcrEngine:
"""Returns an OcrEngine to use for processing this file.
The OcrEngine may be instantiated multiple times, by both the main process
- and child process. As such, it must be obtain store any state in ``options``
- or some common location.
+ and child process.
Note:
This is a :ref:`firstresult hook<firstresult>`.
diff --git a/src/ocrmypdf/subprocess/_windows.py b/src/ocrmypdf/subprocess/_windows.py
index aa9dfe56..d6a9d63b 100644
--- a/src/ocrmypdf/subprocess/_windows.py
+++ b/src/ocrmypdf/subprocess/_windows.py
@@ -4,6 +4,9 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+# type: ignore
+# Non-Windows mypy now breaks when trying to typecheck winreg
+
import logging
import os
import shutil
@@ -17,6 +20,7 @@ try:
except ModuleNotFoundError as e:
raise ModuleNotFoundError("This module is for Windows only") from e
+
log = logging.getLogger(__name__)
T = TypeVar('T')
diff --git a/tests/conftest.py b/tests/conftest.py
index 2f25a857..62adaae2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -134,8 +134,7 @@ def run_ocrmypdf(
p = run(
p_args,
- stdout=PIPE,
- stderr=PIPE,
+ capture_output=True,
text=text,
check=False,
)
diff --git a/tests/plugins/tesseract_cache.py b/tests/plugins/tesseract_cache.py
index 73e900bf..dd33383f 100644
--- a/tests/plugins/tesseract_cache.py
+++ b/tests/plugins/tesseract_cache.py
@@ -50,6 +50,7 @@ import logging
import platform
import re
import shutil
+import threading
from functools import partial
from pathlib import Path
from subprocess import PIPE, CalledProcessError, CompletedProcess
@@ -176,26 +177,40 @@ def cached_run(options, run_args, **run_kwargs):
class CacheOcrEngine(TesseractOcrEngine):
+ # Concurrent threads (with --use-threads) might try to use different parts
+ # of the OcrEngine, so we need a lock to protect the state of patched
+ # module whenever it's patched. Should refactor ocrmypdf._exec.tesseract so that
+ # it does not to be patched at all for testing.
+ lock = threading.Lock()
+
@staticmethod
def get_orientation(input_file, options):
- with patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)):
+ with CacheOcrEngine.lock, patch(
+ 'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
+ ):
return TesseractOcrEngine.get_orientation(input_file, options)
@staticmethod
def get_deskew(input_file, options) -> float:
- with patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)):
+ with CacheOcrEngine.lock, patch(
+ 'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
+ ):
return TesseractOcrEngine.get_deskew(input_file, options)
@staticmethod
def generate_hocr(input_file, output_hocr, output_text, options):
- with patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)):
+ with CacheOcrEngine.lock, patch(
+ 'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
+ ):
TesseractOcrEngine.generate_hocr(
input_file, output_hocr, output_text, options
)
@staticmethod
def generate_pdf(input_file, output_pdf, output_text, options):
- with patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)):
+ with CacheOcrEngine.lock, patch(
+ 'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)
+ ):
TesseractOcrEngine.generate_pdf(
input_file, output_pdf, output_text, options
)
diff --git a/tests/test_completion.py b/tests/test_completion.py
index 20d716ff..01a02b31 100644
--- a/tests/test_completion.py
+++ b/tests/test_completion.py
@@ -24,8 +24,7 @@ def test_fish():
['fish', '-n', 'misc/completion/ocrmypdf.fish'],
check=True,
encoding='utf-8',
- stdout=PIPE,
- stderr=PIPE,
+ capture_output=True,
)
assert proc.stderr == '', proc.stderr
except FileNotFoundError:
@@ -41,8 +40,7 @@ def test_bash():
['bash', '-n', 'misc/completion/ocrmypdf.bash'],
check=True,
encoding='utf-8',
- stdout=PIPE,
- stderr=PIPE,
+ capture_output=True,
)
assert proc.stderr == '', proc.stderr
except FileNotFoundError:
diff --git a/tests/test_concurrency.py b/tests/test_concurrency.py
index 8d7675bf..fb149a2e 100644
--- a/tests/test_concurrency.py
+++ b/tests/test_concurrency.py
@@ -13,6 +13,7 @@ from ocrmypdf import ExitCode
from .conftest import run_ocrmypdf_api
+@pytest.mark.skipif(True, reason="--use-threads is currently default")
@pytest.mark.skipif(os.name == 'nt', reason="Windows doesn't have SIGKILL")
def test_simulate_oom_killer(resources, no_outpdf):
exitcode = run_ocrmypdf_api(
diff --git a/tests/test_main.py b/tests/test_main.py
index 910b2120..a60c76f8 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -630,8 +630,7 @@ def test_compression_preserved(ocrmypdf_exec, resources, image, outpdf):
]
p = run(
p_args,
- stdout=PIPE,
- stderr=PIPE,
+ capture_output=True,
stdin=input_stream,
text=True,
check=False,
@@ -691,8 +690,7 @@ def test_compression_changed(ocrmypdf_exec, resources, image, compression, outpd
]
p = run(
p_args,
- stdout=PIPE,
- stderr=PIPE,
+ capture_output=True,
stdin=input_stream,
text=True,
check=False,
diff --git a/tests/test_pdfinfo.py b/tests/test_pdfinfo.py
index 1a21e0e0..56052d1e 100644
--- a/tests/test_pdfinfo.py
+++ b/tests/test_pdfinfo.py
@@ -173,17 +173,15 @@ def test_stack_abuse():
p = pikepdf.Pdf.new()
stream = pikepdf.Stream(p, b'q ' * 35)
- with pytest.warns(None) as record:
+ with pytest.warns(UserWarning, match="overflowed"):
pdfinfo.info._interpret_contents(stream)
- assert 'overflowed' in str(record[0].message)
stream = pikepdf.Stream(p, b'q Q Q Q Q')
- with pytest.warns(None) as record:
+ with pytest.warns(UserWarning, match="underflowed") as record:
pdfinfo.info._interpret_contents(stream)
- assert 'underflowed' in str(record[0].message)
stream = pikepdf.Stream(p, b'q ' * 135)
- with pytest.warns(None):
+ with pytest.warns(UserWarning):
with pytest.raises(RuntimeError):
pdfinfo.info._interpret_contents(stream)
diff --git a/tests/test_rotation.py b/tests/test_rotation.py
index 75dcb6f8..12eb04e4 100644
--- a/tests/test_rotation.py
+++ b/tests/test_rotation.py
@@ -25,6 +25,12 @@ from .conftest import check_ocrmypdf, run_ocrmypdf
# pylintx: disable=unused-variable
+# Remove this workaround when we require Pillow >= 10
+try:
+ Transpose = Image.Transpose # type: ignore
+except AttributeError:
+ # Pillow 9 shim
+ Transpose = Image # type: ignore
RENDERERS = ['hocr', 'sandwich']
@@ -222,7 +228,7 @@ def test_rotate_page_level(image_angle, page_angle, resources, outdir):
with Image.open(fspath(resources / 'typewriter.png')) as im:
if image_angle != 0:
ccw_angle = -image_angle % 360
- im = im.transpose(getattr(Image, f'ROTATE_{ccw_angle}'))
+ im = im.transpose(getattr(Transpose, f'ROTATE_{ccw_angle}'))
im.save(memimg, format='PNG')
memimg.seek(0)
mempdf = BytesIO()
diff --git a/tests/test_stdio.py b/tests/test_stdio.py
index 4c8bbacc..a57c713a 100644
--- a/tests/test_stdio.py
+++ b/tests/test_stdio.py
@@ -30,12 +30,12 @@ def test_stdin(ocrmypdf_exec, resources, outpdf):
'--plugin',
'tests/plugins/tesseract_noop.py',
]
- run(p_args, stdout=PIPE, stderr=PIPE, stdin=input_stream, check=True)
+ run(p_args, capture_output=True, stdin=input_stream, check=True)
def test_stdout(ocrmypdf_exec, resources, outpdf):
if 'COV_CORE_DATAFILE' in os.environ:
- pytest.skip(msg="Coverage uses stdout")
+ pytest.skip("Coverage uses stdout")
input_file = str(resources / 'francais.pdf')
output_file = str(outpdf)
@@ -72,7 +72,7 @@ def test_bad_locale(monkeypatch):
)
def test_dev_null(resources):
if 'COV_CORE_DATAFILE' in os.environ:
- pytest.skip(msg="Coverage uses stdout")
+ pytest.skip("Coverage uses stdout")
p = run_ocrmypdf(
resources / 'trivial.pdf',