diff options
author | Sean Whitton <spwhitton@spwhitton.name> | 2022-04-15 14:31:00 -0700 |
---|---|---|
committer | Sean Whitton <spwhitton@spwhitton.name> | 2022-04-15 14:31:00 -0700 |
commit | 9cc69dd57404de2c1499fef851e2a030af311978 (patch) | |
tree | 69a99959025152197481cb82bdea90626e874b85 | |
parent | 783fdf51b7c403109f297ce77dc79051803f7dc3 (diff) | |
parent | f970cfcdc7f034800014cf59149036098dcf842f (diff) | |
download | ocrmypdf-9cc69dd57404de2c1499fef851e2a030af311978.tar.gz |
Merge tag 'v13.4.3+dfsg' into debian
DFSG-clean upstream version v13.4.3
-rw-r--r-- | .docker/Dockerfile | 2 | ||||
-rw-r--r-- | .github/workflows/build.yml | 16 | ||||
-rw-r--r-- | .pre-commit-config.yaml | 12 | ||||
-rw-r--r-- | docs/jbig2.rst | 3 | ||||
-rw-r--r-- | docs/plugins.rst | 10 | ||||
-rw-r--r-- | docs/release_notes.rst | 45 | ||||
-rw-r--r-- | misc/webservice.py | 2 | ||||
-rw-r--r-- | setup.cfg | 4 | ||||
-rw-r--r-- | src/ocrmypdf/_exec/ghostscript.py | 21 | ||||
-rw-r--r-- | src/ocrmypdf/_exec/unpaper.py | 4 | ||||
-rw-r--r-- | src/ocrmypdf/_pipeline.py | 13 | ||||
-rw-r--r-- | src/ocrmypdf/_validation.py | 2 | ||||
-rw-r--r-- | src/ocrmypdf/cli.py | 8 | ||||
-rw-r--r-- | src/ocrmypdf/pdfinfo/info.py | 8 | ||||
-rw-r--r-- | src/ocrmypdf/pluginspec.py | 8 | ||||
-rw-r--r-- | src/ocrmypdf/subprocess/_windows.py | 4 | ||||
-rw-r--r-- | tests/conftest.py | 3 | ||||
-rw-r--r-- | tests/plugins/tesseract_cache.py | 23 | ||||
-rw-r--r-- | tests/test_completion.py | 6 | ||||
-rw-r--r-- | tests/test_concurrency.py | 1 | ||||
-rw-r--r-- | tests/test_main.py | 6 | ||||
-rw-r--r-- | tests/test_pdfinfo.py | 8 | ||||
-rw-r--r-- | tests/test_rotation.py | 8 | ||||
-rw-r--r-- | tests/test_stdio.py | 6 |
24 files changed, 160 insertions, 63 deletions
diff --git a/.docker/Dockerfile b/.docker/Dockerfile index 36c913a1..c7614a8b 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -1,7 +1,7 @@ # OCRmyPDF # -FROM ubuntu:21.04 as base +FROM debian:bookworm-slim as base ENV LANG=C.UTF-8 ENV TZ=UTC diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b9bdc84f..d5e0915b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,17 +21,19 @@ jobs: matrix: include: - os: ubuntu-18.04 - python: 3.7 + python: "3.7" - os: ubuntu-20.04 - python: 3.8 + python: "3.8" - os: ubuntu-20.04 - python: 3.9 + python: "3.9" + - os: ubuntu-20.04 + python: "3.10" - os: ubuntu-latest - python: 3.9 + python: "3.9" - os: ubuntu-latest python: "pypy-3.7" - os: ubuntu-latest - python: 3.9 + python: "3.9" tesseract5: true env: @@ -120,7 +122,7 @@ jobs: strategy: matrix: os: [macos-latest] - python: ["3.9"] + python: ["3.9", "3.10"] env: OS: ${{ matrix.os }} @@ -175,7 +177,7 @@ jobs: strategy: matrix: os: [windows-latest] - python: ["3.9"] + python: ["3.9", "3.10"] env: OS: ${{ matrix.os }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aae14891..f913fc40 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v4.1.0 hooks: - id: check-case-conflict - id: check-merge-conflict @@ -8,26 +8,26 @@ repos: - id: check-yaml - id: debug-statements - repo: https://github.com/pycqa/isort - rev: 5.9.3 + rev: 5.10.1 hooks: - id: isort args: ["--profile", "black"] - repo: https://github.com/psf/black - rev: 21.9b0 + rev: 22.3.0 hooks: - id: black language_version: python - repo: https://github.com/asottile/setup-cfg-fmt - rev: v1.19.0 + rev: v1.20.1 hooks: - id: setup-cfg-fmt - repo: https://github.com/asottile/pyupgrade - rev: v2.29.0 + rev: v2.31.1 hooks: - id: pyupgrade args: ["--py37-plus"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.910-1 + rev: v0.942 hooks: - id: mypy additional_dependencies: diff --git a/docs/jbig2.rst b/docs/jbig2.rst index c49f0e9c..2e7b7746 100644 --- a/docs/jbig2.rst +++ b/docs/jbig2.rst @@ -32,6 +32,9 @@ For all other Linux, you must build a JBIG2 encoder from source: .. _jbig2-lossy: +Dependencies include libtoolize and libleptonica, which on Ubuntu systems +are packaged as libtool and libleptonica-dev. + Lossy mode JBIG2 ================ diff --git a/docs/plugins.rst b/docs/plugins.rst index c8d3e5ae..1d8f3f70 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -152,6 +152,16 @@ hooks. As such, you cannot "chain" a series of plugin filters together in this way. Instead, a single hook implementation should be responsible for any such chaining operations. +Examples +======== + +* OCRmyPDF's test suite contains several plugins that are used to simulate certain + test conditions. +* `ocrmypdf-papermerge <https://github.com/papermerge/OCRmyPDF_papermerge>`_ is + a production plugin that integrates OCRmyPDF and the Papermerge document + management system. + + Custom command line arguments ----------------------------- diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 04ee67f9..458d5ace 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -10,16 +10,53 @@ that is, output messages may be improved at any release level, so parsing them may be unreliable. Use the API to depend on precise behavior. The public API may be useful in scripts that launch OCRmyPDF processes or that -wish to use some of its features for working with PDFs.. +wish to use some of its features for working with PDFs. + +The most recent release of OCRmyPDF is |OCRmyPDF PyPI|. Any newer versions +referred to in these notes may exist the main branch but have not been +tagged yet. + +.. |OCRmyPDF PyPI| image:: https://img.shields.io/pypi/v/ocrmypdf.svg + +v13.4.3 +======= + +- Fix error on pytest.skip() with older versions of pytest. +- Documentation updates. + +v13.4.2 +======= + +- Worked around a + `major regression in Ghostscript 9.56.0 <https://bugs.ghostscript.com/show_bug.cgi?id=705187>`__ + where **all OCR text is stripped out of the PDF**. It simply removes all text, + even generated by software other than OCRmyPDF. Fortunately, we can ask + Ghostscript 9.56.0 to use its old behavior that worked correctly for our purposes. + Users must avoid the combination (Ghostscript 9.56.0, ocrmypdf <13.4.2) since + older versions of OCRmyPDF have no way of detecting that this particular + version of Ghostscript removes all OCR text. +- Marked pdfminer 20220319 as supported. +- Fixed some deprecation warnings from recent versions of Pillow and pytest. +- Test suite now covers Python 3.10 (Python 3.10 worked fine before, but was not + being tested). +- Docker image now uses debian:bookworm-slim as the base image to fix the Docker + image build. + +v13.4.1 +======= + +- Temporarily make threads rather than processes the default executor worker, due + to a persistent deadlock issue when processes are used. Add a new command line + argument ``--no-use-threads`` to disable this. v13.4.0 ======= - Fixed test failures when using pikepdf 5.0.0. - Various improvements to the optimizer. In particular, we now recognize PDF images - that are encoded with both flate and DCT (JPEG), and also produce PDF with images - compressed with flate and DCT, since this often yields file size improvements - compared to plain DCT. + that are encoded with both deflate (PNG) and DCT (JPEG), and also produce PDF + with images compressed with deflate and DCT, since this often yields file size + improvements compared to plain DCT. v13.3.0 ======= diff --git a/misc/webservice.py b/misc/webservice.py index e005b7c2..0e2079e8 100644 --- a/misc/webservice.py +++ b/misc/webservice.py @@ -59,7 +59,7 @@ def do_ocrmypdf(file): return Response("--sidecar not supported", 501, mimetype='text/plain') ocrmypdf_args = ["ocrmypdf", *cmd_args, up_file, down_file] - proc = run(ocrmypdf_args, stdout=PIPE, stderr=PIPE, encoding="utf-8") + proc = run(ocrmypdf_args, capture_output=True, encoding="utf-8") if proc.returncode != 0: stderr = proc.stderr return Response(stderr, 400, mimetype='text/plain') @@ -49,8 +49,8 @@ install_requires = coloredlogs>=14.0 # strictly optional img2pdf>=0.3.0,<0.5 # pure Python packaging>=20 - pdfminer.six!=20200720,>=20191110,<=20211012 - pikepdf>=4.0.0,!=5.0.0 + pdfminer.six!=20200720,>=20191110,<=20220319 + pikepdf!=5.0.0,>=4.0.0 pluggy>=0.13.0,<2 reportlab>=3.5.66 tqdm>=4 diff --git a/src/ocrmypdf/_exec/ghostscript.py b/src/ocrmypdf/_exec/ghostscript.py index 5d977a80..0ccadc45 100644 --- a/src/ocrmypdf/_exec/ghostscript.py +++ b/src/ocrmypdf/_exec/ghostscript.py @@ -24,6 +24,13 @@ from ocrmypdf.exceptions import MissingDependencyError, SubprocessOutputError from ocrmypdf.helpers import Resolution from ocrmypdf.subprocess import get_version, run, run_polling_stderr +# Remove this workaround when we require Pillow >= 10 +try: + Transpose = Image.Transpose # type: ignore +except AttributeError: + # Pillow 9 shim + Transpose = Image # type: ignore + log = logging.getLogger(__name__) missing_gs_error = """ @@ -132,11 +139,11 @@ def rasterize_pdf( # rotation is a clockwise angle and Image.ROTATE_* is # counterclockwise so this cancels out the rotation if rotation == 90: - im = im.transpose(Image.ROTATE_90) + im = im.transpose(Transpose.ROTATE_90) elif rotation == 180: - im = im.transpose(Image.ROTATE_180) + im = im.transpose(Transpose.ROTATE_180) elif rotation == 270: - im = im.transpose(Image.ROTATE_270) + im = im.transpose(Transpose.ROTATE_270) if rotation % 180 == 90: page_dpi = page_dpi.flip_axis() im.save(fspath(output_file), dpi=page_dpi) @@ -210,14 +217,18 @@ def generate_pdfa( # Older versions of Ghostscript expect a leading slash in # sColorConversionStrategy, newer ones should not have it. See Ghostscript # git commit fe1c025d. - strategy = ('/' + strategy) if version() < '9.19' else strategy + gs_version = version() + strategy = ('/' + strategy) if gs_version < '9.19' else strategy - if version() == '9.23': + if gs_version == '9.23': # 9.23: added JPEG passthrough as a new feature, but with a bug that # incorrectly formats some images. Fixed as of 9.24. So we disable this # feature for 9.23. # https://bugs.ghostscript.com/show_bug.cgi?id=699216 compression_args.append('-dPassThroughJPEGImages=false') + elif gs_version == '9.56.0': + # 9.56.0 breaks our OCR...? + compression_args.append('-dNEWPDF=false') # nb no need to specify ProcessColorModel when ColorConversionStrategy # is set; see: diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py index 38132ac9..69eb58ee 100644 --- a/src/ocrmypdf/_exec/unpaper.py +++ b/src/ocrmypdf/_exec/unpaper.py @@ -18,7 +18,7 @@ from decimal import Decimal from pathlib import Path from subprocess import PIPE, STDOUT from tempfile import TemporaryDirectory -from typing import List, Optional, Tuple, Union +from typing import Iterator, List, Optional, Tuple, Union from PIL import Image @@ -76,7 +76,7 @@ def _convert_image(im: Image.Image) -> Tuple[Image.Image, bool, str]: @contextmanager -def _setup_unpaper_io(input_file: Path) -> Tuple[Path, Path, Path]: +def _setup_unpaper_io(input_file: Path) -> Iterator[Tuple[Path, Path, Path]]: with Image.open(input_file) as im: if im.width * im.height >= UNPAPER_IMAGE_PIXEL_LIMIT: raise UnpaperImageTooLargeError(w=im.width, h=im.height) diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py index 447471cc..f0c690ca 100644 --- a/src/ocrmypdf/_pipeline.py +++ b/src/ocrmypdf/_pipeline.py @@ -18,7 +18,7 @@ from typing import Dict, Iterable, Optional import img2pdf import pikepdf from pikepdf.models.metadata import encode_pdf_date -from PIL import Image, ImageDraw +from PIL import Image, ImageColor, ImageDraw from ocrmypdf._concurrent import Executor from ocrmypdf._exec import unpaper @@ -38,6 +38,13 @@ from ocrmypdf.optimize import optimize from ocrmypdf.pdfa import generate_pdfa_ps from ocrmypdf.pdfinfo import Colorspace, Encoding, PdfInfo +# Remove this workaround when we require Pillow >= 10 +try: + BICUBIC = Image.Resampling.BICUBIC # type: ignore +except AttributeError: + # Pillow 9 shim + BICUBIC = Image.BICUBIC # type: ignore + log = logging.getLogger(__name__) VECTOR_PAGE_DPI = 400 @@ -484,7 +491,9 @@ def preprocess_deskew(input_file: Path, page_context: PageContext): # According to Pillow docs, .rotate() will automatically use Image.NEAREST # resampling if image is mode '1' or 'P' deskewed = im.rotate( - deskew_angle_degrees, resample=Image.BICUBIC, fillcolor='white' + deskew_angle_degrees, + resample=BICUBIC, + fillcolor=ImageColor.getcolor('white', mode=im.mode), ) deskewed.save(output_file, dpi=dpi) diff --git a/src/ocrmypdf/_validation.py b/src/ocrmypdf/_validation.py index a69514f1..613e1c9f 100644 --- a/src/ocrmypdf/_validation.py +++ b/src/ocrmypdf/_validation.py @@ -41,7 +41,7 @@ log = logging.getLogger(__name__) def check_platform(): - if os.name == 'nt' and sys.maxsize <= 2 ** 32: # pragma: no cover + if os.name == 'nt' and sys.maxsize <= 2**32: # pragma: no cover # 32-bit interpreter on Windows log.error( "You are running OCRmyPDF in a 32-bit (x86) Python interpreter." diff --git a/src/ocrmypdf/cli.py b/src/ocrmypdf/cli.py index db12cdb7..322f1239 100644 --- a/src/ocrmypdf/cli.py +++ b/src/ocrmypdf/cli.py @@ -229,7 +229,13 @@ after installing the ocrmypdf-doc package. help=argparse.SUPPRESS, ) jobcontrol.add_argument( - '--use-threads', action='store_true', help=argparse.SUPPRESS + '--use-threads', action='store_true', default=True, help=argparse.SUPPRESS + ) + jobcontrol.add_argument( + '--no-use-threads', + action='store_false', + dest='use_threads', + help=argparse.SUPPRESS, ) metadata = parser.add_argument_group( diff --git a/src/ocrmypdf/pdfinfo/info.py b/src/ocrmypdf/pdfinfo/info.py index 399b2579..c63bfa03 100644 --- a/src/ocrmypdf/pdfinfo/info.py +++ b/src/ocrmypdf/pdfinfo/info.py @@ -647,8 +647,8 @@ def _pdf_pageinfo_concurrent( max_workers, check_pages, detailed_analysis=False, -) -> List[Optional['PageInfo']]: - pages = [None] * len(pdf.pages) +) -> Sequence[Optional['PageInfo']]: + pages: Sequence[Optional['PageInfo']] = [None] * len(pdf.pages) def update_pageinfo(result, pbar): page = result @@ -925,11 +925,11 @@ class PdfInfo: @property def min_version(self) -> str: # The minimum PDF is the maximum version that any particular page needs - return max(page.min_version for page in self.pages) + return max(page.min_version for page in self.pages if page) @property def has_userunit(self) -> bool: - return any(page.userunit != 1.0 for page in self.pages) + return any(page.userunit != 1.0 for page in self.pages if page) @property def has_acroform(self) -> bool: diff --git a/src/ocrmypdf/pluginspec.py b/src/ocrmypdf/pluginspec.py index bec8bcba..9db079e2 100644 --- a/src/ocrmypdf/pluginspec.py +++ b/src/ocrmypdf/pluginspec.py @@ -132,6 +132,7 @@ def get_progressbar_class(): Here is how OCRmyPDF will use the progress bar: Example: + pbar_class = pm.hook.get_progressbar_class() with pbar_class(**tqdm_kwargs) as pbar: ... @@ -235,9 +236,9 @@ def filter_page_image(page: 'PageContext', image_filename: Path) -> Path: ``image_filename``. The hook may overwrite ``image_filename`` with a new file. The output image should preserve the same physical unit dimensions, that is - (width * dpi_x, height * dpi_y). That is, if the image is resized, the DPI + ``(width * dpi_x, height * dpi_y)``. That is, if the image is resized, the DPI must be adjusted by the reciprocal. If this is not preserved, the PDF page - will be resized and the OCR layer misaligned. OCRmyPDF does not nothing + will be resized and the OCR layer misaligned. OCRmyPDF does nothing to enforce these constraints; it is up to the plugin to do sensible things. OCRmyPDF will create the PDF page based on the image format used (unless the @@ -399,8 +400,7 @@ def get_ocr_engine() -> OcrEngine: """Returns an OcrEngine to use for processing this file. The OcrEngine may be instantiated multiple times, by both the main process - and child process. As such, it must be obtain store any state in ``options`` - or some common location. + and child process. Note: This is a :ref:`firstresult hook<firstresult>`. diff --git a/src/ocrmypdf/subprocess/_windows.py b/src/ocrmypdf/subprocess/_windows.py index aa9dfe56..d6a9d63b 100644 --- a/src/ocrmypdf/subprocess/_windows.py +++ b/src/ocrmypdf/subprocess/_windows.py @@ -4,6 +4,9 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +# type: ignore +# Non-Windows mypy now breaks when trying to typecheck winreg + import logging import os import shutil @@ -17,6 +20,7 @@ try: except ModuleNotFoundError as e: raise ModuleNotFoundError("This module is for Windows only") from e + log = logging.getLogger(__name__) T = TypeVar('T') diff --git a/tests/conftest.py b/tests/conftest.py index 2f25a857..62adaae2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -134,8 +134,7 @@ def run_ocrmypdf( p = run( p_args, - stdout=PIPE, - stderr=PIPE, + capture_output=True, text=text, check=False, ) diff --git a/tests/plugins/tesseract_cache.py b/tests/plugins/tesseract_cache.py index 73e900bf..dd33383f 100644 --- a/tests/plugins/tesseract_cache.py +++ b/tests/plugins/tesseract_cache.py @@ -50,6 +50,7 @@ import logging import platform import re import shutil +import threading from functools import partial from pathlib import Path from subprocess import PIPE, CalledProcessError, CompletedProcess @@ -176,26 +177,40 @@ def cached_run(options, run_args, **run_kwargs): class CacheOcrEngine(TesseractOcrEngine): + # Concurrent threads (with --use-threads) might try to use different parts + # of the OcrEngine, so we need a lock to protect the state of patched + # module whenever it's patched. Should refactor ocrmypdf._exec.tesseract so that + # it does not to be patched at all for testing. + lock = threading.Lock() + @staticmethod def get_orientation(input_file, options): - with patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)): + with CacheOcrEngine.lock, patch( + 'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options) + ): return TesseractOcrEngine.get_orientation(input_file, options) @staticmethod def get_deskew(input_file, options) -> float: - with patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)): + with CacheOcrEngine.lock, patch( + 'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options) + ): return TesseractOcrEngine.get_deskew(input_file, options) @staticmethod def generate_hocr(input_file, output_hocr, output_text, options): - with patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)): + with CacheOcrEngine.lock, patch( + 'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options) + ): TesseractOcrEngine.generate_hocr( input_file, output_hocr, output_text, options ) @staticmethod def generate_pdf(input_file, output_pdf, output_text, options): - with patch('ocrmypdf._exec.tesseract.run', new=partial(cached_run, options)): + with CacheOcrEngine.lock, patch( + 'ocrmypdf._exec.tesseract.run', new=partial(cached_run, options) + ): TesseractOcrEngine.generate_pdf( input_file, output_pdf, output_text, options ) diff --git a/tests/test_completion.py b/tests/test_completion.py index 20d716ff..01a02b31 100644 --- a/tests/test_completion.py +++ b/tests/test_completion.py @@ -24,8 +24,7 @@ def test_fish(): ['fish', '-n', 'misc/completion/ocrmypdf.fish'], check=True, encoding='utf-8', - stdout=PIPE, - stderr=PIPE, + capture_output=True, ) assert proc.stderr == '', proc.stderr except FileNotFoundError: @@ -41,8 +40,7 @@ def test_bash(): ['bash', '-n', 'misc/completion/ocrmypdf.bash'], check=True, encoding='utf-8', - stdout=PIPE, - stderr=PIPE, + capture_output=True, ) assert proc.stderr == '', proc.stderr except FileNotFoundError: diff --git a/tests/test_concurrency.py b/tests/test_concurrency.py index 8d7675bf..fb149a2e 100644 --- a/tests/test_concurrency.py +++ b/tests/test_concurrency.py @@ -13,6 +13,7 @@ from ocrmypdf import ExitCode from .conftest import run_ocrmypdf_api +@pytest.mark.skipif(True, reason="--use-threads is currently default") @pytest.mark.skipif(os.name == 'nt', reason="Windows doesn't have SIGKILL") def test_simulate_oom_killer(resources, no_outpdf): exitcode = run_ocrmypdf_api( diff --git a/tests/test_main.py b/tests/test_main.py index 910b2120..a60c76f8 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -630,8 +630,7 @@ def test_compression_preserved(ocrmypdf_exec, resources, image, outpdf): ] p = run( p_args, - stdout=PIPE, - stderr=PIPE, + capture_output=True, stdin=input_stream, text=True, check=False, @@ -691,8 +690,7 @@ def test_compression_changed(ocrmypdf_exec, resources, image, compression, outpd ] p = run( p_args, - stdout=PIPE, - stderr=PIPE, + capture_output=True, stdin=input_stream, text=True, check=False, diff --git a/tests/test_pdfinfo.py b/tests/test_pdfinfo.py index 1a21e0e0..56052d1e 100644 --- a/tests/test_pdfinfo.py +++ b/tests/test_pdfinfo.py @@ -173,17 +173,15 @@ def test_stack_abuse(): p = pikepdf.Pdf.new() stream = pikepdf.Stream(p, b'q ' * 35) - with pytest.warns(None) as record: + with pytest.warns(UserWarning, match="overflowed"): pdfinfo.info._interpret_contents(stream) - assert 'overflowed' in str(record[0].message) stream = pikepdf.Stream(p, b'q Q Q Q Q') - with pytest.warns(None) as record: + with pytest.warns(UserWarning, match="underflowed") as record: pdfinfo.info._interpret_contents(stream) - assert 'underflowed' in str(record[0].message) stream = pikepdf.Stream(p, b'q ' * 135) - with pytest.warns(None): + with pytest.warns(UserWarning): with pytest.raises(RuntimeError): pdfinfo.info._interpret_contents(stream) diff --git a/tests/test_rotation.py b/tests/test_rotation.py index 75dcb6f8..12eb04e4 100644 --- a/tests/test_rotation.py +++ b/tests/test_rotation.py @@ -25,6 +25,12 @@ from .conftest import check_ocrmypdf, run_ocrmypdf # pylintx: disable=unused-variable +# Remove this workaround when we require Pillow >= 10 +try: + Transpose = Image.Transpose # type: ignore +except AttributeError: + # Pillow 9 shim + Transpose = Image # type: ignore RENDERERS = ['hocr', 'sandwich'] @@ -222,7 +228,7 @@ def test_rotate_page_level(image_angle, page_angle, resources, outdir): with Image.open(fspath(resources / 'typewriter.png')) as im: if image_angle != 0: ccw_angle = -image_angle % 360 - im = im.transpose(getattr(Image, f'ROTATE_{ccw_angle}')) + im = im.transpose(getattr(Transpose, f'ROTATE_{ccw_angle}')) im.save(memimg, format='PNG') memimg.seek(0) mempdf = BytesIO() diff --git a/tests/test_stdio.py b/tests/test_stdio.py index 4c8bbacc..a57c713a 100644 --- a/tests/test_stdio.py +++ b/tests/test_stdio.py @@ -30,12 +30,12 @@ def test_stdin(ocrmypdf_exec, resources, outpdf): '--plugin', 'tests/plugins/tesseract_noop.py', ] - run(p_args, stdout=PIPE, stderr=PIPE, stdin=input_stream, check=True) + run(p_args, capture_output=True, stdin=input_stream, check=True) def test_stdout(ocrmypdf_exec, resources, outpdf): if 'COV_CORE_DATAFILE' in os.environ: - pytest.skip(msg="Coverage uses stdout") + pytest.skip("Coverage uses stdout") input_file = str(resources / 'francais.pdf') output_file = str(outpdf) @@ -72,7 +72,7 @@ def test_bad_locale(monkeypatch): ) def test_dev_null(resources): if 'COV_CORE_DATAFILE' in os.environ: - pytest.skip(msg="Coverage uses stdout") + pytest.skip("Coverage uses stdout") p = run_ocrmypdf( resources / 'trivial.pdf', |