Skip to content

Instantly share code, notes, and snippets.

@mara004
Last active November 10, 2024 23:49
Show Gist options
  • Save mara004/f2ec2a3227624aff5270a0c676877960 to your computer and use it in GitHub Desktop.
Save mara004/f2ec2a3227624aff5270a0c676877960 to your computer and use it in GitHub Desktop.
JPEG to PDF N-up with pypdfium2
# SPDX-FileCopyrightText: 2024 geisserml <[email protected]>
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
import argparse
from pathlib import Path
from itertools import product
import pypdfium2 as pdfium
parser = argparse.ArgumentParser()
parser.add_argument("files", nargs="+", type=Path)
parser.add_argument("--output", "-o", type=Path)
args = parser.parse_args()
ROWS, COLS = 2, 2
PAGE_W, PAGE_H = 595, 842
ROTATION = 90
H_ALIGNS = ["right", "left"] # left, mid, right
V_ALIGNS = ["mid"] * ROWS # bottom, mid, top
def main():
n_tiles = ROWS*COLS
box_w, box_h = PAGE_W/COLS, PAGE_H/ROWS
n_pages = -(len(args.files) // -n_tiles) # ceil division
new_pdf = pdfium.PdfDocument.new()
# tricky: range() restarts when a new loop is opened, so we need to cast to an actual iterator
file_idx_iter = iter(range(len(args.files)))
for i in range(n_pages):
page = new_pdf.new_page(PAGE_W, PAGE_H)
# tricky: file_idx_iter must come after the rows/cols iterator to avoid discarding an iteration step
for (r, c), file_idx in zip(product(range(ROWS), range(COLS)), file_idx_iter):
# print(file_idx, r, c)
# assert file_idx == i*n_tiles + r*COLS + c
# assert file_idx < len(args.files)
img = pdfium.PdfImage.new(new_pdf)
img.load_jpeg(args.files[file_idx])
page.insert_obj(img)
px_w, px_h = img.get_px_size()
tile_w, tile_h = box_w, box_h
if ROTATION in (90, 270):
tile_w, tile_h = tile_h, tile_w
scale = min(tile_w/px_w, tile_h/px_h)
scaled_w, scaled_h = px_w*scale, px_h*scale
w_diff, h_diff = tile_w-scaled_w, tile_h-scaled_h
tile_w, tile_h = scaled_w, scaled_h
m = pdfium.PdfMatrix()
m = m.scale(tile_w, tile_h)
if ROTATION != 0:
m = m.translate(-tile_w/2, -tile_h/2)
m = m.rotate(ROTATION)
if ROTATION in (90, 270):
tile_w, tile_h = tile_h, tile_w
w_diff, h_diff = h_diff, w_diff
m = m.translate(tile_w/2, tile_h/2)
pos_y = PAGE_H - box_h*(r+1)
pos_x = box_w * c
pos_x += dict(left=0, mid=w_diff/2, right=w_diff)[ H_ALIGNS[c] ]
pos_y += dict(bottom=0, mid=h_diff/2, top=h_diff)[ V_ALIGNS[r] ]
m = m.translate(pos_x, pos_y)
img.set_matrix(m)
page.gen_content()
new_pdf.save(args.output)
main()
@mara004
Copy link
Author

mara004 commented Aug 31, 2024

Iterator patch to avoid in-loop file index calculation and if-check (click to expand)
diff --git a/tile_orig.py b/tile.py
index 5078ae2..7c82ce8 100644
--- a/tile_orig.py
+++ b/tile.py
@@ -3,9 +3,10 @@
 
 import math
 import argparse
-import itertools
-import pypdfium2 as pdfium 
 from pathlib import Path
+from itertools import product
+
+import pypdfium2 as pdfium 
 
 parser = argparse.ArgumentParser()
 parser.add_argument("files", nargs="+", type=Path)
@@ -26,20 +27,23 @@ def main():
     n_pages = math.ceil( len(args.files) / n_tiles )
     
     new_pdf = pdfium.PdfDocument.new()
+    # tricky: range() restarts when a new loop is opened, so we need to cast to a regular iterator
+    file_idx_iter = iter(range(len(args.files)))
     
     for i in range(n_pages):
         page = new_pdf.new_page(PAGE_W, PAGE_H)
         
-        for r, c in itertools.product(range(ROWS), range(COLS)):
+        # tricky: file_idx_iter must come after the rows/cols iterator to avoid discarding an iteration step
+        for (r, c), file_idx in zip(product(range(ROWS), range(COLS)), file_idx_iter):
             
-            file_idx = i*n_tiles + r*COLS + c
-            if not file_idx < len(args.files):
-                break
+            # print(file_idx, r, c)
+            # assert file_idx == i*n_tiles + r*COLS + c
+            # assert file_idx < len(args.files)
             
             img = pdfium.PdfImage.new(new_pdf)
             img.load_jpeg(args.files[file_idx])
             page.insert_obj(img)
-            px_w, px_h = img.get_size()
+            px_w, px_h = img.get_px_size()
             
             tile_w, tile_h = box_w, box_h
             if ROTATION in (90, 270):

Update: Now applied to the gist

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment