first-commit
ci / Validate workspace (push) Has been cancelled
landing-page-ci / Validate landing page (push) Has been cancelled
landing-page-deploy / Deploy landing page (push) Has been cancelled
github-metrics / Generate repository metrics SVG (push) Has been cancelled
refresh-contributors-wall / Refresh contributors wall cache bust (push) Waiting to run
ci / Validate workspace (push) Has been cancelled
landing-page-ci / Validate landing page (push) Has been cancelled
landing-page-deploy / Deploy landing page (push) Has been cancelled
github-metrics / Generate repository metrics SVG (push) Has been cancelled
refresh-contributors-wall / Refresh contributors wall cache bust (push) Waiting to run
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
__pycache__/
|
||||
*.pyc
|
||||
+134
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extract every shape on every slide of a .pptx into a JSON dump.
|
||||
|
||||
Usage:
|
||||
python extract_pptx.py <path/to/deck.pptx> # prints to stdout
|
||||
python extract_pptx.py <path/to/deck.pptx> -o dump.json
|
||||
|
||||
The dump captures the *actual* state of the export — text content, position,
|
||||
size, and per-run typography (font name, size, bold, italic, color). Use this
|
||||
as the ground truth for the fidelity audit; do not trust the export script's
|
||||
intent.
|
||||
|
||||
Coordinates are reported in inches (rounded to 3 decimals) so they're
|
||||
human-readable when comparing against rails like CONTENT_MAX_Y = 6.70".
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from pptx import Presentation
|
||||
from pptx.util import Emu
|
||||
except ImportError:
|
||||
sys.stderr.write(
|
||||
"python-pptx is required. Install with: pip install python-pptx\n"
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
def emu_to_in(emu: int | None) -> float | None:
|
||||
if emu is None:
|
||||
return None
|
||||
return round(emu / 914400, 3)
|
||||
|
||||
|
||||
def color_repr(color) -> str | None:
|
||||
"""Best-effort color extraction. Returns hex string or None."""
|
||||
if color is None:
|
||||
return None
|
||||
try:
|
||||
# ColorFormat.type may be None when no explicit color is set.
|
||||
if color.type is None:
|
||||
return None
|
||||
rgb = color.rgb
|
||||
if rgb is None:
|
||||
return None
|
||||
return f"#{str(rgb).lower()}"
|
||||
except (AttributeError, ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def extract_runs(text_frame) -> list[dict]:
|
||||
runs = []
|
||||
for para in text_frame.paragraphs:
|
||||
for run in para.runs:
|
||||
font = run.font
|
||||
runs.append({
|
||||
"text": run.text,
|
||||
"font": font.name,
|
||||
"size_pt": float(font.size.pt) if font.size is not None else None,
|
||||
"bold": bool(font.bold) if font.bold is not None else None,
|
||||
"italic": bool(font.italic) if font.italic is not None else None,
|
||||
# Color is independent of font name/size: a run can inherit
|
||||
# font from the theme yet set its own color. Color drift is
|
||||
# one of the things this audit needs to catch, so don't gate
|
||||
# the extraction on unrelated font attributes.
|
||||
"color": color_repr(font.color),
|
||||
})
|
||||
return runs
|
||||
|
||||
|
||||
def extract_shape(shape) -> dict:
|
||||
data = {
|
||||
"name": shape.name,
|
||||
"shape_type": str(shape.shape_type) if shape.shape_type is not None else None,
|
||||
"left_in": emu_to_in(shape.left),
|
||||
"top_in": emu_to_in(shape.top),
|
||||
"width_in": emu_to_in(shape.width),
|
||||
"height_in": emu_to_in(shape.height),
|
||||
}
|
||||
if shape.left is not None and shape.height is not None and shape.top is not None:
|
||||
data["bottom_in"] = emu_to_in(shape.top + shape.height)
|
||||
data["right_in"] = emu_to_in(shape.left + shape.width)
|
||||
if shape.has_text_frame:
|
||||
tf = shape.text_frame
|
||||
data["text"] = tf.text
|
||||
data["runs"] = extract_runs(tf)
|
||||
return data
|
||||
|
||||
|
||||
def extract_pptx(path: Path) -> dict:
|
||||
prs = Presentation(str(path))
|
||||
canvas = {
|
||||
"width_in": emu_to_in(prs.slide_width),
|
||||
"height_in": emu_to_in(prs.slide_height),
|
||||
}
|
||||
slides = []
|
||||
for i, slide in enumerate(prs.slides, 1):
|
||||
shapes = [extract_shape(s) for s in slide.shapes]
|
||||
slides.append({"index": i, "shapes": shapes})
|
||||
return {
|
||||
"source": str(path),
|
||||
"canvas": canvas,
|
||||
"slide_count": len(slides),
|
||||
"slides": slides,
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
|
||||
ap.add_argument("path", type=Path, help=".pptx file to extract")
|
||||
ap.add_argument("-o", "--output", type=Path, help="write JSON to this path; default stdout")
|
||||
args = ap.parse_args()
|
||||
|
||||
if not args.path.exists():
|
||||
ap.error(f"file not found: {args.path}")
|
||||
|
||||
data = extract_pptx(args.path)
|
||||
payload = json.dumps(data, ensure_ascii=False, indent=2)
|
||||
if args.output:
|
||||
args.output.write_text(payload, encoding="utf-8")
|
||||
sys.stderr.write(f"wrote {args.output} ({len(payload)} bytes, {data['slide_count']} slides)\n")
|
||||
else:
|
||||
sys.stdout.write(payload)
|
||||
sys.stdout.write("\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
+144
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Verify a re-exported .pptx against footer-rail + canvas-bound invariants.
|
||||
|
||||
Usage:
|
||||
python verify_layout.py <path/to/deck.pptx>
|
||||
python verify_layout.py <path/to/deck.pptx> --content-max-y 6.70 --canvas-h 7.5
|
||||
|
||||
Exits 0 on no violations, 1 on any violation. Prints a single block of
|
||||
violations sorted by slide index, one per line:
|
||||
|
||||
slide 5 shape 'desc-row-B-1' bottom 7.214" crosses footer rail 6.70"
|
||||
slide 11 shape 'note-paragraph' bottom 7.342" exceeds canvas 7.50"
|
||||
|
||||
Use this as the gate for "this re-export is shippable". Don't claim the audit
|
||||
is fixed without running this script — the human eye misses 1–2 mm overflow
|
||||
at zoom-out, the script doesn't.
|
||||
|
||||
Footer / chrome shapes are exempt from the content rail. Two heuristics
|
||||
identify them, in this order:
|
||||
|
||||
1. **By name** — any shape whose name contains "footer", "foot", "chrome",
|
||||
"page", or "pagination" (case-insensitive). Use semantic names in your
|
||||
export script if you can.
|
||||
2. **By position** — any shape whose `top` is at or below the footer-zone
|
||||
threshold (default `--footer-zone-top 6.80`). This catches python-pptx's
|
||||
auto-generated names like "TextBox 3" when the export script didn't name
|
||||
them. The threshold sits ~0.10" above FOOTER_TOP so chrome rows pinned
|
||||
exactly at FOOTER_TOP are still recognized.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from pptx import Presentation
|
||||
except ImportError:
|
||||
sys.stderr.write(
|
||||
"python-pptx is required. Install with: pip install python-pptx\n"
|
||||
)
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
FOOTER_NAME_HINTS = ("footer", "foot", "chrome", "page", "pagination")
|
||||
EPS_IN = 0.005 # ignore sub-pixel overflows (~0.13mm)
|
||||
|
||||
|
||||
def is_footer_by_name(name: str) -> bool:
|
||||
n = (name or "").lower()
|
||||
return any(hint in n for hint in FOOTER_NAME_HINTS)
|
||||
|
||||
|
||||
def emu_to_in(emu: int | None) -> float:
|
||||
return (emu or 0) / 914400
|
||||
|
||||
|
||||
def verify(path: Path, content_max_y: float, canvas_w: float, canvas_h: float,
|
||||
footer_zone_top: float) -> list[str]:
|
||||
prs = Presentation(str(path))
|
||||
violations: list[str] = []
|
||||
|
||||
actual_w = emu_to_in(prs.slide_width)
|
||||
actual_h = emu_to_in(prs.slide_height)
|
||||
if abs(actual_w - canvas_w) > EPS_IN or abs(actual_h - canvas_h) > EPS_IN:
|
||||
violations.append(
|
||||
f"canvas mismatch: file is {actual_w:.3f}\" x {actual_h:.3f}\", "
|
||||
f"expected {canvas_w}\" x {canvas_h}\""
|
||||
)
|
||||
|
||||
for i, slide in enumerate(prs.slides, 1):
|
||||
for shape in slide.shapes:
|
||||
if shape.top is None or shape.height is None:
|
||||
continue
|
||||
top = emu_to_in(shape.top)
|
||||
left = emu_to_in(shape.left)
|
||||
bottom = top + emu_to_in(shape.height)
|
||||
right = left + emu_to_in(shape.width)
|
||||
name = shape.name or "<unnamed>"
|
||||
|
||||
# Off-canvas (hard fail for any shape).
|
||||
if bottom > canvas_h + EPS_IN:
|
||||
violations.append(
|
||||
f"slide {i:<2} shape '{name}' bottom {bottom:.3f}\" "
|
||||
f"exceeds canvas {canvas_h}\""
|
||||
)
|
||||
if right > canvas_w + EPS_IN:
|
||||
violations.append(
|
||||
f"slide {i:<2} shape '{name}' right {right:.3f}\" "
|
||||
f"exceeds canvas width {canvas_w}\""
|
||||
)
|
||||
if top < -EPS_IN:
|
||||
violations.append(
|
||||
f"slide {i:<2} shape '{name}' top {top:.3f}\" is negative"
|
||||
)
|
||||
if left < -EPS_IN:
|
||||
violations.append(
|
||||
f"slide {i:<2} shape '{name}' left {left:.3f}\" is negative"
|
||||
)
|
||||
|
||||
# Footer rail (only enforced on content shapes).
|
||||
# Shape is exempt if (a) named like a footer, or
|
||||
# (b) pinned at-or-below the footer zone threshold.
|
||||
if is_footer_by_name(name) or top >= footer_zone_top - EPS_IN:
|
||||
continue
|
||||
if bottom > content_max_y + EPS_IN:
|
||||
violations.append(
|
||||
f"slide {i:<2} shape '{name}' bottom {bottom:.3f}\" "
|
||||
f"crosses footer rail {content_max_y}\""
|
||||
)
|
||||
|
||||
return violations
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description=__doc__.split("\n\n")[0])
|
||||
ap.add_argument("path", type=Path, help=".pptx file to verify")
|
||||
ap.add_argument("--content-max-y", type=float, default=6.70,
|
||||
help="content rail in inches; nothing in content area may cross (default 6.70)")
|
||||
ap.add_argument("--canvas-w", type=float, default=13.333,
|
||||
help="expected canvas width in inches (default 13.333 = 16:9)")
|
||||
ap.add_argument("--canvas-h", type=float, default=7.5,
|
||||
help="expected canvas height in inches (default 7.5 = 16:9)")
|
||||
ap.add_argument("--footer-zone-top", type=float, default=6.80,
|
||||
help="any shape with top >= this is treated as footer/chrome "
|
||||
"(default 6.80; sits 0.10\" above the typical FOOTER_TOP=6.85\")")
|
||||
args = ap.parse_args()
|
||||
|
||||
if not args.path.exists():
|
||||
ap.error(f"file not found: {args.path}")
|
||||
|
||||
violations = verify(args.path, args.content_max_y, args.canvas_w, args.canvas_h,
|
||||
args.footer_zone_top)
|
||||
if violations:
|
||||
sys.stderr.write("\n".join(violations) + "\n")
|
||||
sys.stderr.write(f"\n{len(violations)} violation(s) found in {args.path}\n")
|
||||
return 1
|
||||
sys.stderr.write(f"OK: 0 violations across all slides in {args.path}\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user