open-design/skills/hatch-pet/scripts/inspect_frames.py
Zakaria a46764fb1b
Some checks failed
ci / Validate workspace (push) Has been cancelled
landing-page-ci / Validate landing page (push) Has been cancelled
landing-page-deploy / Deploy landing page (push) Has been cancelled
github-metrics / Generate repository metrics SVG (push) Has been cancelled
first-commit
2026-05-04 14:58:14 -04:00

247 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""Inspect extracted Codex pet frames before atlas composition."""
from __future__ import annotations
import argparse
import json
import math
from pathlib import Path
from statistics import median
from PIL import Image
CELL_WIDTH = 192
CELL_HEIGHT = 208
ROW_FRAME_COUNTS = {
"idle": 6,
"running-right": 8,
"running-left": 8,
"waving": 4,
"jumping": 5,
"failed": 8,
"waiting": 6,
"running": 6,
"review": 6,
}
IMAGE_SUFFIXES = {".png", ".webp", ".jpg", ".jpeg"}
def alpha_nonzero_count(image: Image.Image) -> int:
alpha = image if image.mode == "L" else image.getchannel("A")
return sum(alpha.histogram()[1:])
def edge_alpha_count(image: Image.Image, margin: int) -> int:
alpha = image.getchannel("A")
width, height = alpha.size
total = 0
for box in (
(0, 0, width, margin),
(0, height - margin, width, height),
(0, 0, margin, height),
(width - margin, 0, width, height),
):
total += alpha_nonzero_count(alpha.crop(box))
return total
def color_distance(left: tuple[int, int, int], right: tuple[int, int, int]) -> float:
return math.sqrt(sum((left[index] - right[index]) ** 2 for index in range(3)))
def chroma_adjacent_count(
image: Image.Image,
chroma_key: tuple[int, int, int] | None,
threshold: float,
) -> int:
if chroma_key is None:
return 0
rgba = image.convert("RGBA")
data = rgba.tobytes()
count = 0
for index in range(0, len(data), 4):
red, green, blue, alpha = data[index : index + 4]
if alpha > 16 and color_distance((red, green, blue), chroma_key) <= threshold:
count += 1
return count
def frame_files(state_dir: Path) -> list[Path]:
if not state_dir.is_dir():
return []
return sorted(path for path in state_dir.iterdir() if path.suffix.lower() in IMAGE_SUFFIXES)
def load_manifest(frames_root: Path) -> dict[str, dict[str, object]]:
manifest_path = frames_root / "frames-manifest.json"
if not manifest_path.is_file():
return {}
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
rows = manifest.get("rows", [])
if not isinstance(rows, list):
return {}
return {
row["state"]: row
for row in rows
if isinstance(row, dict) and isinstance(row.get("state"), str)
}
def load_chroma_key(frames_root: Path) -> tuple[int, int, int] | None:
manifest_path = frames_root / "frames-manifest.json"
if not manifest_path.is_file():
return None
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
chroma_key = manifest.get("chroma_key")
if not isinstance(chroma_key, dict):
return None
rgb = chroma_key.get("rgb")
if (
not isinstance(rgb, list)
or len(rgb) != 3
or not all(isinstance(value, int) for value in rgb)
):
return None
return (rgb[0], rgb[1], rgb[2])
def inspect_state(
frames_root: Path,
state: str,
expected_count: int,
manifest_rows: dict[str, dict[str, object]],
chroma_key: tuple[int, int, int] | None,
args: argparse.Namespace,
) -> dict[str, object]:
state_dir = frames_root / state
files = frame_files(state_dir)
row_errors: list[str] = []
row_warnings: list[str] = []
frames: list[dict[str, object]] = []
areas: list[int] = []
manifest_row = manifest_rows.get(state, {})
method = manifest_row.get("method")
if len(files) != expected_count:
row_errors.append(f"expected {expected_count} frame files for {state}, found {len(files)}")
if args.require_components and method and method != "components":
row_errors.append(
f"{state} used extraction method {method}; regenerate the row or inspect slot slicing"
)
elif method and method != "components":
row_warnings.append(
f"{state} used extraction method {method}; component extraction is preferred"
)
for index, frame_path in enumerate(files[:expected_count]):
with Image.open(frame_path) as opened:
frame = opened.convert("RGBA")
nontransparent = alpha_nonzero_count(frame)
bbox = frame.getbbox()
edge_pixels = edge_alpha_count(frame, args.edge_margin)
chroma_adjacent_pixels = chroma_adjacent_count(
frame,
chroma_key,
args.chroma_adjacent_threshold,
)
info = {
"index": index,
"file": str(frame_path),
"width": frame.width,
"height": frame.height,
"nontransparent_pixels": nontransparent,
"bbox": list(bbox) if bbox else None,
"edge_pixels": edge_pixels,
"chroma_adjacent_pixels": chroma_adjacent_pixels,
}
frames.append(info)
areas.append(nontransparent)
if frame.size != (CELL_WIDTH, CELL_HEIGHT):
row_errors.append(
f"{state} frame {index:02d} is {frame.width}x{frame.height}; expected {CELL_WIDTH}x{CELL_HEIGHT}"
)
if nontransparent < args.min_used_pixels:
row_errors.append(
f"{state} frame {index:02d} is empty or too sparse ({nontransparent} pixels)"
)
if edge_pixels > args.edge_pixel_threshold:
row_warnings.append(
f"{state} frame {index:02d} has {edge_pixels} non-transparent pixels near the cell edge"
)
if chroma_adjacent_pixels > args.chroma_adjacent_pixel_threshold:
row_errors.append(
f"{state} frame {index:02d} has {chroma_adjacent_pixels} non-transparent pixels close to the chroma key"
)
if areas:
row_median = median(areas)
for index, area in enumerate(areas[:expected_count]):
if row_median > 0 and area < row_median * args.small_outlier_ratio:
row_warnings.append(
f"{state} frame {index:02d} is much smaller than the row median ({area} vs {row_median:.0f})"
)
if row_median > 0 and area > row_median * args.large_outlier_ratio:
row_warnings.append(
f"{state} frame {index:02d} is much larger than the row median ({area} vs {row_median:.0f})"
)
return {
"state": state,
"expected_frames": expected_count,
"actual_frames": len(files),
"extraction_method": method,
"ok": not row_errors,
"errors": row_errors,
"warnings": row_warnings,
"frames": frames,
}
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--frames-root", required=True)
parser.add_argument("--json-out", required=True)
parser.add_argument("--min-used-pixels", type=int, default=400)
parser.add_argument("--edge-margin", type=int, default=2)
parser.add_argument("--edge-pixel-threshold", type=int, default=24)
parser.add_argument("--chroma-adjacent-threshold", type=float, default=150.0)
parser.add_argument("--chroma-adjacent-pixel-threshold", type=int, default=800)
parser.add_argument("--small-outlier-ratio", type=float, default=0.35)
parser.add_argument("--large-outlier-ratio", type=float, default=2.75)
parser.add_argument(
"--require-components",
action="store_true",
help="Fail rows that fell back to equal-slot extraction.",
)
args = parser.parse_args()
frames_root = Path(args.frames_root).expanduser().resolve()
manifest_rows = load_manifest(frames_root)
chroma_key = load_chroma_key(frames_root)
rows = [
inspect_state(frames_root, state, count, manifest_rows, chroma_key, args)
for state, count in ROW_FRAME_COUNTS.items()
]
errors = [error for row in rows for error in row["errors"]]
warnings = [warning for row in rows for warning in row["warnings"]]
result = {
"ok": not errors,
"frames_root": str(frames_root),
"errors": errors,
"warnings": warnings,
"rows": rows,
}
json_out = Path(args.json_out).expanduser().resolve()
json_out.parent.mkdir(parents=True, exist_ok=True)
json_out.write_text(json.dumps(result, indent=2) + "\n", encoding="utf-8")
print(json.dumps({k: v for k, v in result.items() if k != "rows"}, indent=2))
raise SystemExit(0 if result["ok"] else 1)
if __name__ == "__main__":
main()