open-design/scripts/import-prompt-templates.mjs
Zakaria a46764fb1b
Some checks failed
ci / Validate workspace (push) Has been cancelled
landing-page-ci / Validate landing page (push) Has been cancelled
landing-page-deploy / Deploy landing page (push) Has been cancelled
github-metrics / Generate repository metrics SVG (push) Has been cancelled
first-commit
2026-05-04 14:58:14 -04:00

404 lines
14 KiB
JavaScript
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* Pulls down the upstream prompt corpora (CC BY 4.0) and emits curated
* JSON files under `prompt-templates/{image,video}/`. Re-run anytime to
* pick up new featured prompts.
*
* Usage:
* node scripts/import-prompt-templates.mjs
*
* Source READMEs:
* - https://github.com/YouMind-OpenLab/awesome-gpt-image-2 (CC BY 4.0)
* - https://github.com/YouMind-OpenLab/awesome-seedance-2-prompts (CC BY 4.0)
*
* Each upstream README is a structured catalog. Two patterns we care about:
*
* Featured block:
* ### No. N: <Title>
* <badges>
* #### 📖 Description
* <description paragraph>
* #### 📝 Prompt
* ```
* <prompt body>
* ```
* #### 🎬 Video (or 🖼️ Generated Images)
* <preview img / video link>
* #### 📌 Details
* - **Author:** [Name](url)
* - **Source:** [Twitter Post](url)
* - **Published:** ...
*
* All-Prompts block:
* ### <Title>
* <badges>
* > <description>
* #### 📝 Prompt
* ```
* <prompt body>
* ```
* <img src="<thumb>"> | <a href=...>
* **Author:** [Name](url) | **Source:** [Link](url) | **Published:** ...
*
* We pick the featured 6 from each repo (always good) plus a sampled slice
* of the All-Prompts head so the gallery has breadth across categories.
*
* All output JSON carries a `source` block so attribution stays intact.
*/
import { mkdir, writeFile, readdir, unlink, readFile } from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(__dirname, '..');
const OUT_IMAGE = path.join(ROOT, 'prompt-templates', 'image');
const OUT_VIDEO = path.join(ROOT, 'prompt-templates', 'video');
const SOURCES = [
{
surface: 'image',
repo: 'YouMind-OpenLab/awesome-gpt-image-2',
license: 'CC-BY-4.0',
readmeUrl:
'https://raw.githubusercontent.com/YouMind-OpenLab/awesome-gpt-image-2/main/README.md',
defaultModel: 'gpt-image-2',
defaultAspect: '1:1',
// Cap how many entries we pull from the "All Prompts" tail to keep the
// committed dataset reviewable. The featured block is always taken.
sampleAllPrompts: 30,
},
{
surface: 'video',
repo: 'YouMind-OpenLab/awesome-seedance-2-prompts',
license: 'CC-BY-4.0',
readmeUrl:
'https://raw.githubusercontent.com/YouMind-OpenLab/awesome-seedance-2-prompts/main/README.md',
defaultModel: 'seedance-2.0',
defaultAspect: '16:9',
sampleAllPrompts: 30,
},
];
async function fetchText(url) {
const resp = await fetch(url);
if (!resp.ok) {
throw new Error(`failed ${url}: ${resp.status}`);
}
return resp.text();
}
function slugify(input) {
return input
.toLowerCase()
.normalize('NFKD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.slice(0, 64);
}
// Featured blocks come between the "🔥 Featured Prompts" / "⭐ Featured" /
// "## 🔥 Featured Prompts" header and the next H2.
function sliceSection(md, headerRe) {
const match = headerRe.exec(md);
if (!match) return '';
const start = match.index + match[0].length;
const next = md.slice(start).search(/\n## /);
if (next === -1) return md.slice(start);
return md.slice(start, start + next);
}
function parseFeaturedBlock(block, ctx) {
const out = [];
// Each featured prompt starts at "### No. N: Title".
const headerRe = /^### No\. \d+: (.+?)\s*$/gm;
const headers = [];
let m;
while ((m = headerRe.exec(block)) !== null) {
headers.push({ index: m.index, end: m.index + m[0].length, title: m[1] });
}
for (let i = 0; i < headers.length; i += 1) {
const h = headers[i];
const next = headers[i + 1]?.index ?? block.length;
const body = block.slice(h.end, next);
const entry = parseEntryBody(body, h.title, ctx, true);
if (entry) out.push(entry);
}
return out;
}
function parseAllPromptsBlock(block, ctx) {
const out = [];
// The "All Prompts" section uses "### <Title>" headers — sometimes
// prefixed with "No. N:" (gpt-image-2 README), sometimes bare
// (seedance README). Both shapes route through parseEntryBody which
// strips the "No. N:" prefix where present.
const headerRe = /^### (.+?)\s*$/gm;
const headers = [];
let m;
while ((m = headerRe.exec(block)) !== null) {
const title = m[1].replace(/^No\.\s*\d+:\s*/, '').trim();
headers.push({ index: m.index, end: m.index + m[0].length, title });
}
for (let i = 0; i < headers.length && out.length < ctx.sampleAllPrompts; i += 1) {
const h = headers[i];
const next = headers[i + 1]?.index ?? block.length;
const body = block.slice(h.end, next);
const entry = parseEntryBody(body, h.title, ctx, false);
if (entry) out.push(entry);
}
return out;
}
function parseEntryBody(body, title, ctx, featured) {
const promptMatch = /#### 📝 Prompt\s*\n+```[a-zA-Z0-9_-]*\n([\s\S]*?)```/m.exec(
body,
);
if (!promptMatch) return null;
const prompt = promptMatch[1].trim();
if (prompt.length < 40) return null;
// The image README structures every entry — featured AND in-list —
// with a "#### 📖 Description" block. The seedance README only does
// that for featured; in-list entries fall back to a leading blockquote.
// Try the structured form first regardless, then fall back.
const description =
extractDescription(body) || extractBlockquoteSummary(body);
const author = extractAuthor(body);
const sourceUrl = extractSourceUrl(body) ?? null;
const previewImage = extractFirstImage(body);
const previewVideo = extractVideoLink(body);
const category = inferCategory(title, ctx.surface);
const tags = inferTags(title, prompt, ctx.surface);
return {
id: slugify(title),
surface: ctx.surface,
title: cleanTitle(title),
summary: (description || cleanTitle(title)).slice(0, 200),
category,
tags,
model: ctx.defaultModel,
aspect: ctx.defaultAspect,
prompt,
previewImageUrl: previewImage ?? undefined,
previewVideoUrl: previewVideo ?? undefined,
source: {
repo: ctx.repo,
license: ctx.license,
author: author ?? undefined,
url: sourceUrl ?? undefined,
},
};
}
function extractDescription(body) {
const m = /#### 📖 Description\s*\n+([\s\S]*?)(?=\n+####|\n+---)/m.exec(body);
return m?.[1]?.trim().replace(/\s+/g, ' ') ?? '';
}
function extractBlockquoteSummary(body) {
const m = /^>\s*(.+?)\s*$/m.exec(body);
return m?.[1]?.trim() ?? '';
}
function extractAuthor(body) {
// Featured: "- **Author:** [Name](url)"
// All-prompts: "**Author:** [Name](url) | ..."
const m = /\*\*Author:\*\*\s*\[([^\]]+)\]/.exec(body);
return m?.[1]?.trim() ?? null;
}
function extractSourceUrl(body) {
const m = /\*\*Source:\*\*\s*\[[^\]]+\]\(([^)]+)\)/.exec(body);
return m?.[1]?.trim() ?? null;
}
function extractFirstImage(body) {
const m = /<img[^>]*src=["']([^"']+)["']/.exec(body);
if (!m) return null;
return m[1];
}
function extractVideoLink(body) {
// 1) Featured entries embed an explicit "<a href=...releases/.../<id>.mp4">"
// download link — prefer it. GitHub releases are stable and don't
// rely on a per-request signed redirect. Catches all 6 featured
// prompts in awesome-seedance-2-prompts.
const releaseLink = /href=["']([^"']+\.mp4)["']/.exec(body);
if (releaseLink) return releaseLink[1];
// 2) All-prompts entries don't expose a static mp4 — they only embed
// the Cloudflare Stream thumbnail. Reconstruct the playable mp4
// from the Stream video id encoded in the thumbnail URL. The
// /downloads/default.mp4 endpoint 302s to a freshly-signed CDN
// URL on every request; the browser follows that transparently
// when set as <video src>. CORS is permissive (`*` on origin)
// and `accept-ranges: bytes` is honored, so seeking works too.
// This is what unlocks an actual video preview for the other
// ~30 sampled templates instead of a static thumbnail.
const streamThumb =
/https?:\/\/([a-z0-9-]+\.cloudflarestream\.com)\/([a-f0-9]{20,})\/thumbnails\/thumbnail\.jpg/i.exec(
body,
);
if (streamThumb) {
return `https://${streamThumb[1]}/${streamThumb[2]}/downloads/default.mp4`;
}
return null;
}
function cleanTitle(raw) {
// "Profile / Avatar - Cyberpunk Anime …" → strip the leading category
// prefix shared by every entry in the same gpt-image-2 bucket. Keeps
// titles scannable on cards without losing meaning.
return raw
.replace(/\s*\(.*\)\s*$/, '')
.replace(/^\s*[-]\s*/, '')
.trim();
}
function inferCategory(title, surface) {
const lower = title.toLowerCase();
if (surface === 'image') {
if (/profile|avatar|portrait/.test(lower)) return 'Profile / Avatar';
if (/social|post|carousel/.test(lower)) return 'Social Media Post';
if (/info[ -]?graphic|chart|diagram/.test(lower)) return 'Infographic';
if (/youtube|thumbnail/.test(lower)) return 'YouTube Thumbnail';
if (/comic|storyboard|panel/.test(lower)) return 'Comic / Storyboard';
if (/poster|flyer/.test(lower)) return 'Poster / Flyer';
if (/ui|app|web design|mockup|landing/.test(lower)) return 'App / Web Design';
if (/product|exploded|merch|packaging/.test(lower)) return 'Product Marketing';
if (/anime|manga/.test(lower)) return 'Anime / Manga';
if (/cinematic|film/.test(lower)) return 'Cinematic';
if (/3d|render|isometric/.test(lower)) return '3D Render';
if (/sketch|line art|pencil/.test(lower)) return 'Sketch / Line Art';
if (/pixel/.test(lower)) return 'Pixel Art';
if (/oil|water[- ]?color/.test(lower)) return 'Painterly';
if (/cyberpunk|sci[- ]?fi|futuristic/.test(lower)) return 'Cyberpunk / Sci-Fi';
if (/landscape|nature/.test(lower)) return 'Landscape';
return 'Illustration';
}
// video
if (/cinematic|film|movie|noir/.test(lower)) return 'Cinematic';
if (/anime|manga/.test(lower)) return 'Anime';
if (/ad|advert|commercial|brand/.test(lower)) return 'Advertising';
if (/ugc|tutorial|vlog/.test(lower)) return 'UGC / Vlog';
if (/meme|tiktok|viral/.test(lower)) return 'Social / Meme';
if (/drama|short film|romance/.test(lower)) return 'Short Film / Drama';
if (/intro|motion graphics|title sequence/.test(lower)) return 'Motion Graphics';
if (/vfx|fantasy|magic/.test(lower)) return 'VFX / Fantasy';
if (/race|action|combat|fight/.test(lower)) return 'Action';
return 'General';
}
function inferTags(title, prompt, surface) {
const set = new Set();
const blob = `${title} ${prompt}`.toLowerCase();
const checks = [
['portrait', /portrait|selfie|headshot/],
['anime', /anime|manga/],
['cinematic', /cinematic|filmic|grain|8k/],
['cyberpunk', /cyberpunk|neon/],
['fantasy', /fantasy|mage|elf|dragon/],
['3d-render', /3d render|unreal engine|render/],
['isometric', /isometric/],
['typography', /typography|kerning|font|lettering/],
['product', /product|packaging|exploded/],
['ugc', /ugc|vlog|selfie cam/],
['cinematic-romance', /romance|pure love|romantic/],
['action', /chase|action|combat|race/],
['food', /food|coffee|kitchen/],
['nature', /forest|river|mountain|landscape/],
];
for (const [tag, re] of checks) {
if (re.test(blob)) set.add(tag);
}
const lim = surface === 'image' ? 4 : 3;
return Array.from(set).slice(0, lim);
}
// Remove previously generated JSON files. Hand-authored templates (those
// whose `source.repo` is not the upstream CC-BY corpus we import from) are
// preserved so first-party curated prompts aren't wiped on re-run.
async function clearDir(dir, upstreamRepo) {
try {
const files = await readdir(dir);
for (const f of files) {
if (!f.endsWith('.json')) continue;
const filePath = path.join(dir, f);
let keep = false;
try {
const parsed = JSON.parse(await readFile(filePath, 'utf8'));
const repo = parsed?.source?.repo;
if (repo && repo !== upstreamRepo) keep = true;
} catch {
// Unparseable file — treat as generated and remove.
}
if (!keep) await unlink(filePath);
}
} catch {
// missing dir is fine — created below.
}
}
async function writeAll(entries, outDir, upstreamRepo) {
await mkdir(outDir, { recursive: true });
await clearDir(outDir, upstreamRepo);
// De-dup on slug; if two entries collide, keep the first (which is the
// featured one — always parsed before "All Prompts"). Hand-authored
// templates already on disk (preserved by clearDir) also take priority
// so we never overwrite curated first-party prompts.
const seen = new Set();
try {
const existing = await readdir(outDir);
for (const f of existing) {
if (f.endsWith('.json')) seen.add(f.replace(/\.json$/, ''));
}
} catch {
// noop
}
let count = 0;
for (const entry of entries) {
if (seen.has(entry.id)) continue;
seen.add(entry.id);
const filePath = path.join(outDir, `${entry.id}.json`);
await writeFile(filePath, `${JSON.stringify(entry, null, 2)}\n`, 'utf8');
count += 1;
}
return count;
}
async function main() {
let totalImage = 0;
let totalVideo = 0;
for (const ctx of SOURCES) {
const md = await fetchText(ctx.readmeUrl);
const featuredBlock = sliceSection(md, /## 🔥 Featured Prompts/m)
|| sliceSection(md, /## ⭐ Featured Prompts/m)
|| sliceSection(md, /## Featured/m);
const allPromptsBlock = sliceSection(md, /## (📋|🎬) All Prompts/m)
|| sliceSection(md, /## All Prompts/m);
const featured = parseFeaturedBlock(featuredBlock, ctx);
const sampled = parseAllPromptsBlock(allPromptsBlock, ctx);
const entries = [...featured, ...sampled];
if (entries.length === 0) {
console.error(`No entries parsed for ${ctx.repo}; check headers.`);
process.exitCode = 1;
continue;
}
const outDir = ctx.surface === 'image' ? OUT_IMAGE : OUT_VIDEO;
const written = await writeAll(entries, outDir, ctx.repo);
if (ctx.surface === 'image') totalImage += written;
else totalVideo += written;
console.log(
`[${ctx.repo}] featured=${featured.length} sampled=${sampled.length} written=${written}${path.relative(ROOT, outDir)}`,
);
}
console.log(`\nDone. ${totalImage} image + ${totalVideo} video templates.`);
}
main().catch((err) => {
console.error(err);
process.exit(1);
});