404 lines
14 KiB
JavaScript
Executable File
404 lines
14 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
||
/**
|
||
* Pulls down the upstream prompt corpora (CC BY 4.0) and emits curated
|
||
* JSON files under `prompt-templates/{image,video}/`. Re-run anytime to
|
||
* pick up new featured prompts.
|
||
*
|
||
* Usage:
|
||
* node scripts/import-prompt-templates.mjs
|
||
*
|
||
* Source READMEs:
|
||
* - https://github.com/YouMind-OpenLab/awesome-gpt-image-2 (CC BY 4.0)
|
||
* - https://github.com/YouMind-OpenLab/awesome-seedance-2-prompts (CC BY 4.0)
|
||
*
|
||
* Each upstream README is a structured catalog. Two patterns we care about:
|
||
*
|
||
* Featured block:
|
||
* ### No. N: <Title>
|
||
* <badges>
|
||
* #### 📖 Description
|
||
* <description paragraph>
|
||
* #### 📝 Prompt
|
||
* ```
|
||
* <prompt body>
|
||
* ```
|
||
* #### 🎬 Video (or 🖼️ Generated Images)
|
||
* <preview img / video link>
|
||
* #### 📌 Details
|
||
* - **Author:** [Name](url)
|
||
* - **Source:** [Twitter Post](url)
|
||
* - **Published:** ...
|
||
*
|
||
* All-Prompts block:
|
||
* ### <Title>
|
||
* <badges>
|
||
* > <description>
|
||
* #### 📝 Prompt
|
||
* ```
|
||
* <prompt body>
|
||
* ```
|
||
* <img src="<thumb>"> | <a href=...>
|
||
* **Author:** [Name](url) | **Source:** [Link](url) | **Published:** ...
|
||
*
|
||
* We pick the featured 6 from each repo (always good) plus a sampled slice
|
||
* of the All-Prompts head so the gallery has breadth across categories.
|
||
*
|
||
* All output JSON carries a `source` block so attribution stays intact.
|
||
*/
|
||
|
||
import { mkdir, writeFile, readdir, unlink, readFile } from 'node:fs/promises';
|
||
import path from 'node:path';
|
||
import { fileURLToPath } from 'node:url';
|
||
|
||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||
const ROOT = path.resolve(__dirname, '..');
|
||
const OUT_IMAGE = path.join(ROOT, 'prompt-templates', 'image');
|
||
const OUT_VIDEO = path.join(ROOT, 'prompt-templates', 'video');
|
||
|
||
const SOURCES = [
|
||
{
|
||
surface: 'image',
|
||
repo: 'YouMind-OpenLab/awesome-gpt-image-2',
|
||
license: 'CC-BY-4.0',
|
||
readmeUrl:
|
||
'https://raw.githubusercontent.com/YouMind-OpenLab/awesome-gpt-image-2/main/README.md',
|
||
defaultModel: 'gpt-image-2',
|
||
defaultAspect: '1:1',
|
||
// Cap how many entries we pull from the "All Prompts" tail to keep the
|
||
// committed dataset reviewable. The featured block is always taken.
|
||
sampleAllPrompts: 30,
|
||
},
|
||
{
|
||
surface: 'video',
|
||
repo: 'YouMind-OpenLab/awesome-seedance-2-prompts',
|
||
license: 'CC-BY-4.0',
|
||
readmeUrl:
|
||
'https://raw.githubusercontent.com/YouMind-OpenLab/awesome-seedance-2-prompts/main/README.md',
|
||
defaultModel: 'seedance-2.0',
|
||
defaultAspect: '16:9',
|
||
sampleAllPrompts: 30,
|
||
},
|
||
];
|
||
|
||
async function fetchText(url) {
|
||
const resp = await fetch(url);
|
||
if (!resp.ok) {
|
||
throw new Error(`failed ${url}: ${resp.status}`);
|
||
}
|
||
return resp.text();
|
||
}
|
||
|
||
function slugify(input) {
|
||
return input
|
||
.toLowerCase()
|
||
.normalize('NFKD')
|
||
.replace(/[\u0300-\u036f]/g, '')
|
||
.replace(/[^a-z0-9]+/g, '-')
|
||
.replace(/^-+|-+$/g, '')
|
||
.slice(0, 64);
|
||
}
|
||
|
||
// Featured blocks come between the "🔥 Featured Prompts" / "⭐ Featured" /
|
||
// "## 🔥 Featured Prompts" header and the next H2.
|
||
function sliceSection(md, headerRe) {
|
||
const match = headerRe.exec(md);
|
||
if (!match) return '';
|
||
const start = match.index + match[0].length;
|
||
const next = md.slice(start).search(/\n## /);
|
||
if (next === -1) return md.slice(start);
|
||
return md.slice(start, start + next);
|
||
}
|
||
|
||
function parseFeaturedBlock(block, ctx) {
|
||
const out = [];
|
||
// Each featured prompt starts at "### No. N: Title".
|
||
const headerRe = /^### No\. \d+: (.+?)\s*$/gm;
|
||
const headers = [];
|
||
let m;
|
||
while ((m = headerRe.exec(block)) !== null) {
|
||
headers.push({ index: m.index, end: m.index + m[0].length, title: m[1] });
|
||
}
|
||
for (let i = 0; i < headers.length; i += 1) {
|
||
const h = headers[i];
|
||
const next = headers[i + 1]?.index ?? block.length;
|
||
const body = block.slice(h.end, next);
|
||
const entry = parseEntryBody(body, h.title, ctx, true);
|
||
if (entry) out.push(entry);
|
||
}
|
||
return out;
|
||
}
|
||
|
||
function parseAllPromptsBlock(block, ctx) {
|
||
const out = [];
|
||
// The "All Prompts" section uses "### <Title>" headers — sometimes
|
||
// prefixed with "No. N:" (gpt-image-2 README), sometimes bare
|
||
// (seedance README). Both shapes route through parseEntryBody which
|
||
// strips the "No. N:" prefix where present.
|
||
const headerRe = /^### (.+?)\s*$/gm;
|
||
const headers = [];
|
||
let m;
|
||
while ((m = headerRe.exec(block)) !== null) {
|
||
const title = m[1].replace(/^No\.\s*\d+:\s*/, '').trim();
|
||
headers.push({ index: m.index, end: m.index + m[0].length, title });
|
||
}
|
||
for (let i = 0; i < headers.length && out.length < ctx.sampleAllPrompts; i += 1) {
|
||
const h = headers[i];
|
||
const next = headers[i + 1]?.index ?? block.length;
|
||
const body = block.slice(h.end, next);
|
||
const entry = parseEntryBody(body, h.title, ctx, false);
|
||
if (entry) out.push(entry);
|
||
}
|
||
return out;
|
||
}
|
||
|
||
function parseEntryBody(body, title, ctx, featured) {
|
||
const promptMatch = /#### 📝 Prompt\s*\n+```[a-zA-Z0-9_-]*\n([\s\S]*?)```/m.exec(
|
||
body,
|
||
);
|
||
if (!promptMatch) return null;
|
||
const prompt = promptMatch[1].trim();
|
||
if (prompt.length < 40) return null;
|
||
|
||
// The image README structures every entry — featured AND in-list —
|
||
// with a "#### 📖 Description" block. The seedance README only does
|
||
// that for featured; in-list entries fall back to a leading blockquote.
|
||
// Try the structured form first regardless, then fall back.
|
||
const description =
|
||
extractDescription(body) || extractBlockquoteSummary(body);
|
||
const author = extractAuthor(body);
|
||
const sourceUrl = extractSourceUrl(body) ?? null;
|
||
const previewImage = extractFirstImage(body);
|
||
const previewVideo = extractVideoLink(body);
|
||
const category = inferCategory(title, ctx.surface);
|
||
const tags = inferTags(title, prompt, ctx.surface);
|
||
|
||
return {
|
||
id: slugify(title),
|
||
surface: ctx.surface,
|
||
title: cleanTitle(title),
|
||
summary: (description || cleanTitle(title)).slice(0, 200),
|
||
category,
|
||
tags,
|
||
model: ctx.defaultModel,
|
||
aspect: ctx.defaultAspect,
|
||
prompt,
|
||
previewImageUrl: previewImage ?? undefined,
|
||
previewVideoUrl: previewVideo ?? undefined,
|
||
source: {
|
||
repo: ctx.repo,
|
||
license: ctx.license,
|
||
author: author ?? undefined,
|
||
url: sourceUrl ?? undefined,
|
||
},
|
||
};
|
||
}
|
||
|
||
function extractDescription(body) {
|
||
const m = /#### 📖 Description\s*\n+([\s\S]*?)(?=\n+####|\n+---)/m.exec(body);
|
||
return m?.[1]?.trim().replace(/\s+/g, ' ') ?? '';
|
||
}
|
||
|
||
function extractBlockquoteSummary(body) {
|
||
const m = /^>\s*(.+?)\s*$/m.exec(body);
|
||
return m?.[1]?.trim() ?? '';
|
||
}
|
||
|
||
function extractAuthor(body) {
|
||
// Featured: "- **Author:** [Name](url)"
|
||
// All-prompts: "**Author:** [Name](url) | ..."
|
||
const m = /\*\*Author:\*\*\s*\[([^\]]+)\]/.exec(body);
|
||
return m?.[1]?.trim() ?? null;
|
||
}
|
||
|
||
function extractSourceUrl(body) {
|
||
const m = /\*\*Source:\*\*\s*\[[^\]]+\]\(([^)]+)\)/.exec(body);
|
||
return m?.[1]?.trim() ?? null;
|
||
}
|
||
|
||
function extractFirstImage(body) {
|
||
const m = /<img[^>]*src=["']([^"']+)["']/.exec(body);
|
||
if (!m) return null;
|
||
return m[1];
|
||
}
|
||
|
||
function extractVideoLink(body) {
|
||
// 1) Featured entries embed an explicit "<a href=...releases/.../<id>.mp4">"
|
||
// download link — prefer it. GitHub releases are stable and don't
|
||
// rely on a per-request signed redirect. Catches all 6 featured
|
||
// prompts in awesome-seedance-2-prompts.
|
||
const releaseLink = /href=["']([^"']+\.mp4)["']/.exec(body);
|
||
if (releaseLink) return releaseLink[1];
|
||
// 2) All-prompts entries don't expose a static mp4 — they only embed
|
||
// the Cloudflare Stream thumbnail. Reconstruct the playable mp4
|
||
// from the Stream video id encoded in the thumbnail URL. The
|
||
// /downloads/default.mp4 endpoint 302s to a freshly-signed CDN
|
||
// URL on every request; the browser follows that transparently
|
||
// when set as <video src>. CORS is permissive (`*` on origin)
|
||
// and `accept-ranges: bytes` is honored, so seeking works too.
|
||
// This is what unlocks an actual video preview for the other
|
||
// ~30 sampled templates instead of a static thumbnail.
|
||
const streamThumb =
|
||
/https?:\/\/([a-z0-9-]+\.cloudflarestream\.com)\/([a-f0-9]{20,})\/thumbnails\/thumbnail\.jpg/i.exec(
|
||
body,
|
||
);
|
||
if (streamThumb) {
|
||
return `https://${streamThumb[1]}/${streamThumb[2]}/downloads/default.mp4`;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function cleanTitle(raw) {
|
||
// "Profile / Avatar - Cyberpunk Anime …" → strip the leading category
|
||
// prefix shared by every entry in the same gpt-image-2 bucket. Keeps
|
||
// titles scannable on cards without losing meaning.
|
||
return raw
|
||
.replace(/\s*\(.*\)\s*$/, '')
|
||
.replace(/^\s*[-–]\s*/, '')
|
||
.trim();
|
||
}
|
||
|
||
function inferCategory(title, surface) {
|
||
const lower = title.toLowerCase();
|
||
if (surface === 'image') {
|
||
if (/profile|avatar|portrait/.test(lower)) return 'Profile / Avatar';
|
||
if (/social|post|carousel/.test(lower)) return 'Social Media Post';
|
||
if (/info[ -]?graphic|chart|diagram/.test(lower)) return 'Infographic';
|
||
if (/youtube|thumbnail/.test(lower)) return 'YouTube Thumbnail';
|
||
if (/comic|storyboard|panel/.test(lower)) return 'Comic / Storyboard';
|
||
if (/poster|flyer/.test(lower)) return 'Poster / Flyer';
|
||
if (/ui|app|web design|mockup|landing/.test(lower)) return 'App / Web Design';
|
||
if (/product|exploded|merch|packaging/.test(lower)) return 'Product Marketing';
|
||
if (/anime|manga/.test(lower)) return 'Anime / Manga';
|
||
if (/cinematic|film/.test(lower)) return 'Cinematic';
|
||
if (/3d|render|isometric/.test(lower)) return '3D Render';
|
||
if (/sketch|line art|pencil/.test(lower)) return 'Sketch / Line Art';
|
||
if (/pixel/.test(lower)) return 'Pixel Art';
|
||
if (/oil|water[- ]?color/.test(lower)) return 'Painterly';
|
||
if (/cyberpunk|sci[- ]?fi|futuristic/.test(lower)) return 'Cyberpunk / Sci-Fi';
|
||
if (/landscape|nature/.test(lower)) return 'Landscape';
|
||
return 'Illustration';
|
||
}
|
||
// video
|
||
if (/cinematic|film|movie|noir/.test(lower)) return 'Cinematic';
|
||
if (/anime|manga/.test(lower)) return 'Anime';
|
||
if (/ad|advert|commercial|brand/.test(lower)) return 'Advertising';
|
||
if (/ugc|tutorial|vlog/.test(lower)) return 'UGC / Vlog';
|
||
if (/meme|tiktok|viral/.test(lower)) return 'Social / Meme';
|
||
if (/drama|short film|romance/.test(lower)) return 'Short Film / Drama';
|
||
if (/intro|motion graphics|title sequence/.test(lower)) return 'Motion Graphics';
|
||
if (/vfx|fantasy|magic/.test(lower)) return 'VFX / Fantasy';
|
||
if (/race|action|combat|fight/.test(lower)) return 'Action';
|
||
return 'General';
|
||
}
|
||
|
||
function inferTags(title, prompt, surface) {
|
||
const set = new Set();
|
||
const blob = `${title} ${prompt}`.toLowerCase();
|
||
const checks = [
|
||
['portrait', /portrait|selfie|headshot/],
|
||
['anime', /anime|manga/],
|
||
['cinematic', /cinematic|filmic|grain|8k/],
|
||
['cyberpunk', /cyberpunk|neon/],
|
||
['fantasy', /fantasy|mage|elf|dragon/],
|
||
['3d-render', /3d render|unreal engine|render/],
|
||
['isometric', /isometric/],
|
||
['typography', /typography|kerning|font|lettering/],
|
||
['product', /product|packaging|exploded/],
|
||
['ugc', /ugc|vlog|selfie cam/],
|
||
['cinematic-romance', /romance|pure love|romantic/],
|
||
['action', /chase|action|combat|race/],
|
||
['food', /food|coffee|kitchen/],
|
||
['nature', /forest|river|mountain|landscape/],
|
||
];
|
||
for (const [tag, re] of checks) {
|
||
if (re.test(blob)) set.add(tag);
|
||
}
|
||
const lim = surface === 'image' ? 4 : 3;
|
||
return Array.from(set).slice(0, lim);
|
||
}
|
||
|
||
// Remove previously generated JSON files. Hand-authored templates (those
|
||
// whose `source.repo` is not the upstream CC-BY corpus we import from) are
|
||
// preserved so first-party curated prompts aren't wiped on re-run.
|
||
async function clearDir(dir, upstreamRepo) {
|
||
try {
|
||
const files = await readdir(dir);
|
||
for (const f of files) {
|
||
if (!f.endsWith('.json')) continue;
|
||
const filePath = path.join(dir, f);
|
||
let keep = false;
|
||
try {
|
||
const parsed = JSON.parse(await readFile(filePath, 'utf8'));
|
||
const repo = parsed?.source?.repo;
|
||
if (repo && repo !== upstreamRepo) keep = true;
|
||
} catch {
|
||
// Unparseable file — treat as generated and remove.
|
||
}
|
||
if (!keep) await unlink(filePath);
|
||
}
|
||
} catch {
|
||
// missing dir is fine — created below.
|
||
}
|
||
}
|
||
|
||
async function writeAll(entries, outDir, upstreamRepo) {
|
||
await mkdir(outDir, { recursive: true });
|
||
await clearDir(outDir, upstreamRepo);
|
||
// De-dup on slug; if two entries collide, keep the first (which is the
|
||
// featured one — always parsed before "All Prompts"). Hand-authored
|
||
// templates already on disk (preserved by clearDir) also take priority
|
||
// so we never overwrite curated first-party prompts.
|
||
const seen = new Set();
|
||
try {
|
||
const existing = await readdir(outDir);
|
||
for (const f of existing) {
|
||
if (f.endsWith('.json')) seen.add(f.replace(/\.json$/, ''));
|
||
}
|
||
} catch {
|
||
// noop
|
||
}
|
||
let count = 0;
|
||
for (const entry of entries) {
|
||
if (seen.has(entry.id)) continue;
|
||
seen.add(entry.id);
|
||
const filePath = path.join(outDir, `${entry.id}.json`);
|
||
await writeFile(filePath, `${JSON.stringify(entry, null, 2)}\n`, 'utf8');
|
||
count += 1;
|
||
}
|
||
return count;
|
||
}
|
||
|
||
async function main() {
|
||
let totalImage = 0;
|
||
let totalVideo = 0;
|
||
for (const ctx of SOURCES) {
|
||
const md = await fetchText(ctx.readmeUrl);
|
||
const featuredBlock = sliceSection(md, /## 🔥 Featured Prompts/m)
|
||
|| sliceSection(md, /## ⭐ Featured Prompts/m)
|
||
|| sliceSection(md, /## Featured/m);
|
||
const allPromptsBlock = sliceSection(md, /## (📋|🎬) All Prompts/m)
|
||
|| sliceSection(md, /## All Prompts/m);
|
||
const featured = parseFeaturedBlock(featuredBlock, ctx);
|
||
const sampled = parseAllPromptsBlock(allPromptsBlock, ctx);
|
||
const entries = [...featured, ...sampled];
|
||
if (entries.length === 0) {
|
||
console.error(`No entries parsed for ${ctx.repo}; check headers.`);
|
||
process.exitCode = 1;
|
||
continue;
|
||
}
|
||
const outDir = ctx.surface === 'image' ? OUT_IMAGE : OUT_VIDEO;
|
||
const written = await writeAll(entries, outDir, ctx.repo);
|
||
if (ctx.surface === 'image') totalImage += written;
|
||
else totalVideo += written;
|
||
console.log(
|
||
`[${ctx.repo}] featured=${featured.length} sampled=${sampled.length} written=${written} → ${path.relative(ROOT, outDir)}`,
|
||
);
|
||
}
|
||
console.log(`\nDone. ${totalImage} image + ${totalVideo} video templates.`);
|
||
}
|
||
|
||
main().catch((err) => {
|
||
console.error(err);
|
||
process.exit(1);
|
||
});
|