first-commit
ci / Validate workspace (push) Has been cancelled
landing-page-ci / Validate landing page (push) Has been cancelled
landing-page-deploy / Deploy landing page (push) Has been cancelled
github-metrics / Generate repository metrics SVG (push) Has been cancelled
refresh-contributors-wall / Refresh contributors wall cache bust (push) Waiting to run

This commit is contained in:
Zakaria
2026-05-04 14:58:14 -04:00
commit a46764fb1b
1210 changed files with 233231 additions and 0 deletions
+403
View File
@@ -0,0 +1,403 @@
#!/usr/bin/env node
/**
* Pulls down the upstream prompt corpora (CC BY 4.0) and emits curated
* JSON files under `prompt-templates/{image,video}/`. Re-run anytime to
* pick up new featured prompts.
*
* Usage:
* node scripts/import-prompt-templates.mjs
*
* Source READMEs:
* - https://github.com/YouMind-OpenLab/awesome-gpt-image-2 (CC BY 4.0)
* - https://github.com/YouMind-OpenLab/awesome-seedance-2-prompts (CC BY 4.0)
*
* Each upstream README is a structured catalog. Two patterns we care about:
*
* Featured block:
* ### No. N: <Title>
* <badges>
* #### 📖 Description
* <description paragraph>
* #### 📝 Prompt
* ```
* <prompt body>
* ```
* #### 🎬 Video (or 🖼️ Generated Images)
* <preview img / video link>
* #### 📌 Details
* - **Author:** [Name](url)
* - **Source:** [Twitter Post](url)
* - **Published:** ...
*
* All-Prompts block:
* ### <Title>
* <badges>
* > <description>
* #### 📝 Prompt
* ```
* <prompt body>
* ```
* <img src="<thumb>"> | <a href=...>
* **Author:** [Name](url) | **Source:** [Link](url) | **Published:** ...
*
* We pick the featured 6 from each repo (always good) plus a sampled slice
* of the All-Prompts head so the gallery has breadth across categories.
*
* All output JSON carries a `source` block so attribution stays intact.
*/
import { mkdir, writeFile, readdir, unlink, readFile } from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(__dirname, '..');
const OUT_IMAGE = path.join(ROOT, 'prompt-templates', 'image');
const OUT_VIDEO = path.join(ROOT, 'prompt-templates', 'video');
const SOURCES = [
{
surface: 'image',
repo: 'YouMind-OpenLab/awesome-gpt-image-2',
license: 'CC-BY-4.0',
readmeUrl:
'https://raw.githubusercontent.com/YouMind-OpenLab/awesome-gpt-image-2/main/README.md',
defaultModel: 'gpt-image-2',
defaultAspect: '1:1',
// Cap how many entries we pull from the "All Prompts" tail to keep the
// committed dataset reviewable. The featured block is always taken.
sampleAllPrompts: 30,
},
{
surface: 'video',
repo: 'YouMind-OpenLab/awesome-seedance-2-prompts',
license: 'CC-BY-4.0',
readmeUrl:
'https://raw.githubusercontent.com/YouMind-OpenLab/awesome-seedance-2-prompts/main/README.md',
defaultModel: 'seedance-2.0',
defaultAspect: '16:9',
sampleAllPrompts: 30,
},
];
async function fetchText(url) {
const resp = await fetch(url);
if (!resp.ok) {
throw new Error(`failed ${url}: ${resp.status}`);
}
return resp.text();
}
function slugify(input) {
return input
.toLowerCase()
.normalize('NFKD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.slice(0, 64);
}
// Featured blocks come between the "🔥 Featured Prompts" / "⭐ Featured" /
// "## 🔥 Featured Prompts" header and the next H2.
function sliceSection(md, headerRe) {
const match = headerRe.exec(md);
if (!match) return '';
const start = match.index + match[0].length;
const next = md.slice(start).search(/\n## /);
if (next === -1) return md.slice(start);
return md.slice(start, start + next);
}
function parseFeaturedBlock(block, ctx) {
const out = [];
// Each featured prompt starts at "### No. N: Title".
const headerRe = /^### No\. \d+: (.+?)\s*$/gm;
const headers = [];
let m;
while ((m = headerRe.exec(block)) !== null) {
headers.push({ index: m.index, end: m.index + m[0].length, title: m[1] });
}
for (let i = 0; i < headers.length; i += 1) {
const h = headers[i];
const next = headers[i + 1]?.index ?? block.length;
const body = block.slice(h.end, next);
const entry = parseEntryBody(body, h.title, ctx, true);
if (entry) out.push(entry);
}
return out;
}
function parseAllPromptsBlock(block, ctx) {
const out = [];
// The "All Prompts" section uses "### <Title>" headers — sometimes
// prefixed with "No. N:" (gpt-image-2 README), sometimes bare
// (seedance README). Both shapes route through parseEntryBody which
// strips the "No. N:" prefix where present.
const headerRe = /^### (.+?)\s*$/gm;
const headers = [];
let m;
while ((m = headerRe.exec(block)) !== null) {
const title = m[1].replace(/^No\.\s*\d+:\s*/, '').trim();
headers.push({ index: m.index, end: m.index + m[0].length, title });
}
for (let i = 0; i < headers.length && out.length < ctx.sampleAllPrompts; i += 1) {
const h = headers[i];
const next = headers[i + 1]?.index ?? block.length;
const body = block.slice(h.end, next);
const entry = parseEntryBody(body, h.title, ctx, false);
if (entry) out.push(entry);
}
return out;
}
function parseEntryBody(body, title, ctx, featured) {
const promptMatch = /#### 📝 Prompt\s*\n+```[a-zA-Z0-9_-]*\n([\s\S]*?)```/m.exec(
body,
);
if (!promptMatch) return null;
const prompt = promptMatch[1].trim();
if (prompt.length < 40) return null;
// The image README structures every entry — featured AND in-list —
// with a "#### 📖 Description" block. The seedance README only does
// that for featured; in-list entries fall back to a leading blockquote.
// Try the structured form first regardless, then fall back.
const description =
extractDescription(body) || extractBlockquoteSummary(body);
const author = extractAuthor(body);
const sourceUrl = extractSourceUrl(body) ?? null;
const previewImage = extractFirstImage(body);
const previewVideo = extractVideoLink(body);
const category = inferCategory(title, ctx.surface);
const tags = inferTags(title, prompt, ctx.surface);
return {
id: slugify(title),
surface: ctx.surface,
title: cleanTitle(title),
summary: (description || cleanTitle(title)).slice(0, 200),
category,
tags,
model: ctx.defaultModel,
aspect: ctx.defaultAspect,
prompt,
previewImageUrl: previewImage ?? undefined,
previewVideoUrl: previewVideo ?? undefined,
source: {
repo: ctx.repo,
license: ctx.license,
author: author ?? undefined,
url: sourceUrl ?? undefined,
},
};
}
function extractDescription(body) {
const m = /#### 📖 Description\s*\n+([\s\S]*?)(?=\n+####|\n+---)/m.exec(body);
return m?.[1]?.trim().replace(/\s+/g, ' ') ?? '';
}
function extractBlockquoteSummary(body) {
const m = /^>\s*(.+?)\s*$/m.exec(body);
return m?.[1]?.trim() ?? '';
}
function extractAuthor(body) {
// Featured: "- **Author:** [Name](url)"
// All-prompts: "**Author:** [Name](url) | ..."
const m = /\*\*Author:\*\*\s*\[([^\]]+)\]/.exec(body);
return m?.[1]?.trim() ?? null;
}
function extractSourceUrl(body) {
const m = /\*\*Source:\*\*\s*\[[^\]]+\]\(([^)]+)\)/.exec(body);
return m?.[1]?.trim() ?? null;
}
function extractFirstImage(body) {
const m = /<img[^>]*src=["']([^"']+)["']/.exec(body);
if (!m) return null;
return m[1];
}
function extractVideoLink(body) {
// 1) Featured entries embed an explicit "<a href=...releases/.../<id>.mp4">"
// download link — prefer it. GitHub releases are stable and don't
// rely on a per-request signed redirect. Catches all 6 featured
// prompts in awesome-seedance-2-prompts.
const releaseLink = /href=["']([^"']+\.mp4)["']/.exec(body);
if (releaseLink) return releaseLink[1];
// 2) All-prompts entries don't expose a static mp4 — they only embed
// the Cloudflare Stream thumbnail. Reconstruct the playable mp4
// from the Stream video id encoded in the thumbnail URL. The
// /downloads/default.mp4 endpoint 302s to a freshly-signed CDN
// URL on every request; the browser follows that transparently
// when set as <video src>. CORS is permissive (`*` on origin)
// and `accept-ranges: bytes` is honored, so seeking works too.
// This is what unlocks an actual video preview for the other
// ~30 sampled templates instead of a static thumbnail.
const streamThumb =
/https?:\/\/([a-z0-9-]+\.cloudflarestream\.com)\/([a-f0-9]{20,})\/thumbnails\/thumbnail\.jpg/i.exec(
body,
);
if (streamThumb) {
return `https://${streamThumb[1]}/${streamThumb[2]}/downloads/default.mp4`;
}
return null;
}
function cleanTitle(raw) {
// "Profile / Avatar - Cyberpunk Anime …" → strip the leading category
// prefix shared by every entry in the same gpt-image-2 bucket. Keeps
// titles scannable on cards without losing meaning.
return raw
.replace(/\s*\(.*\)\s*$/, '')
.replace(/^\s*[-]\s*/, '')
.trim();
}
function inferCategory(title, surface) {
const lower = title.toLowerCase();
if (surface === 'image') {
if (/profile|avatar|portrait/.test(lower)) return 'Profile / Avatar';
if (/social|post|carousel/.test(lower)) return 'Social Media Post';
if (/info[ -]?graphic|chart|diagram/.test(lower)) return 'Infographic';
if (/youtube|thumbnail/.test(lower)) return 'YouTube Thumbnail';
if (/comic|storyboard|panel/.test(lower)) return 'Comic / Storyboard';
if (/poster|flyer/.test(lower)) return 'Poster / Flyer';
if (/ui|app|web design|mockup|landing/.test(lower)) return 'App / Web Design';
if (/product|exploded|merch|packaging/.test(lower)) return 'Product Marketing';
if (/anime|manga/.test(lower)) return 'Anime / Manga';
if (/cinematic|film/.test(lower)) return 'Cinematic';
if (/3d|render|isometric/.test(lower)) return '3D Render';
if (/sketch|line art|pencil/.test(lower)) return 'Sketch / Line Art';
if (/pixel/.test(lower)) return 'Pixel Art';
if (/oil|water[- ]?color/.test(lower)) return 'Painterly';
if (/cyberpunk|sci[- ]?fi|futuristic/.test(lower)) return 'Cyberpunk / Sci-Fi';
if (/landscape|nature/.test(lower)) return 'Landscape';
return 'Illustration';
}
// video
if (/cinematic|film|movie|noir/.test(lower)) return 'Cinematic';
if (/anime|manga/.test(lower)) return 'Anime';
if (/ad|advert|commercial|brand/.test(lower)) return 'Advertising';
if (/ugc|tutorial|vlog/.test(lower)) return 'UGC / Vlog';
if (/meme|tiktok|viral/.test(lower)) return 'Social / Meme';
if (/drama|short film|romance/.test(lower)) return 'Short Film / Drama';
if (/intro|motion graphics|title sequence/.test(lower)) return 'Motion Graphics';
if (/vfx|fantasy|magic/.test(lower)) return 'VFX / Fantasy';
if (/race|action|combat|fight/.test(lower)) return 'Action';
return 'General';
}
function inferTags(title, prompt, surface) {
const set = new Set();
const blob = `${title} ${prompt}`.toLowerCase();
const checks = [
['portrait', /portrait|selfie|headshot/],
['anime', /anime|manga/],
['cinematic', /cinematic|filmic|grain|8k/],
['cyberpunk', /cyberpunk|neon/],
['fantasy', /fantasy|mage|elf|dragon/],
['3d-render', /3d render|unreal engine|render/],
['isometric', /isometric/],
['typography', /typography|kerning|font|lettering/],
['product', /product|packaging|exploded/],
['ugc', /ugc|vlog|selfie cam/],
['cinematic-romance', /romance|pure love|romantic/],
['action', /chase|action|combat|race/],
['food', /food|coffee|kitchen/],
['nature', /forest|river|mountain|landscape/],
];
for (const [tag, re] of checks) {
if (re.test(blob)) set.add(tag);
}
const lim = surface === 'image' ? 4 : 3;
return Array.from(set).slice(0, lim);
}
// Remove previously generated JSON files. Hand-authored templates (those
// whose `source.repo` is not the upstream CC-BY corpus we import from) are
// preserved so first-party curated prompts aren't wiped on re-run.
async function clearDir(dir, upstreamRepo) {
try {
const files = await readdir(dir);
for (const f of files) {
if (!f.endsWith('.json')) continue;
const filePath = path.join(dir, f);
let keep = false;
try {
const parsed = JSON.parse(await readFile(filePath, 'utf8'));
const repo = parsed?.source?.repo;
if (repo && repo !== upstreamRepo) keep = true;
} catch {
// Unparseable file — treat as generated and remove.
}
if (!keep) await unlink(filePath);
}
} catch {
// missing dir is fine — created below.
}
}
async function writeAll(entries, outDir, upstreamRepo) {
await mkdir(outDir, { recursive: true });
await clearDir(outDir, upstreamRepo);
// De-dup on slug; if two entries collide, keep the first (which is the
// featured one — always parsed before "All Prompts"). Hand-authored
// templates already on disk (preserved by clearDir) also take priority
// so we never overwrite curated first-party prompts.
const seen = new Set();
try {
const existing = await readdir(outDir);
for (const f of existing) {
if (f.endsWith('.json')) seen.add(f.replace(/\.json$/, ''));
}
} catch {
// noop
}
let count = 0;
for (const entry of entries) {
if (seen.has(entry.id)) continue;
seen.add(entry.id);
const filePath = path.join(outDir, `${entry.id}.json`);
await writeFile(filePath, `${JSON.stringify(entry, null, 2)}\n`, 'utf8');
count += 1;
}
return count;
}
async function main() {
let totalImage = 0;
let totalVideo = 0;
for (const ctx of SOURCES) {
const md = await fetchText(ctx.readmeUrl);
const featuredBlock = sliceSection(md, /## 🔥 Featured Prompts/m)
|| sliceSection(md, /## ⭐ Featured Prompts/m)
|| sliceSection(md, /## Featured/m);
const allPromptsBlock = sliceSection(md, /## (📋|🎬) All Prompts/m)
|| sliceSection(md, /## All Prompts/m);
const featured = parseFeaturedBlock(featuredBlock, ctx);
const sampled = parseAllPromptsBlock(allPromptsBlock, ctx);
const entries = [...featured, ...sampled];
if (entries.length === 0) {
console.error(`No entries parsed for ${ctx.repo}; check headers.`);
process.exitCode = 1;
continue;
}
const outDir = ctx.surface === 'image' ? OUT_IMAGE : OUT_VIDEO;
const written = await writeAll(entries, outDir, ctx.repo);
if (ctx.surface === 'image') totalImage += written;
else totalVideo += written;
console.log(
`[${ctx.repo}] featured=${featured.length} sampled=${sampled.length} written=${written}${path.relative(ROOT, outDir)}`,
);
}
console.log(`\nDone. ${totalImage} image + ${totalVideo} video templates.`);
}
main().catch((err) => {
console.error(err);
process.exit(1);
});