which
This commit is contained in:
BIN
IMG_8015.png
Normal file
BIN
IMG_8015.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 565 KiB |
BIN
IMG_8016.png
Normal file
BIN
IMG_8016.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 725 KiB |
@@ -1,7 +1,7 @@
|
|||||||
import { NextRequest, NextResponse } from 'next/server';
|
import { NextRequest, NextResponse } from 'next/server';
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
import { resolveDirectVideoUrl } from '@/lib/resolvers';
|
import { resolveDirectVideoUrl } from '@/lib/resolvers';
|
||||||
import { analyzeCookingThumbnails, analyzeFromTranscript, analyzeFromTranscriptAndImages, transcribeAudioBytes, generateRecipeTitle } from '@/lib/openai';
|
import { analyzeCookingThumbnails, analyzeFromTranscript, analyzeFromTranscriptAndImages, transcribeAudioBytes, generateRecipeTitle, generateDetailedInstructions } from '@/lib/openai';
|
||||||
import { downloadInstagramVideoBytes } from '@/lib/instaloader';
|
import { downloadInstagramVideoBytes } from '@/lib/instaloader';
|
||||||
import { downloadTikTokVideoBytes } from '@/lib/pyktok';
|
import { downloadTikTokVideoBytes } from '@/lib/pyktok';
|
||||||
import { extractThumbnailsFromVideoBytes } from '@/lib/video';
|
import { extractThumbnailsFromVideoBytes } from '@/lib/video';
|
||||||
@@ -74,13 +74,14 @@ export async function POST(req: NextRequest) {
|
|||||||
// 1) Convert to transcript first
|
// 1) Convert to transcript first
|
||||||
let analysis;
|
let analysis;
|
||||||
let transcript: string | null = null;
|
let transcript: string | null = null;
|
||||||
|
const MAX_FRAMES = Number(process.env.ANALYZE_MAX_FRAMES || '24');
|
||||||
try {
|
try {
|
||||||
const audio = await extractMp3FromVideoBytes(uploadBytes);
|
const audio = await extractMp3FromVideoBytes(uploadBytes);
|
||||||
if (audio && audio.byteLength > 0) {
|
if (audio && audio.byteLength > 0) {
|
||||||
transcript = await transcribeAudioBytes(audio);
|
transcript = await transcribeAudioBytes(audio);
|
||||||
if (transcript && transcript.trim().length > 0) {
|
if (transcript && transcript.trim().length > 0) {
|
||||||
// Extract frames at 2 fps and include alongside transcript in order
|
// Extract frames at 2 fps and include alongside transcript in order
|
||||||
const thumbsForCombined = await extractThumbnailsFromVideoBytes(uploadBytes, 0, 2);
|
const thumbsForCombined = await extractThumbnailsFromVideoBytes(uploadBytes, MAX_FRAMES > 0 ? MAX_FRAMES : 0, 2);
|
||||||
if (thumbsForCombined.length) {
|
if (thumbsForCombined.length) {
|
||||||
analysis = await analyzeFromTranscriptAndImages(transcript, thumbsForCombined, description);
|
analysis = await analyzeFromTranscriptAndImages(transcript, thumbsForCombined, description);
|
||||||
} else {
|
} else {
|
||||||
@@ -97,7 +98,7 @@ export async function POST(req: NextRequest) {
|
|||||||
|
|
||||||
// 2) If transcript failed/empty, fall back to thumbnails-based analysis
|
// 2) If transcript failed/empty, fall back to thumbnails-based analysis
|
||||||
if (!analysis) {
|
if (!analysis) {
|
||||||
const thumbs = await extractThumbnailsFromVideoBytes(uploadBytes, 0, 2);
|
const thumbs = await extractThumbnailsFromVideoBytes(uploadBytes, MAX_FRAMES > 0 ? MAX_FRAMES : 0, 2);
|
||||||
if (!thumbs.length) {
|
if (!thumbs.length) {
|
||||||
return NextResponse.json({ error: 'Could not extract thumbnails from video' }, { status: 400 });
|
return NextResponse.json({ error: 'Could not extract thumbnails from video' }, { status: 400 });
|
||||||
}
|
}
|
||||||
@@ -110,8 +111,14 @@ export async function POST(req: NextRequest) {
|
|||||||
? `data:image/jpeg;base64,${Buffer.from(firstThumbArr[0]).toString('base64')}`
|
? `data:image/jpeg;base64,${Buffer.from(firstThumbArr[0]).toString('base64')}`
|
||||||
: undefined;
|
: undefined;
|
||||||
|
|
||||||
// Generate a title
|
// Generate a title and detailed instructions
|
||||||
const title = await generateRecipeTitle({ description, transcript: transcript || '', analysis });
|
const title = await generateRecipeTitle({ description, transcript: transcript || '', analysis });
|
||||||
|
try {
|
||||||
|
const detailed = await generateDetailedInstructions({ description, transcript: transcript || '', analysis });
|
||||||
|
if (detailed) {
|
||||||
|
(analysis as any).detailed = detailed;
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
// Cleanup: delete temp downloaded video file if present
|
// Cleanup: delete temp downloaded video file if present
|
||||||
if (tempFilePath) {
|
if (tempFilePath) {
|
||||||
|
|||||||
@@ -66,20 +66,42 @@ export default function Editor({ id, title, description, transcript, analysis, f
|
|||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Prep section with numbering 1.x */}
|
||||||
<div>
|
<div>
|
||||||
<h2 className="text-xl font-semibold">Prep Steps</h2>
|
<h2 className="text-xl font-semibold">Prep</h2>
|
||||||
<ol className="list-decimal pl-6 mt-2 space-y-1">
|
<ol className="pl-6 mt-2 space-y-3">
|
||||||
{(analysis?.prep_steps || []).map((s: string, i: number) => (
|
{((analysis as any)?.detailed?.prep || (analysis?.prep_steps || [])).map((s: any, i: number) => (
|
||||||
<li key={i}>{s}</li>
|
<li key={i} className="marker:hidden">
|
||||||
|
<div className="text-amber-600 font-semibold">{`1.${i + 1}`}</div>
|
||||||
|
{typeof s === 'string' ? (
|
||||||
|
<p className="mt-1">{s}</p>
|
||||||
|
) : (
|
||||||
|
<div className="mt-1">
|
||||||
|
<div className="font-medium">{s.title}</div>
|
||||||
|
<p>{s.body}</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</li>
|
||||||
))}
|
))}
|
||||||
</ol>
|
</ol>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Cook section with numbering 2.x */}
|
||||||
<div>
|
<div>
|
||||||
<h2 className="text-xl font-semibold">Cooking Steps</h2>
|
<h2 className="text-xl font-semibold">Cook</h2>
|
||||||
<ol className="list-decimal pl-6 mt-2 space-y-1">
|
<ol className="pl-6 mt-2 space-y-3">
|
||||||
{(analysis?.cooking_steps || []).map((s: string, i: number) => (
|
{((analysis as any)?.detailed?.cook || (analysis?.cooking_steps || [])).map((s: any, i: number) => (
|
||||||
<li key={i}>{s}</li>
|
<li key={i} className="marker:hidden">
|
||||||
|
<div className="text-amber-600 font-semibold">{`2.${i + 1}`}</div>
|
||||||
|
{typeof s === 'string' ? (
|
||||||
|
<p className="mt-1">{s}</p>
|
||||||
|
) : (
|
||||||
|
<div className="mt-1">
|
||||||
|
<div className="font-medium">{s.title}</div>
|
||||||
|
<p>{s.body}</p>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</li>
|
||||||
))}
|
))}
|
||||||
</ol>
|
</ol>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -7,6 +7,10 @@ export type RecipeAnalysis = {
|
|||||||
ingredients: Array<{ name: string; quantity: string | null; unit: string | null; notes?: string | null }>;
|
ingredients: Array<{ name: string; quantity: string | null; unit: string | null; notes?: string | null }>;
|
||||||
prep_steps: string[];
|
prep_steps: string[];
|
||||||
cooking_steps: string[];
|
cooking_steps: string[];
|
||||||
|
detailed?: {
|
||||||
|
prep: Array<{ title: string; body: string }>;
|
||||||
|
cook: Array<{ title: string; body: string }>;
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
export async function uploadImageToOpenAI(bytes: Uint8Array, filename = 'image.jpg') {
|
export async function uploadImageToOpenAI(bytes: Uint8Array, filename = 'image.jpg') {
|
||||||
@@ -26,7 +30,7 @@ Return STRICT JSON with keys: ingredients, prep_steps, cooking_steps.
|
|||||||
Do not invent details not visible or clearly inferable. If unknown, use null.`;
|
Do not invent details not visible or clearly inferable. If unknown, use null.`;
|
||||||
|
|
||||||
const resp = await openai.responses.create({
|
const resp = await openai.responses.create({
|
||||||
model: process.env.OPENAI_MODEL || 'gpt-4o',
|
model: process.env.OPENAI_IMAGE_MODEL || process.env.OPENAI_MODEL || 'gpt-4o-mini',
|
||||||
input: [
|
input: [
|
||||||
{
|
{
|
||||||
role: 'system',
|
role: 'system',
|
||||||
@@ -62,12 +66,15 @@ Do not invent details not visible or clearly inferable. If unknown, use null.`;
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function analyzeCookingThumbnails(images: Uint8Array[], description?: string): Promise<RecipeAnalysis> {
|
export async function analyzeCookingThumbnails(images: Uint8Array[], description?: string): Promise<RecipeAnalysis> {
|
||||||
const system = `You are a culinary expert. Analyze ONLY the attached images (video thumbnails).
|
const system = `You are a culinary expert. Analyze the attached images (ordered video thumbnails) and optional description.
|
||||||
Return STRICT JSON with keys: ingredients, prep_steps, cooking_steps.
|
Return STRICT JSON with keys: ingredients, prep_steps, cooking_steps.
|
||||||
- ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) }
|
- ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) }
|
||||||
- prep_steps: array of strings
|
- prep_steps: array of strings
|
||||||
- cooking_steps: array of strings
|
- cooking_steps: array of strings
|
||||||
Do not invent details not visible or clearly inferable. If unknown, use null.`;
|
Filling gaps: When exact details are not stated, infer reasonable approximations from visuals and common practice while preserving the spirit of the original recipe.
|
||||||
|
- Provide approximate quantities using a '~' prefix or add '(approx)' in notes when inferred from images.
|
||||||
|
- If a likely temperature/time/equipment is needed, infer a sensible default and mark as '(approx)'.
|
||||||
|
If truly unknowable, use null.`;
|
||||||
|
|
||||||
const contentImages = images.map((bytes) => ({
|
const contentImages = images.map((bytes) => ({
|
||||||
type: 'input_image' as const,
|
type: 'input_image' as const,
|
||||||
@@ -76,7 +83,7 @@ Do not invent details not visible or clearly inferable. If unknown, use null.`;
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
const resp = await openai.responses.create({
|
const resp = await openai.responses.create({
|
||||||
model: process.env.OPENAI_MODEL || 'gpt-4o',
|
model: process.env.OPENAI_IMAGE_MODEL || process.env.OPENAI_MODEL || 'gpt-4o-mini',
|
||||||
input: [
|
input: [
|
||||||
{ role: 'system', content: [{ type: 'input_text', text: system }] },
|
{ role: 'system', content: [{ type: 'input_text', text: system }] },
|
||||||
{
|
{
|
||||||
@@ -113,7 +120,7 @@ export async function transcribeAudioBytes(audioBytes: Uint8Array): Promise<stri
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function analyzeFromTranscript(transcript: string): Promise<RecipeAnalysis> {
|
export async function analyzeFromTranscript(transcript: string): Promise<RecipeAnalysis> {
|
||||||
const system = `You are a culinary expert. Analyze ONLY the provided transcript of a cooking video. \nReturn STRICT JSON with keys: ingredients, prep_steps, cooking_steps. \n- ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) }\n- prep_steps: array of strings\n- cooking_steps: array of strings\nDo not invent details not present. If unknown, use null.`;
|
const system = `You are a culinary expert. Analyze the provided transcript of a cooking video. \nReturn STRICT JSON with keys: ingredients, prep_steps, cooking_steps. \n- ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) }\n- prep_steps: array of strings\n- cooking_steps: array of strings\nFilling gaps: If specifics are missing but strongly implied by context, infer reasonable approximations and mark them with a '~' prefix or '(approx)' in notes. If truly unknowable, use null.`;
|
||||||
|
|
||||||
const resp = await openai.responses.create({
|
const resp = await openai.responses.create({
|
||||||
model: process.env.OPENAI_MODEL || 'gpt-4o',
|
model: process.env.OPENAI_MODEL || 'gpt-4o',
|
||||||
@@ -138,7 +145,7 @@ export async function analyzeFromTranscriptAndImages(
|
|||||||
images: Uint8Array[],
|
images: Uint8Array[],
|
||||||
description?: string,
|
description?: string,
|
||||||
): Promise<RecipeAnalysis> {
|
): Promise<RecipeAnalysis> {
|
||||||
const system = `You are a culinary expert. Analyze ONLY the provided transcript and ordered thumbnails from a cooking video.\nReturn STRICT JSON with keys: ingredients, prep_steps, cooking_steps.\n- ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) }\n- prep_steps: array of strings\n- cooking_steps: array of strings\nIf unknown, use null. Consider the images in order.`;
|
const system = `You are a culinary expert. Analyze the provided transcript and ordered thumbnails from a cooking video.\nReturn STRICT JSON with keys: ingredients, prep_steps, cooking_steps.\n- ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) }\n- prep_steps: array of strings\n- cooking_steps: array of strings\nFilling gaps: Combine transcript and visuals to infer missing details (quantities, temperatures, times, equipment) while preserving the spirit of the original recipe.\n- Use '~' or '(approx)' for estimated quantities/times when inferred; if truly unknowable, use null. Consider the images in order.`;
|
||||||
|
|
||||||
const contentImages = images.map((bytes) => ({
|
const contentImages = images.map((bytes) => ({
|
||||||
type: 'input_image' as const,
|
type: 'input_image' as const,
|
||||||
@@ -186,4 +193,24 @@ export async function generateRecipeTitle(input: { description?: string; transcr
|
|||||||
return text.trim().replace(/^"|"$/g, '').slice(0, 120);
|
return text.trim().replace(/^"|"$/g, '').slice(0, 120);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function generateDetailedInstructions(input: { description?: string; transcript?: string; analysis: RecipeAnalysis }): Promise<RecipeAnalysis['detailed']> {
|
||||||
|
const prompt = `Create detailed, step-by-step cooking instructions in two sections: Prep and Cook.\nGuidelines:\n- Use clear, concise language with decisive verbs.\n- Include temperatures, times, quantities, and pan/surface sizes when inferable.\n- When specifics are missing, infer sensible approximations that fit the recipe's style and mark them '(approx)'.\n- Each step should have a short Title (3-10 words) and a Body (1-3 sentences).\n- Keep steps atomic; prefer 6-12 steps per section when appropriate.\nReturn STRICT JSON: { "prep": [{"title":"...","body":"..."}], "cook": [{"title":"...","body":"..."}] }\n\nContext:\nDescription: ${input.description || ''}\n\nTranscript: ${input.transcript || ''}\n\nIngredients: ${(input.analysis?.ingredients || []).map(i => i.name).join(', ')}\n\nExisting prep steps: ${(input.analysis?.prep_steps || []).join(' | ')}\nExisting cooking steps: ${(input.analysis?.cooking_steps || []).join(' | ')}`;
|
||||||
|
|
||||||
|
const resp = await openai.responses.create({
|
||||||
|
model: process.env.OPENAI_MODEL || 'gpt-4o-mini',
|
||||||
|
input: [{ role: 'user', content: [{ type: 'input_text', text: prompt }] }],
|
||||||
|
temperature: 0.3,
|
||||||
|
});
|
||||||
|
const text = (resp as any).output_text || (resp as any).content?.[0]?.text || '';
|
||||||
|
let json: any;
|
||||||
|
try {
|
||||||
|
json = JSON.parse(text);
|
||||||
|
} catch {
|
||||||
|
const m = text.match(/\{[\s\S]*\}/);
|
||||||
|
if (m) json = JSON.parse(m[0]);
|
||||||
|
}
|
||||||
|
if (!json || !('prep' in json) || !('cook' in json)) return undefined as any;
|
||||||
|
return json as RecipeAnalysis['detailed'];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ export async function extractThumbnailsFromVideoBytes(
|
|||||||
videoBytes: Uint8Array,
|
videoBytes: Uint8Array,
|
||||||
maxFrames: number | null = 8,
|
maxFrames: number | null = 8,
|
||||||
fps = 2,
|
fps = 2,
|
||||||
|
maxWidth = 640,
|
||||||
): Promise<Uint8Array[]> {
|
): Promise<Uint8Array[]> {
|
||||||
const tmpBase = await mkdtemp(join(tmpdir(), 'recipe-ai-'));
|
const tmpBase = await mkdtemp(join(tmpdir(), 'recipe-ai-'));
|
||||||
const inputPath = join(tmpBase, 'input.mp4');
|
const inputPath = join(tmpBase, 'input.mp4');
|
||||||
@@ -23,7 +24,7 @@ export async function extractThumbnailsFromVideoBytes(
|
|||||||
'-loglevel', 'error',
|
'-loglevel', 'error',
|
||||||
'-y',
|
'-y',
|
||||||
'-i', inputPath,
|
'-i', inputPath,
|
||||||
'-vf', `fps=${fps}`,
|
'-vf', `fps=${fps},scale=${maxWidth}:-1:force_original_aspect_ratio=decrease`,
|
||||||
'-q:v', '2',
|
'-q:v', '2',
|
||||||
];
|
];
|
||||||
if (maxFrames && maxFrames > 0) {
|
if (maxFrames && maxFrames > 0) {
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user