diff --git a/IMG_8015.png b/IMG_8015.png new file mode 100644 index 0000000..23ceeeb Binary files /dev/null and b/IMG_8015.png differ diff --git a/IMG_8016.png b/IMG_8016.png new file mode 100644 index 0000000..52b6ae8 Binary files /dev/null and b/IMG_8016.png differ diff --git a/src/app/api/analyze/route.ts b/src/app/api/analyze/route.ts index aec6ea5..955d4ea 100644 --- a/src/app/api/analyze/route.ts +++ b/src/app/api/analyze/route.ts @@ -1,7 +1,7 @@ import { NextRequest, NextResponse } from 'next/server'; import { z } from 'zod'; import { resolveDirectVideoUrl } from '@/lib/resolvers'; -import { analyzeCookingThumbnails, analyzeFromTranscript, analyzeFromTranscriptAndImages, transcribeAudioBytes, generateRecipeTitle } from '@/lib/openai'; +import { analyzeCookingThumbnails, analyzeFromTranscript, analyzeFromTranscriptAndImages, transcribeAudioBytes, generateRecipeTitle, generateDetailedInstructions } from '@/lib/openai'; import { downloadInstagramVideoBytes } from '@/lib/instaloader'; import { downloadTikTokVideoBytes } from '@/lib/pyktok'; import { extractThumbnailsFromVideoBytes } from '@/lib/video'; @@ -74,13 +74,14 @@ export async function POST(req: NextRequest) { // 1) Convert to transcript first let analysis; let transcript: string | null = null; + const MAX_FRAMES = Number(process.env.ANALYZE_MAX_FRAMES || '24'); try { const audio = await extractMp3FromVideoBytes(uploadBytes); if (audio && audio.byteLength > 0) { transcript = await transcribeAudioBytes(audio); if (transcript && transcript.trim().length > 0) { // Extract frames at 2 fps and include alongside transcript in order - const thumbsForCombined = await extractThumbnailsFromVideoBytes(uploadBytes, 0, 2); + const thumbsForCombined = await extractThumbnailsFromVideoBytes(uploadBytes, MAX_FRAMES > 0 ? MAX_FRAMES : 0, 2); if (thumbsForCombined.length) { analysis = await analyzeFromTranscriptAndImages(transcript, thumbsForCombined, description); } else { @@ -97,7 +98,7 @@ export async function POST(req: NextRequest) { // 2) If transcript failed/empty, fall back to thumbnails-based analysis if (!analysis) { - const thumbs = await extractThumbnailsFromVideoBytes(uploadBytes, 0, 2); + const thumbs = await extractThumbnailsFromVideoBytes(uploadBytes, MAX_FRAMES > 0 ? MAX_FRAMES : 0, 2); if (!thumbs.length) { return NextResponse.json({ error: 'Could not extract thumbnails from video' }, { status: 400 }); } @@ -110,8 +111,14 @@ export async function POST(req: NextRequest) { ? `data:image/jpeg;base64,${Buffer.from(firstThumbArr[0]).toString('base64')}` : undefined; - // Generate a title + // Generate a title and detailed instructions const title = await generateRecipeTitle({ description, transcript: transcript || '', analysis }); + try { + const detailed = await generateDetailedInstructions({ description, transcript: transcript || '', analysis }); + if (detailed) { + (analysis as any).detailed = detailed; + } + } catch {} // Cleanup: delete temp downloaded video file if present if (tempFilePath) { diff --git a/src/app/recipes/[id]/Editor.tsx b/src/app/recipes/[id]/Editor.tsx index 98e075c..e6d65d6 100644 --- a/src/app/recipes/[id]/Editor.tsx +++ b/src/app/recipes/[id]/Editor.tsx @@ -66,20 +66,42 @@ export default function Editor({ id, title, description, transcript, analysis, f + {/* Prep section with numbering 1.x */}
-

Prep Steps

-
    - {(analysis?.prep_steps || []).map((s: string, i: number) => ( -
  1. {s}
  2. +

    Prep

    +
      + {((analysis as any)?.detailed?.prep || (analysis?.prep_steps || [])).map((s: any, i: number) => ( +
    1. +
      {`1.${i + 1}`}
      + {typeof s === 'string' ? ( +

      {s}

      + ) : ( +
      +
      {s.title}
      +

      {s.body}

      +
      + )} +
    2. ))}
+ {/* Cook section with numbering 2.x */}
-

Cooking Steps

-
    - {(analysis?.cooking_steps || []).map((s: string, i: number) => ( -
  1. {s}
  2. +

    Cook

    +
      + {((analysis as any)?.detailed?.cook || (analysis?.cooking_steps || [])).map((s: any, i: number) => ( +
    1. +
      {`2.${i + 1}`}
      + {typeof s === 'string' ? ( +

      {s}

      + ) : ( +
      +
      {s.title}
      +

      {s.body}

      +
      + )} +
    2. ))}
diff --git a/src/lib/openai.ts b/src/lib/openai.ts index a108c89..d786067 100644 --- a/src/lib/openai.ts +++ b/src/lib/openai.ts @@ -7,6 +7,10 @@ export type RecipeAnalysis = { ingredients: Array<{ name: string; quantity: string | null; unit: string | null; notes?: string | null }>; prep_steps: string[]; cooking_steps: string[]; + detailed?: { + prep: Array<{ title: string; body: string }>; + cook: Array<{ title: string; body: string }>; + }; }; export async function uploadImageToOpenAI(bytes: Uint8Array, filename = 'image.jpg') { @@ -26,7 +30,7 @@ Return STRICT JSON with keys: ingredients, prep_steps, cooking_steps. Do not invent details not visible or clearly inferable. If unknown, use null.`; const resp = await openai.responses.create({ - model: process.env.OPENAI_MODEL || 'gpt-4o', + model: process.env.OPENAI_IMAGE_MODEL || process.env.OPENAI_MODEL || 'gpt-4o-mini', input: [ { role: 'system', @@ -62,12 +66,15 @@ Do not invent details not visible or clearly inferable. If unknown, use null.`; } export async function analyzeCookingThumbnails(images: Uint8Array[], description?: string): Promise { - const system = `You are a culinary expert. Analyze ONLY the attached images (video thumbnails). -Return STRICT JSON with keys: ingredients, prep_steps, cooking_steps. + const system = `You are a culinary expert. Analyze the attached images (ordered video thumbnails) and optional description. +Return STRICT JSON with keys: ingredients, prep_steps, cooking_steps. - ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) } - prep_steps: array of strings - cooking_steps: array of strings -Do not invent details not visible or clearly inferable. If unknown, use null.`; +Filling gaps: When exact details are not stated, infer reasonable approximations from visuals and common practice while preserving the spirit of the original recipe. +- Provide approximate quantities using a '~' prefix or add '(approx)' in notes when inferred from images. +- If a likely temperature/time/equipment is needed, infer a sensible default and mark as '(approx)'. +If truly unknowable, use null.`; const contentImages = images.map((bytes) => ({ type: 'input_image' as const, @@ -76,7 +83,7 @@ Do not invent details not visible or clearly inferable. If unknown, use null.`; })); const resp = await openai.responses.create({ - model: process.env.OPENAI_MODEL || 'gpt-4o', + model: process.env.OPENAI_IMAGE_MODEL || process.env.OPENAI_MODEL || 'gpt-4o-mini', input: [ { role: 'system', content: [{ type: 'input_text', text: system }] }, { @@ -113,7 +120,7 @@ export async function transcribeAudioBytes(audioBytes: Uint8Array): Promise { - const system = `You are a culinary expert. Analyze ONLY the provided transcript of a cooking video. \nReturn STRICT JSON with keys: ingredients, prep_steps, cooking_steps. \n- ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) }\n- prep_steps: array of strings\n- cooking_steps: array of strings\nDo not invent details not present. If unknown, use null.`; + const system = `You are a culinary expert. Analyze the provided transcript of a cooking video. \nReturn STRICT JSON with keys: ingredients, prep_steps, cooking_steps. \n- ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) }\n- prep_steps: array of strings\n- cooking_steps: array of strings\nFilling gaps: If specifics are missing but strongly implied by context, infer reasonable approximations and mark them with a '~' prefix or '(approx)' in notes. If truly unknowable, use null.`; const resp = await openai.responses.create({ model: process.env.OPENAI_MODEL || 'gpt-4o', @@ -138,7 +145,7 @@ export async function analyzeFromTranscriptAndImages( images: Uint8Array[], description?: string, ): Promise { - const system = `You are a culinary expert. Analyze ONLY the provided transcript and ordered thumbnails from a cooking video.\nReturn STRICT JSON with keys: ingredients, prep_steps, cooking_steps.\n- ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) }\n- prep_steps: array of strings\n- cooking_steps: array of strings\nIf unknown, use null. Consider the images in order.`; + const system = `You are a culinary expert. Analyze the provided transcript and ordered thumbnails from a cooking video.\nReturn STRICT JSON with keys: ingredients, prep_steps, cooking_steps.\n- ingredients: array of { name, quantity (string|null), unit (string|null), notes (string|null) }\n- prep_steps: array of strings\n- cooking_steps: array of strings\nFilling gaps: Combine transcript and visuals to infer missing details (quantities, temperatures, times, equipment) while preserving the spirit of the original recipe.\n- Use '~' or '(approx)' for estimated quantities/times when inferred; if truly unknowable, use null. Consider the images in order.`; const contentImages = images.map((bytes) => ({ type: 'input_image' as const, @@ -186,4 +193,24 @@ export async function generateRecipeTitle(input: { description?: string; transcr return text.trim().replace(/^"|"$/g, '').slice(0, 120); } +export async function generateDetailedInstructions(input: { description?: string; transcript?: string; analysis: RecipeAnalysis }): Promise { + const prompt = `Create detailed, step-by-step cooking instructions in two sections: Prep and Cook.\nGuidelines:\n- Use clear, concise language with decisive verbs.\n- Include temperatures, times, quantities, and pan/surface sizes when inferable.\n- When specifics are missing, infer sensible approximations that fit the recipe's style and mark them '(approx)'.\n- Each step should have a short Title (3-10 words) and a Body (1-3 sentences).\n- Keep steps atomic; prefer 6-12 steps per section when appropriate.\nReturn STRICT JSON: { "prep": [{"title":"...","body":"..."}], "cook": [{"title":"...","body":"..."}] }\n\nContext:\nDescription: ${input.description || ''}\n\nTranscript: ${input.transcript || ''}\n\nIngredients: ${(input.analysis?.ingredients || []).map(i => i.name).join(', ')}\n\nExisting prep steps: ${(input.analysis?.prep_steps || []).join(' | ')}\nExisting cooking steps: ${(input.analysis?.cooking_steps || []).join(' | ')}`; + + const resp = await openai.responses.create({ + model: process.env.OPENAI_MODEL || 'gpt-4o-mini', + input: [{ role: 'user', content: [{ type: 'input_text', text: prompt }] }], + temperature: 0.3, + }); + const text = (resp as any).output_text || (resp as any).content?.[0]?.text || ''; + let json: any; + try { + json = JSON.parse(text); + } catch { + const m = text.match(/\{[\s\S]*\}/); + if (m) json = JSON.parse(m[0]); + } + if (!json || !('prep' in json) || !('cook' in json)) return undefined as any; + return json as RecipeAnalysis['detailed']; +} + diff --git a/src/lib/video.ts b/src/lib/video.ts index 1eddacb..7c40927 100644 --- a/src/lib/video.ts +++ b/src/lib/video.ts @@ -10,6 +10,7 @@ export async function extractThumbnailsFromVideoBytes( videoBytes: Uint8Array, maxFrames: number | null = 8, fps = 2, + maxWidth = 640, ): Promise { const tmpBase = await mkdtemp(join(tmpdir(), 'recipe-ai-')); const inputPath = join(tmpBase, 'input.mp4'); @@ -23,7 +24,7 @@ export async function extractThumbnailsFromVideoBytes( '-loglevel', 'error', '-y', '-i', inputPath, - '-vf', `fps=${fps}`, + '-vf', `fps=${fps},scale=${maxWidth}:-1:force_original_aspect_ratio=decrease`, '-q:v', '2', ]; if (maxFrames && maxFrames > 0) { diff --git a/tools/instaloader/downloads/DLNigR4JYM7.mp4 b/tools/instaloader/downloads/DLNigR4JYM7.mp4 deleted file mode 100644 index 5b28380..0000000 Binary files a/tools/instaloader/downloads/DLNigR4JYM7.mp4 and /dev/null differ