Files
Recipe-AI/tools/instaloader/download_ig.py
2025-10-28 14:33:24 -04:00

138 lines
5.1 KiB
Python

#!/usr/bin/env python3
import argparse
import os
import shutil
import requests
import json
import re
import sys
from pathlib import Path
try:
import instaloader
except Exception as e:
print(f"Error: instaloader not installed. Run: pip install -r requirements.txt\n{e}", file=sys.stderr)
sys.exit(1)
def extract_shortcode_from_url(url: str):
m = re.search(r"instagram\.com\/(?:p|reel)\/([^\/?#]+)", url, re.I)
return m.group(1) if m else None
def main():
parser = argparse.ArgumentParser(description="Download Instagram post media using Instaloader")
parser.add_argument('--url', required=True, help='Instagram post/reel URL')
parser.add_argument('--out', default='downloads', help='Output directory (default: downloads)')
parser.add_argument('--username', default=os.getenv('IG_USERNAME'), help='Instagram username (env IG_USERNAME)')
parser.add_argument('--password', default=os.getenv('IG_PASSWORD'), help='Instagram password (env IG_PASSWORD)')
parser.add_argument('--sessionfile', default=os.getenv('IG_SESSIONFILE'), help='Instaloader session file path (env IG_SESSIONFILE)')
args = parser.parse_args()
shortcode = extract_shortcode_from_url(args.url)
if not shortcode:
print('Error: Could not parse shortcode from URL', file=sys.stderr)
sys.exit(2)
out_dir = Path(args.out).resolve()
out_dir.mkdir(parents=True, exist_ok=True)
L = instaloader.Instaloader(
download_comments=False,
post_metadata_txt_pattern='',
download_video_thumbnails=False,
save_metadata=False,
compress_json=False,
quiet=True,
)
# Silence logging further to keep stdout clean for the final path only
try:
L.context.log = lambda *_a, **_k: None # type: ignore[attr-defined]
L.context.log_progress = lambda *_a, **_k: None # type: ignore[attr-defined]
L.context.error = lambda *_a, **_k: None # type: ignore[attr-defined]
L.context.warning = lambda *_a, **_k: None # type: ignore[attr-defined]
except Exception:
pass
# Login if possible
if args.sessionfile and args.username:
try:
L.load_session_from_file(args.username, args.sessionfile)
except Exception as e:
print(f"Warning: failed to load session from {args.sessionfile}: {e}", file=sys.stderr)
elif args.username and args.password:
try:
L.login(args.username, args.password)
except Exception as e:
print(f"Warning: login failed: {e}", file=sys.stderr)
# Resolve and download
try:
post = instaloader.Post.from_shortcode(L.context, shortcode)
except Exception as e:
print(f"Error: resolve failed: {e}", file=sys.stderr)
sys.exit(3)
# Prefer downloading just the first video directly into out_dir as <shortcode>.mp4
video_url = None
try:
if getattr(post, 'is_video', False) and getattr(post, 'video_url', None):
video_url = post.video_url
else:
# Sidecar posts may contain one or more videos
for node in post.get_sidecar_nodes():
if getattr(node, 'is_video', False) and getattr(node, 'video_url', None):
video_url = node.video_url
break
except Exception:
video_url = None
dest_path = (out_dir / f"{shortcode}.mp4").resolve()
description = ''
try:
description = getattr(post, 'caption', '') or ''
except Exception:
description = ''
if video_url:
try:
with requests.get(video_url, stream=True, headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36',
'accept': '*/*',
'referer': 'https://www.instagram.com/',
}, timeout=60) as r:
r.raise_for_status()
with open(dest_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024 * 256):
if chunk:
f.write(chunk)
print(json.dumps({"video_path": dest_path.as_posix(), "description": description}, ensure_ascii=False))
return
except Exception:
# Fall back to full download if direct fetch fails
pass
# Fallback: Use Instaloader's downloader into a temp subdir, then move the first mp4 up
try:
target = out_dir / shortcode
target.mkdir(parents=True, exist_ok=True)
L.download_post(post, target.as_posix())
mp4s = sorted(target.glob('*.mp4'))
if not mp4s:
print('No video found (post may be image-only)', file=sys.stderr)
sys.exit(4)
src_path = mp4s[0]
if dest_path.exists():
dest_path.unlink()
shutil.move(src_path.as_posix(), dest_path.as_posix())
print(json.dumps({"video_path": dest_path.as_posix(), "description": description}, ensure_ascii=False))
except Exception as e:
print(f"Error: download failed: {e}", file=sys.stderr)
sys.exit(3)
if __name__ == '__main__':
main()