Version One

This commit is contained in:
2025-10-28 14:33:24 -04:00
parent e0831295f6
commit 00fa383638
41 changed files with 8835 additions and 1 deletions

View File

@@ -0,0 +1,37 @@
# Instaloader Helper
A small wrapper around Instaloader to download a single Instagram post/reel.
Reference project: instaloader/instaloader (MIT) — https://github.com/instaloader/instaloader
## Setup
```bash
cd tools/instaloader
python -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
```
## Login options
- Env vars: `IG_USERNAME`, `IG_PASSWORD`
- Or a session file: `IG_SESSIONFILE` together with `--username`. Create it via:
```bash
instaloader --login YOUR_USERNAME
# After successful login, pass the created session file path via IG_SESSIONFILE or --sessionfile
```
## Usage
```bash
# In the virtualenv
python download_ig.py --url https://www.instagram.com/p/DPujDRJDNov/ --out ./downloads
# or provide username/password via env
IG_USERNAME=... IG_PASSWORD=... python download_ig.py --url https://www.instagram.com/p/.../ --out ./downloads
# or use a session file
IG_SESSIONFILE=/path/to/sessionfile python download_ig.py --url https://www.instagram.com/reel/.../ --username YOUR_USERNAME
```
- Prints the first downloaded `.mp4` path on success.
- Exits with non-zero code if the post is image-only or access is restricted.

View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
import argparse
import os
import shutil
import requests
import json
import re
import sys
from pathlib import Path
try:
import instaloader
except Exception as e:
print(f"Error: instaloader not installed. Run: pip install -r requirements.txt\n{e}", file=sys.stderr)
sys.exit(1)
def extract_shortcode_from_url(url: str):
m = re.search(r"instagram\.com\/(?:p|reel)\/([^\/?#]+)", url, re.I)
return m.group(1) if m else None
def main():
parser = argparse.ArgumentParser(description="Download Instagram post media using Instaloader")
parser.add_argument('--url', required=True, help='Instagram post/reel URL')
parser.add_argument('--out', default='downloads', help='Output directory (default: downloads)')
parser.add_argument('--username', default=os.getenv('IG_USERNAME'), help='Instagram username (env IG_USERNAME)')
parser.add_argument('--password', default=os.getenv('IG_PASSWORD'), help='Instagram password (env IG_PASSWORD)')
parser.add_argument('--sessionfile', default=os.getenv('IG_SESSIONFILE'), help='Instaloader session file path (env IG_SESSIONFILE)')
args = parser.parse_args()
shortcode = extract_shortcode_from_url(args.url)
if not shortcode:
print('Error: Could not parse shortcode from URL', file=sys.stderr)
sys.exit(2)
out_dir = Path(args.out).resolve()
out_dir.mkdir(parents=True, exist_ok=True)
L = instaloader.Instaloader(
download_comments=False,
post_metadata_txt_pattern='',
download_video_thumbnails=False,
save_metadata=False,
compress_json=False,
quiet=True,
)
# Silence logging further to keep stdout clean for the final path only
try:
L.context.log = lambda *_a, **_k: None # type: ignore[attr-defined]
L.context.log_progress = lambda *_a, **_k: None # type: ignore[attr-defined]
L.context.error = lambda *_a, **_k: None # type: ignore[attr-defined]
L.context.warning = lambda *_a, **_k: None # type: ignore[attr-defined]
except Exception:
pass
# Login if possible
if args.sessionfile and args.username:
try:
L.load_session_from_file(args.username, args.sessionfile)
except Exception as e:
print(f"Warning: failed to load session from {args.sessionfile}: {e}", file=sys.stderr)
elif args.username and args.password:
try:
L.login(args.username, args.password)
except Exception as e:
print(f"Warning: login failed: {e}", file=sys.stderr)
# Resolve and download
try:
post = instaloader.Post.from_shortcode(L.context, shortcode)
except Exception as e:
print(f"Error: resolve failed: {e}", file=sys.stderr)
sys.exit(3)
# Prefer downloading just the first video directly into out_dir as <shortcode>.mp4
video_url = None
try:
if getattr(post, 'is_video', False) and getattr(post, 'video_url', None):
video_url = post.video_url
else:
# Sidecar posts may contain one or more videos
for node in post.get_sidecar_nodes():
if getattr(node, 'is_video', False) and getattr(node, 'video_url', None):
video_url = node.video_url
break
except Exception:
video_url = None
dest_path = (out_dir / f"{shortcode}.mp4").resolve()
description = ''
try:
description = getattr(post, 'caption', '') or ''
except Exception:
description = ''
if video_url:
try:
with requests.get(video_url, stream=True, headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36',
'accept': '*/*',
'referer': 'https://www.instagram.com/',
}, timeout=60) as r:
r.raise_for_status()
with open(dest_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024 * 256):
if chunk:
f.write(chunk)
print(json.dumps({"video_path": dest_path.as_posix(), "description": description}, ensure_ascii=False))
return
except Exception:
# Fall back to full download if direct fetch fails
pass
# Fallback: Use Instaloader's downloader into a temp subdir, then move the first mp4 up
try:
target = out_dir / shortcode
target.mkdir(parents=True, exist_ok=True)
L.download_post(post, target.as_posix())
mp4s = sorted(target.glob('*.mp4'))
if not mp4s:
print('No video found (post may be image-only)', file=sys.stderr)
sys.exit(4)
src_path = mp4s[0]
if dest_path.exists():
dest_path.unlink()
shutil.move(src_path.as_posix(), dest_path.as_posix())
print(json.dumps({"video_path": dest_path.as_posix(), "description": description}, ensure_ascii=False))
except Exception as e:
print(f"Error: download failed: {e}", file=sys.stderr)
sys.exit(3)
if __name__ == '__main__':
main()

Binary file not shown.

View File

@@ -0,0 +1,3 @@
instaloader==4.14.2

30
tools/pyktok/README.md Normal file
View File

@@ -0,0 +1,30 @@
# Pyktok Helper
A small wrapper around Pyktok to download a single TikTok video.
Reference: Pyktok on PyPI — https://pypi.org/project/pyktok/
## Setup
```bash
cd tools/pyktok
python -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
# Install browser drivers required by playwright
python -m playwright install
# On some systems you may also need OS deps:
# python -m playwright install-deps
```
## Usage
```bash
# In the virtualenv
python download_tt.py --url "https://www.tiktok.com/@user/video/123..." --out ./downloads
# Prints the absolute path to the downloaded .mp4 on success
```
Notes:
- Pyktok may require a logged-in browser session for certain videos. See Pyktok docs for `specify_browser` usage if needed.
- This script emits only the final .mp4 path on stdout for easy consumption by Node.

View File

@@ -0,0 +1,75 @@
#!/usr/bin/env python3
import argparse
import os
import sys
import json
from pathlib import Path
try:
import pyktok as pyk # type: ignore
except Exception as e:
print(
f"Error: pyktok not installed. Run: pip install -r requirements.txt\n{e}",
file=sys.stderr,
)
sys.exit(1)
def main():
parser = argparse.ArgumentParser(description="Download TikTok video using Pyktok")
parser.add_argument('--url', required=True, help='TikTok share URL')
parser.add_argument('--out', default='downloads', help='Output directory (default: downloads)')
args = parser.parse_args()
out_dir = Path(args.out).resolve()
out_dir.mkdir(parents=True, exist_ok=True)
# Change CWD to output dir so pyktok saves files here
os.chdir(out_dir.as_posix())
# Save video and a metadata CSV in the output directory
meta_csv = out_dir / 'tiktok_metadata.csv'
try:
pyk.save_tiktok(args.url, True, meta_csv.as_posix())
except Exception as e:
print(f"Error: download failed: {e}", file=sys.stderr)
sys.exit(3)
# Find the most recent mp4 in the output dir
mp4s = sorted(out_dir.glob('*.mp4'), key=lambda p: p.stat().st_mtime, reverse=True)
if not mp4s:
print('No video found', file=sys.stderr)
sys.exit(4)
# Attempt to retrieve description via JSON
description = ''
try:
data = pyk.alt_get_tiktok_json(args.url)
# recursive search for 'desc' or 'description'
def find_desc(obj):
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(k, str) and k.lower() in ('desc', 'description', 'title') and isinstance(v, str):
return v
found = find_desc(v)
if found:
return found
elif isinstance(obj, list):
for it in obj:
found = find_desc(it)
if found:
return found
return None
d = find_desc(data)
if isinstance(d, str):
description = d
except Exception:
description = ''
print(json.dumps({"video_path": mp4s[0].as_posix(), "description": description}, ensure_ascii=False))
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,5 @@
pyktok==0.0.31
playwright>=1.46.0
requests>=2.31.0