Version One
This commit is contained in:
37
tools/instaloader/README.md
Normal file
37
tools/instaloader/README.md
Normal file
@@ -0,0 +1,37 @@
|
||||
# Instaloader Helper
|
||||
|
||||
A small wrapper around Instaloader to download a single Instagram post/reel.
|
||||
|
||||
Reference project: instaloader/instaloader (MIT) — https://github.com/instaloader/instaloader
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
cd tools/instaloader
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Login options
|
||||
- Env vars: `IG_USERNAME`, `IG_PASSWORD`
|
||||
- Or a session file: `IG_SESSIONFILE` together with `--username`. Create it via:
|
||||
```bash
|
||||
instaloader --login YOUR_USERNAME
|
||||
# After successful login, pass the created session file path via IG_SESSIONFILE or --sessionfile
|
||||
```
|
||||
|
||||
## Usage
|
||||
```bash
|
||||
# In the virtualenv
|
||||
python download_ig.py --url https://www.instagram.com/p/DPujDRJDNov/ --out ./downloads
|
||||
# or provide username/password via env
|
||||
IG_USERNAME=... IG_PASSWORD=... python download_ig.py --url https://www.instagram.com/p/.../ --out ./downloads
|
||||
# or use a session file
|
||||
IG_SESSIONFILE=/path/to/sessionfile python download_ig.py --url https://www.instagram.com/reel/.../ --username YOUR_USERNAME
|
||||
```
|
||||
|
||||
- Prints the first downloaded `.mp4` path on success.
|
||||
- Exits with non-zero code if the post is image-only or access is restricted.
|
||||
|
||||
|
||||
137
tools/instaloader/download_ig.py
Normal file
137
tools/instaloader/download_ig.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
import requests
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import instaloader
|
||||
except Exception as e:
|
||||
print(f"Error: instaloader not installed. Run: pip install -r requirements.txt\n{e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def extract_shortcode_from_url(url: str):
|
||||
m = re.search(r"instagram\.com\/(?:p|reel)\/([^\/?#]+)", url, re.I)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download Instagram post media using Instaloader")
|
||||
parser.add_argument('--url', required=True, help='Instagram post/reel URL')
|
||||
parser.add_argument('--out', default='downloads', help='Output directory (default: downloads)')
|
||||
parser.add_argument('--username', default=os.getenv('IG_USERNAME'), help='Instagram username (env IG_USERNAME)')
|
||||
parser.add_argument('--password', default=os.getenv('IG_PASSWORD'), help='Instagram password (env IG_PASSWORD)')
|
||||
parser.add_argument('--sessionfile', default=os.getenv('IG_SESSIONFILE'), help='Instaloader session file path (env IG_SESSIONFILE)')
|
||||
args = parser.parse_args()
|
||||
|
||||
shortcode = extract_shortcode_from_url(args.url)
|
||||
if not shortcode:
|
||||
print('Error: Could not parse shortcode from URL', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
out_dir = Path(args.out).resolve()
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
L = instaloader.Instaloader(
|
||||
download_comments=False,
|
||||
post_metadata_txt_pattern='',
|
||||
download_video_thumbnails=False,
|
||||
save_metadata=False,
|
||||
compress_json=False,
|
||||
quiet=True,
|
||||
)
|
||||
# Silence logging further to keep stdout clean for the final path only
|
||||
try:
|
||||
L.context.log = lambda *_a, **_k: None # type: ignore[attr-defined]
|
||||
L.context.log_progress = lambda *_a, **_k: None # type: ignore[attr-defined]
|
||||
L.context.error = lambda *_a, **_k: None # type: ignore[attr-defined]
|
||||
L.context.warning = lambda *_a, **_k: None # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Login if possible
|
||||
if args.sessionfile and args.username:
|
||||
try:
|
||||
L.load_session_from_file(args.username, args.sessionfile)
|
||||
except Exception as e:
|
||||
print(f"Warning: failed to load session from {args.sessionfile}: {e}", file=sys.stderr)
|
||||
elif args.username and args.password:
|
||||
try:
|
||||
L.login(args.username, args.password)
|
||||
except Exception as e:
|
||||
print(f"Warning: login failed: {e}", file=sys.stderr)
|
||||
|
||||
# Resolve and download
|
||||
try:
|
||||
post = instaloader.Post.from_shortcode(L.context, shortcode)
|
||||
except Exception as e:
|
||||
print(f"Error: resolve failed: {e}", file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
# Prefer downloading just the first video directly into out_dir as <shortcode>.mp4
|
||||
video_url = None
|
||||
try:
|
||||
if getattr(post, 'is_video', False) and getattr(post, 'video_url', None):
|
||||
video_url = post.video_url
|
||||
else:
|
||||
# Sidecar posts may contain one or more videos
|
||||
for node in post.get_sidecar_nodes():
|
||||
if getattr(node, 'is_video', False) and getattr(node, 'video_url', None):
|
||||
video_url = node.video_url
|
||||
break
|
||||
except Exception:
|
||||
video_url = None
|
||||
|
||||
dest_path = (out_dir / f"{shortcode}.mp4").resolve()
|
||||
description = ''
|
||||
try:
|
||||
description = getattr(post, 'caption', '') or ''
|
||||
except Exception:
|
||||
description = ''
|
||||
|
||||
if video_url:
|
||||
try:
|
||||
with requests.get(video_url, stream=True, headers={
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36',
|
||||
'accept': '*/*',
|
||||
'referer': 'https://www.instagram.com/',
|
||||
}, timeout=60) as r:
|
||||
r.raise_for_status()
|
||||
with open(dest_path, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024 * 256):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
print(json.dumps({"video_path": dest_path.as_posix(), "description": description}, ensure_ascii=False))
|
||||
return
|
||||
except Exception:
|
||||
# Fall back to full download if direct fetch fails
|
||||
pass
|
||||
|
||||
# Fallback: Use Instaloader's downloader into a temp subdir, then move the first mp4 up
|
||||
try:
|
||||
target = out_dir / shortcode
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
L.download_post(post, target.as_posix())
|
||||
mp4s = sorted(target.glob('*.mp4'))
|
||||
if not mp4s:
|
||||
print('No video found (post may be image-only)', file=sys.stderr)
|
||||
sys.exit(4)
|
||||
src_path = mp4s[0]
|
||||
if dest_path.exists():
|
||||
dest_path.unlink()
|
||||
shutil.move(src_path.as_posix(), dest_path.as_posix())
|
||||
print(json.dumps({"video_path": dest_path.as_posix(), "description": description}, ensure_ascii=False))
|
||||
except Exception as e:
|
||||
print(f"Error: download failed: {e}", file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
BIN
tools/instaloader/downloads/DLNigR4JYM7.mp4
Normal file
BIN
tools/instaloader/downloads/DLNigR4JYM7.mp4
Normal file
Binary file not shown.
3
tools/instaloader/requirements.txt
Normal file
3
tools/instaloader/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
instaloader==4.14.2
|
||||
|
||||
|
||||
30
tools/pyktok/README.md
Normal file
30
tools/pyktok/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Pyktok Helper
|
||||
|
||||
A small wrapper around Pyktok to download a single TikTok video.
|
||||
|
||||
Reference: Pyktok on PyPI — https://pypi.org/project/pyktok/
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
cd tools/pyktok
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
# Install browser drivers required by playwright
|
||||
python -m playwright install
|
||||
# On some systems you may also need OS deps:
|
||||
# python -m playwright install-deps
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# In the virtualenv
|
||||
python download_tt.py --url "https://www.tiktok.com/@user/video/123..." --out ./downloads
|
||||
# Prints the absolute path to the downloaded .mp4 on success
|
||||
```
|
||||
|
||||
Notes:
|
||||
- Pyktok may require a logged-in browser session for certain videos. See Pyktok docs for `specify_browser` usage if needed.
|
||||
- This script emits only the final .mp4 path on stdout for easy consumption by Node.
|
||||
75
tools/pyktok/download_tt.py
Normal file
75
tools/pyktok/download_tt.py
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import pyktok as pyk # type: ignore
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Error: pyktok not installed. Run: pip install -r requirements.txt\n{e}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download TikTok video using Pyktok")
|
||||
parser.add_argument('--url', required=True, help='TikTok share URL')
|
||||
parser.add_argument('--out', default='downloads', help='Output directory (default: downloads)')
|
||||
args = parser.parse_args()
|
||||
|
||||
out_dir = Path(args.out).resolve()
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Change CWD to output dir so pyktok saves files here
|
||||
os.chdir(out_dir.as_posix())
|
||||
|
||||
# Save video and a metadata CSV in the output directory
|
||||
meta_csv = out_dir / 'tiktok_metadata.csv'
|
||||
try:
|
||||
pyk.save_tiktok(args.url, True, meta_csv.as_posix())
|
||||
except Exception as e:
|
||||
print(f"Error: download failed: {e}", file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
# Find the most recent mp4 in the output dir
|
||||
mp4s = sorted(out_dir.glob('*.mp4'), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
if not mp4s:
|
||||
print('No video found', file=sys.stderr)
|
||||
sys.exit(4)
|
||||
|
||||
# Attempt to retrieve description via JSON
|
||||
description = ''
|
||||
try:
|
||||
data = pyk.alt_get_tiktok_json(args.url)
|
||||
# recursive search for 'desc' or 'description'
|
||||
def find_desc(obj):
|
||||
if isinstance(obj, dict):
|
||||
for k, v in obj.items():
|
||||
if isinstance(k, str) and k.lower() in ('desc', 'description', 'title') and isinstance(v, str):
|
||||
return v
|
||||
found = find_desc(v)
|
||||
if found:
|
||||
return found
|
||||
elif isinstance(obj, list):
|
||||
for it in obj:
|
||||
found = find_desc(it)
|
||||
if found:
|
||||
return found
|
||||
return None
|
||||
d = find_desc(data)
|
||||
if isinstance(d, str):
|
||||
description = d
|
||||
except Exception:
|
||||
description = ''
|
||||
|
||||
print(json.dumps({"video_path": mp4s[0].as_posix(), "description": description}, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
5
tools/pyktok/requirements.txt
Normal file
5
tools/pyktok/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
pyktok==0.0.31
|
||||
playwright>=1.46.0
|
||||
requests>=2.31.0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user