#!/usr/bin/env python3 """ Video Subtitle GIF Generator Searches for text in video subtitles and creates GIF clips for each match. Dependencies: - FFmpeg and FFprobe (required): Install from https://ffmpeg.org/ - srt (optional): Install with 'pip install srt' for SRT subtitle support - webvtt-py (optional): Install with 'pip install webvtt-py' for VTT subtitle support - colorama (optional): Install with 'pip install colorama' for colored output Usage: python video_subtitle_gif.py VIDEO_PATH SEARCH_TEXT [OPTIONS] Example: python video_subtitle_gif.py movie.mp4 "hello world" --fps 15 --width 640 """ import argparse import hashlib import json import os import re import subprocess import sys import tempfile import urllib.request import urllib.parse from typing import List, Dict, Optional try: from colorama import Fore, Style, init as colorama_init colorama_init(autoreset=True) COLORS_ENABLED = True except ImportError: # Fallback if colorama is not installed COLORS_ENABLED = False class Fore: RED = GREEN = YELLOW = BLUE = CYAN = MAGENTA = WHITE = '' class Style: BRIGHT = RESET_ALL = '' # Constants ERROR_PREVIEW_LENGTH = 200 # Characters to show from error messages SUBTITLE_PREVIEW_LENGTH = 60 # Characters to show from subtitle text DEFAULT_ENCODINGS = ['utf-8', 'utf-8-sig', 'latin-1'] # Encoding fallback order try: import srt except ImportError: srt = None try: import webvtt except ImportError: webvtt = None try: import pgsrip except ImportError: pgsrip = None try: import pytesseract except ImportError: pytesseract = None try: from PIL import Image except ImportError: Image = None class SubtitleError(Exception): """Base exception for subtitle-related errors""" pass class FFmpegError(Exception): """Exception for FFmpeg-related errors""" pass class ValidationError(Exception): """Exception for input validation errors""" pass def parse_arguments() -> argparse.Namespace: """Parse command-line arguments using argparse""" parser = argparse.ArgumentParser( description="Search video subtitles and create GIFs for matches" ) parser.add_argument("video_path", help="Path to video file") parser.add_argument("search_text", help="Text to search in subtitles") parser.add_argument( "--output-prefix", default="output", help="Prefix for output GIF files (default: output)" ) parser.add_argument( "--fps", type=int, default=10, help="GIF frames per second (default: 10, range: 1-60)" ) parser.add_argument( "--width", type=int, default=480, help="GIF width in pixels (default: 480, must be positive)" ) parser.add_argument( "--context-before", type=float, default=0.5, help="Extra seconds before subtitle (default: 0.5, can be negative to trim)" ) parser.add_argument( "--context-after", type=float, default=0.5, help="Extra seconds after subtitle (default: 0.5, can be negative to trim)" ) parser.add_argument( "--include-surrounding-subtitles", action="store_true", help="Include subtitles from surrounding lines in the output" ) return parser.parse_args() def check_ffmpeg_available() -> None: """ Check if FFmpeg and FFprobe are available in PATH. Raises FFmpegError if not found. """ for tool in ['ffmpeg', 'ffprobe']: try: subprocess.run( [tool, '-version'], capture_output=True, check=True, timeout=5 ) except FileNotFoundError: raise FFmpegError( f"{tool} not found. Please install FFmpeg from https://ffmpeg.org/" ) except subprocess.CalledProcessError: raise FFmpegError(f"{tool} is installed but not working correctly") except subprocess.TimeoutExpired: raise FFmpegError(f"{tool} is not responding") def validate_inputs(args: argparse.Namespace) -> None: """ Validate all input arguments. Raises ValidationError if any validation fails. """ # Validate video file if not os.path.exists(args.video_path): raise ValidationError(f"Video file not found: {args.video_path}") if not os.path.isfile(args.video_path): raise ValidationError(f"Path is not a file: {args.video_path}") # Validate search text if not args.search_text or len(args.search_text.strip()) == 0: raise ValidationError("Search text cannot be empty") # Validate numeric arguments if args.fps < 1 or args.fps > 60: raise ValidationError(f"FPS must be between 1 and 60, got: {args.fps}") if args.width < 1: raise ValidationError(f"Width must be positive, got: {args.width}") def find_or_extract_subtitles(video_path: str) -> Optional[str]: """ Find external subtitle file, extract embedded subtitles, or download from OpenSubtitles. Returns path to subtitle file or None if not found. """ video_dir = os.path.dirname(os.path.abspath(video_path)) video_name = os.path.splitext(os.path.basename(video_path))[0] # Check for external subtitle files for ext in ['.srt', '.ass', '.vtt']: subtitle_path = os.path.join(video_dir, video_name + ext) if os.path.exists(subtitle_path): print(f"{Fore.GREEN}✅ Found external subtitle: {subtitle_path}{Style.RESET_ALL}") return subtitle_path # Try extracting embedded subtitles print(f"{Fore.YELLOW}🔍 No external subtitles found. Checking for embedded subtitles...{Style.RESET_ALL}") embedded_subs = extract_embedded_subtitles(video_path, video_dir, video_name) if embedded_subs: return embedded_subs # Do not try downloading from OpenSubtitles print(f"{Fore.YELLOW}🌐 Trying to grab subtitles from the internet is too janky, just do it manually. {Style.RESET_ALL}") def extract_embedded_subtitles(video_path: str, output_dir: str, base_name: str) -> Optional[str]: """ Extract embedded subtitles using FFmpeg. Tries to find English subtitles first. Returns path to extracted subtitle file or None. """ # First, poke the video to find subtitle streams with detailed info probe_cmd = [ 'ffprobe', '-v', 'error', '-select_streams', 's', '-show_entries', 'stream=index:stream=codec_name:stream_tags=language,title', '-of', 'json', video_path ] try: result = subprocess.run( probe_cmd, capture_output=True, text=True, check=True ) probe_data = json.loads(result.stdout) streams = probe_data.get('streams', []) if not streams: print(f"{Fore.YELLOW}❌ No embedded subtitles found{Style.RESET_ALL}") return None print(f"{Fore.CYAN}📺 Found {len(streams)} subtitle stream(s):{Style.RESET_ALL}") # Find English subtitle stream english_stream_idx = None first_stream_idx = None english_codec = None for i, stream in enumerate(streams): stream_index = stream.get('index') codec_name = stream.get('codec_name', 'unknown') tags = stream.get('tags', {}) language = tags.get('language', 'unknown').lower() title = tags.get('title', '') print(f"{Fore.CYAN} 📝 Stream {i}: index={stream_index}, codec={codec_name}, language={language}, title={title}{Style.RESET_ALL}") # Remember first stream as fallback if first_stream_idx is None: first_stream_idx = i # Check if this is an English subtitle if language in ['en', 'eng', 'english'] or 'english' in title.lower(): english_stream_idx = i english_codec = codec_name print(f"{Fore.GREEN} ✅ Selected English subtitle stream {i} (codec: {codec_name}){Style.RESET_ALL}") break # Use English stream if found, otherwise use first stream selected_stream = english_stream_idx if english_stream_idx is not None else first_stream_idx selected_codec = english_codec if english_codec else streams[first_stream_idx].get('codec_name', 'unknown') if selected_stream is None: print(f"{Fore.YELLOW}❌ No suitable subtitle stream found{Style.RESET_ALL}") return None if english_stream_idx is None: print(f"{Fore.YELLOW} ⚠️ No English subtitle found, using first stream {selected_stream}{Style.RESET_ALL}") # Extract selected subtitle stream # Try different extraction methods based on codec output_path = os.path.join(output_dir, f"{base_name}.srt") # First attempt: Try converting to SRT extract_cmd = [ 'ffmpeg', '-v', 'error', '-i', video_path, '-map', f'0:s:{selected_stream}', '-c:s', 'srt', '-y', output_path ] result = subprocess.run( extract_cmd, capture_output=True, text=True ) # If SRT conversion failed, try extracting as-is and converting later if result.returncode != 0: print(f"{Fore.YELLOW} ⚠️ SRT conversion failed, trying alternative extraction...{Style.RESET_ALL}") print(f"{Fore.RED} ⚠️ Error: {result.stderr[:ERROR_PREVIEW_LENGTH]}{Style.RESET_ALL}") # Try extracting with copy codec (no conversion) temp_output = os.path.join(output_dir, f"{base_name}.{selected_codec}") extract_cmd = [ 'ffmpeg', '-v', 'error', '-i', video_path, '-map', f'0:s:{selected_stream}', '-c:s', 'copy', '-y', temp_output ] result = subprocess.run( extract_cmd, capture_output=True, text=True ) if result.returncode != 0: print(f"{Fore.RED} ❌ Failed to extract subtitle: {result.stderr[:ERROR_PREVIEW_LENGTH]}{Style.RESET_ALL}") return None # If extracted successfully, check if it's already text-based if selected_codec in ['srt', 'subrip', 'ass', 'ssa', 'vtt', 'webvtt']: # Rename to .srt for consistency if selected_codec in ['srt', 'subrip']: os.rename(temp_output, output_path) else: # Keep original extension for ASS/VTT output_path = temp_output else: # Image-based subtitle format detected print(f"{Fore.YELLOW} ⚠️ Detected image-based subtitle format: {selected_codec}{Style.RESET_ALL}") return None else: # Check if the codec is image-based if selected_codec in ['hdmv_pgs_subtitle', 'dvd_subtitle', 'dvdsub', 'pgssub']: print(f"{Fore.YELLOW} ⚠️ Found image-based subtitles ({selected_codec}){Style.RESET_ALL}") return None print(f"{Fore.GREEN}✅ Extracted embedded subtitles to: {output_path}{Style.RESET_ALL}") return output_path except subprocess.CalledProcessError as e: print(f"{Fore.RED}❌ Failed to extract subtitles: {e}{Style.RESET_ALL}") return None except Exception as e: print(f"{Fore.RED}❌ Error during subtitle extraction: {e}{Style.RESET_ALL}") return None def parse_subtitles(subtitle_path: str) -> List[Dict]: """ Parse subtitle file and return list of subtitle entries. Each entry: {"index": int, "start": float, "end": float, "text": str} Raises SubtitleError if format is unsupported. """ ext = os.path.splitext(subtitle_path)[1].lower() if ext == '.srt': return parse_srt(subtitle_path) elif ext == '.vtt': return parse_vtt(subtitle_path) elif ext == '.ass': return parse_ass(subtitle_path) else: raise SubtitleError(f"Unsupported subtitle format: {ext}") def parse_srt(subtitle_path: str) -> List[Dict]: """ Parse SRT subtitle file using srt library. Raises SubtitleError if library is missing or parsing fails. """ if srt is None: raise SubtitleError("'srt' library not installed. Install with: pip install srt") subtitles = None last_error = None for encoding in DEFAULT_ENCODINGS: try: with open(subtitle_path, 'r', encoding=encoding) as f: subtitle_generator = srt.parse(f.read()) subtitles = list(subtitle_generator) break except (UnicodeDecodeError, LookupError): last_error = f"Encoding {encoding} failed" if encoding == DEFAULT_ENCODINGS[-1]: raise SubtitleError(f"Could not decode subtitle file with any encoding: {', '.join(DEFAULT_ENCODINGS)}") continue except Exception as e: raise SubtitleError(f"Error parsing SRT file: {e}") if subtitles is None: raise SubtitleError(f"Failed to parse SRT file: {last_error}") entries = [] for sub in subtitles: entries.append({ "index": sub.index, "start": sub.start.total_seconds(), "end": sub.end.total_seconds(), "text": sub.content }) return entries def parse_vtt(subtitle_path: str) -> List[Dict]: """ Parse WebVTT subtitle file. Raises SubtitleError if library is missing or parsing fails. """ if webvtt is None: raise SubtitleError("'webvtt-py' library not installed. Install with: pip install webvtt-py") try: vtt = webvtt.read(subtitle_path) except Exception as e: raise SubtitleError(f"Error parsing VTT file: {e}") entries = [] for i, caption in enumerate(vtt, 1): # WebVTT timestamps are in format HH:MM:SS.mmm start = parse_vtt_timestamp(caption.start) end = parse_vtt_timestamp(caption.end) entries.append({ "index": i, "start": start, "end": end, "text": caption.text }) return entries def parse_vtt_timestamp(timestamp_str: str) -> float: """Convert VTT timestamp to seconds""" # Format: HH:MM:SS.mmm or MM:SS.mmm parts = timestamp_str.split(':') if len(parts) == 3: h, m, s = parts return int(h) * 3600 + int(m) * 60 + float(s) else: m, s = parts return int(m) * 60 + float(s) def parse_ass(subtitle_path: str) -> List[Dict]: """ Parse ASS/SSA subtitle file. Raises SubtitleError if parsing fails. """ entries = [] index = 0 last_error = None for encoding in DEFAULT_ENCODINGS: try: with open(subtitle_path, 'r', encoding=encoding) as f: in_events = False for line in f: line = line.strip() if line == '[Events]': in_events = True continue if in_events and line.startswith('Dialogue:'): # Format: Dialogue: Layer,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text parts = line.split(',', 9) if len(parts) >= 10: start = parse_ass_timestamp(parts[1]) end = parse_ass_timestamp(parts[2]) text = parts[9] # Remove ASS formatting tags text = re.sub(r'\{[^}]*\}', '', text) text = text.replace('\\N', '\n') index += 1 entries.append({ "index": index, "start": start, "end": end, "text": text }) break except (UnicodeDecodeError, LookupError): last_error = f"Encoding {encoding} failed" if encoding == DEFAULT_ENCODINGS[-1]: raise SubtitleError(f"Could not decode subtitle file with any encoding: {', '.join(DEFAULT_ENCODINGS)}") continue except Exception as e: raise SubtitleError(f"Error parsing ASS file: {e}") return entries def parse_ass_timestamp(timestamp_str: str) -> float: """Convert ASS timestamp (H:MM:SS.cc) to seconds""" # Format: H:MM:SS.cc h, m, s = timestamp_str.split(':') return int(h) * 3600 + int(m) * 60 + float(s) def search_subtitles(subtitle_entries: List[Dict], search_text: str) -> List[Dict]: """ Search subtitle entries for case-insensitive substring matches. Returns list of matching entries. """ search_lower = search_text.lower() matches = [] for entry in subtitle_entries: if search_lower in entry['text'].lower(): matches.append(entry) print(f"\n{Fore.GREEN}{Style.BRIGHT}✨ Found {len(matches)} matching subtitle(s):{Style.RESET_ALL}") for i, match in enumerate(matches, 1): preview_text = match['text'].replace('\n', ' ')[:SUBTITLE_PREVIEW_LENGTH] print(f"{Fore.CYAN} {i}. [{format_timestamp(match['start'])} - {format_timestamp(match['end'])}]: {Fore.WHITE}{preview_text}...{Style.RESET_ALL}") return matches def format_timestamp(seconds: float) -> str: """Convert seconds to HH:MM:SS.mmm format""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = seconds % 60 return f"{hours:02d}:{minutes:02d}:{secs:06.3f}" def create_single_subtitle_file(match: Dict, subtitle_entries: List[Dict], match_index: int, context_before: float, context_after: float, clip_start_time: float, include_surrounding: bool) -> str: """ Create a temporary SRT file with the matched subtitle and optionally surrounding subtitles. Adjusts timestamps relative to clip_start_time for input seeking. Returns path to temporary file (caller is responsible for cleanup). Args: match: The matched subtitle entry subtitle_entries: All subtitle entries (needed for surrounding context) match_index: Index of the match in subtitle_entries context_before: Seconds before the subtitle context_after: Seconds after the subtitle clip_start_time: Start time of the video clip include_surrounding: Whether to include subtitles from surrounding lines """ if srt is None: raise SubtitleError("'srt' library not installed. Install with: pip install srt") from datetime import timedelta subtitles_to_include = [] if include_surrounding: # Calculate the time range of the clip clip_end_time = clip_start_time + (match['end'] + context_after - clip_start_time) clip_duration = clip_end_time - clip_start_time # Find all subtitles that overlap with the clip time range for entry in subtitle_entries: # Check if subtitle overlaps with clip time range if entry['end'] >= clip_start_time and entry['start'] <= clip_end_time: # Adjust subtitle times to be relative to clip start adjusted_start = max(0, entry['start'] - clip_start_time) adjusted_end = entry['end'] - clip_start_time # Clamp the end time to the clip duration adjusted_end = min(adjusted_end, clip_duration) # Only add if valid duration if adjusted_end > adjusted_start and adjusted_end > 0: subtitles_to_include.append(srt.Subtitle( index=len(subtitles_to_include) + 1, start=timedelta(seconds=adjusted_start), end=timedelta(seconds=adjusted_end), content=entry['text'] )) else: # Only include the matched subtitle # Calculate the clip duration to clamp subtitle end time clip_duration = match['end'] + context_after - clip_start_time adjusted_start = max(0, match['start'] - clip_start_time) adjusted_end = match['end'] - clip_start_time # Clamp the end time to the clip duration adjusted_end = min(adjusted_end, clip_duration) # Only add subtitle if it has valid duration if adjusted_end > adjusted_start and adjusted_end > 0: subtitles_to_include.append(srt.Subtitle( index=1, start=timedelta(seconds=adjusted_start), end=timedelta(seconds=adjusted_end), content=match['text'] )) # Write to temporary file using context manager for proper resource handling with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_file: temp_file.write(srt.compose(subtitles_to_include)) temp_path = temp_file.name return temp_path def generate_gifs(video_path: str, subtitle_path: str, matches: List[Dict], subtitle_entries: List[Dict], args: argparse.Namespace) -> None: """Generate GIF for each matching subtitle""" output_prefix = args.output_prefix fps = args.fps width = args.width context_before = args.context_before context_after = args.context_after include_surrounding = args.include_surrounding_subtitles for i, match in enumerate(matches, 1): # Calculate clip timestamps with context padding start_time = max(0, match['start'] - context_before) end_time = match['end'] + context_after duration = end_time - start_time # Validate that we have a positive duration if duration <= 0: print(f"\n{Fore.YELLOW}⚠️ Skipping match {i}: negative duration ({duration:.3f}s){Style.RESET_ALL}") print(f"{Fore.YELLOW} 💡 Subtitle: {format_timestamp(match['start'])} - {format_timestamp(match['end'])}{Style.RESET_ALL}") print(f"{Fore.YELLOW} 💡 After trimming: {format_timestamp(start_time)} - {format_timestamp(end_time)}{Style.RESET_ALL}") print(f"{Fore.YELLOW} 💡 Try reducing --context-before or --context-after values{Style.RESET_ALL}") continue output_gif = f"{output_prefix}_{i}.gif" print(f"\n{Fore.MAGENTA}{Style.BRIGHT}🎬 Generating {output_gif}...{Style.RESET_ALL}") print(f"{Fore.CYAN} ⏱️ Time range: {format_timestamp(start_time)} - {format_timestamp(end_time)}{Style.RESET_ALL}") # Create temporary subtitle file with this match (and optionally surrounding subtitles) # Adjust subtitle timestamps to be relative to start_time # Find the index of this match in subtitle_entries match_index = next((idx for idx, entry in enumerate(subtitle_entries) if entry['start'] == match['start'] and entry['text'] == match['text']), 0) temp_subtitle_path = create_single_subtitle_file( match, subtitle_entries, match_index, context_before, context_after, start_time, include_surrounding ) # Create temporary video clip (fast stream copy, no re-encoding) temp_clip = tempfile.NamedTemporaryFile(suffix='.mkv', delete=False) temp_clip.close() temp_clip_path = temp_clip.name try: # Step 1: Extract clip with precise seeking # Note: Using -ss after -i for accurate seeking, but slower # Using -t for duration instead of -to for better accuracy print(f"{Fore.YELLOW} ✂️ Extracting clip...{Style.RESET_ALL}") extract_cmd = [ 'ffmpeg', '-i', video_path, '-ss', str(start_time), # Seek after input for accuracy '-t', str(duration), # Duration of clip '-c:v', 'libx264', # Re-encode for precise cutting '-preset', 'ultrafast', # Fast encoding '-c:a', 'aac', # Audio codec '-y', temp_clip_path ] subprocess.run( extract_cmd, capture_output=True, text=True, check=True ) # Step 2: Convert clip to GIF with subtitles print(f"{Fore.YELLOW} 🎨 Converting to GIF...{Style.RESET_ALL}") # Escape special characters in subtitle path for FFmpeg filter # For Windows paths: escape backslashes and colons # For all paths: escape special filter chars escaped_subtitle_path = temp_subtitle_path.replace('\\', '/').replace(':', '\\:') # Build the filter_complex string with proper quoting # Note: Use double backslash for the quote escaping in force_style filter_string = ( # Subtitles are now relative to the clip start (timestamp 0) f"[0:v]subtitles={escaped_subtitle_path}:force_style='FontSize=24\\,Bold=1',fps={fps},scale={width}:-1:flags=lanczos[sub];" # Split for palette generation "[sub]split[s0][s1];" "[s0]palettegen[p];" "[s1][p]paletteuse[out]" ) gif_cmd = [ 'ffmpeg', '-i', temp_clip_path, '-filter_complex', filter_string, '-map', '[out]', '-loop', '0', '-y', output_gif ] subprocess.run( gif_cmd, capture_output=True, text=True, check=True ) # Get file size size_mb = os.path.getsize(output_gif) / (1024 * 1024) print(f"{Fore.GREEN} ✅ Created: {output_gif} ({size_mb:.2f} MB){Style.RESET_ALL}") except subprocess.CalledProcessError as e: print(f"{Fore.RED} ❌ Error creating GIF: {e}{Style.RESET_ALL}") print(f"{Fore.RED} FFmpeg stderr: {e.stderr}{Style.RESET_ALL}") finally: # Clean up temporary files try: os.unlink(temp_subtitle_path) except Exception: pass try: os.unlink(temp_clip_path) except Exception: pass def main(): """Main entry point for the script""" try: # Check dependencies check_ffmpeg_available() # Parse and validate arguments args = parse_arguments() validate_inputs(args) # Find or extract subtitles subtitle_file = find_or_extract_subtitles(args.video_path) if not subtitle_file: print(f"{Fore.RED}❌ Error: No subtitles found{Style.RESET_ALL}") print(f"{Fore.YELLOW}💡 Please download subtitles manually and place them next to the video file{Style.RESET_ALL}") sys.exit(1) # Parse subtitles subtitle_entries = parse_subtitles(subtitle_file) # Search for matches matches = search_subtitles(subtitle_entries, args.search_text) if not matches: print(f"\n{Fore.YELLOW}🔍 No matches found for '{args.search_text}'{Style.RESET_ALL}") sys.exit(0) # Generate GIFs print(f"\n{Fore.MAGENTA}{Style.BRIGHT}🎬 Generating GIFs...{Style.RESET_ALL}") generate_gifs(args.video_path, subtitle_file, matches, subtitle_entries, args) print(f"\n{Fore.GREEN}{Style.BRIGHT}🎉 Successfully created {len(matches)} GIF(s)!{Style.RESET_ALL}") except ValidationError as e: print(f"{Fore.RED}❌ Validation error: {e}{Style.RESET_ALL}") sys.exit(1) except FFmpegError as e: print(f"{Fore.RED}❌ FFmpeg error: {e}{Style.RESET_ALL}") sys.exit(1) except SubtitleError as e: print(f"{Fore.RED}❌ Subtitle error: {e}{Style.RESET_ALL}") sys.exit(1) except KeyboardInterrupt: print(f"\n\n{Fore.YELLOW}⚠️ Operation cancelled by user{Style.RESET_ALL}") sys.exit(130) except Exception as e: print(f"{Fore.RED}❌ Unexpected error: {e}{Style.RESET_ALL}") import traceback traceback.print_exc() sys.exit(1) if __name__ == '__main__': main()