import re
import sys
import os
import glob

def srt_vtt_to_text(file_path):
    txt_file = os.path.splitext(file_path)[0] + '.txt'

    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()

        # Remove sequence numbers (for SRT files)
        content = re.sub(r'^\d+\s*$', '', content, flags=re.MULTILINE)
        
        # Remove SRT timestamps
        content = re.sub(r'\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}', '', content)
        
        # Remove VTT timestamps
        content = re.sub(r'\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}', '', content)
        
        # Remove any extra empty lines
        content = re.sub(r'\n\s*\n', '\n', content).strip()

        with open(txt_file, 'w', encoding='utf-8') as file:
            file.write(content)

        print(f"Successfully converted {file_path} to {txt_file}")
    except Exception as e:
        print(f"An error occurred with {file_path}: {e}")

def main(directory):
    if not os.path.isdir(directory):
        print(f"Directory {directory} does not exist.")
        return

    srt_files = glob.glob(os.path.join(directory, '**', '*.srt'), recursive=True)
    vtt_files = glob.glob(os.path.join(directory, '**', '*.vtt'), recursive=True)

    for file_path in srt_files + vtt_files:
        srt_vtt_to_text(file_path)

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python srt_to_text.py <directory>")
    else:
        directory = sys.argv[1]
        main(directory)

