import os
import uuid
import edge_tts
from fastapi.responses import FileResponse
from langdetect import detect

from utils import remove_file, split_text_by_language, get_sentiment_logic, apply_smart_tashkeel

from pydub import AudioSegment, effects
from pydub.utils import which
from pydub.silence import detect_leading_silence

# Check for ffmpeg availability
if which("ffmpeg") is None:
    print("\n" + "!"*50)
    print("WARNING: ffmpeg NOT FOUND!")
    print("pydub needs ffmpeg to merge audio seamlessly.")
    print("Please install ffmpeg and add it to your PATH.")
    print("!"*50 + "\n")

def trim_silence(audio, threshold=-80.0):
    """Trim silence from start and end of an AudioSegment."""
    start_trim = detect_leading_silence(audio, silence_threshold=threshold)
    end_trim = detect_leading_silence(audio.reverse(), silence_threshold=threshold)
    duration = len(audio)
    return audio[start_trim:duration-end_trim]

import asyncio

async def generate_segment_audio(text, voice, rate, pitch, volume, output_path):
    """Helper function to generate a single audio segment."""
    communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch, volume=volume)
    await communicate.save(output_path)
    return output_path

async def tts_service(background_tasks, text, voice, rate, pitch, volume, tashkeel, return_path=False):
    file_id = str(uuid.uuid4())
    output_path = f"speech_{file_id}.mp3"
    temp_files = []
    DEFAULT_MULTILINGUAL_VOICE = "en-US-AvaMultilingualNeural"
    
    try:
        if voice != "Auto" and voice != "":
            # Single custom voice mode
            processed_text = text
            if tashkeel:
                processed_text = apply_smart_tashkeel(text)
            
            try: lang = detect(text)
            except: lang = "ar"
            
            s_rate, s_pitch, s_volume = get_sentiment_logic(text, lang)
            f_rate = s_rate if rate == "Auto" else rate
            f_pitch = s_pitch if pitch == "Auto" else pitch
            f_volume = s_volume if volume == "Auto" else volume
            
            communicate = edge_tts.Communicate(processed_text, voice, rate=f_rate, pitch=f_pitch, volume=f_volume)
            await communicate.save(output_path)
        else:
            # Multi-language mode
            segments = split_text_by_language(text)
            
            if len(segments) <= 1:
                processed_text = text
                if tashkeel:
                    processed_text = apply_smart_tashkeel(text)
                try: lang = detect(text)
                except: lang = "ar"
                s_rate, s_pitch, s_volume = get_sentiment_logic(text, lang)
                f_rate = s_rate if rate == "Auto" else rate
                f_pitch = s_pitch if pitch == "Auto" else pitch
                f_volume = s_volume if volume == "Auto" else volume
                
                communicate = edge_tts.Communicate(processed_text, DEFAULT_MULTILINGUAL_VOICE, rate=f_rate, pitch=f_pitch, volume=f_volume)
                await communicate.save(output_path)
            else:
                # PARALLEL PROCESSING: Generate all segments at once
                tasks = []
                segment_data = []
                
                for i, (seg_text, seg_lang) in enumerate(segments):
                    processed_seg = seg_text
                    if seg_lang == "ar" and tashkeel:
                        processed_seg = apply_smart_tashkeel(seg_text)
                    
                    s_rate, s_pitch, s_volume = get_sentiment_logic(seg_text, seg_lang)
                    f_rate = s_rate if rate == "Auto" else rate
                    f_pitch = s_pitch if pitch == "Auto" else pitch
                    f_volume = s_volume if volume == "Auto" else volume
                    
                    temp_seg_path = f"temp_{file_id}_{i}.mp3"
                    temp_files.append(temp_seg_path)
                    
                    # Create a task for each segment
                    tasks.append(generate_segment_audio(processed_seg, DEFAULT_MULTILINGUAL_VOICE, f_rate, f_pitch, f_volume, temp_seg_path))
                    segment_data.append(temp_seg_path)

                # Wait for all segments to be generated simultaneously
                await asyncio.gather(*tasks)

                # Merge segments using pydub
                combined_audio = None
                for temp_path in segment_data:
                    if os.path.exists(temp_path) and os.path.getsize(temp_path) > 0:
                        segment_audio = AudioSegment.from_file(temp_path, format="mp3")
                        trimmed_segment = trim_silence(segment_audio)
                        normalized_segment = effects.normalize(trimmed_segment)
                        
                        if combined_audio is None:
                            combined_audio = normalized_segment
                        else:
                            combined_audio = combined_audio.append(normalized_segment, crossfade=100)
                
                if combined_audio and len(combined_audio) > 0:
                    combined_audio.export(output_path, format="mp3")

        if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
            communicate = edge_tts.Communicate(text, "ar-EG-SalmaNeural")
            await communicate.save(output_path)

        if return_path:
            return output_path, temp_files

        background_tasks.add_task(remove_file, output_path)
        for f in temp_files:
            background_tasks.add_task(remove_file, f)
            
        return FileResponse(path=output_path, media_type="audio/mpeg", filename="speech.mp3")
    except Exception as e:
        if os.path.exists(output_path): os.remove(output_path)
        for f in temp_files:
            if os.path.exists(f): os.remove(f)
        if return_path: raise e
        return {"error": str(e)}