import re
import asyncio
import logging
from concurrent.futures import ThreadPoolExecutor
from typing import List, Tuple


logger = logging.getLogger(__name__)


class CatchPhraseReportService:
    _executor = ThreadPoolExecutor(max_workers=16)

    @staticmethod
    def _phrase_present_sync(text: str, phrase: str, use_word_boundaries: bool = False):
        if not text or not phrase:
            return phrase, False

        try:
            phrase_escaped = re.escape(phrase.lower())
            pattern = fr"\b{phrase_escaped}\b" if use_word_boundaries else phrase_escaped
            regex = re.compile(pattern)

            found = bool(regex.search(text.lower()))
            return phrase, found

        except Exception as exc:
            logger.warning(f"Phrase preparation failed {exc}")
            return phrase, False

    @staticmethod
    async def count_phrase_async(
        text: str,
        phrase: str,
        semaphore: asyncio.Semaphore,
        timeout: int = 3,
        use_word_boundaries: bool = False
    ) -> Tuple[str, int]:
        async with semaphore:
            loop = asyncio.get_running_loop()

            try:
                return await asyncio.wait_for(
                    loop.run_in_executor(
                        CatchPhraseReportService._executor,
                        lambda: CatchPhraseReportService._phrase_present_sync(
                            text,
                            phrase,
                            use_word_boundaries
                        )
                    ),
                    timeout=timeout
                )
            except asyncio.TimeoutError:
                logger.warning(f"Phrase counting timed out for phrase '{phrase}'")
                return phrase, 0
            except Exception as e:
                logger.error(f"Unexpected error counting phrase '{phrase}': {e}")
                return phrase, 0

    @staticmethod
    async def count_phrases_in_text(
        text: str,
        phrases: List,
        max_concurrency: int = 12,
        use_word_boundaries: bool = False
    ) -> list[str]:
        """
        Count all phrases in a transcript asynchronously.
        """

        if not text:
            return []

        semaphore = asyncio.Semaphore(max_concurrency)

        tasks = [
            CatchPhraseReportService.count_phrase_async(
                text,
                p.phrase,
                semaphore,
                use_word_boundaries=use_word_boundaries
            )
            for p in phrases
        ]

        results = await asyncio.gather(*tasks)

        return [phrase for phrase, found in results if found]

    @staticmethod
    async def build_call_phrase_report_async(call, phrases, use_word_boundaries: bool = False):
        transcript = call.twilio_recording_text
        if not transcript:
            transcript = call.transcript
            if not transcript:
                return []

        return await CatchPhraseReportService.count_phrases_in_text(
            transcript,
            phrases,
            use_word_boundaries=use_word_boundaries
        )
