import asyncio
import json
import os
import subprocess
import time
from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
from pathlib import Path
from threading import Thread

os.environ["CUDA_VISIBLE_DEVICES"] = "1"
print(f"CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']}", flush=True)

import websockets
from faster_whisper import WhisperModel

BASE = Path("/opt/ai-avatar-demo")
WORK = BASE / "work"
CHUNKS = WORK / "gate602_m1_audio_chunks"
TRANS = WORK / "gate602_m1_transcripts"
MODEL_PATH = Path("/opt/ai-avatar-demo/models/current-stt-large-v3").resolve()

CHUNKS.mkdir(parents=True, exist_ok=True)
TRANS.mkdir(parents=True, exist_ok=True)

print(f"RESOLVED_MODEL_PATH={MODEL_PATH}", flush=True)


def serve_page():
    os.chdir(str(WORK))
    httpd = ThreadingHTTPServer(("0.0.0.0", 8766), SimpleHTTPRequestHandler)
    print("HTTP_PAGE_SERVER=0.0.0.0:8766", flush=True)
    httpd.serve_forever()


print("LOADING_MODEL_START", flush=True)
model = WhisperModel(
    str(MODEL_PATH),
    device="cuda",
    device_index=0,
    compute_type="float16",
    local_files_only=True,
)
print("LOADING_MODEL_DONE", flush=True)


def ext_from_mime(mime_type: str) -> str:
    if "ogg" in mime_type:
        return "ogg"
    if "webm" in mime_type:
        return "webm"
    return "webm"


async def handle(ws):
    ts = int(time.time())
    meta = {"mimeType": "audio/webm"}
    audio_bytes = bytearray()

    try:
        async for msg in ws:
            if isinstance(msg, str) and msg == "__END__":
                break
            if isinstance(msg, str):
                try:
                    data = json.loads(msg)
                    if data.get("type") == "meta":
                        meta.update(data)
                        print(f"AUDIO_META={meta}", flush=True)
                        continue
                except Exception:
                    pass
            if isinstance(msg, bytes):
                audio_bytes.extend(msg)

        mime = meta.get("mimeType", "audio/webm")
        ext = ext_from_mime(mime)
        src = CHUNKS / f"gate602_m1_{ts}.{ext}"
        wav = CHUNKS / f"gate602_m1_{ts}.wav"
        transcript_file = TRANS / f"gate602_m1_{ts}.txt"

        src.write_bytes(audio_bytes)
        print(f"AUDIO_SRC={src} MIME={mime} SIZE={len(audio_bytes)}", flush=True)

        if len(audio_bytes) < 2000:
            msg = f"AUDIO_TOO_SMALL size={len(audio_bytes)}"
            print(msg, flush=True)
            await ws.send(msg)
            return

        cmd = [
            "ffmpeg",
            "-y",
            "-hide_banner",
            "-loglevel",
            "error",
            "-i",
            str(src),
            "-ar",
            "16000",
            "-ac",
            "1",
            str(wav),
        ]
        p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        print(f"FFMPEG_EXIT={p.returncode}", flush=True)

        if p.returncode != 0:
            err = (p.stderr or "")[-1200:]
            print("FFMPEG_STDERR_LAST1200=" + err.replace("\n", "\\n"), flush=True)
            await ws.send("FFMPEG_FAILED: " + err[:500])
            return

        segments, info = model.transcribe(str(wav), language=None, beam_size=1)
        texts = [seg.text.strip() for seg in segments if seg.text.strip()]
        text = " ".join(texts).strip()
        transcript_file.write_text(text, encoding="utf-8")
        print(f"TRANSCRIPT_FILE={transcript_file}", flush=True)
        print(f"TRANSCRIPT_TEXT={text}", flush=True)
        await ws.send("TRANSCRIPT: " + (text if text else "[NO_TEXT_DETECTED]"))

    except Exception as e:
        print(f"HANDLE_EXCEPTION={type(e).__name__}: {e}", flush=True)
        try:
            await ws.send(f"SERVER_EXCEPTION: {type(e).__name__}: {e}")
        except Exception:
            pass


async def main():
    Thread(target=serve_page, daemon=True).start()
    print("STT_WS_SERVER=0.0.0.0:8765", flush=True)
    async with websockets.serve(handle, "0.0.0.0", 8765, max_size=30000000):
        await asyncio.Future()


if __name__ == "__main__":
    asyncio.run(main())
