from pathlib import Path
import soundfile as sf
import torch
import qwen_tts
import numpy as np
from transformers import AutoConfig, AutoModel
from qwen_tts.core.models.configuration_qwen3_tts import Qwen3TTSConfig
from qwen_tts.core.models.modeling_qwen3_tts import Qwen3TTSForConditionalGeneration

ROOT = Path('/opt/ai-avatar-demo')
MODEL_ROOT = ROOT / 'models/qwen3-tts-12hz-1b7-base'
REF_AUDIO = ROOT / 'work/MARSsc.wav'
REF_TEXT_FILE = ROOT / 'work/MARSsc_ref_text.txt'
RAW_WAV = ROOT / 'work/gate602_m2_marssc_clone_validation_v2_raw.wav'
OUT_WAV = ROOT / 'work/gate602_m2_marssc_clone_validation_v2.wav'
TARGET_TEXT = '我是Mars 今天2026/06/02 一切都正在更好當中!'
LANGUAGE = 'Chinese'

AutoConfig.register('qwen3_tts', Qwen3TTSConfig)
AutoModel.register(Qwen3TTSConfig, Qwen3TTSForConditionalGeneration)
ref_text = REF_TEXT_FILE.read_text(encoding='utf-8').strip()
Model = getattr(qwen_tts, 'Qwen3TTSModel', None)
model = Model.from_pretrained(
    str(MODEL_ROOT),
    device_map='cuda:0' if torch.cuda.is_available() else 'cpu',
    dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
)
wavs, sample_rate = model.generate_voice_clone(
    text=TARGET_TEXT,
    language=LANGUAGE,
    ref_audio=str(REF_AUDIO),
    ref_text=ref_text,
    non_streaming_mode=True,
)
if isinstance(wavs, (list, tuple)):
    audio_data = np.concatenate([w.detach().cpu().numpy() if hasattr(w, 'detach') else w for w in wavs])
else:
    audio_data = wavs.detach().cpu().numpy() if hasattr(wavs, 'detach') else wavs
sf.write(str(RAW_WAV), audio_data, sample_rate)

# Slow down by extending total duration by +0.5s via linear interpolation
x, sr = sf.read(str(RAW_WAV))
if x.ndim > 1:
    x = x[:,0]
old_n = len(x)
new_n = int(old_n + 0.5 * sr)
old_idx = np.arange(old_n, dtype=np.float64)
new_idx = np.linspace(0, old_n - 1, new_n)
y = np.interp(new_idx, old_idx, x).astype(np.float32)
sf.write(str(OUT_WAV), y, sr)

print(f'RAW_SAMPLES={old_n}')
print(f'NEW_SAMPLES={new_n}')
print(f'RAW_DUR={old_n/sr:.3f}')
print(f'NEW_DUR={new_n/sr:.3f}')
print(OUT_WAV)
