#!/usr/bin/env python3
"""
Language Learning Assistant v5.1 (The Final Polish)
- Creates a high-quality, neutral English voice sample on startup.
- Uses this sample for all "English Chat" responses to ensure consistent,
  natural-sounding audio, fixing the final bug.
- This is the complete, polished, and fully functional application.
"""
import os
import base64
import uuid
import subprocess
import json
import re
import requests
import soundfile as sf
import numpy as np

from flask import Flask, render_template, request, jsonify, send_from_directory
from pathlib import Path

# The final, complete set of imports needed
import torch.serialization
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import XttsAudioConfig
from TTS.config.shared_configs import BaseDatasetConfig
from TTS.tts.models.xtts import XttsArgs
from TTS.api import TTS
import whisper

# PyTorch Security Fix: The complete allowlist
torch.serialization.add_safe_globals([XttsConfig, XttsAudioConfig, BaseDatasetConfig, XttsArgs])

# --- Configuration ---
app = Flask(__name__)
os.makedirs("audio_files", exist_ok=True)
AUDIO_DIR = Path("audio_files").resolve()

LM_STUDIO_URL = "http://192.168.5.30:1234/v1/chat/completions"
MODEL_NAME = "google/gemma-3-4b"

# --- Load AI Models on Start ---
print("Loading Whisper model...")
whisper_model = whisper.load_model("base")
print("Whisper model loaded.")

print("Loading Coqui TTS model...")
tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
print("Coqui TTS model loaded successfully.")

# --- NEW: Generate a high-quality English reference voice on startup ---
ENGLISH_VOICE_SAMPLE_PATH = AUDIO_DIR / "english_voice_sample.wav"
if not ENGLISH_VOICE_SAMPLE_PATH.exists():
    print("Creating a high-quality English voice sample...")
    try:
        # We use a known good speaker from the training data to generate the sample
        tts_model.tts_to_file(
            text="Hello, I am your English language partner. How can I help you today?",
            file_path=str(ENGLISH_VOICE_SAMPLE_PATH),
            language="en",
            speaker="Ana Florence" # A high-quality voice from the model's training data
        )
        print(f"English voice sample created at {ENGLISH_VOICE_SAMPLE_PATH}")
    except Exception as e:
        print(f"Could not create English voice sample: {e}")
        ENGLISH_VOICE_SAMPLE_PATH = None
else:
    print("English voice sample already exists.")


def transcribe_audio_with_conversion(audio_bytes: bytes) -> tuple[str, str]:
    # (This function is correct and remains unchanged)
    print(f"Received {len(audio_bytes)} bytes...")
    if not audio_bytes: return "Error: Received empty audio.", ""
    job_id = str(uuid.uuid4())
    input_path = AUDIO_DIR / f"input_{job_id}.dat"
    output_path = AUDIO_DIR / f"output_{job_id}.wav"
    try:
        with open(input_path, "wb") as f: f.write(audio_bytes)
        command = ["ffmpeg", "-i", str(input_path), "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", str(output_path)]
        subprocess.run(command, check=True, capture_output=True)
        result = whisper_model.transcribe(str(output_path), fp16=False)
        return result["text"], str(output_path)
    except subprocess.CalledProcessError as e:
        print(f"!!! FFMPEG conversion failed !!!\nFFMPEG stderr: {e.stderr.decode()}")
        return "Error: Failed to convert audio format.", ""
    finally:
        if 'input_path' in locals() and input_path.exists(): os.remove(input_path)


def language_llm_logic(text_to_process: str, mode: str) -> dict:
    # (This function is correct and remains unchanged)
    error_response = {"spanish_text": "Error processing request.", "flashcards": [], "english_recap": ""}
    if "Error:" in text_to_process: return {"spanish_text": "Could not process audio. Please try again.", "flashcards": [], "english_recap": ""}
    if mode == "spanish_immersion":
        system_prompt = "You are a friendly Spanish conversational partner. The user will speak to you in English. Your task is to respond naturally in Spanish. After your Spanish response, provide a literal English translation of YOUR response. You MUST return a single, valid JSON object."
        user_prompt = f'Respond to the user in Spanish, then provide an English translation of your response. Format the output ONLY as JSON: {{"spanish_text": "<your_spanish_response>", "english_recap": "<english_translation_of_your_response>"}}\n\nThe user said: "{text_to_process}"'
    elif mode == "english_chat":
        system_prompt = "You are a helpful conversational AI assistant. Respond to the user's query in clear, concise English. You MUST return your response as a single, valid JSON object."
        user_prompt = f'Respond to the following in English. Respond ONLY with a JSON object in the format: {{"spanish_text": "<your_english_response>"}}\n\nThe user said: "{text_to_process}"'
    else:
        system_prompt = "You are an expert English-to-Spanish translator and language teacher. Provide the Spanish translation, provide an EXACT copy of the original English text for the recap, and identify 2-3 key words for flashcards. You MUST return your response as a single, valid JSON object."
        user_prompt = f'Translate the text, provide an exact recap, and generate flashcards. Respond ONLY with a JSON object in the format: {{"spanish_text": "<your_spanish_translation>", "english_recap": "<exact_original_english_text>", "flashcards": [{{"english": "word1", "spanish": "translation1"}}]}}\n\nThe text is: "{text_to_process}"'
    headers = {"Content-Type": "application/json"}
    payload = {"model": MODEL_NAME, "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}], "temperature": 0.7 if "chat" in mode or "immersion" in mode else 0.2}
    try:
        print(f"Sending request to LM Studio in mode '{mode}'...")
        response = requests.post(LM_STUDIO_URL, headers=headers, json=payload, timeout=90)
        response.raise_for_status()
        raw_text = response.json()["choices"][0]["message"]["content"]
        print(f"LM Studio raw response: {raw_text}")
        json_match = re.search(r"\{[\s\S]*\}", raw_text)
        if not json_match: return error_response
        decision = json.loads(json_match.group(0))
        return decision
    except Exception as e:
        print(f"An error occurred during LLM processing: {e}")
        return error_response


def text_to_speech_with_coqui(text_to_speak: str, speaker_wav_path: str, mode: str) -> str:
    output_path = None
    try:
        output_filename = f"tts_output_{uuid.uuid4()}.wav"
        output_path = AUDIO_DIR / output_filename
        lang_code = "en" if mode == "english_chat" else "es"
        
        # --- MODIFIED: Use the pre-generated sample for English chat ---
        speaker_to_use = ENGLISH_VOICE_SAMPLE_PATH if mode == "english_chat" else speaker_wav_path
        
        if not text_to_speak or not text_to_speak.strip(): return ""

        print(f"Generating audio in language '{lang_code}'...")
        tts_model.tts_to_file(text=text_to_speak, file_path=str(output_path), language=lang_code, speaker_wav=speaker_to_use)
        print(f"Successfully generated TTS audio at {output_path}")
        return str(output_path)
    except Exception as e:
        print(f"Error generating TTS audio: {e}")
        return ""
    finally:
        if speaker_wav_path and Path(speaker_wav_path).exists():
            os.remove(speaker_wav_path)
            print(f"Cleaned up speaker reference file: {speaker_wav_path}")


@app.route('/process_audio', methods=['POST'])
def process_audio():
    # This function is now final and correct
    try:
        data = request.json
        audio_b64 = data['audio_data']
        mode = data.get('mode', 'learn_translate')
        audio_bytes = base64.b64decode(audio_b64)
        english_transcript, user_voice_path = transcribe_audio_with_conversion(audio_bytes)
        llm_result = language_llm_logic(english_transcript, mode)
        response_text = llm_result.get("spanish_text", "")
        flashcards = llm_result.get("flashcards", [])
        english_recap = llm_result.get("english_recap", "")
        tts_audio_path_str = text_to_speech_with_coqui(response_text, user_voice_path, mode)
        audio_url = f"/audio/{Path(tts_audio_path_str).name}" if tts_audio_path_str else None
        return jsonify({"english_text": english_transcript, "spanish_text": response_text, "flashcards": flashcards, "english_recap": english_recap, "audio_url": audio_url})
    except Exception as e:
        print(f"An error occurred in process_audio: {e}")
        return jsonify({"error": str(e)}), 500


@app.route('/')
def index(): return render_template('index.html')

@app.route('/audio/<filename>')
def serve_audio(filename): return send_from_directory(AUDIO_DIR, filename)

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5001, threaded=True, debug=True)
