"""Tests for the code-switching voice run splitter."""

from app.services.voice.voice_runs import VoiceRunSplitter, strip_voice_tags


def test_basic_mixed_sentence():
    s = VoiceRunSplitter()
    assert s.split("Con thích <en>dinosaur</en> màu xanh.") == [
        ("vi", "Con thích "),
        ("en", "dinosaur"),
        ("vi", " màu xanh."),
    ]


def test_span_carries_across_sentences():
    # `<en>` opens in sentence 1, closes in sentence 2 (a period falls inside
    # the English span). The open-lang state must persist.
    s = VoiceRunSplitter()
    assert s.split("Nó nói <en>the quick fox.") == [
        ("vi", "Nó nói "),
        ("en", "the quick fox."),
    ]
    assert s.split("jumps over</en> con chó.") == [
        ("en", "jumps over"),
        ("vi", " con chó."),
    ]


def test_unclosed_tag_stays_english():
    s = VoiceRunSplitter()
    assert s.split("Học từ <en>apple banana") == [
        ("vi", "Học từ "),
        ("en", "apple banana"),
    ]


def test_orphan_close_tag_is_ignored():
    s = VoiceRunSplitter()
    assert s.split("Xin chào</en> bạn.") == [("vi", "Xin chào bạn.")]


def test_nested_open_is_idempotent():
    s = VoiceRunSplitter()
    assert s.split("Tôi <en><en>hello</en> world.") == [
        ("vi", "Tôi "),
        ("en", "hello"),
        ("vi", " world."),
    ]


def test_whole_english_sentence():
    s = VoiceRunSplitter()
    assert s.split("<en>How are you?</en>") == [("en", "How are you?")]


def test_fresh_splitter_resets_state():
    s1 = VoiceRunSplitter()
    s1.split("dở dang <en>open")  # leaves state open
    s2 = VoiceRunSplitter()
    assert s2.split("Xin chào.") == [("vi", "Xin chào.")]


def test_tag_whitespace_tolerant():
    s = VoiceRunSplitter()
    assert s.split("a < en >hi< / en > b") == [
        ("vi", "a "),
        ("en", "hi"),
        ("vi", " b"),
    ]


def test_strip_voice_tags():
    assert strip_voice_tags("a <en>b</en> c") == "a b c"
    assert strip_voice_tags("no tags") == "no tags"
