(** Module for representing syllables and their components *) open Base (** Type representing a syllable with its phonological components *) type t = { onset : string; (** Initial consonants *) medial : string; (** Medial consonants (between onset and nucleus) *) nucleus : string; (** Vowel core of the syllable *) coda : string; (** Final consonants *) tone : string; (** Tonal information *) spelling : string; (** Orthographic representation *) start_idx : int; (** Start position in the word *) end_idx : int; (** End position in the word *) stressed : bool; (** Whether this syllable is stressed *) } (** Create an empty syllable *) let empty = { onset = ""; medial = ""; nucleus = ""; coda = ""; tone = ""; spelling = ""; start_idx = 0; end_idx = 0; stressed = false; } (** Create a syllable with specified components *) let create ?(onset = "") ?(medial = "") ?(nucleus = "") ?(coda = "") ?(tone = "") ?(spelling = "") ?(start_idx = 0) ?(end_idx = 0) ?(stressed = false) () = { onset; medial; nucleus; coda; tone; spelling; start_idx; end_idx; stressed } (** Get the complete syllable string *) let all t = t.onset ^ t.medial ^ t.nucleus ^ t.coda ^ t.tone (** Get the rhyme (medial + nucleus + coda) *) let rhyme t = t.medial ^ t.nucleus ^ t.coda (** Check if the nucleus contains length marker *) let is_long t = String.is_substring t.nucleus ~substring:"ː" (** Finalize a syllable - determine if it's stressed based on position relative to stress marker *) let finalize t ~end_idx ~stress_idx = let stressed = if stress_idx >= 0 then stress_idx >= t.start_idx && stress_idx <= end_idx else false in { t with end_idx; stressed } (** Pretty print a syllable with Unicode box drawing *) let pretty_print t = let open Printf in let box_width = 10 in let pad s = let len = String.length s in if len >= box_width then String.prefix s box_width else s ^ String.make (box_width - len) ' ' in let h_line = String.make box_width '-' in let top = "+" ^ h_line ^ "+" in let bottom = "+" ^ h_line ^ "+" in let make_row label content = sprintf "|%s|" (pad (label ^ content)) in String.concat ~sep:"\n" [ top; make_row "σ: " (all t); make_row "O: " t.onset; make_row "M: " t.medial; make_row "N: " t.nucleus; make_row "C: " t.coda; make_row "T: " t.tone; if t.stressed then make_row "* " "stressed" else ""; bottom ] |> String.strip (** Convert to string representation *) let to_string t = Printf.sprintf "{onset=%S; medial=%S; nucleus=%S; coda=%S; tone=%S; stressed=%b}" t.onset t.medial t.nucleus t.coda t.tone t.stressed (** Append to onset *) let append_onset t s = { t with onset = t.onset ^ s } (** Append to nucleus *) let append_nucleus t s = { t with nucleus = t.nucleus ^ s } (** Append to coda *) let append_coda t s = { t with coda = t.coda ^ s } (** Append to tone *) let append_tone t s = { t with tone = t.tone ^ s }