summaryrefslogtreecommitdiff
path: root/sorsyl/lib/syllable.ml
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-06-22 13:46:57 +0700
committerpolwex <polwex@sortug.com>2025-06-22 13:46:57 +0700
commit5dd49048bb65de3d572d43ba2f1b01435c71a35a (patch)
tree7f8e629ae511c3947a80f99906542f3fd2de0a9f /sorsyl/lib/syllable.ml
parent5f495c1d4ee624f9d24f03e50700e7d9a9305b73 (diff)
Diffstat (limited to 'sorsyl/lib/syllable.ml')
-rw-r--r--sorsyl/lib/syllable.ml97
1 files changed, 97 insertions, 0 deletions
diff --git a/sorsyl/lib/syllable.ml b/sorsyl/lib/syllable.ml
new file mode 100644
index 0000000..ef72736
--- /dev/null
+++ b/sorsyl/lib/syllable.ml
@@ -0,0 +1,97 @@
+(** Module for representing syllables and their components *)
+
+open Base
+
+(** Type representing a syllable with its phonological components *)
+type t = {
+ onset : string; (** Initial consonants *)
+ medial : string; (** Medial consonants (between onset and nucleus) *)
+ nucleus : string; (** Vowel core of the syllable *)
+ coda : string; (** Final consonants *)
+ tone : string; (** Tonal information *)
+ spelling : string; (** Orthographic representation *)
+ start_idx : int; (** Start position in the word *)
+ end_idx : int; (** End position in the word *)
+ stressed : bool; (** Whether this syllable is stressed *)
+}
+
+(** Create an empty syllable *)
+let empty = {
+ onset = "";
+ medial = "";
+ nucleus = "";
+ coda = "";
+ tone = "";
+ spelling = "";
+ start_idx = 0;
+ end_idx = 0;
+ stressed = false;
+}
+
+(** Create a syllable with specified components *)
+let create ?(onset = "") ?(medial = "") ?(nucleus = "") ?(coda = "") ?(tone = "")
+ ?(spelling = "") ?(start_idx = 0) ?(end_idx = 0) ?(stressed = false) () =
+ { onset; medial; nucleus; coda; tone; spelling; start_idx; end_idx; stressed }
+
+(** Get the complete syllable string *)
+let all t = t.onset ^ t.medial ^ t.nucleus ^ t.coda ^ t.tone
+
+(** Get the rhyme (medial + nucleus + coda) *)
+let rhyme t = t.medial ^ t.nucleus ^ t.coda
+
+(** Check if the nucleus contains length marker *)
+let is_long t = String.is_substring t.nucleus ~substring:"ː"
+
+(** Finalize a syllable - determine if it's stressed based on position relative to stress marker *)
+let finalize t ~end_idx ~stress_idx =
+ let stressed =
+ if stress_idx >= 0 then
+ stress_idx >= t.start_idx && stress_idx <= end_idx
+ else
+ false
+ in
+ { t with end_idx; stressed }
+
+(** Pretty print a syllable with Unicode box drawing *)
+let pretty_print t =
+ let open Printf in
+ let box_width = 10 in
+ let pad s =
+ let len = String.length s in
+ if len >= box_width then String.prefix s box_width
+ else s ^ String.make (box_width - len) ' '
+ in
+
+ let h_line = String.make box_width '-' in
+ let top = "+" ^ h_line ^ "+" in
+ let bottom = "+" ^ h_line ^ "+" in
+ let make_row label content = sprintf "|%s|" (pad (label ^ content)) in
+
+ String.concat ~sep:"\n" [
+ top;
+ make_row "σ: " (all t);
+ make_row "O: " t.onset;
+ make_row "M: " t.medial;
+ make_row "N: " t.nucleus;
+ make_row "C: " t.coda;
+ make_row "T: " t.tone;
+ if t.stressed then make_row "* " "stressed" else "";
+ bottom
+ ] |> String.strip
+
+(** Convert to string representation *)
+let to_string t =
+ Printf.sprintf "{onset=%S; medial=%S; nucleus=%S; coda=%S; tone=%S; stressed=%b}"
+ t.onset t.medial t.nucleus t.coda t.tone t.stressed
+
+(** Append to onset *)
+let append_onset t s = { t with onset = t.onset ^ s }
+
+(** Append to nucleus *)
+let append_nucleus t s = { t with nucleus = t.nucleus ^ s }
+
+(** Append to coda *)
+let append_coda t s = { t with coda = t.coda ^ s }
+
+(** Append to tone *)
+let append_tone t s = { t with tone = t.tone ^ s } \ No newline at end of file