summaryrefslogtreecommitdiff
path: root/sorsyl/lib/syllable.ml
blob: ef72736748e909c4627e6c200eb7f51a4102423c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
(** Module for representing syllables and their components *)

open Base

(** Type representing a syllable with its phonological components *)
type t = {
  onset : string;      (** Initial consonants *)
  medial : string;     (** Medial consonants (between onset and nucleus) *)
  nucleus : string;    (** Vowel core of the syllable *)
  coda : string;       (** Final consonants *)
  tone : string;       (** Tonal information *)
  spelling : string;   (** Orthographic representation *)
  start_idx : int;     (** Start position in the word *)
  end_idx : int;       (** End position in the word *)
  stressed : bool;     (** Whether this syllable is stressed *)
}

(** Create an empty syllable *)
let empty = {
  onset = "";
  medial = "";
  nucleus = "";
  coda = "";
  tone = "";
  spelling = "";
  start_idx = 0;
  end_idx = 0;
  stressed = false;
}

(** Create a syllable with specified components *)
let create ?(onset = "") ?(medial = "") ?(nucleus = "") ?(coda = "") ?(tone = "") 
           ?(spelling = "") ?(start_idx = 0) ?(end_idx = 0) ?(stressed = false) () =
  { onset; medial; nucleus; coda; tone; spelling; start_idx; end_idx; stressed }

(** Get the complete syllable string *)
let all t = t.onset ^ t.medial ^ t.nucleus ^ t.coda ^ t.tone

(** Get the rhyme (medial + nucleus + coda) *)
let rhyme t = t.medial ^ t.nucleus ^ t.coda

(** Check if the nucleus contains length marker *)
let is_long t = String.is_substring t.nucleus ~substring:"ː"

(** Finalize a syllable - determine if it's stressed based on position relative to stress marker *)
let finalize t ~end_idx ~stress_idx =
  let stressed = 
    if stress_idx >= 0 then
      stress_idx >= t.start_idx && stress_idx <= end_idx
    else
      false
  in
  { t with end_idx; stressed }

(** Pretty print a syllable with Unicode box drawing *)
let pretty_print t =
  let open Printf in
  let box_width = 10 in
  let pad s = 
    let len = String.length s in
    if len >= box_width then String.prefix s box_width
    else s ^ String.make (box_width - len) ' '
  in
  
  let h_line = String.make box_width '-' in
  let top = "+" ^ h_line ^ "+" in
  let bottom = "+" ^ h_line ^ "+" in
  let make_row label content = sprintf "|%s|" (pad (label ^ content)) in
  
  String.concat ~sep:"\n" [
    top;
    make_row "σ: " (all t);
    make_row "O: " t.onset;
    make_row "M: " t.medial;
    make_row "N: " t.nucleus;
    make_row "C: " t.coda;
    make_row "T: " t.tone;
    if t.stressed then make_row "* " "stressed" else "";
    bottom
  ] |> String.strip

(** Convert to string representation *)
let to_string t =
  Printf.sprintf "{onset=%S; medial=%S; nucleus=%S; coda=%S; tone=%S; stressed=%b}"
    t.onset t.medial t.nucleus t.coda t.tone t.stressed

(** Append to onset *)
let append_onset t s = { t with onset = t.onset ^ s }

(** Append to nucleus *)
let append_nucleus t s = { t with nucleus = t.nucleus ^ s }

(** Append to coda *)
let append_coda t s = { t with coda = t.coda ^ s }

(** Append to tone *)
let append_tone t s = { t with tone = t.tone ^ s }