blob: ef72736748e909c4627e6c200eb7f51a4102423c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
(** Module for representing syllables and their components *)
open Base
(** Type representing a syllable with its phonological components *)
type t = {
onset : string; (** Initial consonants *)
medial : string; (** Medial consonants (between onset and nucleus) *)
nucleus : string; (** Vowel core of the syllable *)
coda : string; (** Final consonants *)
tone : string; (** Tonal information *)
spelling : string; (** Orthographic representation *)
start_idx : int; (** Start position in the word *)
end_idx : int; (** End position in the word *)
stressed : bool; (** Whether this syllable is stressed *)
}
(** Create an empty syllable *)
let empty = {
onset = "";
medial = "";
nucleus = "";
coda = "";
tone = "";
spelling = "";
start_idx = 0;
end_idx = 0;
stressed = false;
}
(** Create a syllable with specified components *)
let create ?(onset = "") ?(medial = "") ?(nucleus = "") ?(coda = "") ?(tone = "")
?(spelling = "") ?(start_idx = 0) ?(end_idx = 0) ?(stressed = false) () =
{ onset; medial; nucleus; coda; tone; spelling; start_idx; end_idx; stressed }
(** Get the complete syllable string *)
let all t = t.onset ^ t.medial ^ t.nucleus ^ t.coda ^ t.tone
(** Get the rhyme (medial + nucleus + coda) *)
let rhyme t = t.medial ^ t.nucleus ^ t.coda
(** Check if the nucleus contains length marker *)
let is_long t = String.is_substring t.nucleus ~substring:"ː"
(** Finalize a syllable - determine if it's stressed based on position relative to stress marker *)
let finalize t ~end_idx ~stress_idx =
let stressed =
if stress_idx >= 0 then
stress_idx >= t.start_idx && stress_idx <= end_idx
else
false
in
{ t with end_idx; stressed }
(** Pretty print a syllable with Unicode box drawing *)
let pretty_print t =
let open Printf in
let box_width = 10 in
let pad s =
let len = String.length s in
if len >= box_width then String.prefix s box_width
else s ^ String.make (box_width - len) ' '
in
let h_line = String.make box_width '-' in
let top = "+" ^ h_line ^ "+" in
let bottom = "+" ^ h_line ^ "+" in
let make_row label content = sprintf "|%s|" (pad (label ^ content)) in
String.concat ~sep:"\n" [
top;
make_row "σ: " (all t);
make_row "O: " t.onset;
make_row "M: " t.medial;
make_row "N: " t.nucleus;
make_row "C: " t.coda;
make_row "T: " t.tone;
if t.stressed then make_row "* " "stressed" else "";
bottom
] |> String.strip
(** Convert to string representation *)
let to_string t =
Printf.sprintf "{onset=%S; medial=%S; nucleus=%S; coda=%S; tone=%S; stressed=%b}"
t.onset t.medial t.nucleus t.coda t.tone t.stressed
(** Append to onset *)
let append_onset t s = { t with onset = t.onset ^ s }
(** Append to nucleus *)
let append_nucleus t s = { t with nucleus = t.nucleus ^ s }
(** Append to coda *)
let append_coda t s = { t with coda = t.coda ^ s }
(** Append to tone *)
let append_tone t s = { t with tone = t.tone ^ s }
|