summaryrefslogtreecommitdiff
path: root/sorsyl/lib/sonority.ml
blob: 90bfa55429567648975be804f69429e934dd3399 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
(** Sonority module for determining the sonority of phonetic segments.

    This module provides functionality to determine the sonority of IPA
    (International Phonetic Alphabet) segments on a scale of 1 to 9, where:
    - 9: Low vowels (most sonorous)
    - 8: High vowels
    - 7: Glides/approximants
    - 6: Liquids
    - 5: Nasals
    - 4: Voiced fricatives
    - 3: Voiceless fricatives
    - 2: Voiced stops
    - 1: Voiceless stops (least sonorous) *)

(** Decision tree for computing sonority values *)
type bool_tree =
  | Leaf of int  (** Terminal node with sonority value *)
  | Node of {
      test : Feature.segment -> bool;  (** Test function *)
      t_branch : bool_tree;  (** Branch to follow if test is true *)
      f_branch : bool_tree;  (** Branch to follow if test is false *)
    }

(** Main Sonority module functionality *)
module Sonority = struct
  (** Initialize the module by loading IPA data *)
  let init data_dir =
    let csv_file = Filename.concat data_dir "ipa_all.csv" in
    Ipa_table.load_csv csv_file

  (** Build the decision tree for sonority calculation *)
  let build_tree () =
    let open Feature in
    let plusSyl = test (Syllabic, Plus) in
    let minusHi = test (High, Minus) in
    let minusCons = test (Consonantal, Minus) in
    let plusSon = test (Sonorant, Plus) in
    let minusNas = test (Nasal, Minus) in
    let plusCont = test (Continuant, Plus) in
    let plusVoi = test (Voiced, Plus) in

    (* Build the tree bottom-up *)
    let minusHi_branch =
      Node
        {
          test = minusHi;
          t_branch = Leaf 9;
          (* -hi vowels = low vowels *)
          f_branch = Leaf 8;
          (* +hi vowels = high vowels *)
        }
    in

    let plusVoi1_branch =
      Node
        {
          test = plusVoi;
          t_branch = Leaf 4;
          (* +voi +cont = voiced fricatives *)
          f_branch = Leaf 3;
          (* -voi +cont = voiceless fricatives *)
        }
    in

    let plusVoi2_branch =
      Node
        {
          test = plusVoi;
          t_branch = Leaf 2;
          (* +voi -cont = voiced stops *)
          f_branch = Leaf 1;
          (* -voi -cont = voiceless stops *)
        }
    in

    let plusCont_branch =
      Node
        {
          test = plusCont;
          t_branch = plusVoi1_branch;
          (* +cont = fricatives *)
          f_branch = plusVoi2_branch;
          (* -cont = stops *)
        }
    in

    let minusNas_branch =
      Node
        {
          test = minusNas;
          t_branch = Leaf 6;
          (* -nas +son = liquids *)
          f_branch = Leaf 5;
          (* +nas +son = nasals *)
        }
    in

    let plusSon_branch =
      Node
        {
          test = plusSon;
          t_branch = minusNas_branch;
          (* +son = sonorants *)
          f_branch = plusCont_branch;
          (* -son = obstruents *)
        }
    in

    let minusCons_branch =
      Node
        {
          test = minusCons;
          t_branch = Leaf 7;
          (* -cons = glides *)
          f_branch = plusSon_branch;
          (* +cons = true consonants *)
        }
    in

    Node
      {
        test = plusSyl;
        t_branch = minusHi_branch;
        (* +syl = vowels *)
        f_branch = minusCons_branch;
        (* -syl = non-vowels *)
      }

  (** Evaluate the decision tree for a segment *)
  let rec eval_tree tree segment =
    match tree with
    | Leaf value -> value
    | Node { test; t_branch; f_branch } ->
        if test segment then eval_tree t_branch segment
        else eval_tree f_branch segment

  (** The main decision tree instance *)
  let sonority_tree = lazy (build_tree ())

  (** Get sonority value from feature specifications *)
  let sonority_from_features segment =
    eval_tree (Lazy.force sonority_tree) segment

  (** Get sonority value from an IPA character *)
  let sonority ipa =
    match Ipa_table.lookup_segment ipa with
    | Some features -> sonority_from_features features
    | None -> failwith (Printf.sprintf "Unknown IPA segment: %s" ipa)
end

(** Public interface *)

(** Initialize the sonority module with the data directory *)
let init = Sonority.init

(** Get the sonority value (1-9) for an IPA character *)
let sonority = Sonority.sonority

(** Get the sonority value from a feature specification *)
let sonority_from_features = Sonority.sonority_from_features