diff options
Diffstat (limited to 'sorsyl/lib/sonority.ml')
-rw-r--r-- | sorsyl/lib/sonority.ml | 250 |
1 files changed, 114 insertions, 136 deletions
diff --git a/sorsyl/lib/sonority.ml b/sorsyl/lib/sonority.ml index 90bfa55..c47d4a0 100644 --- a/sorsyl/lib/sonority.ml +++ b/sorsyl/lib/sonority.ml @@ -1,16 +1,4 @@ -(** Sonority module for determining the sonority of phonetic segments. - - This module provides functionality to determine the sonority of IPA - (International Phonetic Alphabet) segments on a scale of 1 to 9, where: - - 9: Low vowels (most sonorous) - - 8: High vowels - - 7: Glides/approximants - - 6: Liquids - - 5: Nasals - - 4: Voiced fricatives - - 3: Voiceless fricatives - - 2: Voiced stops - - 1: Voiceless stops (least sonorous) *) +(** Functional sonority module without global state *) (** Decision tree for computing sonority values *) type bool_tree = @@ -21,140 +9,130 @@ type bool_tree = f_branch : bool_tree; (** Branch to follow if test is false *) } -(** Main Sonority module functionality *) -module Sonority = struct - (** Initialize the module by loading IPA data *) - let init data_dir = - let csv_file = Filename.concat data_dir "ipa_all.csv" in - Ipa_table.load_csv csv_file - - (** Build the decision tree for sonority calculation *) - let build_tree () = - let open Feature in - let plusSyl = test (Syllabic, Plus) in - let minusHi = test (High, Minus) in - let minusCons = test (Consonantal, Minus) in - let plusSon = test (Sonorant, Plus) in - let minusNas = test (Nasal, Minus) in - let plusCont = test (Continuant, Plus) in - let plusVoi = test (Voiced, Plus) in - - (* Build the tree bottom-up *) - let minusHi_branch = - Node - { - test = minusHi; - t_branch = Leaf 9; - (* -hi vowels = low vowels *) - f_branch = Leaf 8; - (* +hi vowels = high vowels *) - } - in - - let plusVoi1_branch = - Node - { - test = plusVoi; - t_branch = Leaf 4; - (* +voi +cont = voiced fricatives *) - f_branch = Leaf 3; - (* -voi +cont = voiceless fricatives *) - } - in - - let plusVoi2_branch = - Node - { - test = plusVoi; - t_branch = Leaf 2; - (* +voi -cont = voiced stops *) - f_branch = Leaf 1; - (* -voi -cont = voiceless stops *) - } - in - - let plusCont_branch = - Node - { - test = plusCont; - t_branch = plusVoi1_branch; - (* +cont = fricatives *) - f_branch = plusVoi2_branch; - (* -cont = stops *) - } - in - - let minusNas_branch = - Node - { - test = minusNas; - t_branch = Leaf 6; - (* -nas +son = liquids *) - f_branch = Leaf 5; - (* +nas +son = nasals *) - } - in - - let plusSon_branch = - Node - { - test = plusSon; - t_branch = minusNas_branch; - (* +son = sonorants *) - f_branch = plusCont_branch; - (* -son = obstruents *) - } - in - - let minusCons_branch = - Node - { - test = minusCons; - t_branch = Leaf 7; - (* -cons = glides *) - f_branch = plusSon_branch; - (* +cons = true consonants *) - } - in +type t = { ipa_table : Ipa_table.t; decision_tree : bool_tree } +(** Type representing a sonority calculator *) + +(** Build the decision tree for sonority calculation *) +let build_tree () = + let open Feature in + let plusSyl = test (Syllabic, Plus) in + let minusHi = test (High, Minus) in + let minusCons = test (Consonantal, Minus) in + let plusSon = test (Sonorant, Plus) in + let minusNas = test (Nasal, Minus) in + let plusCont = test (Continuant, Plus) in + let plusVoi = test (Voiced, Plus) in + + (* Build the tree bottom-up, matching the Python original exactly *) + let minusHi_branch = + Node + { + test = minusHi; + t_branch = Leaf 9; + (* -hi vowels = low vowels *) + f_branch = Leaf 8; + (* +hi vowels = high vowels *) + } + in + let plusVoi1_branch = Node { - test = plusSyl; - t_branch = minusHi_branch; - (* +syl = vowels *) - f_branch = minusCons_branch; - (* -syl = non-vowels *) + test = plusVoi; + t_branch = Leaf 4; + (* +voi +cont = voiced fricatives *) + f_branch = Leaf 3; + (* -voi +cont = voiceless fricatives *) } + in - (** Evaluate the decision tree for a segment *) - let rec eval_tree tree segment = - match tree with - | Leaf value -> value - | Node { test; t_branch; f_branch } -> - if test segment then eval_tree t_branch segment - else eval_tree f_branch segment + let plusVoi2_branch = + Node + { + test = plusVoi; + t_branch = Leaf 2; + (* +voi -cont = voiced stops *) + f_branch = Leaf 1; + (* -voi -cont = voiceless stops *) + } + in - (** The main decision tree instance *) - let sonority_tree = lazy (build_tree ()) + let plusCont_branch = + Node + { + test = plusCont; + t_branch = plusVoi1_branch; + (* +cont = fricatives *) + f_branch = plusVoi2_branch; + (* -cont = stops *) + } + in - (** Get sonority value from feature specifications *) - let sonority_from_features segment = - eval_tree (Lazy.force sonority_tree) segment + let minusNas_branch = + Node + { + test = minusNas; + t_branch = Leaf 6; + (* -nas +son = liquids *) + f_branch = Leaf 5; + (* +nas +son = nasals *) + } + in + + let plusSon_branch = + Node + { + test = plusSon; + t_branch = minusNas_branch; + (* +son = sonorants *) + f_branch = plusCont_branch; + (* -son = obstruents *) + } + in - (** Get sonority value from an IPA character *) - let sonority ipa = - match Ipa_table.lookup_segment ipa with - | Some features -> sonority_from_features features - | None -> failwith (Printf.sprintf "Unknown IPA segment: %s" ipa) -end + let minusCons_branch = + Node + { + test = minusCons; + t_branch = Leaf 7; + (* -cons = glides *) + f_branch = plusSon_branch; + (* +cons = true consonants *) + } + in + + Node + { + test = plusSyl; + t_branch = minusHi_branch; + (* +syl = vowels *) + f_branch = minusCons_branch; + (* -syl = non-vowels *) + } -(** Public interface *) +(** Create a sonority calculator from data directory *) +let create (data_dir : string) : t = + let ipa_table = Ipa_table.load_csv data_dir in + let decision_tree = build_tree () in + { ipa_table; decision_tree } -(** Initialize the sonority module with the data directory *) -let init = Sonority.init +(** Traverse the decision tree to get sonority value *) +let rec traverse_tree (tree : bool_tree) (segment : Feature.segment) : int = + match tree with + | Leaf value -> value + | Node { test; t_branch; f_branch } -> + if test segment then traverse_tree t_branch segment + else traverse_tree f_branch segment (** Get the sonority value (1-9) for an IPA character *) -let sonority = Sonority.sonority +let sonority (calc : t) (ipa : string) : int = + match Ipa_table.lookup_segment calc.ipa_table.table ipa with + | Some features -> traverse_tree calc.decision_tree features + | None -> failwith (Printf.sprintf "Unknown IPA segment: %s" ipa) (** Get the sonority value from a feature specification *) -let sonority_from_features = Sonority.sonority_from_features +let sonority_from_features (calc : t) (segment : Feature.segment) : int = + traverse_tree calc.decision_tree segment + +(** Get the underlying IPA table *) +let get_ipa_table (calc : t) : Ipa_table.t = calc.ipa_table |