diff options
author | polwex <polwex@sortug.com> | 2025-06-22 01:46:49 +0700 |
---|---|---|
committer | polwex <polwex@sortug.com> | 2025-06-22 01:46:49 +0700 |
commit | c9fbdb681b77698bdf8a503cb9d13b6f0b53fd93 (patch) | |
tree | cbf7db884b7f91d33b92a12dae410ffd265635d8 /sorsyl/lib |
init
Diffstat (limited to 'sorsyl/lib')
-rw-r--r-- | sorsyl/lib/dune | 3 | ||||
-rw-r--r-- | sorsyl/lib/feature.ml | 80 | ||||
-rw-r--r-- | sorsyl/lib/ipa_table.ml | 85 | ||||
-rw-r--r-- | sorsyl/lib/sonority.ml | 160 | ||||
-rw-r--r-- | sorsyl/lib/sonority.mli | 47 |
5 files changed, 375 insertions, 0 deletions
diff --git a/sorsyl/lib/dune b/sorsyl/lib/dune new file mode 100644 index 0000000..148997f --- /dev/null +++ b/sorsyl/lib/dune @@ -0,0 +1,3 @@ +(library + (name sorsyl) + (libraries csv base stdio)) diff --git a/sorsyl/lib/feature.ml b/sorsyl/lib/feature.ml new file mode 100644 index 0000000..280977b --- /dev/null +++ b/sorsyl/lib/feature.ml @@ -0,0 +1,80 @@ +(** Module for handling phonological features and segments *) + +(* module Feature = struct *) + +(** Type representing a phonological feature value *) +type feature_value = Plus | Minus | Zero + +(** Type representing a phonological feature *) +type feature = + | Syllabic + | Sonorant + | Consonantal + | Continuant + | DelayedRelease + | Lateral + | Nasal + | Strident + | Voiced + | SpreadGlottis + | ConstrictedGlottis + | Anterior + | Coronal + | Distributed + | Labial + | High + | Low + | Back + | Rounded + | Velaric + | Tense + | Long + | HighTone + | HighReg (*high registry?*) + +type feature_spec = feature * feature_value +(** Type representing a feature specification as a (value, feature) pair *) + +type segment = feature_spec list + +(** Convert a string feature value to the feature_value type *) +let value_of_string = function + | "+" -> Plus + | "-" -> Minus + | "0" -> Zero + | s -> failwith (Printf.sprintf "Invalid feature value: %s" s) + +let string_of_feature = function + | "syl" -> Syllabic + | "son" -> Sonorant + | "cons" -> Consonantal + | "cont" -> Continuant + | "delrel" -> DelayedRelease + | "lat" -> Lateral + | "nas" -> Nasal + | "strid" -> Strident + | "voi" -> Voiced + | "sg" -> SpreadGlottis + | "cg" -> ConstrictedGlottis + | "ant" -> Anterior + | "cor" -> Coronal + | "distr" -> Distributed + | "lab" -> Labial + | "hi" -> High + | "lo" -> Low + | "back" -> Back + | "round" -> Rounded + | "velaric" -> Velaric + | "tense" -> Tense + | "long" -> Long + | "hitone" -> HighTone + | "hireg" -> HighReg + | _ -> failwith "not a valid feature" + +(** Check if a segment has a specific feature with a given value *) +let has_feature (value, feature_name) segment = + List.exists (fun (v, f) -> v = value && f = feature_name) segment + +(** Create a feature test function for use in the decision tree *) +let test feature_spec segment = has_feature feature_spec segment +(* end *) diff --git a/sorsyl/lib/ipa_table.ml b/sorsyl/lib/ipa_table.ml new file mode 100644 index 0000000..bee027a --- /dev/null +++ b/sorsyl/lib/ipa_table.ml @@ -0,0 +1,85 @@ +(** Type representing a segment as a set of feature specifications *) +(* an association list I guess . Use List.assoc to handle*) + +(** Decision tree for computing sonority values *) +type bool_tree = + | Leaf of int (** Terminal node with sonority value *) + | Node of { + test : Feature.segment -> bool; (** Test function *) + t_branch : bool_tree; (** Branch to follow if test is true *) + f_branch : bool_tree; (** Branch to follow if test is false *) + } + +type ipa_entry = { ipa : string; features : Feature.segment } +(** Type representing a row from the IPA CSV file *) + +(** Storage for loaded IPA data *) +let ipa_table : (string, Feature.segment) Hashtbl.t = Hashtbl.create 1000 + +(** Parse a single row from the CSV file *) +let parse_row (row : string list) : ipa_entry option = + match row with + | [] -> None + | ipa :: features -> ( + let feature_names = + [ + "syl"; + "son"; + "cons"; + "cont"; + "delrel"; + "lat"; + "nas"; + "strid"; + "voi"; + "sg"; + "cg"; + "ant"; + "cor"; + "distr"; + "lab"; + "hi"; + "lo"; + "back"; + "round"; + "velaric"; + "tense"; + "long"; + "hitone"; + "hireg"; + ] + in + (* Skip the header row *) + if ipa = "ipa" then None + else + let rec build_features names values acc = + match (names, values) with + | [], [] -> Some (List.rev acc) + | name :: ns, value :: vs -> + let fval = Feature.value_of_string value in + let fname = Feature.string_of_feature name in + build_features ns vs ((fname, fval) :: acc) + | _ -> None (* Mismatched lengths *) + in + match build_features feature_names features [] with + | Some feature_list -> Some { ipa; features = feature_list } + | None -> None) + +(** Load IPA data from CSV file *) +let load_csv filename = + let ic = open_in filename in + let csv = Csv.of_channel ic in + try + Csv.iter + ~f:(fun row -> + match parse_row row with + | Some entry -> Hashtbl.add ipa_table entry.ipa entry.features + | None -> ()) + csv; + close_in ic + with e -> + close_in ic; + raise e + +(** Look up features for an IPA segment *) +let lookup_segment ipa = Hashtbl.find_opt ipa_table ipa diff --git a/sorsyl/lib/sonority.ml b/sorsyl/lib/sonority.ml new file mode 100644 index 0000000..90bfa55 --- /dev/null +++ b/sorsyl/lib/sonority.ml @@ -0,0 +1,160 @@ +(** Sonority module for determining the sonority of phonetic segments. + + This module provides functionality to determine the sonority of IPA + (International Phonetic Alphabet) segments on a scale of 1 to 9, where: + - 9: Low vowels (most sonorous) + - 8: High vowels + - 7: Glides/approximants + - 6: Liquids + - 5: Nasals + - 4: Voiced fricatives + - 3: Voiceless fricatives + - 2: Voiced stops + - 1: Voiceless stops (least sonorous) *) + +(** Decision tree for computing sonority values *) +type bool_tree = + | Leaf of int (** Terminal node with sonority value *) + | Node of { + test : Feature.segment -> bool; (** Test function *) + t_branch : bool_tree; (** Branch to follow if test is true *) + f_branch : bool_tree; (** Branch to follow if test is false *) + } + +(** Main Sonority module functionality *) +module Sonority = struct + (** Initialize the module by loading IPA data *) + let init data_dir = + let csv_file = Filename.concat data_dir "ipa_all.csv" in + Ipa_table.load_csv csv_file + + (** Build the decision tree for sonority calculation *) + let build_tree () = + let open Feature in + let plusSyl = test (Syllabic, Plus) in + let minusHi = test (High, Minus) in + let minusCons = test (Consonantal, Minus) in + let plusSon = test (Sonorant, Plus) in + let minusNas = test (Nasal, Minus) in + let plusCont = test (Continuant, Plus) in + let plusVoi = test (Voiced, Plus) in + + (* Build the tree bottom-up *) + let minusHi_branch = + Node + { + test = minusHi; + t_branch = Leaf 9; + (* -hi vowels = low vowels *) + f_branch = Leaf 8; + (* +hi vowels = high vowels *) + } + in + + let plusVoi1_branch = + Node + { + test = plusVoi; + t_branch = Leaf 4; + (* +voi +cont = voiced fricatives *) + f_branch = Leaf 3; + (* -voi +cont = voiceless fricatives *) + } + in + + let plusVoi2_branch = + Node + { + test = plusVoi; + t_branch = Leaf 2; + (* +voi -cont = voiced stops *) + f_branch = Leaf 1; + (* -voi -cont = voiceless stops *) + } + in + + let plusCont_branch = + Node + { + test = plusCont; + t_branch = plusVoi1_branch; + (* +cont = fricatives *) + f_branch = plusVoi2_branch; + (* -cont = stops *) + } + in + + let minusNas_branch = + Node + { + test = minusNas; + t_branch = Leaf 6; + (* -nas +son = liquids *) + f_branch = Leaf 5; + (* +nas +son = nasals *) + } + in + + let plusSon_branch = + Node + { + test = plusSon; + t_branch = minusNas_branch; + (* +son = sonorants *) + f_branch = plusCont_branch; + (* -son = obstruents *) + } + in + + let minusCons_branch = + Node + { + test = minusCons; + t_branch = Leaf 7; + (* -cons = glides *) + f_branch = plusSon_branch; + (* +cons = true consonants *) + } + in + + Node + { + test = plusSyl; + t_branch = minusHi_branch; + (* +syl = vowels *) + f_branch = minusCons_branch; + (* -syl = non-vowels *) + } + + (** Evaluate the decision tree for a segment *) + let rec eval_tree tree segment = + match tree with + | Leaf value -> value + | Node { test; t_branch; f_branch } -> + if test segment then eval_tree t_branch segment + else eval_tree f_branch segment + + (** The main decision tree instance *) + let sonority_tree = lazy (build_tree ()) + + (** Get sonority value from feature specifications *) + let sonority_from_features segment = + eval_tree (Lazy.force sonority_tree) segment + + (** Get sonority value from an IPA character *) + let sonority ipa = + match Ipa_table.lookup_segment ipa with + | Some features -> sonority_from_features features + | None -> failwith (Printf.sprintf "Unknown IPA segment: %s" ipa) +end + +(** Public interface *) + +(** Initialize the sonority module with the data directory *) +let init = Sonority.init + +(** Get the sonority value (1-9) for an IPA character *) +let sonority = Sonority.sonority + +(** Get the sonority value from a feature specification *) +let sonority_from_features = Sonority.sonority_from_features diff --git a/sorsyl/lib/sonority.mli b/sorsyl/lib/sonority.mli new file mode 100644 index 0000000..3e9166e --- /dev/null +++ b/sorsyl/lib/sonority.mli @@ -0,0 +1,47 @@ +(** Sonority module for determining the sonority of phonetic segments. + + This module provides functionality to determine the sonority of IPA + (International Phonetic Alphabet) segments on a scale of 1 to 9, where: + - 9: Low vowels (most sonorous) + - 8: High vowels + - 7: Glides/approximants + - 6: Liquids + - 5: Nasals + - 4: Voiced fricatives + - 3: Voiceless fricatives + - 2: Voiced stops + - 1: Voiceless stops (least sonorous) + + Example usage: + {[ + (* Initialize the module with the data directory *) + Sonority.init "./data";; + + (* Get sonority values for IPA segments *) + Sonority.sonority "a";; + + (* Returns 9 - low vowel *) + Sonority.sonority "p";; + + (* Returns 1 - voiceless stop *) + Sonority.sonority "l" (* Returns 6 - liquid *) + ]} *) + +val init : string -> unit +(** Initialize the sonority module with the data directory. This must be called + before using other functions. + @param data_dir The directory containing the ipa_all.csv file + @raise Sys_error if the CSV file cannot be found or read *) + +val sonority : string -> int +(** Get the sonority value (1-9) for an IPA character. + @param ipa The IPA character/segment to analyze + @return The sonority value between 1 and 9 + @raise Failure if the IPA segment is not recognized *) + +val sonority_from_features : Feature.segment -> int +(** Get the sonority value from a feature specification. This is useful when you + already have the phonological features of a segment and don't need to look + it up by IPA symbol. + @param segment The list of feature specifications + @return The sonority value between 1 and 9 *) |