summaryrefslogtreecommitdiff
path: root/sorsyl/lib
diff options
context:
space:
mode:
Diffstat (limited to 'sorsyl/lib')
-rw-r--r--sorsyl/lib/dune3
-rw-r--r--sorsyl/lib/feature.ml80
-rw-r--r--sorsyl/lib/ipa_table.ml85
-rw-r--r--sorsyl/lib/sonority.ml160
-rw-r--r--sorsyl/lib/sonority.mli47
5 files changed, 375 insertions, 0 deletions
diff --git a/sorsyl/lib/dune b/sorsyl/lib/dune
new file mode 100644
index 0000000..148997f
--- /dev/null
+++ b/sorsyl/lib/dune
@@ -0,0 +1,3 @@
+(library
+ (name sorsyl)
+ (libraries csv base stdio))
diff --git a/sorsyl/lib/feature.ml b/sorsyl/lib/feature.ml
new file mode 100644
index 0000000..280977b
--- /dev/null
+++ b/sorsyl/lib/feature.ml
@@ -0,0 +1,80 @@
+(** Module for handling phonological features and segments *)
+
+(* module Feature = struct *)
+
+(** Type representing a phonological feature value *)
+type feature_value = Plus | Minus | Zero
+
+(** Type representing a phonological feature *)
+type feature =
+ | Syllabic
+ | Sonorant
+ | Consonantal
+ | Continuant
+ | DelayedRelease
+ | Lateral
+ | Nasal
+ | Strident
+ | Voiced
+ | SpreadGlottis
+ | ConstrictedGlottis
+ | Anterior
+ | Coronal
+ | Distributed
+ | Labial
+ | High
+ | Low
+ | Back
+ | Rounded
+ | Velaric
+ | Tense
+ | Long
+ | HighTone
+ | HighReg (*high registry?*)
+
+type feature_spec = feature * feature_value
+(** Type representing a feature specification as a (value, feature) pair *)
+
+type segment = feature_spec list
+
+(** Convert a string feature value to the feature_value type *)
+let value_of_string = function
+ | "+" -> Plus
+ | "-" -> Minus
+ | "0" -> Zero
+ | s -> failwith (Printf.sprintf "Invalid feature value: %s" s)
+
+let string_of_feature = function
+ | "syl" -> Syllabic
+ | "son" -> Sonorant
+ | "cons" -> Consonantal
+ | "cont" -> Continuant
+ | "delrel" -> DelayedRelease
+ | "lat" -> Lateral
+ | "nas" -> Nasal
+ | "strid" -> Strident
+ | "voi" -> Voiced
+ | "sg" -> SpreadGlottis
+ | "cg" -> ConstrictedGlottis
+ | "ant" -> Anterior
+ | "cor" -> Coronal
+ | "distr" -> Distributed
+ | "lab" -> Labial
+ | "hi" -> High
+ | "lo" -> Low
+ | "back" -> Back
+ | "round" -> Rounded
+ | "velaric" -> Velaric
+ | "tense" -> Tense
+ | "long" -> Long
+ | "hitone" -> HighTone
+ | "hireg" -> HighReg
+ | _ -> failwith "not a valid feature"
+
+(** Check if a segment has a specific feature with a given value *)
+let has_feature (value, feature_name) segment =
+ List.exists (fun (v, f) -> v = value && f = feature_name) segment
+
+(** Create a feature test function for use in the decision tree *)
+let test feature_spec segment = has_feature feature_spec segment
+(* end *)
diff --git a/sorsyl/lib/ipa_table.ml b/sorsyl/lib/ipa_table.ml
new file mode 100644
index 0000000..bee027a
--- /dev/null
+++ b/sorsyl/lib/ipa_table.ml
@@ -0,0 +1,85 @@
+(** Type representing a segment as a set of feature specifications *)
+(* an association list I guess . Use List.assoc to handle*)
+
+(** Decision tree for computing sonority values *)
+type bool_tree =
+ | Leaf of int (** Terminal node with sonority value *)
+ | Node of {
+ test : Feature.segment -> bool; (** Test function *)
+ t_branch : bool_tree; (** Branch to follow if test is true *)
+ f_branch : bool_tree; (** Branch to follow if test is false *)
+ }
+
+type ipa_entry = { ipa : string; features : Feature.segment }
+(** Type representing a row from the IPA CSV file *)
+
+(** Storage for loaded IPA data *)
+let ipa_table : (string, Feature.segment) Hashtbl.t = Hashtbl.create 1000
+
+(** Parse a single row from the CSV file *)
+let parse_row (row : string list) : ipa_entry option =
+ match row with
+ | [] -> None
+ | ipa :: features -> (
+ let feature_names =
+ [
+ "syl";
+ "son";
+ "cons";
+ "cont";
+ "delrel";
+ "lat";
+ "nas";
+ "strid";
+ "voi";
+ "sg";
+ "cg";
+ "ant";
+ "cor";
+ "distr";
+ "lab";
+ "hi";
+ "lo";
+ "back";
+ "round";
+ "velaric";
+ "tense";
+ "long";
+ "hitone";
+ "hireg";
+ ]
+ in
+ (* Skip the header row *)
+ if ipa = "ipa" then None
+ else
+ let rec build_features names values acc =
+ match (names, values) with
+ | [], [] -> Some (List.rev acc)
+ | name :: ns, value :: vs ->
+ let fval = Feature.value_of_string value in
+ let fname = Feature.string_of_feature name in
+ build_features ns vs ((fname, fval) :: acc)
+ | _ -> None (* Mismatched lengths *)
+ in
+ match build_features feature_names features [] with
+ | Some feature_list -> Some { ipa; features = feature_list }
+ | None -> None)
+
+(** Load IPA data from CSV file *)
+let load_csv filename =
+ let ic = open_in filename in
+ let csv = Csv.of_channel ic in
+ try
+ Csv.iter
+ ~f:(fun row ->
+ match parse_row row with
+ | Some entry -> Hashtbl.add ipa_table entry.ipa entry.features
+ | None -> ())
+ csv;
+ close_in ic
+ with e ->
+ close_in ic;
+ raise e
+
+(** Look up features for an IPA segment *)
+let lookup_segment ipa = Hashtbl.find_opt ipa_table ipa
diff --git a/sorsyl/lib/sonority.ml b/sorsyl/lib/sonority.ml
new file mode 100644
index 0000000..90bfa55
--- /dev/null
+++ b/sorsyl/lib/sonority.ml
@@ -0,0 +1,160 @@
+(** Sonority module for determining the sonority of phonetic segments.
+
+ This module provides functionality to determine the sonority of IPA
+ (International Phonetic Alphabet) segments on a scale of 1 to 9, where:
+ - 9: Low vowels (most sonorous)
+ - 8: High vowels
+ - 7: Glides/approximants
+ - 6: Liquids
+ - 5: Nasals
+ - 4: Voiced fricatives
+ - 3: Voiceless fricatives
+ - 2: Voiced stops
+ - 1: Voiceless stops (least sonorous) *)
+
+(** Decision tree for computing sonority values *)
+type bool_tree =
+ | Leaf of int (** Terminal node with sonority value *)
+ | Node of {
+ test : Feature.segment -> bool; (** Test function *)
+ t_branch : bool_tree; (** Branch to follow if test is true *)
+ f_branch : bool_tree; (** Branch to follow if test is false *)
+ }
+
+(** Main Sonority module functionality *)
+module Sonority = struct
+ (** Initialize the module by loading IPA data *)
+ let init data_dir =
+ let csv_file = Filename.concat data_dir "ipa_all.csv" in
+ Ipa_table.load_csv csv_file
+
+ (** Build the decision tree for sonority calculation *)
+ let build_tree () =
+ let open Feature in
+ let plusSyl = test (Syllabic, Plus) in
+ let minusHi = test (High, Minus) in
+ let minusCons = test (Consonantal, Minus) in
+ let plusSon = test (Sonorant, Plus) in
+ let minusNas = test (Nasal, Minus) in
+ let plusCont = test (Continuant, Plus) in
+ let plusVoi = test (Voiced, Plus) in
+
+ (* Build the tree bottom-up *)
+ let minusHi_branch =
+ Node
+ {
+ test = minusHi;
+ t_branch = Leaf 9;
+ (* -hi vowels = low vowels *)
+ f_branch = Leaf 8;
+ (* +hi vowels = high vowels *)
+ }
+ in
+
+ let plusVoi1_branch =
+ Node
+ {
+ test = plusVoi;
+ t_branch = Leaf 4;
+ (* +voi +cont = voiced fricatives *)
+ f_branch = Leaf 3;
+ (* -voi +cont = voiceless fricatives *)
+ }
+ in
+
+ let plusVoi2_branch =
+ Node
+ {
+ test = plusVoi;
+ t_branch = Leaf 2;
+ (* +voi -cont = voiced stops *)
+ f_branch = Leaf 1;
+ (* -voi -cont = voiceless stops *)
+ }
+ in
+
+ let plusCont_branch =
+ Node
+ {
+ test = plusCont;
+ t_branch = plusVoi1_branch;
+ (* +cont = fricatives *)
+ f_branch = plusVoi2_branch;
+ (* -cont = stops *)
+ }
+ in
+
+ let minusNas_branch =
+ Node
+ {
+ test = minusNas;
+ t_branch = Leaf 6;
+ (* -nas +son = liquids *)
+ f_branch = Leaf 5;
+ (* +nas +son = nasals *)
+ }
+ in
+
+ let plusSon_branch =
+ Node
+ {
+ test = plusSon;
+ t_branch = minusNas_branch;
+ (* +son = sonorants *)
+ f_branch = plusCont_branch;
+ (* -son = obstruents *)
+ }
+ in
+
+ let minusCons_branch =
+ Node
+ {
+ test = minusCons;
+ t_branch = Leaf 7;
+ (* -cons = glides *)
+ f_branch = plusSon_branch;
+ (* +cons = true consonants *)
+ }
+ in
+
+ Node
+ {
+ test = plusSyl;
+ t_branch = minusHi_branch;
+ (* +syl = vowels *)
+ f_branch = minusCons_branch;
+ (* -syl = non-vowels *)
+ }
+
+ (** Evaluate the decision tree for a segment *)
+ let rec eval_tree tree segment =
+ match tree with
+ | Leaf value -> value
+ | Node { test; t_branch; f_branch } ->
+ if test segment then eval_tree t_branch segment
+ else eval_tree f_branch segment
+
+ (** The main decision tree instance *)
+ let sonority_tree = lazy (build_tree ())
+
+ (** Get sonority value from feature specifications *)
+ let sonority_from_features segment =
+ eval_tree (Lazy.force sonority_tree) segment
+
+ (** Get sonority value from an IPA character *)
+ let sonority ipa =
+ match Ipa_table.lookup_segment ipa with
+ | Some features -> sonority_from_features features
+ | None -> failwith (Printf.sprintf "Unknown IPA segment: %s" ipa)
+end
+
+(** Public interface *)
+
+(** Initialize the sonority module with the data directory *)
+let init = Sonority.init
+
+(** Get the sonority value (1-9) for an IPA character *)
+let sonority = Sonority.sonority
+
+(** Get the sonority value from a feature specification *)
+let sonority_from_features = Sonority.sonority_from_features
diff --git a/sorsyl/lib/sonority.mli b/sorsyl/lib/sonority.mli
new file mode 100644
index 0000000..3e9166e
--- /dev/null
+++ b/sorsyl/lib/sonority.mli
@@ -0,0 +1,47 @@
+(** Sonority module for determining the sonority of phonetic segments.
+
+ This module provides functionality to determine the sonority of IPA
+ (International Phonetic Alphabet) segments on a scale of 1 to 9, where:
+ - 9: Low vowels (most sonorous)
+ - 8: High vowels
+ - 7: Glides/approximants
+ - 6: Liquids
+ - 5: Nasals
+ - 4: Voiced fricatives
+ - 3: Voiceless fricatives
+ - 2: Voiced stops
+ - 1: Voiceless stops (least sonorous)
+
+ Example usage:
+ {[
+ (* Initialize the module with the data directory *)
+ Sonority.init "./data";;
+
+ (* Get sonority values for IPA segments *)
+ Sonority.sonority "a";;
+
+ (* Returns 9 - low vowel *)
+ Sonority.sonority "p";;
+
+ (* Returns 1 - voiceless stop *)
+ Sonority.sonority "l" (* Returns 6 - liquid *)
+ ]} *)
+
+val init : string -> unit
+(** Initialize the sonority module with the data directory. This must be called
+ before using other functions.
+ @param data_dir The directory containing the ipa_all.csv file
+ @raise Sys_error if the CSV file cannot be found or read *)
+
+val sonority : string -> int
+(** Get the sonority value (1-9) for an IPA character.
+ @param ipa The IPA character/segment to analyze
+ @return The sonority value between 1 and 9
+ @raise Failure if the IPA segment is not recognized *)
+
+val sonority_from_features : Feature.segment -> int
+(** Get the sonority value from a feature specification. This is useful when you
+ already have the phonological features of a segment and don't need to look
+ it up by IPA symbol.
+ @param segment The list of feature specifications
+ @return The sonority value between 1 and 9 *)