blob: bee027aa902ff893cb7570d0f982aba1cb1032b3 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
(** Type representing a segment as a set of feature specifications *)
(* an association list I guess . Use List.assoc to handle*)
(** Decision tree for computing sonority values *)
type bool_tree =
| Leaf of int (** Terminal node with sonority value *)
| Node of {
test : Feature.segment -> bool; (** Test function *)
t_branch : bool_tree; (** Branch to follow if test is true *)
f_branch : bool_tree; (** Branch to follow if test is false *)
}
type ipa_entry = { ipa : string; features : Feature.segment }
(** Type representing a row from the IPA CSV file *)
(** Storage for loaded IPA data *)
let ipa_table : (string, Feature.segment) Hashtbl.t = Hashtbl.create 1000
(** Parse a single row from the CSV file *)
let parse_row (row : string list) : ipa_entry option =
match row with
| [] -> None
| ipa :: features -> (
let feature_names =
[
"syl";
"son";
"cons";
"cont";
"delrel";
"lat";
"nas";
"strid";
"voi";
"sg";
"cg";
"ant";
"cor";
"distr";
"lab";
"hi";
"lo";
"back";
"round";
"velaric";
"tense";
"long";
"hitone";
"hireg";
]
in
(* Skip the header row *)
if ipa = "ipa" then None
else
let rec build_features names values acc =
match (names, values) with
| [], [] -> Some (List.rev acc)
| name :: ns, value :: vs ->
let fval = Feature.value_of_string value in
let fname = Feature.string_of_feature name in
build_features ns vs ((fname, fval) :: acc)
| _ -> None (* Mismatched lengths *)
in
match build_features feature_names features [] with
| Some feature_list -> Some { ipa; features = feature_list }
| None -> None)
(** Load IPA data from CSV file *)
let load_csv filename =
let ic = open_in filename in
let csv = Csv.of_channel ic in
try
Csv.iter
~f:(fun row ->
match parse_row row with
| Some entry -> Hashtbl.add ipa_table entry.ipa entry.features
| None -> ())
csv;
close_in ic
with e ->
close_in ic;
raise e
(** Look up features for an IPA segment *)
let lookup_segment ipa = Hashtbl.find_opt ipa_table ipa
|