summaryrefslogtreecommitdiff
path: root/sorsyl/test/test_table.ml
diff options
context:
space:
mode:
Diffstat (limited to 'sorsyl/test/test_table.ml')
-rw-r--r--sorsyl/test/test_table.ml208
1 files changed, 208 insertions, 0 deletions
diff --git a/sorsyl/test/test_table.ml b/sorsyl/test/test_table.ml
new file mode 100644
index 0000000..89cb4a2
--- /dev/null
+++ b/sorsyl/test/test_table.ml
@@ -0,0 +1,208 @@
+open Sorsyl
+
+(* let es = *)
+(* String.split_on_char ' ' *)
+(* "la ˌinteɾnˌaθjonˌaliθaθjˈon del kˌoɾaθˈon i el ðˌikθjonˈaɾjo" *)
+
+(* let de = *)
+(* [ *)
+(* "kɔmˈpjuːtɐ"; *)
+(* "teleˈfoːn"; *)
+(* "fɑˈmiːliːjə"; *)
+(* "ˈʔɑːpɔtekə"; *)
+(* "ˈʃoːkolɑdə"; *)
+(* "ˈtoːmɑtən"; *)
+(* "ˈbananə"; *)
+(* "ˈpoːlitsae"; *)
+(* "ˈmuːzɔøm"; *)
+(* "ˈbʏçɐ̯ae"; *)
+(* "mediˈt͡siːn"; *)
+(* "ˈpɾoːfɛszoɾ"; *)
+(* "ʔeleˈfɑnt"; *)
+(* "dokumɛnt"; *)
+(* "ˈʔɪntɛɾnət"; *)
+(* "ˈʔʊnʔivɛɾziˈtɛːt"; *)
+(* "ˈkaɾtɔffɛln"; *)
+(* "ˈmatemɑtik"; *)
+(* "gəˈbʊɾtstɑk"; *)
+(* "ˈvœʁtɐˌbuːx"; *)
+(* "ˈbɪbli̯oːtək"; *)
+(* "demɔkɾɑˈtiːjə"; *)
+(* "fotɔgɾɑˈfiːjə"; *)
+(* "tɛçnoloˈgiːjə"; *)
+(* "bioloˈgiːjə"; *)
+(* "psʏçoloˈgiːjə"; *)
+(* "filozɔfiːjə"; *)
+(* "ˈʃoːkolɑdə"; *)
+(* "ˈmaɾmelɑdə"; *)
+(* "ˈzɛkɾetɛɾɪn"; *)
+(* "ʔɛntˈʃʊldɪgʊŋ"; *)
+(* "ˈkɾaŋkɛnvɛɾzɪçɐ̯ʊŋ"; *)
+(* "gəˈbʊɾtstagspaɾty"; *)
+(* "kɔmmunikɑˈtsĭoːn"; *)
+(* "ʔɔɾgɑnizɑˈtsĭoːn"; *)
+(* "bʏɾgɛɾˈməɪstɐ̯"; *)
+(* "zeɛnsvʏɾdɪkˈkəiːt"; *)
+(* "ˈmiːneɾalvaszɐ̯"; *)
+(* "ˈtsuːzammenaɾˈbəiːt"; *)
+(* "mœglɪçˈkəiːtən"; *)
+(* "gəlegɛnˈəiːtən"; *)
+(* "naxˈmɪtˌtɑːk"; *)
+(* "ʔɪnfɔɾmɑˈtsĭoːn"; *)
+(* "televiˈzĭoːn"; *)
+(* "gəʃvɪndɪkkaetsbɛgɾentsʊŋ"; *)
+(* "ˈkɾaŋkɛnaozaofɛntalt"; *)
+(* "ʔaɾbaetslozɪkˈkəiːt"; *)
+(* "fɛːɐ̯ˈʔantvɔɾtlɪçˈkəiːt"; *)
+(* "zeɛnsvʏɾdɪkˈkəiːtən"; *)
+(* "ˈzɛlpstvɛɾstɛntlɪç"; *)
+(* "ˈvaenaxtsgɛʃɛŋkə"; *)
+(* "gəˈbʊɾtstaksgɛʃɛŋk"; *)
+(* "tuɾɪstenɪnfɔɾmɑˈtsĭoːn"; *)
+(* "ˈʔʊnˌʔiːvɛɾzitɛtspɾofɛszoɾ"; *)
+(* "ˈleːbɛnsmɪtɛlgɛʃɛft"; *)
+(* "ˈfɑɾɾatvɛɾlae"; *)
+(* "ˈbʊndɛstakzapgeɔɾdnətɐ̯"; *)
+(* "ˈʃtɾaeçɔltsʃɛçtɛlçən"; *)
+(* "ˈfɾɔøntʃaftsbetsiːʊŋən"; *)
+(* "ˈɾɛçtsʃʊtsvɛɾzɪçɐ̯ʊŋ"; *)
+(* "nɑɾʊŋsmɪtelʊnvɛɾtɾɛglɪçˈkəiːt"; *)
+(* ] *)
+
+(* let en1 = *)
+(* [ *)
+(* "ˈæpəɫ"; *)
+(* "ˈðɪs"; *)
+(* "ˈɪs"; *)
+(* "ˈeɪ"; *)
+(* "ɫɪŋˈɡwɪstɪks"; *)
+(* "kəˈtæstɹəfi"; *)
+(* "wɪˈθaʊt"; *)
+(* "ˈpɹɛsədənt"; *)
+(* "ˈtu"; *)
+(* "ədˈmɪt"; *)
+(* "ˈænd"; *)
+(* "ˈɪts"; *)
+(* "ˌɪnstəˈɡeɪʃən"; *)
+(* "ˈʃʊd"; *)
+(* "ˈbi"; *)
+(* "ˈpənɪʃt"; *)
+(* ] *)
+
+let en2 =
+ [
+ "d͡ʒɹ̩mən";
+ "ˈpɹɛzənt";
+ "ˈɑɹtɪkəɫ";
+ "pɹəˈvaɪdz";
+ "ˌɹiəˈnæɫəsəs";
+ "kˈɔːɹɑːnəl";
+ "kɝˈoʊnəɫ";
+ "ˈɑptɪks";
+ "ˈɛksəɫəns";
+ "əbˈstɹuənts";
+ "ˈdʒɝmən";
+ "ˈɛŋɡɫɪʃ";
+ "ˈkɑmənɫi";
+ "əˈsumd";
+ "ˈdʒɪps";
+ "ˈpɫæstɝ";
+ "ˈɛŋɡɫɪʃ";
+ (* "ˈɫæps"; *)
+ "ˈɑɹɡjud";
+ "bɪˈɫoʊ";
+ "ˌɛkstɹəsɪˈɫæbɪk";
+ "ˈkɑnsənənts";
+ "ˌdɛɹəˈveɪʃənəɫ";
+ "ˈsteɪdʒ";
+ "ˌɛkstɹəsɪɫəˈbɪsɪti";
+ "ˈaɪðɝ";
+ "ˈɫæŋɡwɪdʒɪz";
+ "ɪɡˈzɪsts";
+ "ˈɛvədəns";
+ "ˈkɑmənɫi";
+ "pɹɪˈzɛntɪd";
+ "səˈpɔɹt";
+ "ˌɛkstɹəsɪˈɫæbɪk";
+ "ˈkɑnsənənts";
+ "kəmˈpætəbəɫ";
+ "ˈfʊɫi";
+ "ˈsɝfəs";
+ "ˌɑptəˈmæɫəti";
+ "ˌθiɝˈɛtɪk";
+ "ˈtɹitmənt";
+ "kənˈstɹeɪnts";
+ "ɹɪˈfɝɪŋ";
+ "ˈfʊɫi";
+ "səˈɫæbəˌfaɪd";
+ "ˈaʊtˌpʊt";
+ "ˌɹɛpɹəzɛnˈteɪʃənz";
+ "pɹəˈpoʊzd";
+ ]
+
+(* let zh = [ "/t͡ɕi⁵¹ ti⁵¹ pʰi⁵¹/" ] *)
+
+(* let ws = *)
+(* [ *)
+(* ( "/nuˌmɑ.noʊ.ʌl.tɹə.maɪ.kɹoʊˈskɑ.pɪkˌsɪ.lɪ.koʊ.vɑl.keɪ.noʊ.koʊ.niˈoʊ.sɪs/", *)
+(* "/əˈbæn.dn̩.əd.li/", *)
+(* " /əˈbæn.dn̩.əd.li/", *)
+(* "/əˈbæn.dn̩.mn̩t/", *)
+(* "/-ˌbiːə-/", *)
+(* "/əˈbluː.ʃn̩/xx" ); *)
+(* ] *)
+
+let get_data =
+ Printf.printf "Getting data\n";
+ let data_dir =
+ if Sys.file_exists "./data" then "./data"
+ else if Sys.file_exists "../data" then "../data"
+ else if Sys.file_exists "../../../data" then "../../../data"
+ else (
+ Printf.eprintf "Current directory: %s\n" (Sys.getcwd ());
+ failwith "Cannot find data directory")
+ in
+ Ipa_table.load_csv data_dir
+
+let test_fts table =
+ let result = Ipa_table.fts table "s" in
+ match result with
+ | None -> ()
+ | Some seg -> Printf.printf "fts\n %s\n" (Feature.string_of_segment seg)
+(* let expected = None in *)
+(* let results = Ipa_table.fts *)
+(* assert (result = expected); *)
+(* Printf.printf "test_fts: PASSED\n" *)
+
+let test_segs data =
+ let words = en2 in
+ (* let expected = [] in *)
+ let _results =
+ Base.List.map words ~f:(fun word ->
+ let res = Ipa_table.ipa_segs data word in
+ Printf.printf "%s\n" word;
+ let xl =
+ Base.List.fold res ~init:"" ~f:(fun acc char ->
+ Printf.sprintf "%s-%s" acc char)
+ in
+ Printf.printf "%s\n" xl;
+ res)
+ in
+ (* Base.List.iter results ~f:(fun x -> *)
+ (* let xl = *)
+ (* Base.List.fold x ~init:"" ~f:(fun acc char -> *)
+ (* Printf.sprintf "%s-%s" acc char) *)
+ (* in *)
+ (* Printf.printf "%s\n" xl) *)
+ (* assert (result = expected) *)
+ Printf.printf "test_fts: PASSED\n"
+
+(** Run all tests *)
+let () =
+ let data = get_data in
+ Printf.printf "Running IPA Table module tests...\n";
+ Printf.printf "===================================\n";
+ test_fts data.table;
+ test_segs data;
+ Printf.printf "===================================\n";
+ Printf.printf "All IPA Table tests passed!\n"