diff options
Diffstat (limited to 'sorsyl/test/test_table.ml')
-rw-r--r-- | sorsyl/test/test_table.ml | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/sorsyl/test/test_table.ml b/sorsyl/test/test_table.ml new file mode 100644 index 0000000..89cb4a2 --- /dev/null +++ b/sorsyl/test/test_table.ml @@ -0,0 +1,208 @@ +open Sorsyl + +(* let es = *) +(* String.split_on_char ' ' *) +(* "la ˌinteɾnˌaθjonˌaliθaθjˈon del kˌoɾaθˈon i el ðˌikθjonˈaɾjo" *) + +(* let de = *) +(* [ *) +(* "kɔmˈpjuːtɐ"; *) +(* "teleˈfoːn"; *) +(* "fɑˈmiːliːjə"; *) +(* "ˈʔɑːpɔtekə"; *) +(* "ˈʃoːkolɑdə"; *) +(* "ˈtoːmɑtən"; *) +(* "ˈbananə"; *) +(* "ˈpoːlitsae"; *) +(* "ˈmuːzɔøm"; *) +(* "ˈbʏçɐ̯ae"; *) +(* "mediˈt͡siːn"; *) +(* "ˈpɾoːfɛszoɾ"; *) +(* "ʔeleˈfɑnt"; *) +(* "dokumɛnt"; *) +(* "ˈʔɪntɛɾnət"; *) +(* "ˈʔʊnʔivɛɾziˈtɛːt"; *) +(* "ˈkaɾtɔffɛln"; *) +(* "ˈmatemɑtik"; *) +(* "gəˈbʊɾtstɑk"; *) +(* "ˈvœʁtɐˌbuːx"; *) +(* "ˈbɪbli̯oːtək"; *) +(* "demɔkɾɑˈtiːjə"; *) +(* "fotɔgɾɑˈfiːjə"; *) +(* "tɛçnoloˈgiːjə"; *) +(* "bioloˈgiːjə"; *) +(* "psʏçoloˈgiːjə"; *) +(* "filozɔfiːjə"; *) +(* "ˈʃoːkolɑdə"; *) +(* "ˈmaɾmelɑdə"; *) +(* "ˈzɛkɾetɛɾɪn"; *) +(* "ʔɛntˈʃʊldɪgʊŋ"; *) +(* "ˈkɾaŋkɛnvɛɾzɪçɐ̯ʊŋ"; *) +(* "gəˈbʊɾtstagspaɾty"; *) +(* "kɔmmunikɑˈtsĭoːn"; *) +(* "ʔɔɾgɑnizɑˈtsĭoːn"; *) +(* "bʏɾgɛɾˈməɪstɐ̯"; *) +(* "zeɛnsvʏɾdɪkˈkəiːt"; *) +(* "ˈmiːneɾalvaszɐ̯"; *) +(* "ˈtsuːzammenaɾˈbəiːt"; *) +(* "mœglɪçˈkəiːtən"; *) +(* "gəlegɛnˈəiːtən"; *) +(* "naxˈmɪtˌtɑːk"; *) +(* "ʔɪnfɔɾmɑˈtsĭoːn"; *) +(* "televiˈzĭoːn"; *) +(* "gəʃvɪndɪkkaetsbɛgɾentsʊŋ"; *) +(* "ˈkɾaŋkɛnaozaofɛntalt"; *) +(* "ʔaɾbaetslozɪkˈkəiːt"; *) +(* "fɛːɐ̯ˈʔantvɔɾtlɪçˈkəiːt"; *) +(* "zeɛnsvʏɾdɪkˈkəiːtən"; *) +(* "ˈzɛlpstvɛɾstɛntlɪç"; *) +(* "ˈvaenaxtsgɛʃɛŋkə"; *) +(* "gəˈbʊɾtstaksgɛʃɛŋk"; *) +(* "tuɾɪstenɪnfɔɾmɑˈtsĭoːn"; *) +(* "ˈʔʊnˌʔiːvɛɾzitɛtspɾofɛszoɾ"; *) +(* "ˈleːbɛnsmɪtɛlgɛʃɛft"; *) +(* "ˈfɑɾɾatvɛɾlae"; *) +(* "ˈbʊndɛstakzapgeɔɾdnətɐ̯"; *) +(* "ˈʃtɾaeçɔltsʃɛçtɛlçən"; *) +(* "ˈfɾɔøntʃaftsbetsiːʊŋən"; *) +(* "ˈɾɛçtsʃʊtsvɛɾzɪçɐ̯ʊŋ"; *) +(* "nɑɾʊŋsmɪtelʊnvɛɾtɾɛglɪçˈkəiːt"; *) +(* ] *) + +(* let en1 = *) +(* [ *) +(* "ˈæpəɫ"; *) +(* "ˈðɪs"; *) +(* "ˈɪs"; *) +(* "ˈeɪ"; *) +(* "ɫɪŋˈɡwɪstɪks"; *) +(* "kəˈtæstɹəfi"; *) +(* "wɪˈθaʊt"; *) +(* "ˈpɹɛsədənt"; *) +(* "ˈtu"; *) +(* "ədˈmɪt"; *) +(* "ˈænd"; *) +(* "ˈɪts"; *) +(* "ˌɪnstəˈɡeɪʃən"; *) +(* "ˈʃʊd"; *) +(* "ˈbi"; *) +(* "ˈpənɪʃt"; *) +(* ] *) + +let en2 = + [ + "d͡ʒɹ̩mən"; + "ˈpɹɛzənt"; + "ˈɑɹtɪkəɫ"; + "pɹəˈvaɪdz"; + "ˌɹiəˈnæɫəsəs"; + "kˈɔːɹɑːnəl"; + "kɝˈoʊnəɫ"; + "ˈɑptɪks"; + "ˈɛksəɫəns"; + "əbˈstɹuənts"; + "ˈdʒɝmən"; + "ˈɛŋɡɫɪʃ"; + "ˈkɑmənɫi"; + "əˈsumd"; + "ˈdʒɪps"; + "ˈpɫæstɝ"; + "ˈɛŋɡɫɪʃ"; + (* "ˈɫæps"; *) + "ˈɑɹɡjud"; + "bɪˈɫoʊ"; + "ˌɛkstɹəsɪˈɫæbɪk"; + "ˈkɑnsənənts"; + "ˌdɛɹəˈveɪʃənəɫ"; + "ˈsteɪdʒ"; + "ˌɛkstɹəsɪɫəˈbɪsɪti"; + "ˈaɪðɝ"; + "ˈɫæŋɡwɪdʒɪz"; + "ɪɡˈzɪsts"; + "ˈɛvədəns"; + "ˈkɑmənɫi"; + "pɹɪˈzɛntɪd"; + "səˈpɔɹt"; + "ˌɛkstɹəsɪˈɫæbɪk"; + "ˈkɑnsənənts"; + "kəmˈpætəbəɫ"; + "ˈfʊɫi"; + "ˈsɝfəs"; + "ˌɑptəˈmæɫəti"; + "ˌθiɝˈɛtɪk"; + "ˈtɹitmənt"; + "kənˈstɹeɪnts"; + "ɹɪˈfɝɪŋ"; + "ˈfʊɫi"; + "səˈɫæbəˌfaɪd"; + "ˈaʊtˌpʊt"; + "ˌɹɛpɹəzɛnˈteɪʃənz"; + "pɹəˈpoʊzd"; + ] + +(* let zh = [ "/t͡ɕi⁵¹ ti⁵¹ pʰi⁵¹/" ] *) + +(* let ws = *) +(* [ *) +(* ( "/nuˌmɑ.noʊ.ʌl.tɹə.maɪ.kɹoʊˈskɑ.pɪkˌsɪ.lɪ.koʊ.vɑl.keɪ.noʊ.koʊ.niˈoʊ.sɪs/", *) +(* "/əˈbæn.dn̩.əd.li/", *) +(* " /əˈbæn.dn̩.əd.li/", *) +(* "/əˈbæn.dn̩.mn̩t/", *) +(* "/-ˌbiːə-/", *) +(* "/əˈbluː.ʃn̩/xx" ); *) +(* ] *) + +let get_data = + Printf.printf "Getting data\n"; + let data_dir = + if Sys.file_exists "./data" then "./data" + else if Sys.file_exists "../data" then "../data" + else if Sys.file_exists "../../../data" then "../../../data" + else ( + Printf.eprintf "Current directory: %s\n" (Sys.getcwd ()); + failwith "Cannot find data directory") + in + Ipa_table.load_csv data_dir + +let test_fts table = + let result = Ipa_table.fts table "s" in + match result with + | None -> () + | Some seg -> Printf.printf "fts\n %s\n" (Feature.string_of_segment seg) +(* let expected = None in *) +(* let results = Ipa_table.fts *) +(* assert (result = expected); *) +(* Printf.printf "test_fts: PASSED\n" *) + +let test_segs data = + let words = en2 in + (* let expected = [] in *) + let _results = + Base.List.map words ~f:(fun word -> + let res = Ipa_table.ipa_segs data word in + Printf.printf "%s\n" word; + let xl = + Base.List.fold res ~init:"" ~f:(fun acc char -> + Printf.sprintf "%s-%s" acc char) + in + Printf.printf "%s\n" xl; + res) + in + (* Base.List.iter results ~f:(fun x -> *) + (* let xl = *) + (* Base.List.fold x ~init:"" ~f:(fun acc char -> *) + (* Printf.sprintf "%s-%s" acc char) *) + (* in *) + (* Printf.printf "%s\n" xl) *) + (* assert (result = expected) *) + Printf.printf "test_fts: PASSED\n" + +(** Run all tests *) +let () = + let data = get_data in + Printf.printf "Running IPA Table module tests...\n"; + Printf.printf "===================================\n"; + test_fts data.table; + test_segs data; + Printf.printf "===================================\n"; + Printf.printf "All IPA Table tests passed!\n" |