summaryrefslogtreecommitdiff
path: root/desk/lib/cjk.hoon
diff options
context:
space:
mode:
Diffstat (limited to 'desk/lib/cjk.hoon')
-rw-r--r--desk/lib/cjk.hoon107
1 files changed, 107 insertions, 0 deletions
diff --git a/desk/lib/cjk.hoon b/desk/lib/cjk.hoon
new file mode 100644
index 0000000..2d34896
--- /dev/null
+++ b/desk/lib/cjk.hoon
@@ -0,0 +1,107 @@
+/+ sr=sortug
+/* raw %json /data/unihan/json
+|%
+++ generic
+|= jon=json
+ =, dejs:format
+ ?: ?=(%n -.jon) (ni jon)
+ ?: ?=(%s -.jon) (so jon)
+ ?: ?=(%o -.jon) ((om generic) jon)
+ ?: ?=(%a -.jon) ((ar generic) jon) :: only question here is whether to do ar or as
+ ''
+++ dejs
+=, dejs:format
+ %- om |= jon=json
+ ?. ?=(%o -.jon) !! :: mmm
+ (~(run by p.jon) generic)
+ :: ?: .= k 'kAccountingNumeric' (ni v)
+ :: ?: .= k 'kGB0' (ni v)
+ :: ?: .= k 'kGB1' (ni v)
+ :: ?: .= k 'kGradeLevel' (ni v)
+ :: ?: .= k 'kFrequency' (so v)
+ :: ?: .= k 'kBigFive' (so v)
+ :: ?: .= k 'kCCCII' (so v)
+ :: ?: .= k 'kCNS1986' (so v)
+ :: ?: .= k 'kCNS1992' (so v)
+ :: ?: .= k 'kCangjie' (so v)
+ :: ?: .= k 'kCowles' (so v)
+ :: ?: .= k 'kDaeJaweon' (so v)
+ :: ?: .= k 'kDefinition' (so v)
+ :: ?: .= k 'kEACC' (so v)
+ :: ?: .= k 'kFenn' (so v)
+ :: ?: .= k 'kFennIndex' (so v)
+ :: ?: .= k 'kFourCornerCode' (so v)
+ :: ?: .= k 'kHKGlyph' (so v)
+ :: ?: .= k 'kHanYu' (so v)
+ :: ?: .= k 'kIICore' (so v)
+ :: ?: .= k 'kIRGDaeJaweon' (so v)
+ :: ?: .= k 'kIRGDaiKanwaZiten' (so v)
+ :: ?: .= k 'kIRGHanyuDaZidian' (so v)
+ :: ?: .= k 'kIRGKangXi' (so v)
+ :: ?: .= k 'kIRG_GSource' (so v)
+ :: ?: .= k 'kIRG_HSource' (so v)
+ :: ?: .= k 'kIRG_JSource' (so v)
+ :: ?: .= k 'kIRG_KPSource' (so v)
+ :: ?: .= k 'kIRG_KSource' (so v)
+ :: ?: .= k 'kIRG_TSource' (so v)
+ :: ?: .= k 'kJis0' (so v)
+ :: ?: .= k 'kKPS0' (so v)
+ :: ?: .= k 'kKSC0' (so v)
+ :: ?: .= k 'kKangXi' (so v)
+ :: ?: .= k 'kKorean' (so v)
+ :: ?: .= k 'kKoreanName' (so v)
+ :: ?: .= k 'kMainlandTelegraph' (so v)
+ :: ?: .= k 'kMandarin' (so v)
+ :: ?: .= k 'kMatthews' (so v)
+ :: ?: .= k 'kMeyerWempe' (so v)
+ :: ?: .= k 'kMorohashi' (so v)
+ :: ?: .= k 'kPhonetic' (so v)
+ :: ?: .= k 'kRSAdobe_Japan1_6' (so v)
+ :: ?: .= k 'kRSKangXi' (so v)
+ :: ?: .= k 'kRSUnicode' (so v)
+ :: ?: .= k 'kSBGY' (so v)
+ :: ?: .= k 'kSemanticVariant' (so v)
+ :: ?: .= k 'kSpecializedSemanticVariant' (so v)
+ :: ?: .= k 'kTGH' (so v)
+ :: ?: .= k 'kXHC1983' (so v)
+ :: ?: .= k 'kXerox' (so v)
+ :: ?: .= k 'kZVariant' (so v)
+ :: ?: .= k 'kCantonese' ((as so) v)
+ :: ?: .= k 'kHangul' ((om so) v)
+ :: ?: .= k 'kHanyuPinyin' ((om (as so)) v)
+ :: ?: .= k 'kJapaneseKun' ((as so) v)
+ :: ?: .= k 'kJapaneseOn' ((as so) v)
+ :: ?: .= k 'kLau' ((as ni) v)
+ :: ?: .= k 'kNelson' ((as ni) v)
+ :: ?: .= k 'kTaiwanTelegraph' ((as ni) v)
+ :: ?: .= k 'kTotalStrokes' ((as ni) v) ''
+++ get-map
+ (dejs raw)
+++ is-cjk
+|= char=@t ^- ?
+ =/ start 0x4e00
+ =/ end 0x9fff
+ =/ codepoint (taft char)
+ ?& (gte codepoint start) (lte codepoint end) ==
+
+++ has
+|= t=tape ^- ?
+ |-
+ ?~ t .n
+ ?: .=(3 (met 3 i.t)) .y
+ $(t t.t)
+++ romanize
+|= t=@t ^- tape
+ =/ dict get-map
+ =/ size (div (met 3 t) 3)
+ =/ l (rip [3 size] t)
+ |-
+ ?~ l ~
+ =/ data (~(get by dict) i.l)
+ ?~ data $(l t.l)
+ =/ mand (~(get by u.data) 'kMandarin')
+ ?~ mand $(l t.l)
+ =/ cord ((soft @t) u.mand)
+ ?~ cord $(l t.l)
+ :- u.cord $(l t.l)
+--