diff options
Diffstat (limited to 'desk/lib/cjk.hoon')
-rw-r--r-- | desk/lib/cjk.hoon | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/desk/lib/cjk.hoon b/desk/lib/cjk.hoon new file mode 100644 index 0000000..2d34896 --- /dev/null +++ b/desk/lib/cjk.hoon @@ -0,0 +1,107 @@ +/+ sr=sortug +/* raw %json /data/unihan/json +|% +++ generic +|= jon=json + =, dejs:format + ?: ?=(%n -.jon) (ni jon) + ?: ?=(%s -.jon) (so jon) + ?: ?=(%o -.jon) ((om generic) jon) + ?: ?=(%a -.jon) ((ar generic) jon) :: only question here is whether to do ar or as + '' +++ dejs +=, dejs:format + %- om |= jon=json + ?. ?=(%o -.jon) !! :: mmm + (~(run by p.jon) generic) + :: ?: .= k 'kAccountingNumeric' (ni v) + :: ?: .= k 'kGB0' (ni v) + :: ?: .= k 'kGB1' (ni v) + :: ?: .= k 'kGradeLevel' (ni v) + :: ?: .= k 'kFrequency' (so v) + :: ?: .= k 'kBigFive' (so v) + :: ?: .= k 'kCCCII' (so v) + :: ?: .= k 'kCNS1986' (so v) + :: ?: .= k 'kCNS1992' (so v) + :: ?: .= k 'kCangjie' (so v) + :: ?: .= k 'kCowles' (so v) + :: ?: .= k 'kDaeJaweon' (so v) + :: ?: .= k 'kDefinition' (so v) + :: ?: .= k 'kEACC' (so v) + :: ?: .= k 'kFenn' (so v) + :: ?: .= k 'kFennIndex' (so v) + :: ?: .= k 'kFourCornerCode' (so v) + :: ?: .= k 'kHKGlyph' (so v) + :: ?: .= k 'kHanYu' (so v) + :: ?: .= k 'kIICore' (so v) + :: ?: .= k 'kIRGDaeJaweon' (so v) + :: ?: .= k 'kIRGDaiKanwaZiten' (so v) + :: ?: .= k 'kIRGHanyuDaZidian' (so v) + :: ?: .= k 'kIRGKangXi' (so v) + :: ?: .= k 'kIRG_GSource' (so v) + :: ?: .= k 'kIRG_HSource' (so v) + :: ?: .= k 'kIRG_JSource' (so v) + :: ?: .= k 'kIRG_KPSource' (so v) + :: ?: .= k 'kIRG_KSource' (so v) + :: ?: .= k 'kIRG_TSource' (so v) + :: ?: .= k 'kJis0' (so v) + :: ?: .= k 'kKPS0' (so v) + :: ?: .= k 'kKSC0' (so v) + :: ?: .= k 'kKangXi' (so v) + :: ?: .= k 'kKorean' (so v) + :: ?: .= k 'kKoreanName' (so v) + :: ?: .= k 'kMainlandTelegraph' (so v) + :: ?: .= k 'kMandarin' (so v) + :: ?: .= k 'kMatthews' (so v) + :: ?: .= k 'kMeyerWempe' (so v) + :: ?: .= k 'kMorohashi' (so v) + :: ?: .= k 'kPhonetic' (so v) + :: ?: .= k 'kRSAdobe_Japan1_6' (so v) + :: ?: .= k 'kRSKangXi' (so v) + :: ?: .= k 'kRSUnicode' (so v) + :: ?: .= k 'kSBGY' (so v) + :: ?: .= k 'kSemanticVariant' (so v) + :: ?: .= k 'kSpecializedSemanticVariant' (so v) + :: ?: .= k 'kTGH' (so v) + :: ?: .= k 'kXHC1983' (so v) + :: ?: .= k 'kXerox' (so v) + :: ?: .= k 'kZVariant' (so v) + :: ?: .= k 'kCantonese' ((as so) v) + :: ?: .= k 'kHangul' ((om so) v) + :: ?: .= k 'kHanyuPinyin' ((om (as so)) v) + :: ?: .= k 'kJapaneseKun' ((as so) v) + :: ?: .= k 'kJapaneseOn' ((as so) v) + :: ?: .= k 'kLau' ((as ni) v) + :: ?: .= k 'kNelson' ((as ni) v) + :: ?: .= k 'kTaiwanTelegraph' ((as ni) v) + :: ?: .= k 'kTotalStrokes' ((as ni) v) '' +++ get-map + (dejs raw) +++ is-cjk +|= char=@t ^- ? + =/ start 0x4e00 + =/ end 0x9fff + =/ codepoint (taft char) + ?& (gte codepoint start) (lte codepoint end) == + +++ has +|= t=tape ^- ? + |- + ?~ t .n + ?: .=(3 (met 3 i.t)) .y + $(t t.t) +++ romanize +|= t=@t ^- tape + =/ dict get-map + =/ size (div (met 3 t) 3) + =/ l (rip [3 size] t) + |- + ?~ l ~ + =/ data (~(get by dict) i.l) + ?~ data $(l t.l) + =/ mand (~(get by u.data) 'kMandarin') + ?~ mand $(l t.l) + =/ cord ((soft @t) u.mand) + ?~ cord $(l t.l) + :- u.cord $(l t.l) +-- |