summaryrefslogtreecommitdiff
path: root/desk/lib/cjk.hoon
blob: 2d34896a8b6c3897829fa8e4db067ce11ab9ce28 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/+  sr=sortug
/*  raw  %json  /data/unihan/json  
|%
++  generic
|=  jon=json
  =,  dejs:format
  ?:  ?=(%n -.jon)  (ni jon)
  ?:  ?=(%s -.jon)  (so jon)
  ?:  ?=(%o -.jon)  ((om generic) jon)
  ?:  ?=(%a -.jon)  ((ar generic) jon)  ::  only question here is whether to do ar or as
  ''
++  dejs
=,  dejs:format
  %-  om  |=  jon=json
  ?.  ?=(%o -.jon)  !!  :: mmm
  (~(run by p.jon) generic)
  :: ?:  .=  k   'kAccountingNumeric'           (ni v)
  :: ?:  .=  k   'kGB0'                         (ni v)
  :: ?:  .=  k   'kGB1'                         (ni v)
  :: ?:  .=  k   'kGradeLevel'                  (ni v)
  :: ?:  .=  k   'kFrequency'                   (so v)
  :: ?:  .=  k   'kBigFive'                     (so v)
  :: ?:  .=  k   'kCCCII'                       (so v)
  :: ?:  .=  k   'kCNS1986'                     (so v)
  :: ?:  .=  k   'kCNS1992'                     (so v)
  :: ?:  .=  k   'kCangjie'                     (so v)
  :: ?:  .=  k   'kCowles'                      (so v)
  :: ?:  .=  k   'kDaeJaweon'                   (so v)
  :: ?:  .=  k   'kDefinition'                  (so v)
  :: ?:  .=  k   'kEACC'                        (so v)
  :: ?:  .=  k   'kFenn'                        (so v)
  :: ?:  .=  k   'kFennIndex'                   (so v)
  :: ?:  .=  k   'kFourCornerCode'              (so v)
  :: ?:  .=  k   'kHKGlyph'                     (so v)
  :: ?:  .=  k   'kHanYu'                       (so v)
  :: ?:  .=  k   'kIICore'                      (so v)
  :: ?:  .=  k   'kIRGDaeJaweon'                (so v)
  :: ?:  .=  k   'kIRGDaiKanwaZiten'            (so v)
  :: ?:  .=  k   'kIRGHanyuDaZidian'            (so v)
  :: ?:  .=  k   'kIRGKangXi'                   (so v)
  :: ?:  .=  k   'kIRG_GSource'                 (so v)
  :: ?:  .=  k   'kIRG_HSource'                 (so v)
  :: ?:  .=  k   'kIRG_JSource'                 (so v)
  :: ?:  .=  k   'kIRG_KPSource'                (so v)
  :: ?:  .=  k   'kIRG_KSource'                 (so v)
  :: ?:  .=  k   'kIRG_TSource'                 (so v)
  :: ?:  .=  k   'kJis0'                        (so v)
  :: ?:  .=  k   'kKPS0'                        (so v)
  :: ?:  .=  k   'kKSC0'                        (so v)
  :: ?:  .=  k   'kKangXi'                      (so v)
  :: ?:  .=  k   'kKorean'                      (so v)
  :: ?:  .=  k   'kKoreanName'                  (so v)
  :: ?:  .=  k   'kMainlandTelegraph'           (so v)
  :: ?:  .=  k   'kMandarin'                    (so v)
  :: ?:  .=  k   'kMatthews'                    (so v)
  :: ?:  .=  k   'kMeyerWempe'                  (so v)
  :: ?:  .=  k   'kMorohashi'                   (so v)
  :: ?:  .=  k   'kPhonetic'                    (so v)
  :: ?:  .=  k   'kRSAdobe_Japan1_6'            (so v)
  :: ?:  .=  k   'kRSKangXi'                    (so v)
  :: ?:  .=  k   'kRSUnicode'                   (so v)
  :: ?:  .=  k   'kSBGY'                        (so v)
  :: ?:  .=  k   'kSemanticVariant'             (so v)
  :: ?:  .=  k   'kSpecializedSemanticVariant'  (so v)
  :: ?:  .=  k   'kTGH'                         (so v)
  :: ?:  .=  k   'kXHC1983'                     (so v)
  :: ?:  .=  k   'kXerox'                       (so v)
  :: ?:  .=  k   'kZVariant'                    (so v)
  :: ?:  .=  k   'kCantonese'                 ((as so) v)
  :: ?:  .=  k   'kHangul'                    ((om so) v)
  :: ?:  .=  k   'kHanyuPinyin'               ((om (as so)) v)
  :: ?:  .=  k   'kJapaneseKun'               ((as so) v)
  :: ?:  .=  k   'kJapaneseOn'                ((as so) v)
  :: ?:  .=  k   'kLau'                       ((as ni) v)
  :: ?:  .=  k   'kNelson'                    ((as ni) v)
  :: ?:  .=  k   'kTaiwanTelegraph'           ((as ni) v)
  :: ?:  .=  k   'kTotalStrokes'              ((as ni) v)  ''
++  get-map
  (dejs raw)    
++  is-cjk
|=  char=@t  ^-  ?
  =/  start  0x4e00
  =/  end    0x9fff
  =/  codepoint  (taft char)
  ?&  (gte codepoint start)  (lte codepoint end)  ==

++  has
|=  t=tape  ^-  ?
  |-
  ?~  t  .n
  ?:  .=(3 (met 3 i.t))  .y
  $(t t.t)
++  romanize
|=  t=@t  ^-  tape
  =/  dict  get-map
  =/  size  (div (met 3 t) 3)
  =/  l  (rip [3 size] t)
  |-
  ?~  l  ~
  =/  data  (~(get by dict) i.l)
  ?~  data  $(l t.l)
  =/  mand  (~(get by u.data) 'kMandarin')
  ?~  mand  $(l t.l)
  =/  cord  ((soft @t) u.mand)
  ?~  cord  $(l t.l)
  :-  u.cord  $(l t.l)
--