blob: d79de55b01220545f8d907bca09b48f0313ebec6 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
import type { AsyncRes, Result } from "sortug";
import { detectLang } from "./iso";
const ENDPOINT = "http://localhost:8102";
export async function run(text: string, langg?: string): AsyncRes<SpacyRes> {
try {
const lang = langg ? langg : detectLang(text);
const body = JSON.stringify({ string: text, lang });
const opts = {
headers: {
"Content-type": "application/json",
"X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!,
},
method: "POST",
body,
};
const res = await fetch(ENDPOINT + "/spacy", opts);
const j = await res.json();
console.log("spacy", j);
return { ok: j };
} catch (e) {
return { error: `${e}` };
}
}
export type SpacyResBig = {
doc: {
text: string;
ents: any[];
sents: Array<{ start: number; end: number }>;
tokens: Token[];
};
segs: Sentence[];
};
export type SpacyRes = {
input: string;
segments: Sentence[];
};
export type Sentence = {
text: string;
start: number;
end: number;
root: Token;
subj: Token;
arcs: Arc[];
words: Word[];
};
export type Arc = {
start: number;
end: number;
label: string; // deprel label
dir: string;
};
export type Token = {
id: number;
head: number;
start: number;
end: number;
dep: string;
lemma: string;
morph: string;
pos: string;
tag: string;
text: string;
};
export interface Word extends Token {
ancestors: number[];
children: [];
n_lefts: number;
n_rights: number;
left_edge: number;
right_edge: number;
morph_map: Record<string, string>;
}
export function isChild(w: Word, topId: number): boolean {
return w.id === topId || w.ancestors.includes(topId);
}
|