diff options
Diffstat (limited to 'src/nlp/spacy.ts')
| -rw-r--r-- | src/nlp/spacy.ts | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/src/nlp/spacy.ts b/src/nlp/spacy.ts new file mode 100644 index 0000000..d79de55 --- /dev/null +++ b/src/nlp/spacy.ts @@ -0,0 +1,79 @@ +import type { AsyncRes, Result } from "sortug"; +import { detectLang } from "./iso"; +const ENDPOINT = "http://localhost:8102"; + +export async function run(text: string, langg?: string): AsyncRes<SpacyRes> { + try { + const lang = langg ? langg : detectLang(text); + const body = JSON.stringify({ string: text, lang }); + const opts = { + headers: { + "Content-type": "application/json", + "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!, + }, + method: "POST", + body, + }; + const res = await fetch(ENDPOINT + "/spacy", opts); + const j = await res.json(); + console.log("spacy", j); + return { ok: j }; + } catch (e) { + return { error: `${e}` }; + } +} + +export type SpacyResBig = { + doc: { + text: string; + ents: any[]; + sents: Array<{ start: number; end: number }>; + tokens: Token[]; + }; + segs: Sentence[]; +}; +export type SpacyRes = { + input: string; + segments: Sentence[]; +}; +export type Sentence = { + text: string; + start: number; + end: number; + root: Token; + subj: Token; + arcs: Arc[]; + words: Word[]; +}; +export type Arc = { + start: number; + end: number; + label: string; // deprel label + dir: string; +}; +export type Token = { + id: number; + head: number; + start: number; + end: number; + dep: string; + lemma: string; + morph: string; + pos: string; + tag: string; + text: string; +}; + +export interface Word extends Token { + ancestors: number[]; + children: []; + n_lefts: number; + n_rights: number; + left_edge: number; + right_edge: number; + morph_map: Record<string, string>; +} + +export function isChild(w: Word, topId: number): boolean { + return w.id === topId || w.ancestors.includes(topId); +} |
