summaryrefslogtreecommitdiff
path: root/src/nlp/spacy.ts
diff options
context:
space:
mode:
Diffstat (limited to 'src/nlp/spacy.ts')
-rw-r--r--src/nlp/spacy.ts79
1 files changed, 79 insertions, 0 deletions
diff --git a/src/nlp/spacy.ts b/src/nlp/spacy.ts
new file mode 100644
index 0000000..d79de55
--- /dev/null
+++ b/src/nlp/spacy.ts
@@ -0,0 +1,79 @@
+import type { AsyncRes, Result } from "sortug";
+import { detectLang } from "./iso";
+const ENDPOINT = "http://localhost:8102";
+
+export async function run(text: string, langg?: string): AsyncRes<SpacyRes> {
+ try {
+ const lang = langg ? langg : detectLang(text);
+ const body = JSON.stringify({ string: text, lang });
+ const opts = {
+ headers: {
+ "Content-type": "application/json",
+ "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!,
+ },
+ method: "POST",
+ body,
+ };
+ const res = await fetch(ENDPOINT + "/spacy", opts);
+ const j = await res.json();
+ console.log("spacy", j);
+ return { ok: j };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
+
+export type SpacyResBig = {
+ doc: {
+ text: string;
+ ents: any[];
+ sents: Array<{ start: number; end: number }>;
+ tokens: Token[];
+ };
+ segs: Sentence[];
+};
+export type SpacyRes = {
+ input: string;
+ segments: Sentence[];
+};
+export type Sentence = {
+ text: string;
+ start: number;
+ end: number;
+ root: Token;
+ subj: Token;
+ arcs: Arc[];
+ words: Word[];
+};
+export type Arc = {
+ start: number;
+ end: number;
+ label: string; // deprel label
+ dir: string;
+};
+export type Token = {
+ id: number;
+ head: number;
+ start: number;
+ end: number;
+ dep: string;
+ lemma: string;
+ morph: string;
+ pos: string;
+ tag: string;
+ text: string;
+};
+
+export interface Word extends Token {
+ ancestors: number[];
+ children: [];
+ n_lefts: number;
+ n_rights: number;
+ left_edge: number;
+ right_edge: number;
+ morph_map: Record<string, string>;
+}
+
+export function isChild(w: Word, topId: number): boolean {
+ return w.id === topId || w.ancestors.includes(topId);
+}