summaryrefslogtreecommitdiff
path: root/src/nlp/spacy.ts
blob: d79de55b01220545f8d907bca09b48f0313ebec6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import type { AsyncRes, Result } from "sortug";
import { detectLang } from "./iso";
const ENDPOINT = "http://localhost:8102";

export async function run(text: string, langg?: string): AsyncRes<SpacyRes> {
  try {
    const lang = langg ? langg : detectLang(text);
    const body = JSON.stringify({ string: text, lang });
    const opts = {
      headers: {
        "Content-type": "application/json",
        "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!,
      },
      method: "POST",
      body,
    };
    const res = await fetch(ENDPOINT + "/spacy", opts);
    const j = await res.json();
    console.log("spacy", j);
    return { ok: j };
  } catch (e) {
    return { error: `${e}` };
  }
}

export type SpacyResBig = {
  doc: {
    text: string;
    ents: any[];
    sents: Array<{ start: number; end: number }>;
    tokens: Token[];
  };
  segs: Sentence[];
};
export type SpacyRes = {
  input: string;
  segments: Sentence[];
};
export type Sentence = {
  text: string;
  start: number;
  end: number;
  root: Token;
  subj: Token;
  arcs: Arc[];
  words: Word[];
};
export type Arc = {
  start: number;
  end: number;
  label: string; // deprel label
  dir: string;
};
export type Token = {
  id: number;
  head: number;
  start: number;
  end: number;
  dep: string;
  lemma: string;
  morph: string;
  pos: string;
  tag: string;
  text: string;
};

export interface Word extends Token {
  ancestors: number[];
  children: [];
  n_lefts: number;
  n_rights: number;
  left_edge: number;
  right_edge: number;
  morph_map: Record<string, string>;
}

export function isChild(w: Word, topId: number): boolean {
  return w.id === topId || w.ancestors.includes(topId);
}