module List = struct include List let rec take lst n = match (lst, n) with | ([], _) -> [] | (_, 0) -> [] | (x :: xs, n) -> x :: take xs (n - 1) end let convert_headings text = text |> Str.global_replace (Str.regexp "^#### \\(.*\\)$") "
\\1
"
|> Str.global_replace (Str.regexp "`\\([^`]*\\)`") "\\1
"
let convert_links text =
text
|> Str.global_replace
(Str.regexp "\\[\\([^]]*\\)\\](\\([^)]*\\))")
"\\1"
let convert_lists text =
let lines = String.split_on_char '\n' text in
let process_line line =
match line with
| line when Str.string_match (Str.regexp "^-\\s*\\(.*\\)$") line 0 ->
"" :: acc) true rest else if trimmed = "" then if in_quote then process_lines ("" :: acc) false rest else process_lines (line :: acc) false rest else if in_quote then process_lines (line :: acc) true rest else process_lines (line :: acc) false rest in lines |> process_lines [] false |> String.concat "\n" let convert_paragraphs text = let lines = String.split_on_char '\n' text in let is_block_element line = Str.string_match (Str.regexp "^<\\(h[1-6]\\|ul\\|ol\\|blockquote\\|pre\\)>") line 0 in let wrap_paragraphs lines = let rec aux acc current_p lines = match lines with | [] when current_p <> "" -> List.rev (("
" ^ current_p ^ "
") :: acc) | [] -> List.rev acc | line :: rest when is_block_element line -> if current_p <> "" then aux (line :: ("" ^ current_p ^ "
") :: acc) "" rest else aux (line :: acc) "" rest | line :: rest when String.trim line = "" -> if current_p <> "" then aux (("" ^ current_p ^ "
") :: acc) "" rest else aux acc "" rest | line :: rest -> let sep = if current_p = "" then "" else " " in aux acc (current_p ^ sep ^ String.trim line) rest in aux [] "" lines in lines |> wrap_paragraphs |> String.concat "\n" let to_html markdown = markdown |> convert_headings |> convert_emphasis |> convert_code |> convert_links |> convert_lists |> wrap_lists |> convert_blockquotes |> convert_paragraphs |> String.trim let extract_text markdown = markdown |> Str.global_replace (Str.regexp "\\[([^]]*)\\]\\([^)]*\\)") "\\1" |> Str.global_replace (Str.regexp "\\*\\*\\([^*]*\\)\\*\\*") "\\1" |> Str.global_replace (Str.regexp "\\*\\([^*]*\\)\\*") "\\1" |> Str.global_replace (Str.regexp "__\\([^_]*\\)__") "\\1" |> Str.global_replace (Str.regexp "_\\([^_]*\\)_") "\\1" |> Str.global_replace (Str.regexp "~~\\([^~]*\\)~~") "\\1" |> Str.global_replace (Str.regexp "`\\([^`]*\\)`") "\\1" |> Str.global_replace (Str.regexp "```[^`]*```") "" |> Str.global_replace (Str.regexp "^#+ .*$") "\n" |> Str.global_replace (Str.regexp "^#* .*$") "\n" |> Str.global_replace (Str.regexp "> \\|>") "" |> Str.global_replace (Str.regexp "\\[\\|\\]\\|\\(\\|\\)") "" |> Str.global_replace (Str.regexp "-\\|\\+\\|\\*\\s+") "" |> Str.global_replace (Str.regexp "^\\d+\\.\\s+") "" |> Str.global_replace (Str.regexp "\\\\") "" |> String.trim let summarize text ~words:n = let words = Str.split (Str.regexp "[ \n\r\t]+") text in let truncated = List.take words n in let dots = if List.length words > n then "..." else "" in String.concat " " truncated ^ dots