module List = struct include List let rec take lst n = match (lst, n) with | ([], _) -> [] | (_, 0) -> [] | (x :: xs, n) -> x :: take xs (n - 1) end let convert_headings text = text |> Str.global_replace (Str.regexp "^#### \\(.*\\)$") "

\\1

" |> Str.global_replace (Str.regexp "^### \\(.*\\)$") "

\\1

" |> Str.global_replace (Str.regexp "^## \\(.*\\)$") "

\\1

" |> Str.global_replace (Str.regexp "^# \\(.*\\)$") "

\\1

" let convert_emphasis text = text |> Str.global_replace (Str.regexp "\\*\\*\\([^*]*\\)\\*\\*") "\\1" |> Str.global_replace (Str.regexp "__\\([^_]*\\)__") "\\1" |> Str.global_replace (Str.regexp "\\*\\([^*]*\\)\\*") "\\1" |> Str.global_replace (Str.regexp "_\\([^_]*\\)_") "\\1" let convert_code text = text |> Str.global_replace (Str.regexp "```\\([^`]*\\)```") "
\\1
" |> Str.global_replace (Str.regexp "`\\([^`]*\\)`") "\\1" let convert_links text = text |> Str.global_replace (Str.regexp "\\[\\([^]]*\\)\\](\\([^)]*\\))") "\\1" let convert_lists text = let lines = String.split_on_char '\n' text in let process_line line = match line with | line when Str.string_match (Str.regexp "^-\\s*\\(.*\\)$") line 0 -> "
  • " ^ Str.matched_group 1 line ^ "
  • " | line when Str.string_match (Str.regexp "^\\+\\s*\\(.*\\)$") line 0 -> "
  • " ^ Str.matched_group 1 line ^ "
  • " | line when Str.string_match (Str.regexp "^\\*\\s*\\(.*\\)$") line 0 -> "
  • " ^ Str.matched_group 1 line ^ "
  • " | line when Str.string_match (Str.regexp "^\\d+\\.\\s*\\(.*\\)$") line 0 -> "
  • " ^ Str.matched_group 1 line ^ "
  • " | _ -> line in let wrap_consecutive_items lines = let rec aux acc current_list lines = match (current_list, lines) with | ([], []) -> List.rev acc | (hd :: tl, []) -> List.rev [ ""; ] @ acc | ([], line :: rest) -> if Str.string_match (Str.regexp "^
  • ") line 0 then aux acc [line] rest else aux (line :: acc) [] rest | (items, line :: rest) -> if Str.string_match (Str.regexp "^
  • ") line 0 then aux acc (line :: current_list) rest else aux (line :: ("") :: acc) [] rest in aux [] [] lines in lines |> List.map process_line |> wrap_consecutive_items |> String.concat "\n" let wrap_lists text = text |> Str.global_replace (Str.regexp "
  • .*
  • \\(\n
  • .*
  • \\)*") "" let convert_blockquotes text = let lines = String.split_on_char '\n' text in let rec process_lines acc in_quote lines = match lines with | [] when in_quote -> List.rev ("" :: acc) | [] -> List.rev acc | line :: rest -> let trimmed = String.trim line in if Str.string_match (Str.regexp "^>\\s*\\(.*\\)$") trimmed 0 then let content = Str.matched_group 1 trimmed in if in_quote then process_lines (content :: acc) true rest else process_lines (content :: "
    " :: acc) true rest else if trimmed = "" then if in_quote then process_lines ("
    " :: acc) false rest else process_lines (line :: acc) false rest else if in_quote then process_lines (line :: acc) true rest else process_lines (line :: acc) false rest in lines |> process_lines [] false |> String.concat "\n" let convert_paragraphs text = let lines = String.split_on_char '\n' text in let is_block_element line = Str.string_match (Str.regexp "^<\\(h[1-6]\\|ul\\|ol\\|blockquote\\|pre\\)>") line 0 in let wrap_paragraphs lines = let rec aux acc current_p lines = match lines with | [] when current_p <> "" -> List.rev (("

    " ^ current_p ^ "

    ") :: acc) | [] -> List.rev acc | line :: rest when is_block_element line -> if current_p <> "" then aux (line :: ("

    " ^ current_p ^ "

    ") :: acc) "" rest else aux (line :: acc) "" rest | line :: rest when String.trim line = "" -> if current_p <> "" then aux (("

    " ^ current_p ^ "

    ") :: acc) "" rest else aux acc "" rest | line :: rest -> let sep = if current_p = "" then "" else " " in aux acc (current_p ^ sep ^ String.trim line) rest in aux [] "" lines in lines |> wrap_paragraphs |> String.concat "\n" let to_html markdown = markdown |> convert_headings |> convert_emphasis |> convert_code |> convert_links |> convert_lists |> wrap_lists |> convert_blockquotes |> convert_paragraphs |> String.trim let extract_text markdown = markdown |> Str.global_replace (Str.regexp "\\[([^]]*)\\]\\([^)]*\\)") "\\1" |> Str.global_replace (Str.regexp "\\*\\*\\([^*]*\\)\\*\\*") "\\1" |> Str.global_replace (Str.regexp "\\*\\([^*]*\\)\\*") "\\1" |> Str.global_replace (Str.regexp "__\\([^_]*\\)__") "\\1" |> Str.global_replace (Str.regexp "_\\([^_]*\\)_") "\\1" |> Str.global_replace (Str.regexp "~~\\([^~]*\\)~~") "\\1" |> Str.global_replace (Str.regexp "`\\([^`]*\\)`") "\\1" |> Str.global_replace (Str.regexp "```[^`]*```") "" |> Str.global_replace (Str.regexp "^#+ .*$") "\n" |> Str.global_replace (Str.regexp "^#* .*$") "\n" |> Str.global_replace (Str.regexp "> \\|>") "" |> Str.global_replace (Str.regexp "\\[\\|\\]\\|\\(\\|\\)") "" |> Str.global_replace (Str.regexp "-\\|\\+\\|\\*\\s+") "" |> Str.global_replace (Str.regexp "^\\d+\\.\\s+") "" |> Str.global_replace (Str.regexp "\\\\") "" |> String.trim let summarize text ~words:n = let words = Str.split (Str.regexp "[ \n\r\t]+") text in let truncated = List.take words n in let dots = if List.length words > n then "..." else "" in String.concat " " truncated ^ dots