import {decode} from 'html-entities'
import * as R from 'rambdax'
import * as regexp from './regexp'

// prettier-ignore
const BLOCK_TAGS = [
  'address', 'article', 'aside', 'blockquote', 'canvas', 'dd', 'div', 'dl',
  'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2',
  'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'li', 'main', 'nav', 'noscript',
  'ol', 'p', 'pre', 'section', 'table', 'tfoot', 'ul', 'video',
]
const SCRIPT_TAGS = ['script', 'style']

interface TagStripperProps {
  scriptTags?: string[]
  blockTags?: string[]
  allowTags?: string[]
  allowAttrs?: string[]
  decodeEntities?: boolean
}

const memo =
  <T>(fn: (s: string) => T) =>
  (s: string) => {
    const cache: Record<string, T> = {}
    return (cache[s] ??= fn(s))
  }

/**
 * Make a tag stripper. This is not a security mechanism, it's purely
 * a convenience for avoiding unwanted tags or entities in rendered text.
 */
export const tagStripper = ({
  scriptTags = SCRIPT_TAGS,
  blockTags = BLOCK_TAGS,
  allowTags = [],
  allowAttrs = [],
  decodeEntities = true,
}: TagStripperProps = {}): ((input: string) => string) => {
  const scriptTagsRe = regexp.tag('gisux')`
    <
    (${scriptTags.join('|')})\b
    [^>]*>
    (?:.*?</${regexp.backref(1)}\s*>)?`
  const blockTagsRe = regexp.tag('gisux')`
    </?
    (${blockTags.join('|')})\b
    [^>]*>`
  const otherTagsRe = regexp.tag('gisux')`
    </?
    (\w[^>\s]*)
    [^>]*>`
  const attrTagsRe = regexp.tag('gisux')`
    <\w[^>]*>`
  const attrsRe = regexp.tag('gisux')`
    \s+
    ([^=\s]+)=(?:"[^"]*"|'[^']*'|\S*)`
  const except =
    (allow: string[], r: string) => (match: string, p1: string | undefined) =>
      p1 && allow.includes(p1) ? match : r
  return R.pipe(
    // Kill script/style tags and contents
    R.replace(scriptTagsRe, except(allowTags, '')),
    // Kill block tags, preserving contents, adding space
    R.replace(blockTagsRe, except(allowTags, ' ')),
    // Kill other tags, preserving contents
    R.replace(otherTagsRe, except(allowTags, '')),
    // If any tags allowed, kill attrs
    R.replace(attrTagsRe, R.replace(attrsRe, except(allowAttrs, ''))),
    // Decode HTML entities
    decodeEntities ? decode : R.identity,
  )
}

/**
 * Strip all tags, for situations where we get tags unexpectedly in text. Also
 * used for some server-side rendering of HTML to avoid unmatched tags that
 * would break the DOM tree.
 */
export const stripTags = memo(tagStripper())

/**
 * Strip script tags, so that they can run consistently on the client via the
 * Html component.
 */
export const stripScripts = memo((s: string) =>
  s.replace(/<script\b[^>]*>(?:.*?<\/script\s*>)?/gisu, ''),
)

/**
 * Simplify HTML for constrained display, such as sub-heading in the Header
 * block, or content details. There shouldn't be linebreaks or other block
 * elements here, but we do allow simple formatting and links.
 */
export const simplifyHtml = tagStripper({
  allowTags: ['a', 'b', 'em', 'i', 'strong', 'sub', 'sup'],
  allowAttrs: ['href', 'target'],
  // Decoding entities is only needed when displaying as text, not when
  // displaying as HTML because then the browser will do the decode.
  decodeEntities: false,
})

/**
 * Simplify HTML for TOC.
 */
export const simplifyTocHtml = memo(
  R.pipe(
    R.replace(regexp.tag('gisux')`<br[^>]*>`, ' | '),
    tagStripper({
      allowTags: ['b', 'em', 'i', 'strong', 'sub', 'sup'],
      // Decoding entities is only needed when displaying as text, not when
      // displaying as HTML because then the browser will do the decode.
      decodeEntities: false,
    }),
  ),
)

export const normalizeSpace = (s: string) => s.trim().replace(/\s+/, ' ')
