//

// Pure JS Dom Parser and Cleaner
// Only removes blacklist elements from the dom
import { createAST } from "./ast.js";
import { allowedTags, voidTags, inlineTags } from "./tags.js";
import { allowedAttrs } from "./attrs.js";

// 1. Convert the source html to dom
function fromHtmlString(source) {
  let dom = new DOMParser().parseFromString(source, "text/html");
  // console.log(dom);
  return dom;
}

// 2. Sanitize the Dom
// Walk the dom and remove tags & attributes
// Normalize the dom to handle

const ELEMENT_NODE = 1;
const TEXT_NODE = 3;
const CDATA_SECTION_NODE = 4;
const PROCESSING_INSTRUCTION_NODE = 7;
const COMMENT_NODE = 8;
const DOCUMENT_NODE = 9;
const DOCUMENT_TYPE_NODE = 10;
const DOCUMENT_FRAGMENT_NODE = 11;

function rewriteHref(initUrl, baseUrl) {
  try {
    let testUrl = new URL(initUrl);
    let proto = testUrl.protocol;
    if (proto === "http:" || proto === "https:") {
      return initUrl;
    } else {
      return "";
    }
  } catch {
    try {
      let testUrl = new URL(initUrl, baseUrl);
      return testUrl.toString();
    } catch {
      return "";
    }
  }
}

function sanitizeDom(dom, url) {
  // console.log(dom.baseURI);

  const walker = document.createTreeWalker(dom, -1, null, false);
  const removeNodes = [];
  while (walker.nextNode()) {
    let node = walker.currentNode;
    switch (node.nodeType) {
      case ELEMENT_NODE:
        let tagName = node.nodeName;
        if (!allowedTags.includes(tagName)) {
          removeNodes.push(node);
        } else {
          for (let name of node.getAttributeNames()) {
            if (!allowedAttrs.includes(name)) {
              node.removeAttribute(name);
            } else if (tagName === "A") {
              let stringHref = node.getAttribute("href");
              let newHref = rewriteHref(stringHref, url);
              node.href = newHref;
            }
          }
        }
        break;
      case TEXT_NODE:
      case DOCUMENT_TYPE_NODE:
      case DOCUMENT_NODE: {
        break;
      }
      default:
        removeNodes.push(node);
    }
  }
  // Removes all the unnecessary elements
  if (removeNodes.length > 0) {
    for (const node of removeNodes) node.remove();
  }
  // Normalize the new dom, by merging adjoining text nodes
  dom.normalize();
}

// 3. Clean
// Removes Whitespace only text except PRE
// Remove Block Nodes which are empty
// Trim whitespace on elements

const trim = (str) => str.replace(/[\t\n\r\f ]+/g, " ");
const collapse = (str) => str.replace(/[\t\n\r\f ]+/g, "");
const trimAll = (str) => {
  str = str.replace(/[\t\n\r\f ]+/g, " ");
  if (str.charAt(0) === " ") str = str.substring(1, str.length);
  if (str.charAt(str.length - 1) === " ")
    str = str.substring(0, str.length - 1);
  return str;
};
const whitespace = (str) => !/[^\t\n\r\f ]/.test(str);
const tagClass = (tagName) => {
  if (tagName === "PRE") return "PRE_TAG";
  if (voidTags.includes(tagName)) return "VOID_TAG";
  if (inlineTags.includes(tagName)) return "INLINE_TAG";
  return "BLOCK_TAG";
};

function getAllNodes(doc) {
  let treeWalker = document.createTreeWalker(doc, -1, null, false);

  let nodeList = [];
  let currentNode = treeWalker.currentNode;

  while (currentNode) {
    nodeList.push(currentNode);
    currentNode = treeWalker.nextNode();
  }

  console.log(nodeList);
  return treeWalker;
}

function cleanDom(dom) {
  const walker = document.createTreeWalker(dom, -1, null, false);
  const removeNodes = [];
  while (walker.nextNode()) {
    let node = walker.currentNode;
    // console.log(node.nodeName);
    switch (node.nodeType) {
      case ELEMENT_NODE: {
        let tagType = tagClass(node.tagName);
        switch (tagType) {
          case "PRE_TAG": {
            node =
              walker.nextSibling() ||
              (walker.parentNode() && walker.nextSibling());
            // Fallthrough to next tag;
          }
          case "BLOCK_TAG": {
            // Handle empty Block tag
            if (node.hasChildNodes()) {
              // Remove whitespace text node for Block Elements
              for (let child of node.childNodes) {
                if (child.nodeType === TEXT_NODE) {
                  if (whitespace(child.nodeValue)) child.remove();
                  else child.nodeValue = trim(child.nodeValue);
                }
              }
            }
            // Now if empty block, remove Block tag
            if (!node.hasChildNodes()) removeNodes.push(node);
            // Replace Empty Block nodes
            if (
              node.tagName === "DIV" &&
              node.getAttributeNames().length === 0
            ) {
              // console.log("empty div");
              // let childChildren = node.childNodes;
              // node.replaceWith(childChildren);
            }
            break;
          }
          case "INLINE_TAG": {
            // for (let child of node.childNodes) {
            //   if (child.nodeType === TEXT_NODE)
            //     child.nodeValue = trim(child.nodeValue);
            // }
            // Handle empty INLINE tag
            //if (node.childNodes.length === 0) removeNodes.push(node);
            break;
          }
        }
        break;
      }
      case TEXT_NODE: {
        node.nodeValue = trim(node.nodeValue);
        // let str = node.nodeValue;
        // if (whitespace(str)) {
        //   node.nodeValue = collapse(str);
        // } else {
        //   node.nodeValue = trim(str);
        // }
        break;
      }
    }
  }
  if (removeNodes.length > 0) {
    //console.log(removeNodes);
    for (const node of removeNodes) node.remove();
  }
  // Normalize the new dom, by merging adjoining text nodes
  dom.normalize();
}

// Simplify the Dom

function walkTheDOM(node, depth) {
  //console.log(node, depth);
  node = node.firstChild;
  while (node) {
    walkTheDOM(node, depth + 1);
    node = node.nextSibling;
  }
}

function simplify(dom) {
  let nodeList = [];
  walkTheDOM(dom, 0);
  return nodeList;
}

// 4. Convert dom body to HTML
function fromDomBodytoHtml(dom) {
  let stringHtml = new XMLSerializer().serializeToString(dom.body);
  // To remove the xmlns expression added by Serializer on the body
  return stringHtml.slice(
    stringHtml.indexOf(">") + 1,
    stringHtml.lastIndexOf("</")
  );
}

// Get text from
const hList = ["H1", "H2", "H3", "H4", "H5", "H6"];
const tagLI = (tag) => `/n${tag}/n`;
const tagPRE = (tag) => `/n${tag}/n`;
const tagDIV = (tag) => `/n${tag}/n`;

function addTextNode(node, pre = null, post = null) {
  if (pre) node.prepend(document.createTextNode(pre));
  if (post) node.appendChild(document.createTextNode(post));
}

function fromDomBodytoText(dom) {
  let domClone = dom.body.cloneNode(true);
  const walker = document.createTreeWalker(domClone, 1, null, false);
  while (walker.nextNode()) {
    let node = walker.currentNode;
    let parent = node.parentNode;
    let previous = node.previousSibling;
    let next = node.nextSibling;
    switch (node.tagName) {
      case "H1":
      case "H2":
      case "H3":
      case "H4":
      case "H5":
      case "H6":
        let hHash = Array(Number(node.nodeName.charAt(1)) + 1).join("#");
        if (next && next.tagName === "P")
          addTextNode(node, `\n${hHash} `, "\n");
        else addTextNode(node, `\n${hHash} `, "\n\n");
        break;
      case "P":
        //if (hlist.includes(previous.tagName)) addTextNode(node, null, "\n");
        addTextNode(node, null, "\n\n");
        break;
      case "TABLE":
        addTextNode(node, "\n", "\n");
        break;
      case "UL":
      case "OL":
        if (parent.nodeName === "LI" && parent.lastElementChild === node)
          addTextNode(node, "\n", null);
        else addTextNode(node, "\n\n", "\n\n");
        break;
      case "LI":
        addTextNode(node, "\n* ", null);
        break;
      case "EM":
      case "I":
        addTextNode(node, "_", "_");
        break;
      case "B":
      case "STRONG":
        addTextNode(node, "**", "**");
        break;
      case "DEL":
      case "S":
      case "STRIKE":
        addTextNode(node, "~", "~");
        break;
    }
  }
  // console.log(domClone);
  return domClone.textContent;
}

// 5. Create structured data
function pure(sourceHtml, sourceUrl) {
  // Get the dom
  let dom = fromHtmlString(sourceHtml);
  sanitizeDom(dom, sourceUrl);
  //console.log(dom);
  //getAllNodes(dom);
  cleanDom(dom);
  //getAllNodes(dom);
  simplify(dom);

  // Create Structured Data
  let data = {
    title: "",
    date: "",
    url: "",
    html: "",
    text: "",
    json: "",
  };

  if (dom.title) data.title = dom.title;
  if (sourceUrl) data.url = sourceUrl;
  let now = new Date();
  data.date = now.toLocaleDateString();
  // To handle http-equiv or html with no body
  if (dom.body) {
    let hast = createAST(dom.body);
    //data.json = JSON.stringify(hast);
    data.json = sourceHtml;
    //data.html = fromDomBodytoHtml(dom);
    data.text = fromDomBodytoText(dom);
    data.html = fromDomBodytoHtml(dom);
  }

  return data;
}

export { pure };
