美团编程题：编写一个算法解析以下符号，转换为json树的结构

Question

美团编程题：编写一个算法解析以下符号，转换为json树的结构

sisterAn opened this issue 4 years ago · comments

// 编写一个算法解析以下符号，转换为json树的结构
let str = `<xml><div><p><a/></p><p></p></div></xml>`

Kq · Answer 1 · Tue Sep 22 2020 11:29:34 GMT+0800 (China Standard Time)

interface TNode {
  name: string;
  children: TNode[] | null;
}

const startTagReg: RegExp = /\<(\w+)\>/; // 匹配开始标签
const endTagReg: RegExp = /\<\/(\w+)\>/; // 匹配结束标签
const closeSelfTagReg: RegExp = /\<(\w+)\/\>/; // 匹配自闭合标签
const textNodeReg: RegExp = /\>(.*?)\<\//; // 匹配文本内容
const tagReg: RegExp = /\<\/?(\w+)\/?\>/g; // 全局匹配标签

let matchedTags: string[] = str.match(tagReg); // 在字符串中匹配到的标签数组

let htmlTree: TNode = null; // 保存生成的节点树
let nodeStacks: TNode[] = []; // 保存遍历过程中的节点栈
let currentNode: TNode | undefined = undefined;

// 根据标签创建新节点
function createNode(tag: string): TNode {
  const tagName = tag.replace(tagReg, "$1");
  return {
    name: tagName,
    children: null
  };
}
// 将节点插入到当前操作栈的节点中
function insertNode(node: TNode) {
  if (htmlTree === null) {
    htmlTree = node;
  } else {
    if (currentNode.children === null) {
      currentNode.children = [node];
    } else {
      currentNode.children.push(node);
    }
  }
}

for (let tag of matchedTags) {
  if (startTagReg.test(tag)) {
    // 创建新节点
    const node = createNode(tag);
    // 向树插入新节点
    insertNode(node);
    // 压入栈，并进入该栈
    nodeStacks.push(node);
    currentNode = nodeStacks[nodeStacks.length - 1];
  } else if (endTagReg.test(tag)) {
    // 找栈中的相对应的节点索引
    let index = nodeStacks
      .reverse()
      .findIndex(el => el.name === tag.replace(tagReg, "$1"));
    index = nodeStacks.length - index;
    nodeStacks.reverse();
    // 删除索引外的栈
    nodeStacks.splice(index - 1);
    // 设置删除后的最后一项为当前节点
    currentNode = nodeStacks[nodeStacks.length - 1];
  } else if (closeSelfTagReg.test(tag)) {
    // 创建新节点
    const node = createNode(tag);
    // 插入新节点
    insertNode(node);
    // 自闭合不需要进栈出栈
  }
}

console.log(matchedTags);
console.log(htmlTree);

{
  "name": "xml",
  "children": [
    {
      "name": "div",
      "children": [
        {
          "name": "p",
          "children": [
            {
              "name": "a",
              "children": null
            }
          ]
        },
        {
          "name": "p",
          "children": null
        }
      ]
    }
  ]
}

早上在地铁看到这题给我整懵了，趁快要吃饭的时间赶紧记录下思考过程，结合前些天看源码，就想到利用模拟进栈出栈的思路，并保留最后可操作节点供插入新值。开始标签就创建节点，并进入新栈；结束标签就找最后一个对应开始标签栈，其余移除栈；自闭合标签不需要进栈出栈，直接更新节点就好。

Fenger Ding · Answer 2 · Wed Sep 23 2020 11:38:54 GMT+0800 (China Standard Time)

比较简陋的版本，假设给的字符串一定是标准的，没有容错处理，有不好的地方望指出。
思路：类似于常见的括号处理的题，

遇到起始标签则入栈，
遇到结束标签则查找最近的起始标签，新建node，将首尾间的所有node存为children，并将中间的所有node和标签出栈；
遇到单闭合标签则直接生成node，入栈。

后续可以逐渐完善，增加各种错误处理

function createNode(name) {
  return {
    name: name,
    children: null
  }
}

function parse(str) {
  const stack = [];
  let i = 0, j = 0;
  while (j < str.length) {
  while (str[i] !== '<') {
    i++;
  }
  i++;
  j = i;

  while (str[j] !== '>') {
    j++;
  }

  let ss = str.substring(i, j); // 去除<>之后的部分
  if (ss.startsWith('/')) {
    const name = ss.substring(1, ss.length);
    if (stack.length > 0) {
      let k = stack.length - 1;
      while (k >= 0) {
        if (stack[k] === name) {
          let node = createNode(stack[k]);
          let length = stack.length-1;
          while (length > k) {
            if (node.children === null) {
              node.children = []
            }
            node.children.push(stack[length]);
            stack.pop();
            length--;
          }
          stack.pop();
          stack.push(node)
          break;
        }
        k--;
      }
    } else {
      return null;
    }
  } else if (ss.endsWith('/')) {
    const name = ss.substring(0, ss.length - 1)
    let node = createNode(name)
    stack.push(node)
  } else {
    stack.push(ss)
  }

  i = j;
  j++;
  }
  return stack[0];
}

瓶子君 · Answer 3 · Wed Sep 23 2020 13:06:03 GMT+0800 (China Standard Time)

interface TNode {
  name: string;
  children: TNode[] | null;
}

const startTagReg: RegExp = /\<(\w+)\>/; // 匹配开始标签
const endTagReg: RegExp = /\<\/(\w+)\>/; // 匹配结束标签
const closeSelfTagReg: RegExp = /\<(\w+)\/\>/; // 匹配自闭合标签
const textNodeReg: RegExp = /\>(.*?)\<\//; // 匹配文本内容
const tagReg: RegExp = /\<\/?(\w+)\/?\>/g; // 全局匹配标签

let matchedTags: string[] = str.match(tagReg); // 在字符串中匹配到的标签数组

let htmlTree: TNode = null; // 保存生成的节点树
let nodeStacks: TNode[] = []; // 保存遍历过程中的节点栈
let currentNode: TNode | undefined = undefined;

// 根据标签创建新节点
function createNode(tag: string): TNode {
  const tagName = tag.replace(tagReg, "$1");
  return {
    name: tagName,
    children: null
  };
}
// 将节点插入到当前操作栈的节点中
function insertNode(node: TNode) {
  if (htmlTree === null) {
    htmlTree = node;
  } else {
    if (currentNode.children === null) {
      currentNode.children = [node];
    } else {
      currentNode.children.push(node);
    }
  }
}

for (let tag of matchedTags) {
  if (startTagReg.test(tag)) {
    // 创建新节点
    const node = createNode(tag);
    // 向树插入新节点
    insertNode(node);
    // 压入栈，并进入该栈
    nodeStacks.push(node);
    currentNode = nodeStacks[nodeStacks.length - 1];
  } else if (endTagReg.test(tag)) {
    // 找栈中的相对应的节点索引
    let index = nodeStacks
      .reverse()
      .findIndex(el => el.name === tag.replace(tagReg, "$1"));
    index = nodeStacks.length - index;
    nodeStacks.reverse();
    // 删除索引外的栈
    nodeStacks.splice(index - 1);
    // 设置删除后的最后一项为当前节点
    currentNode = nodeStacks[nodeStacks.length - 1];
  } else if (closeSelfTagReg.test(tag)) {
    // 创建新节点
    const node = createNode(tag);
    // 插入新节点
    insertNode(node);
    // 自闭合不需要进栈出栈
  }
}

console.log(matchedTags);
console.log(htmlTree);

{
  "name": "xml",
  "children": [
    {
      "name": "div",
      "children": [
        {
          "name": "p",
          "children": [
            {
              "name": "a",
              "children": null
            }
          ]
        },
        {
          "name": "p",
          "children": null
        }
      ]
    }
  ]
}

早上在地铁看到这题给我整懵了，趁快要吃饭的时间赶紧记录下思考过程，结合前些天看源码，就想到利用模拟进栈出栈的思路，并保留最后可操作节点供插入新值。开始标签就创建节点，并进入新栈；结束标签就找最后一个对应开始标签栈，其余移除栈；自闭合标签不需要进栈出栈，直接更新节点就好。

@oxyzhg 这部分主要是用来判断什么的？直接出栈不就可以吗？

 // 找栈中的相对应的节点索引
let index = nodeStacks
  .reverse()
  .findIndex(el => el.name === tag.replace(tagReg, "$1"));
index = nodeStacks.length - index;
nodeStacks.reverse();
// 删除索引外的栈
nodeStacks.splice(index - 1);
// 设置删除后的最后一项为当前节点
currentNode = nodeStacks[nodeStacks.length - 1];

Kq · Answer 4 · Wed Sep 23 2020 13:12:08 GMT+0800 (China Standard Time)

@sisterAn 正常的话就是直接出栈的哈，这里当时考虑了栈尾元素不一定是当前闭合标签相匹配，可能想多了。

LOVEwitch · Answer 5 · Wed Sep 23 2020 18:02:01 GMT+0800 (China Standard Time)

比较简陋的版本，假设给的字符串一定是标准的，没有容错处理，有不好的地方望指出。
思路：类似于常见的括号处理的题，用一个栈保存所有形如

的起始标签，遇到

则查找最近的
，
新建name为p的node，将首尾间的所有node存为p的children，并将中间的所有node和p标签出栈；
遇到

这种标签则直接生成node，入栈。
后续可以逐渐完善，增加各种错误处理
javascript let str =

`
function createNode(name) {
return {
name: name,
children: null
}
}

function parse(str) {
const stack = []
let i = 0, j = 0;
while (j < str.length) {
while (str[i] !== '<') {
i++;
}
i++;
j = i;
while (str[j] !== '>') {
  j++;
}

let ss = str.substring(i, j); // 去除<>之后的部分
if (ss.startsWith('/')) {
  const name = ss.substring(1, ss.length);
  if (stack.length > 0) {
    let k = stack.length - 1;
    while (k >= 0) {
      if (stack[k] === name) {
        let node = createNode(stack[k]);
        let length = stack.length-1;
        while (length > k) {
          if (node.children === null) {
            node.children = []
          }
          node.children.push(stack[length]);
          stack.pop();
          length--;
        }
        stack.pop();
        stack.push(node)
        break;
      }
      k--;
    }
  } else {
    return null;
  }
} else if (ss.endsWith('/')) {
  const name = ss.substring(0, ss.length - 1)
  let node = createNode(name)
  stack.push(node)
} else {
  stack.push(ss)
}

i = j;
j++;
}
return stack[0];
}
`

let index = nodeStacks .reverse() .findIndex(el => el.name === tag.replace(tagReg, "$1")); index = nodeStacks.length - index; nodeStacks.reverse();这段可以用lastIndexOf代替IndexOf吧，就不用来回reverse了

mingju0421 · Answer 6 · Wed Nov 11 2020 16:30:54 GMT+0800 (China Standard Time)

const DOM解析 = str => {
    let 标签开始 = /^<(\w+)>/
    let 标签结束 = /^<\/(\w+)>/
    let 自闭合标签 = /^<(\w+)\/>/
    let DOMList = []
    while (str) {
        let 临时标签
        if (标签开始.test(str)) {
            let dom = {}
            临时标签 = str.match(标签开始)
            dom.name = 临时标签[1]
            str = str.slice(临时标签[0].length)
            DOMList.push(dom)
        } else if (标签结束.test(str)) {
            临时标签 = str.match(标签结束)
            str = str.slice(临时标签[0].length)
            let dom = DOMList.pop()
            if (!str) return dom
            let parentDOM = DOMList[DOMList.length - 1]
            parentDOM.children && parentDOM.children.push(dom) || (parentDOM.children = [dom])
        } else {
            let dom = {}
            临时标签 = str.match(自闭合标签)
            dom.name = 临时标签[1]
            str = str.slice(临时标签[0].length)
            let parentDOM = DOMList[DOMList.length - 1]
            parentDOM.children && parentDOM.children.push(dom) || (parentDOM.children = [dom])
        }
    }
}
const str = '<div><ul><li><a></a><input/></li><li><a></a><input/></li><li><a></a><input/></li><li><a></a><input/></li></ul></div>'

console.log(JSON.stringify(DOM解析(str)))

xllpiupiu · Answer 7 · Tue Oct 05 2021 19:13:10 GMT+0800 (China Standard Time)

function domParse(str) {

    const startTagReg = /\<(\w+)\>/;//匹配开始标签
    const endTagReg = /\<\/(\w+)\>/;//匹配结束标签
    const closeSelfTagReg = /\<(\w+)\/\>/;//匹配自闭合标签
    const textNodeReg = /\>(.*?)\<\//;//匹配文本内容
    const tagReg = /\<\/?(\w+)\/?\>/g;//全局匹配

    let htmlTree = null;
    let nodeStacks = [];
    let currentNode = null;
    //根据标签创建新的节点
    class CreateNode {
        constructor(tag) {
            const tagName = tag.replace(tagReg, "$1");//去掉<>/
            this.name = tagName
            this.children = null
        }
    }
    //将节点插入到当前操作栈的节点中
    function insertNode(node) {
        if (htmlTree === null) {
            htmlTree = node;
        } else {
            if (currentNode.children === null) {
                currentNode.children = [node];
            } else {
                currentNode.children.push(node);
            }
        }
    }
    const matchedTags = str.match(tagReg);//字符串中匹配到的标签数组
    console.log('matchedTags: ', matchedTags);
    for (let tag of matchedTags) {
        if (startTagReg.test(tag)) {
            //创建新节点
            const node = new CreateNode(tag);
            //向树插入新节点
            insertNode(node);
            //压入栈
            nodeStacks.push(node);
            //更新当前节点
            currentNode = nodeStacks[nodeStacks.length - 1];
        } else if (endTagReg.test(tag)) {
            nodeStacks.pop();
            //更新当前节点
            currentNode = nodeStacks[nodeStacks.length - 1]
        } else if (closeSelfTagReg.test(tag)) {
            const node =new CreateNode(tag)
            insertNode(node);
        }
    }
    return htmlTree;
}

XfLoops · Answer 8 · Tue Oct 26 2021 23:00:17 GMT+0800 (China Standard Time)

// 编写一个算法解析以下符号，转换为json树的结构
let str = `<xml><div><p><a/><a/></p><p></p></div><div><a/></div></xml>`

function parseXML(str) {
    const openTagRegex = /^<(\w+)>/
    const closeTagRegex = /^<\/(\w+)>/
    const selfTagRegex = /^<(\w+)\/>/
    const regs = [openTagRegex, closeTagRegex, selfTagRegex]
        
    const matchTag = (str) => {
        let newStr, tag, reg, match
        for(const _reg of regs) {
            if (_reg.test(str)) {
                const [_match, _tag] = _reg.exec(str)
                match = _match
                tag = _tag
                reg = _reg
                break
            }
        }
        newStr = match ? str.replace(match, '') : str
        return [reg, tag, newStr]
    }
    
    const createNode = (tag) => ({name: tag, children: []})
    const stack = []

    while(str) {
        const [reg, tag, newStr] = matchTag(str)
        switch (reg) {
            case openTagRegex:
                stack.push(createNode(tag))
                break
            case closeTagRegex:
                if (stack.length > 1) {
                    const top = stack.pop()
                    stack[stack.length - 1].children.push(top)
                }
                break
            case selfTagRegex:
                stack[stack.length - 1].children.push(createNode(tag))
                break
            default:
                throw new Error(`invalid tag string`)
        }
        str = newStr
    }
    return stack.pop()
}