我如何在 JavaScript 的正则表达式中转义 <br> 和 h2 标签

how I can escape <br> and h2 tag in regex in javascript?

本文关键字:br 标签 h2 转义 JavaScript 正则表达式      更新时间:2023-09-26

我在jquery插件中使用了一些正则表达式代码,它将我的html代码替换为空字符串。

hash.replace(/^.*#/, '').replace(/'?.*$/, '');

在这里我应该怎么做,这样它就不会用空字符串转义我的 HTML 代码。 就像我有 html 代码一样:

"Jquery is a scripting language.<br>
Most widely used language"

此文本具有中断标记,因此将其替换为空字符串

请尝试以下代码:

 hash = hash.replace(/(<([^>]+)>)/ig, '');

仅供参考,替换方法不会影响原始字符串,除非您使用新值更新它。

与其使用正则表达式,我强烈建议使用 DOM API 来解析和删除 HTML 元素,使用可能保留的元素白名单:

function stripHTML(opts) {
  // The default settings for the function, can be overridden
  // by the user,
  // HTML:         String of text/HTML from which the HTML elements
  //               should be removed.
  // allowedHTML:  Array of Strings, the HTML elements that are
  //               permitted to remain within the returned HTML string.
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    // creating an element for containing the supplied String
    // of content in order for it to be parsed:
    temp = document.createElement('div'),
    // uninitialised variables for later use:
    allowedHTML,
    elementNodes,
    parent;
  // Iterating over the keys of the opts Object if one has
  // been supplied, otherwise we iterate over the empty
  // object-literal to prevent an error being thrown:
  Object.keys(opts || {}).forEach(function(key) {
    // here we update the settings Object with the
    // properties, and property-values, from the
    // opts Object (if supplied):
    settings[key] = opts[key];
  });
  // if we have a settings.html property-value, and
  // settings.html is a String:
  if (settings.html && 'string' === typeof settings.html) {
    // assign the settings.html String as the innerHTML of
    // the created-element:
    temp.innerHTML = settings.html;
    // retrieve all elements from the created-element using
    // the universal selector ('*') from CSS and converting
    // the resulting Array-like collection into an Array,
    // using Array.from():
    elementNodes = Array.from(temp.querySelectorAll('*'));
    // here we ensure that the Array of elements is of the
    // type ['h1','span'] not ['<h1>','<span>'] by iterating
    // over the array of settings.allowedHTML and returning
    // a new Array of its elements using Array.prototype.map():
    allowedHTML = settings.allowedHTML.map(function(el) {
      // 'el' the first argument is a reference to the
      // current Array-element of the Array over which
      // we're iterating.
      // returning the string having first removed all ('g')
      // incidences of '<' or ('|') '>' from said string:
      return el.replace(/<|>/g, '');
    });
    // iterating over the elementNodes Array:
    elementNodes.forEach(function(node) {
      // 'node' is (again) a reference to the current
      // Array-element of the Array over which we're
      // iterating.
      // caching a reference to the parentNode of the
      // current element:
      parent = node.parentNode;
      // if the node's localName (same as tagName, but
      // lower-case) is not found in the Array of allowed HTML:
      if (settings.allowedHTML.indexOf(node.localName) === -1) {

        // while the node has a firstChild:
        while (node.firstChild) {
          // we insert that firstChild into the
          // node's parentNode ahead of the node itself:
          parent.insertBefore(node.firstChild, node);
        }
        // removing the node from the parent:
        parent.removeChild(node);
      }
    });
    // here we return the innerHTML of the created-element,
    // having trimmed its leading and trailing white-space:
    return temp.innerHTML.trim();
  }
}
console.log(stripHTML({
  'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time)"
}));
// => jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time).

function stripHTML(opts) {
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    temp = document.createElement('div'),
    allowedHTML,
    elementNodes,
    parent;
  Object.keys(opts || {}).forEach(function(key) {
    settings[key] = opts[key];
  });
  if (settings.html && 'string' === typeof settings.html) {
    temp.innerHTML = settings.html;
    elementNodes = Array.from(temp.querySelectorAll('*'));
    
    allowedHTML = settings.allowedHTML.map(function(el) {
      return el.replace(/<|>/g, '');
    });
    
    elementNodes.forEach(function(node) {
      parent = node.parentNode;
      if (settings.allowedHTML.indexOf(node.localName) === -1) {
        
        while (node.firstChild) {
          parent.insertBefore(node.firstChild, node);
        }
        
        parent.removeChild(node);
      }
    });
    
    return temp.innerHTML.trim();
  }
}
console.log(stripHTML({
  'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time). "
}));

JS小提琴演示。

上面允许一个空数组的allowedHTML,这会导致函数删除所有HTML标签(从某种有限的测试):

console.log(stripHTML({
  'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time). ",
  'allowedHTML': []
}));
// => jQuery is a JavaScript library.And is the most widely-used such library (at this time).

function stripHTML(opts) {
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    temp = document.createElement('div'),
    allowedHTML,
    elementNodes,
    parent;
  Object.keys(opts || {}).forEach(function(key) {
    settings[key] = opts[key];
  });
  if (settings.html && 'string' === typeof settings.html) {
    temp.innerHTML = settings.html;
    elementNodes = Array.from(temp.querySelectorAll('*'));
    
    allowedHTML = settings.allowedHTML.map(function(el) {
      return el.replace(/<|>/g, '');
    });
    
    elementNodes.forEach(function(node) {
      parent = node.parentNode;
      if (settings.allowedHTML.indexOf(node.localName) === -1) {
        
        while (node.firstChild) {
          parent.insertBefore(node.firstChild, node);
        }
        
        parent.removeChild(node);
      }
    });
    
    return temp.innerHTML.trim();
  }
}
console.log(stripHTML({
  'html': "jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time).",
  'allowedHTML': []
}));

JS小提琴演示。

似乎可以可靠地处理 - 就任何浏览器能够处理 - 无效的HTML,例如未打开的元素或"重叠"的元素(第一个打开的元素的结束标签出现在第二个打开的元素的结束标签之前):

console.log(stripHTML({
  'html': "<div><h1>jQuery</div> is a JavaScript library.</h1><br>And is the most widely-used such library (at this time). "
}));
// => jQuery is a JavaScript library.<br>And is the most widely-used such library (at this time).

function stripHTML(opts) {
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    temp = document.createElement('div'),
    allowedHTML,
    elementNodes,
    parent;
  Object.keys(opts || {}).forEach(function(key) {
    settings[key] = opts[key];
  });
  if (settings.html && 'string' === typeof settings.html) {
    temp.innerHTML = settings.html;
    elementNodes = Array.from(temp.querySelectorAll('*'));
    allowedHTML = settings.allowedHTML.map(function(el) {
      return el.replace(/<|>/g, '');
    });
    elementNodes.forEach(function(node) {
      parent = node.parentNode;
      if (settings.allowedHTML.indexOf(node.localName) === -1) {
        while (node.firstChild) {
          parent.insertBefore(node.firstChild, node);
        }
        parent.removeChild(node);
      }
    });
    return temp.innerHTML.trim();
  }
}
console.log(stripHTML({
  'html': "<div><h1>jQuery</div> is a JavaScript library.</h1><br>And is the most widely-used such library (at this time). "
}));

JS小提琴演示。

它似乎也可以管理(荒谬的)嵌套:

console.log(stripHTML({
  'html': "<div>jQuery <h1>is <br>a <span><strong><em><span>JavaScript</span></em> library</strong></span>.</span><br>And is the most widely-used such library (at this time).</h1></div> "
}));

function stripHTML(opts) {
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    temp = document.createElement('div'),
    allowedHTML,
    elementNodes,
    parent;
  Object.keys(opts || {}).forEach(function(key) {
    settings[key] = opts[key];
  });
  if (settings.html && 'string' === typeof settings.html) {
    temp.innerHTML = settings.html;
    elementNodes = Array.from(temp.querySelectorAll('*'));
    allowedHTML = settings.allowedHTML.map(function(el) {
      return el.replace(/<|>/g, '');
    });
    elementNodes.forEach(function(node) {
      parent = node.parentNode;
      if (allowedHTML.indexOf(node.localName) === -1) {
        while (node.firstChild) {
          parent.insertBefore(node.firstChild, node);
        }
        parent.removeChild(node);
      }
    });
    return temp.innerHTML.trim();
  }
}
console.log(stripHTML({
  'html': "<div>jQuery <h1>is <br>a <span><strong><em><span>JavaScript</span></em> library</strong></span>.</span><br>And is the most widely-used such library (at this time).</h1></div> "
}));

JS小提琴演示。

但是,我不能保证这对在stripHTML函数的html字符串中插入<script>元素的人有效、有效或能够有效,例如:

console.log(stripHTML({
  'html': "<script>alert('Will this work?'); console.log('Maybe not?');</" + "script>"
}));
// => alert('Will this work?'); console.log('Maybe not?');
// it doesn't work in my (again: limited) testing, and
// there's no evaluation (eval()) of the inserted, or resulting
// string so it should be safe. This is not a guarantee, so
// please: test your edge cases

function stripHTML(opts) {
  var settings = {
      'html': null,
      'allowedHTML': ['h2', 'br']
    },
    temp = document.createElement('div'),
    allowedHTML,
    elementNodes,
    parent;
  Object.keys(opts || {}).forEach(function(key) {
    settings[key] = opts[key];
  });
  if (settings.html && 'string' === typeof settings.html) {
    temp.innerHTML = settings.html;
    elementNodes = Array.from(temp.querySelectorAll('*'));
    allowedHTML = settings.allowedHTML.map(function(el) {
      return el.replace(/<|>/g, '');
    });
    elementNodes.forEach(function(node) {
      parent = node.parentNode;
      if (settings.allowedHTML.indexOf(node.localName) === -1) {
        while (node.firstChild) {
          parent.insertBefore(node.firstChild, node);
        }
        parent.removeChild(node);
      }
    });
    return temp.innerHTML.trim();
  }
}
console.log(stripHTML({
  'html': "<script>alert('Will this work?'); console.log('Maybe not?');</"+"script>"
}));

JS小提琴演示。

引用:

  • Array.from() .
  • Array.prototype.forEach() .
  • Array.prototype.indexOf() .
  • Array.prototype.map() .
  • document.createElement() .
  • document.querySelectorAll() .
  • Element.localName .
  • JavaScript 正则表达式指南。
  • Node.firstChild .
  • Node.innerHTML .
  • Node.insertBefore() .
  • Node.parentNode .
  • Node.removeChild() .
  • Object.keys() .
  • String.prototype.replace() .
  • typeof运算符。
  • while (...)声明。