正在分析Javascript中的BBCode

Parsing BBCode in Javascript

本文关键字：中的 BBCode Javascript 更新时间：2023-09-26

我正在使用(http://coursesweb.net/javascript/convert-bbcode-html-javascript_cs)作为我解析BBCode的脚本。我已经扩展了它可以处理的BBCodes，但是当换行符紧跟在打开的标签后面时，我遇到了一个问题，例如

  [code]
     code....
  [/code]

如果代码为"inline"，则不会出现问题[code]code....[/code]`

用于匹配这些标记中内容的正则表达式是(.*?)，我知道它与换行符不匹配。我已经尝试过([^'r'n])来匹配换行符，但这也不起作用。

我想这是一个简单的问题，但我对regex没有什么经验，所以任何帮助都将不胜感激

编辑：这是我正在使用的正则表达式的完整列表

  var tokens = {
'URL' : '((?:(?:[a-z][a-z''d+''-.]*:''/{2}(?:(?:[a-z0-9''-._~''!$&''*+,;=:@|]+|%[''dA-F]{2})+|[0-9.]+|''[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+''])(?::''d*)?(?:''/(?:[a-z0-9''-._~''!$&''*+,;=:@|]+|%[''dA-F]{2})*)*(?:''?(?:[a-z0-9''-._~''!$&''*+,;=:@''/?|]+|%[''dA-F]{2})*)?(?:#(?:[a-z0-9''-._~''!$&''*+,;=:@''/?|]+|%[''dA-F]{2})*)?)|(?:www''.(?:[a-z0-9''-._~''!$&''*+,;=:@|]+|%[''dA-F]{2})+(?::''d*)?(?:''/(?:[a-z0-9''-._~''!$&''*+,;=:@|]+|%[''dA-F]{2})*)*(?:''?(?:[a-z0-9''-._~''!$&''*+,;=:@''/?|]+|%[''dA-F]{2})*)?(?:#(?:[a-z0-9''-._~''!$&''*+,;=:@''/?|]+|%[''dA-F]{2})*)?)))',
'LINK' : '([a-z0-9'-'./]+[^"'' ]*)',
'EMAIL' : '((?:[''w'!'#$'%'&'''*'+'-'/'='?'^'`{'|'}'~]+'.)*(?:[''w'!'#$'%'''*'+'-'/'='?'^'`{'|'}'~]|&)+@(?:(?:(?:(?:(?:[a-z0-9]{1}[a-z0-9'-]{0,62}[a-z0-9]{1})|[a-z])'.)+[a-z]{2,6})|(?:''d{1,3}'.){3}''d{1,3}(?:':''d{1,5})?))',
'TEXT' : '(.*?)',
'SIMPLETEXT' : '([a-zA-Z0-9-+.,_ ]+)',
'INTTEXT' : '([a-zA-Z0-9-+,_. ]+)',
'IDENTIFIER' : '([a-zA-Z0-9-_]+)',
'COLOR' : '([a-z]+|#[0-9abcdef]+)',
'NUMBER'  : '([0-9]+)',
'ALL'  : '([^'r'n])',
};

编辑2：用于匹配的完整JS

var token_match = /{[A-Z_]+[0-9]*}/ig;

var _getRegEx = function(str) {
var matches = str.match(token_match);
var nrmatches = matches.length;
var i = 0;
var replacement = '';
if (nrmatches <= 0) {
  return new RegExp(preg_quote(str), 'g');        // no tokens so return the escaped string
}
for(; i < nrmatches; i += 1) {
  // Remove {, } and numbers from the token so it can match the
  // keys in tokens
  var token = matches[i].replace(/[{}0-9]/g, '');
  if (tokens[token]) {
    // Escape everything before the token
    replacement += preg_quote(str.substr(0, str.indexOf(matches[i]))) + tokens[token];
    // Remove everything before the end of the token so it can be used
    // with the next token. Doing this so that parts can be escaped
    str = str.substr(str.indexOf(matches[i]) + matches[i].length);
  }
}
replacement += preg_quote(str);      
 return new RegExp(replacement, 'gi');
};

var _getTpls = function(str) {
var matches = str.match(token_match);
var nrmatches = matches.length;
var i = 0;
var replacement = '';
var positions = {};
var next_position = 0;
if (nrmatches <= 0) {
  return str;       // no tokens so return the string
}
for(; i < nrmatches; i += 1) {
  // Remove {, } and numbers from the token so it can match the
  // keys in tokens
  var token = matches[i].replace(/[{}0-9]/g, '');
  var position;
  // figure out what $# to use ($1, $2)
  if (positions[matches[i]]) {
    position = positions[matches[i]];       
  } else {
    // token doesn't have a position so increment the next position
    // and record this token's position
    next_position += 1;
    position = next_position;
    positions[matches[i]] = position;
  }
  if (tokens[token]) {
    replacement += str.substr(0, str.indexOf(matches[i])) + '$' + position;
    str = str.substr(str.indexOf(matches[i]) + matches[i].length);
  }
}
replacement += str;
return replacement;
};

这对我来说很有用：（为了避免混淆，也更新了这个）

'[code'](['s'S]*?)'['/code']

请参阅regexpal并输入以下内容：

[code]
    code....
[/code]
[code]code.... [/code]

更新：修复了以下regex，这在Chrome控制台中对我有效：

/'[code'](['s'S]*?)'['/code']/g.exec("[code]hello world 'n[/code]")

JavaScript不处理多行RegExp匹配。相反，您必须使用本SO答案中描述的['s'S]技巧。可能

/'[code']['s'S]*'[code']/

此外，RegExps可能不是解析语法的最佳选择。它极其复杂。我建议解析字符串并构建一个抽象语法树，然后从中呈现HTML。