JS Regex 抛出“最大调用堆栈大小超出”错误
2021-07-15
4258
我有一个很长的段落,我需要比较单词边界以替换与所需值的匹配。
所需值将有许多不同的模式。这就是为什么我需要有很多行新的RegExp来逐行进行替换的原因。
var paragraph = "german gateway is located at ... Leonardo DA VINci and other some word superman";
paragraph
.replace( new RegExp("\\b"+ "german gateway" +"\\b", "ig"), "German gateway")
.replace( new RegExp("\\b"+ "Leonardo DA vinci" +"\\b", "ig"), "Leonardo da Vinci")
.replace( new RegExp("\\b"+ "some word" +"\\b", "ig"), "some other word")
.replace( new RegExp
//continue for at least Few Thousand Rows.
console.log(paragraph);
//示例输出
German gateway is located at ... Leonardo da Vinci and other some other word superman
但是太多的新RegExp会导致js运行出错。
Uncaught RangeError: Maximum call stack size exceeded
有没有办法避免大量调用新的RegExp,同时可以保持我想要的正则表达式规则?
2个回答
使用对象并迭代属性。如果您需要保证顺序,那么您可能希望使用对象数组,每个替换都有一个对象。
const paragraph = "german gateway is located at ... Leonardo DA VINci and other some word superman";
const replacements = {
"german gateway": "German gateway",
"Leonardo DA vinci": "Leonardo da Vinci",
"some word": "some other word"
/* ... */
};
const result = Object.entries(replacements)
.reduce((result, replacement) => {
const rx = new RegExp("\\b" + replacement[0] + "\\b", "ig");
return result.replace(rx, replacement[1]);
}, paragraph);
console.log(result);
并且您一定要转义传递给
RegExp()
构造函数的字符串中的变量部分。
Andreas
2021-07-15
您可以使用一个“关联”数组,其中包含要搜索的键和要替换的值,根据搜索词(键)构建一个正则表达式 trie,然后运行一个
.replace
,使用一个正则表达式来查找所有短语,并使用回调方法将其替换为
.replace
的替换参数。
首先,运行
npm install regex-trie
,然后使用
let paragraph = "german gateway is located at ... Leonardo DA VINci and other some word superman";
const phrases = {"german gateway":"German gateway","Leonardo DA vinci":"Leonardo da Vinci","some word":"some other word"}
let RegexTrie = require('regex-trie');
let trie = new RegexTrie();
trie.add(Object.keys(phrases))
const regex = new RegExp(`\\b${trie.toRegExp().source}\\b`, 'ig')
let getValue = function(prop, obj){
prop = prop.toLowerCase();
for(var p in obj){
if(obj.hasOwnProperty(p) && prop == p.toLowerCase()){
return obj[p];
}
}
}
paragraph = paragraph.replace(regex, (m) => getValue(m, phrases));
console.log(paragraph)
// => German gateway is located at ... Leonardo da Vinci and other some other word superman
以下是在
browserify
的帮助下捆绑的 JavaScript 代码片段:
(function(){function r(e,n,t){function o(i,f){if(!n[i]){if(!e[i]){var c="function"==typeof require&&require;if(!f&&c)return c(i,!0);if(u)return u(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[i]={exports:{}};e[i][0].call(p.exports,function(r){var n=e[i][1][r];return o(n||r)},p,p.exports,r,e,n,t)}return n[i].exports}for(var u="function"==typeof require&&require,i=0;i<t.length;i++)o(t[i]);return o}return r})()({1:[function(require,module,exports){
let paragraph = "german gateway is located at ... Leonardo DA VINci and other some word superman";
const phrases = {"german gateway":"German gateway","Leonardo DA vinci":"Leonardo da Vinci","some word":"some other word"}
let RegexTrie = require('regex-trie');
let trie = new RegexTrie();
trie.add(Object.keys(phrases))
const regex = new RegExp(`\\b${trie.toRegExp().source}\\b`, 'ig')
let getValue = function(prop, obj){
prop = prop.toLowerCase();
for(var p in obj){
if(obj.hasOwnProperty(p) && prop == p.toLowerCase()){
return obj[p];
}
}
}
paragraph = paragraph.replace(regex, (m) => getValue(m, phrases));
console.log(paragraph)
},{"regex-trie":3}],2:[function(require,module,exports){
(function (global){(function (){
/*! http://mths.be/jsesc v0.5.0 by @mathias */
;(function(root) {
// Detect free variables `exports`
var freeExports = typeof exports == 'object' && exports;
// Detect free variable `module`
var freeModule = typeof module == 'object' && module &&
module.exports == freeExports && module;
// Detect free variable `global`, from Node.js or Browserified code,
// and use it as `root`
var freeGlobal = typeof global == 'object' && global;
if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) {
root = freeGlobal;
}
/*--------------------------------------------------------------------------*/
var object = {};
var hasOwnProperty = object.hasOwnProperty;
var forOwn = function(object, callback) {
var key;
for (key in object) {
if (hasOwnProperty.call(object, key)) {
callback(key, object[key]);
}
}
};
var extend = function(destination, source) {
if (!source) {
return destination;
}
forOwn(source, function(key, value) {
destination[key] = value;
});
return destination;
};
var forEach = function(array, callback) {
var length = array.length;
var index = -1;
while (++index < length) {
callback(array[index]);
}
};
var toString = object.toString;
var isArray = function(value) {
return toString.call(value) == '[object Array]';
};
var isObject = function(value) {
// This is a very simple check, but it’s good enough for what we need.
return toString.call(value) == '[object Object]';
};
var isString = function(value) {
return typeof value == 'string' ||
toString.call(value) == '[object String]';
};
var isFunction = function(value) {
// In a perfect world, the `typeof` check would be sufficient. However,
// in Chrome 1–12, `typeof /x/ == 'object'`, and in IE 6–8
// `typeof alert == 'object'` and similar for other host objects.
return typeof value == 'function' ||
toString.call(value) == '[object Function]';
};
/*--------------------------------------------------------------------------*/
// http://mathiasbynens.be/notes/javascript-escapes#single
var singleEscapes = {
'"': '\\"',
'\'': '\\\'',
'\\': '\\\\',
'\b': '\\b',
'\f': '\\f',
'\n': '\\n',
'\r': '\\r',
'\t': '\\t'
// `\v` is omitted intentionally, because in IE < 9, '\v' == 'v'.
// '\v': '\\x0B'
};
var regexSingleEscape = /["'\\\b\f\n\r\t]/;
var regexDigit = /[0-9]/;
var regexWhitelist = /[ !#-&\(-\[\]-~]/;
var jsesc = function(argument, options) {
// Handle options
var defaults = {
'escapeEverything': false,
'quotes': 'single',
'wrap': false,
'es6': false,
'json': false,
'compact': true,
'indent': '\t',
'__indent__': ''
};
var json = options && options.json;
if (json) {
defaults.quotes = 'double';
defaults.wrap = true;
}
options = extend(defaults, options);
if (options.quotes != 'single' && options.quotes != 'double') {
options.quotes = 'single';
}
var quote = options.quotes == 'double' ? '"' : '\'';
var compact = options.compact;
var indent = options.indent;
var oldIndent;
var newLine = compact ? '' : '\n';
var result;
var isEmpty = true;
if (json && argument && isFunction(argument.toJSON)) {
argument = argument.toJSON();
}
if (!isString(argument)) {
if (isArray(argument)) {
result = [];
options.wrap = true;
oldIndent = options.__indent__;
indent += oldIndent;
options.__indent__ = indent;
forEach(argument, function(value) {
isEmpty = false;
result.push(
(compact ? '' : indent) +
jsesc(value, options)
);
});
if (isEmpty) {
return '[]';
}
return '[' + newLine + result.join(',' + newLine) + newLine +
(compact ? '' : oldIndent) + ']';
} else if (!isObject(argument)) {
if (json) {
// For some values (e.g. `undefined`, `function` objects),
// `JSON.stringify(value)` returns `undefined` (which isn’t valid
// JSON) instead of `'null'`.
return JSON.stringify(argument) || 'null';
}
return String(argument);
} else { // it’s an object
result = [];
options.wrap = true;
oldIndent = options.__indent__;
indent += oldIndent;
options.__indent__ = indent;
forOwn(argument, function(key, value) {
isEmpty = false;
result.push(
(compact ? '' : indent) +
jsesc(key, options) + ':' +
(compact ? '' : ' ') +
jsesc(value, options)
);
});
if (isEmpty) {
return '{}';
}
return '{' + newLine + result.join(',' + newLine) + newLine +
(compact ? '' : oldIndent) + '}';
}
}
var string = argument;
// Loop over each code unit in the string and escape it
var index = -1;
var length = string.length;
var first;
var second;
var codePoint;
result = '';
while (++index < length) {
var character = string.charAt(index);
if (options.es6) {
first = string.charCodeAt(index);
if ( // check if it’s the start of a surrogate pair
first >= 0xD800 && first <= 0xDBFF && // high surrogate
length > index + 1 // there is a next code unit
) {
second = string.charCodeAt(index + 1);
if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
codePoint = (first - 0xD800) * 0x400 + second - 0xDC00 + 0x10000;
result += '\\u{' + codePoint.toString(16).toUpperCase() + '}';
index++;
continue;
}
}
}
if (!options.escapeEverything) {
if (regexWhitelist.test(character)) {
// It’s a printable ASCII character that is not `"`, `'` or `\`,
// so don’t escape it.
result += character;
continue;
}
if (character == '"') {
result += quote == character ? '\\"' : character;
continue;
}
if (character == '\'') {
result += quote == character ? '\\\'' : character;
continue;
}
}
if (
character == '\0' &&
!json &&
!regexDigit.test(string.charAt(index + 1))
) {
result += '\\0';
continue;
}
if (regexSingleEscape.test(character)) {
// no need for a `hasOwnProperty` check here
result += singleEscapes[character];
continue;
}
var charCode = character.charCodeAt(0);
var hexadecimal = charCode.toString(16).toUpperCase();
var longhand = hexadecimal.length > 2 || json;
var escaped = '\\' + (longhand ? 'u' : 'x') +
('0000' + hexadecimal).slice(longhand ? -4 : -2);
result += escaped;
continue;
}
if (options.wrap) {
result = quote + result + quote;
}
return result;
};
jsesc.version = '0.5.0';
/*--------------------------------------------------------------------------*/
// Some AMD build optimizers, like r.js, check for specific condition patterns
// like the following:
if (
typeof define == 'function' &&
typeof define.amd == 'object' &&
define.amd
) {
define(function() {
return jsesc;
});
} else if (freeExports && !freeExports.nodeType) {
if (freeModule) { // in Node.js or RingoJS v0.8.0+
freeModule.exports = jsesc;
} else { // in Narwhal or RingoJS v0.7.0-
freeExports.jsesc = jsesc;
}
} else { // in Rhino or a web browser
root.jsesc = jsesc;
}
}(this));
}).call(this)}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
},{}],3:[function(require,module,exports){
var jsesc = require('jsesc');
/**
* @module regex-trie
*/
var RegexTrie = (function () {
"use strict";
/**
* The `RegexTrie` class builds a regular expression from a set of phrases
* added to it. It produces a non-optimised `RegExp` and only represents
* literal characters; only alphanumeric or underscore ("_") characters are
* left unescaped.
*
* @class RegexTrie
* @constructor
*/
var RegexTrie = function () {
if ( ! (this instanceof RegexTrie) ) {
return new RegexTrie();
}
this._num_phrases_in_trie = 0;
this._trie = {};
return this;
};
/**
*
* Phrases can be added to the trie using `add`. Elements can be wrapped in
* an array before being added. Only alphanumeric values will be added.
* Objects, booleans, arrays, etc will all be ignored (failed attempts to
* add values are silent.)
*
* @method add()
* @param phrase_to_add {array|string|number}
* @chainable
*/
RegexTrie.prototype.add = function (phrase_to_add) {
if ( phrase_to_add instanceof Array ) {
phrase_to_add.forEach(this.add, this);
}
phrase_to_add = this._coerce_to_string(phrase_to_add);
if ( ! this._is_phrase_valid(phrase_to_add) ) {
return this;
}
// Has this phrase already been added?
if ( this.contains(phrase_to_add) ) {
return this;
}
var trie = this._trie;
phrase_to_add.split('').forEach( function (chr) {
if ( chr in trie ) {
trie = trie[chr];
return;
}
trie[chr] = {};
trie = trie[chr];
}, this);
// Set the end marker (so we know this was a complete word)
trie.end = true;
this._num_phrases_in_trie++;
return this;
};
RegexTrie.prototype.toRegExp = function () {
if ( this._num_phrases_in_trie === 0 ) return;
var result = this.toString();
return new RegExp(result);
};
RegexTrie.prototype.toString = function () {
if ( this._num_phrases_in_trie === 0 ) return;
var _walk_trie = function (trie, this_arg) {
var keys = Object.keys(trie),
alt_group = [],
char_class = [],
end = false; // marks the end of a phrase
keys.forEach( function (key) {
var walk_result, insert;
if ( key === 'end' ) {
end = true;
return;
}
walk_result =
this._quotemeta(key) + _walk_trie(trie[key], this_arg);
// When we have more than one key, `insert` references
// the alternative regexp group, otherwise it points to
// the char class group.
insert = ( keys.length > 1 ) ? [].push.bind(alt_group)
: [].push.bind(char_class);
insert(walk_result);
}, this_arg);
return this_arg._to_regex(alt_group, char_class, end);
};
var result = _walk_trie(this._trie, this);
return result;
};
RegexTrie.prototype._to_regex = function (alt_group, char_class, end) {
var group_has_one_element = function (el) {
return el.length === 1;
},
result = "";
// Once we've finished walking through the tree we need to build
// the regex match groups...
if ( alt_group.length > 0 ) {
if ( alt_group.length === 1 ) {
// Individual elements are merged with the current result.
result += alt_group[0];
}
else if ( alt_group.every(group_has_one_element) ) {
// When every single array in the alternative group is
// a single element array, this gets flattened in to
// a character class.
result += ( '[' + alt_group.join('') + ']' );
}
else {
// Finally, build a non-capturing alternative group.
result += ( '(?:' + alt_group.join('|') + ')' );
}
}
else if ( char_class.length > 0 ) {
result += char_class[0];
}
if ( end && result ) {
if ( result.length === 1 ) {
result += '?';
}
else {
result = '(?:' + result + ')?';
}
}
return result;
};
RegexTrie.prototype.contains = function (phrase_to_fetch) {
if ( ! this._is_phrase_valid(phrase_to_fetch) &&
this._num_phrases_in_trie > 0 ) {
return false;
}
var trie = this._trie;
// Wrap the attempts to contains in a try/catch block; any non-existant
// keys will cause an exception, which we treat as 'this value does not
// exist'.
try {
phrase_to_fetch.split('').forEach( function (chr) {
trie = trie[chr];
});
return ( trie.hasOwnProperty('end') && trie.end === true );
}
catch (e) {
// Fall through
}
return false;
};
RegexTrie.prototype._coerce_to_string = function (phrase) {
if ( typeof phrase === 'number' && ! isNaN(phrase) ) {
phrase = phrase.toString();
}
return phrase;
};
RegexTrie.prototype._is_phrase_valid = function (phrase) {
return ( typeof phrase === 'string' && phrase.length > 0 );
};
RegexTrie.prototype._quotemeta = function (phrase) {
if ( ! this._is_phrase_valid(phrase) ) {
return phrase;
}
return phrase
.replace(/([\t\n\f\r\\\$\(\)\*\+\-\.\?\[\]\^\{\|\}])/g, '\\$1')
.replace(/[^\x20-\x7E]/g, jsesc);
};
return RegexTrie;
})();
module.exports = RegexTrie;
},{"jsesc":2}]},{},[1]);
Wiktor Stribiżew
2021-07-15