根据在线测试工具有效的RegEx,在浏览器中读取文件时没有得到任何匹配

Valid RegEx according to online test tools, not getting any matches when reading file in browser

本文关键字:文件 读取 任何匹 浏览器 测试工具 在线 有效 RegEx      更新时间:2023-09-26

我用几个在线工具和社区的帮助设计了这个正则表达式:

https://regex101.com/r/hJ4pD5/1

('s[A-Z]'.).+?(?=('s[A-Z]'.)|('W?(Answer:)'W?))

目标是提取问题的所有备选方案。根据regexr和regex101,这是一个有效的Javascript regex,与测试数据(pastebin)配合良好:

1. Question goes here:
A. Answer one
B. Answer two 
C. Answer three D. Not indented Answer
Answer: B is correct

预期匹配项应为:

"A.回答一","B.回答二","C.回答三","D.不缩进回答"

但是当我在代码中实现它时,它的性能不是很好,没有找到匹配项。

(尝试粘贴垃圾箱数据)

/**
 * Created by Schwusch on 01/08/2016.
 */
$(document).ready(start);
var questionsRaw;
var questionsFormatted = [];
var questionIndex = 0;
function readSingleFile(e) {
    var file = e.target.files[0];
    if (!file) {
        return;
    }
    var reader = new FileReader();
    reader.onload = function(e) {
        var contents = e.target.result;
        displayContents(contents);
    };
    reader.readAsText(file);
}
/* REGEX MAGIC -------------------------------------------------*/
function displayContents(contents) {
    questionsRaw = contents.split('---');
    $.each(questionsRaw, function(index, question ) {
        var answer = question.split("Answer:")[1];
        var splittedQuestion = question.split("A.")[0];
        var alternatives = question.match(/('s[A-Z]'.).+?(?=('s[A-Z]'.)|('W?(Answer:)'W?))/g);
        questionsFormatted.push({
            question: splittedQuestion,
            alternatives: alternatives,
            answer: answer
        });
    });
/* END REGEX MAGIC -------------------------------------------------*/
    var element = document.getElementById('file-content');
    element.innerHTML = questionsFormatted[questionIndex].question;
    for (var i = 0; i < questionsFormatted[questionIndex].alternatives.length ; i++) {
        $('#alternatives').append('<button type="button" class="list-group-item">' + questionsFormatted[questionIndex].alternatives[i] + '</button>');
    }
}
function start() {
    document.getElementById('file-input')
        .addEventListener('change', readSingleFile, false);
    $(window).keydown(function(e) {
        e = e || event;
        switch(e.keyCode) {
            case 37: // left
                previousQuestion();
                return false;
            case 38: // up
                showQuestion();
                return false;
            case 39: // right
                nextQuestion();
                return false;
            case 40: // down
                showAnswer();
                return false;
        }
    });
    $(document).on('change', ':file', function() {
        var input = $(this),
            numFiles = input.get(0).files ? input.get(0).files.length : 1,
            label = input.val().replace(/''/g, '/').replace(/.*'//, '');
        input.trigger('fileselect', [numFiles, label]);
    });
    $(':file').on('fileselect', function(event, numFiles, label) {
        var element = document.getElementById('filechoose');
        element.innerHTML = label;
    });
}
function showAnswer() {
    var element = document.getElementById('file-content');
    element.innerHTML = questionsFormatted[questionIndex].answer;
}
function showQuestion() {
    var element = document.getElementById('file-content');
    element.innerHTML = questionsFormatted[questionIndex].question;
}
function nextQuestion() {
    if (questionIndex < questionsFormatted.length - 1) questionIndex++ ;
    else questionIndex = 0;
    var element = document.getElementById('file-content');
    element.innerHTML = questionsFormatted[questionIndex].question;
    $( ".list-group-item" ).remove();
    for (var i = 0; i < questionsFormatted[questionIndex].alternatives.length ; i++) {
        $('#alternatives').append('<button type="button" class="list-group-item">' + questionsFormatted[questionIndex].alternatives[i] + '</button>');
    }
}
function previousQuestion() {
    if (questionIndex > 0) questionIndex-- ;
    else questionIndex = questionsFormatted.length - 1;
    var element = document.getElementById('file-content');
    element.innerHTML = questionsFormatted[questionIndex].question;
    $( ".list-group-item" ).remove();
    for (var i = 0; i < questionsFormatted[questionIndex].alternatives.length ; i++) {
        $('#alternatives').append('<button type="button" class="list-group-item">' + questionsFormatted[questionIndex].alternatives[i] + '</button>');
    }
}
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Question tool</title>
    <script src="https://code.jquery.com/jquery-3.1.0.js"
            integrity="sha256-slogkvB1K3VOkzAI8QITxV3VzpOnkeNVsKvtkYLMjfk=" crossorigin="anonymous"></script>
    <!-- Latest compiled and minified CSS -->
    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"
          integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
    <!-- Optional theme -->
    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap-theme.min.css"
          integrity="sha384-rHyoN1iRsVXV4nD0JutlnGaslCJuC7uwjduW9SVrLvRYooPp2bWYgmgJQIXwl/Sp" crossorigin="anonymous">
    <script src="script.js"></script>
    <style>
        /* Move down content */
        body {
            padding-top: 20px;
            padding-bottom: 20px;
        }
    </style>
</head>
<body>
<div>
    <div class="container">
        <div class="jumbotron">
            <h3>Question Tool</h3>
            <label class="btn btn-default btn-file" id="filechoose">
                Choose File
                <input type="file" id="file-input" style="display: none;"/>
            </label>
            <div class="btn-group btn-group-justified" role="group" aria-label="...">
                <div class="btn-group" role="group">
                    <button type="button" class="btn btn-lg btn-primary" onclick="showAnswer()" role="button">
                        <span class="glyphicon glyphicon-arrow-down" aria-hidden="true"></span>Show Answer
                    </button>
                </div>
                <div class="btn-group" role="group">
                    <button type="button" class="btn btn-lg btn-success" onclick="showQuestion()" role="button">
                        <span class="glyphicon glyphicon-arrow-up" aria-hidden="true"></span>Show Question
                    </button>
                </div>
                <div class="btn-group" role="group">
                    <button type="button" class="btn btn-lg btn-danger" onclick="previousQuestion()" role="button">
                        <span class="glyphicon glyphicon-arrow-left" aria-hidden="true"></span>Previous Question
                    </button>
                </div>
                <div class="btn-group" role="group">
                    <button type="button" class="btn btn-lg btn-info" onclick="nextQuestion()" role="button">
                        <span class="glyphicon glyphicon-arrow-right" aria-hidden="true"></span>Next Question
                    </button>
                </div>
            </div>
            <div id="file-content" class="well"></div>
            <div id="alternatives" class="list-group">
            </div>
        </div>
    </div>
</div>
</body>
</html>

为什么它在在线测试中有效,而在浏览器中无效?

它不适用于您的原因是,与您在regex101.com上测试时使用的文本相反,您加载的文件使用'r'n作为换行序列,而不仅仅是'n

再加上默认情况下.元字符与'r不匹配,并且JavaScript不支持可能改变这种行为的s修饰符,则匹配次数更少或根本不匹配。

更具体地说:在正则表达式中,部分.+?在遇到'r时将停止匹配字符。首先,它这样做是因为它向前看,发现它可以将'r's'W匹配,但下一个'n[A-Z]Answer:A不匹配。因此,它随后回溯并尝试继续使用.+?部分,但这也失败了,因为'r与上面解释的不匹配。因此,匹配过程再次从正则表达式的开头开始,以找到潜在的下一个匹配。由于同样的原因,这种做法一次又一次地失败了。

要解决此问题,请更改两件事:

  • 在中间的's之后添加一个+,这样它不仅与'n匹配,而且与前面的'r匹配。

  • 'W?更改为'W*,因此它也可以匹配前面的'r

这应该有效:

/('s[A-Z]'.).+?(?=('s+[A-Z]'.)|('W*(Answer:)'W?))/g

虽然这解决了问题,但我也建议将此正则表达式进一步简化为:

/'s[A-Z]'..+?(?='s+[A-Z]'.|'W*Answer:)/g

值得注意的是,末尾的'W?没有什么意义:它要么与'W匹配,要么不匹配,在这两种情况下你都接受它