Grammalecte  Check-in [5bf409e44a]

Overview
Comment:[core] spellchecking for error messages
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: 5bf409e44afcfdbdd20f55444fcd38e7bb0f1ab82ef0aed44acc6d6a9ecd017b
User & Date: olr on 2020-11-23 20:44:24
Other Links: manifest | tags
Context
2020-11-24
21:27
[fr] corrections des messages check-in: b568531f02 user: olr tags: fr, trunk
2020-11-23
20:44
[core] spellchecking for error messages check-in: 5bf409e44a user: olr tags: core, trunk
20:43
[fr] phonet_simil: danse dance check-in: 07bd6248b1 user: olr tags: fr, trunk
Changes

Modified gc_core/py/lang_core/tests_core.py from [9e599f13ec] to [36a2155509].

    60     60   
    61     61       @classmethod
    62     62       def setUpClass (cls):
    63     63           gc_engine.load()
    64     64           cls._zError = re.compile(r"\{\{.*?\}\}")
    65     65           cls._zRuleEnd = re.compile(r"_a\d+_\d+$")
    66     66           cls._aTestedRules = set()
           67  +        cls._oSpellChecker = gc_engine.getSpellChecker()
    67     68   
    68     69       def test_parse (self):
    69     70           zOption = re.compile("^__([a-zA-Z0-9]+)__ ")
    70     71           spHere, _ = os.path.split(__file__)
    71     72           spfParsingTest = os.path.join(spHere, "gc_test.txt")
    72     73           if not os.path.exists(spfParsingTest):
    73     74               print(f"No file <gc_test.txt> in <{spHere}>")
................................................................................
   140    141           for dErr in sorted(aErrs, key=lambda d: d["nStart"]):
   141    142               sRes = sRes[:dErr["nStart"]] + "~" * (dErr["nEnd"] - dErr["nStart"]) + sRes[dErr["nEnd"]:]
   142    143               sListErr += "    * {sLineId} / {sRuleId}  at  {nStart}:{nEnd}\n".format(**dErr)
   143    144               lAllSugg.append("|".join(dErr["aSuggestions"]))
   144    145               self._aTestedRules.add(dErr["sRuleId"].rstrip("0123456789"))
   145    146               # test messages
   146    147               if False:
   147         -                aMsgErrs = gc_engine.parse(purgeMessage(dErr["sMessage"]))
   148         -                if aMsgErrs or "<start>" in dErr["sMessage"] or "<end>" in dErr["sMessage"]:
   149         -                    aSelectedErrs = [ dMsgErr  for dMsgErr in sorted(aMsgErrs, key=lambda d: d["nStart"])  if self._zRuleEnd.sub("", dMsgErr["sRuleId"]) != self._zRuleEnd.sub("", dErr["sRuleId"]) ]
   150         -                    if aSelectedErrs:
   151         -                        print("\n# Error in: <" + dErr["sMessage"] + ">\n    " + dErr["sLineId"] + " / " + dErr["sRuleId"])
   152         -                        for dMsgErr in aSelectedErrs:
   153         -                            print("        error: {sLineId} / {sRuleId}  at  {nStart}:{nEnd}".format(**dMsgErr))
          148  +                aGramErrs = gc_engine.parse(purgeMessage(dErr["sMessage"]))
          149  +                aGramErrs = [ dMsgErr  for dMsgErr in sorted(aGramErrs, key=lambda d: d["nStart"])  if self._zRuleEnd.sub("", dMsgErr["sRuleId"]) != self._zRuleEnd.sub("", dErr["sRuleId"]) ]
          150  +                aSpellErrs = self._oSpellChecker.parseParagraph(re.sub("‹\\w+›", lambda m: " " * len(m.group(0)), dErr["sMessage"]))
          151  +                if aGramErrs or aSpellErrs or "<start>" in dErr["sMessage"] or "<end>" in dErr["sMessage"]:
          152  +                    print("\n# Error in: <" + dErr["sMessage"] + ">\n    " + dErr["sLineId"] + " / " + dErr["sRuleId"])
          153  +                    for dMsgErr in aGramErrs:
          154  +                        print("        error: {sLineId} / {sRuleId}  at  {nStart}:{nEnd}".format(**dMsgErr))
          155  +                    for dMsgErr in aSpellErrs:
          156  +                        print("        spelling mistake: <{sValue}>  at {nStart}:{nEnd}".format(**dMsgErr))
   154    157           return sRes, sListErr, "|||".join(lAllSugg)
   155    158   
   156    159       def _getExpectedErrors (self, sLine):
   157    160           sRes = " " * len(sLine)
   158    161           for i, m in enumerate(self._zError.finditer(sLine)):
   159    162               nStart = m.start() - (4 * i)
   160    163               nEnd = m.end() - (4 * (i+1))