Grammalecte  Check-in [7a62b9fd73]

Overview
Comment:[cli] add spellchecker suggestions (patch from Stéphane Veyret)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | cli
Files: files | file ages | folders
SHA3-256: 7a62b9fd735fe6575944584330ec4d37cfe868880c461ea063fa93c69ac80a6f
User & Date: olr on 2017-11-12 18:25:15
Original Comment: [cli] add spellchecker suggestions
Other Links: manifest | tags
Context
2017-11-12
18:33
[cli] change options name check-in: 1254fa8cec user: olr tags: cli, trunk
18:25
[cli] add spellchecker suggestions (patch from Stéphane Veyret) check-in: 7a62b9fd73 user: olr tags: cli, trunk
10:59
[fx] CSS protection for hyphenation check-in: 25276d751a user: olr tags: fx, trunk
Changes

Modified cli.py from [68793c6981] to [b74639032d].

     1         -#!python3
            1  +#!/usr/bin/env python3
     2      2   
     3      3   import sys
     4      4   import os.path
     5      5   import argparse
     6      6   import json
     7      7   
     8      8   import grammalecte.fr as gce
................................................................................
    40     40       if sys.platform == "win32":
    41     41           # Apparently, the console transforms «’» in «'».
    42     42           # So we reverse it to avoid many useless warnings.
    43     43           sText = sText.replace("'", "’")
    44     44       return sText
    45     45   
    46     46   
    47         -def _getErrors (sText, oTokenizer, oDict, bContext=False, bDebug=False):
           47  +def _getErrors (sText, oTokenizer, oDict, bContext=False, bSpellSuggestions=False, bDebug=False):
    48     48       "returns a tuple: (grammar errors, spelling errors)"
    49     49       aGrammErrs = gce.parse(sText, "FR", bDebug=bDebug, bContext=bContext)
    50     50       aSpellErrs = []
    51     51       for dToken in oTokenizer.genTokens(sText):
    52     52           if dToken['sType'] == "WORD" and not oDict.isValidToken(dToken['sValue']):
           53  +            if bSpellSuggestions:
           54  +                dToken['aSuggestions'] = oDict.suggest(dToken['sValue'])
    53     55               aSpellErrs.append(dToken)
    54     56       return aGrammErrs, aSpellErrs
    55     57   
    56     58   
    57         -def generateText (sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=False, nWidth=100):
    58         -    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, False, bDebug)
           59  +def generateText (sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=False, bSpellSuggestions=False, nWidth=100):
           60  +    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, False, bSpellSuggestions, bDebug)
    59     61       if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
    60     62           return ""
    61     63       return txt.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth)
    62     64   
    63     65   
    64         -def generateJSON (iIndex, sText, oTokenizer, oDict, bContext=False, bDebug=False, bEmptyIfNoErrors=False, lLineSet=None, bReturnText=False):
    65         -    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, bContext, bDebug)
           66  +def generateJSON (iIndex, sText, oTokenizer, oDict, bContext=False, bDebug=False, bEmptyIfNoErrors=False, bSpellSuggestions=False, lLineSet=None, bReturnText=False):
           67  +    aGrammErrs, aSpellErrs = _getErrors(sText, oTokenizer, oDict, bContext, bSpellSuggestions, bDebug)
    66     68       aGrammErrs = list(aGrammErrs)
    67     69       if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
    68     70           return ""
    69     71       if lLineSet:
    70     72           aGrammErrs, aSpellErrs = txt.convertToXY(aGrammErrs, aSpellErrs, lLineSet)
    71     73           return json.dumps({ "lGrammarErrors": aGrammErrs, "lSpellingErrors": aSpellErrs }, ensure_ascii=False)
    72     74       if bReturnText:
................................................................................
   110    112       xParser.add_argument("-ff", "--file_to_file", help="parse file (UTF-8 required!) and create a result file (*.res.txt)", type=str)
   111    113       xParser.add_argument("-owe", "--only_when_errors", help="display results only when there are errors", action="store_true")
   112    114       xParser.add_argument("-j", "--json", help="generate list of errors in JSON (only with option --file or --file_to_file)", action="store_true")
   113    115       xParser.add_argument("-cl", "--concat_lines", help="concatenate lines not separated by an empty paragraph (only with option --file or --file_to_file)", action="store_true")
   114    116       xParser.add_argument("-tf", "--textformatter", help="auto-format text according to typographical rules (unavailable with option --concat_lines)", action="store_true")
   115    117       xParser.add_argument("-tfo", "--textformatteronly", help="auto-format text and disable grammar checking (only with option --file or --file_to_file)", action="store_true")
   116    118       xParser.add_argument("-ctx", "--context", help="return errors with context (only with option --json)", action="store_true")
          119  +    xParser.add_argument("-as", "--add_suggestions", help="add suggestions for spelling errors (only with option --file or --file_to_file)", action="store_true")
   117    120       xParser.add_argument("-w", "--width", help="width in characters (40 < width < 200; default: 100)", type=int, choices=range(40,201,10), default=100)
   118    121       xParser.add_argument("-lo", "--list_options", help="list options", action="store_true")
   119    122       xParser.add_argument("-lr", "--list_rules", nargs="?", help="list rules [regex pattern as filter]", const="*")
          123  +    xParser.add_argument("-ls", "--list_suggestions", help="list suggestions", type=str)
   120    124       xParser.add_argument("-on", "--opt_on", nargs="+", help="activate options")
   121    125       xParser.add_argument("-off", "--opt_off", nargs="+", help="deactivate options")
   122    126       xParser.add_argument("-roff", "--rule_off", nargs="+", help="deactivate rules")
   123    127       xParser.add_argument("-d", "--debug", help="debugging mode (only in interactive mode)", action="store_true")
   124    128       xArgs = xParser.parse_args()
   125    129   
   126    130       gce.load()
................................................................................
   134    138   
   135    139       if xArgs.list_options or xArgs.list_rules:
   136    140           if xArgs.list_options:
   137    141               gce.displayOptions("fr")
   138    142           if xArgs.list_rules:
   139    143               gce.displayRules(None  if xArgs.list_rules == "*"  else xArgs.list_rules)
   140    144           exit()
          145  +
          146  +    if xArgs.list_suggestions:
          147  +        lSugg = oDict.suggest(xArgs.list_suggestions)
          148  +        if xArgs.json:
          149  +            sText = json.dumps({ "aSuggestions": lSugg }, ensure_ascii=False)
          150  +        else:
          151  +            sText = "Suggestions : " + " | ".join(lSugg)
          152  +        echo(sText)
          153  +        exit()
   141    154   
   142    155       if not xArgs.json:
   143    156           xArgs.context = False
   144    157   
   145    158       gce.setOptions({"html": True, "latex": True})
   146    159       if xArgs.opt_on:
   147    160           gce.setOptions({ opt:True  for opt in xArgs.opt_on  if opt in gce.getOptions() })
................................................................................
   164    177               for i, sText in enumerate(readfile(sFile), 1):
   165    178                   if xArgs.textformatter or xArgs.textformatteronly:
   166    179                       sText = oTF.formatText(sText)
   167    180                   if xArgs.textformatteronly:
   168    181                       output(sText, hDst)
   169    182                   else:
   170    183                       if xArgs.json:
   171         -                        sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bReturnText=xArgs.textformatter)
          184  +                        sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, bReturnText=xArgs.textformatter)
   172    185                       else:
   173         -                        sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width)
          186  +                        sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, nWidth=xArgs.width)
   174    187                       if sText:
   175    188                           if xArgs.json and bComma:
   176    189                               output(",\n", hDst)
   177    190                           output(sText, hDst)
   178    191                           bComma = True
   179    192                   if hDst:
   180    193                       echo("§ %d\r" % i, end="", flush=True)
   181    194           else:
   182    195               # concaténation des lignes non séparées par une ligne vide
   183    196               for i, lLine in enumerate(readfileAndConcatLines(sFile), 1):
   184    197                   sText, lLineSet = txt.createParagraphWithLines(lLine)
   185    198                   if xArgs.json:
   186         -                    sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, lLineSet=lLineSet)
          199  +                    sText = generateJSON(i, sText, oTokenizer, oDict, bContext=xArgs.context, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, lLineSet=lLineSet)
   187    200                   else:
   188         -                    sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, nWidth=xArgs.width)
          201  +                    sText = generateText(sText, oTokenizer, oDict, bDebug=False, bEmptyIfNoErrors=xArgs.only_when_errors, bSpellSuggestions=xArgs.add_suggestions, nWidth=xArgs.width)
   189    202                   if sText:
   190    203                       if xArgs.json and bComma:
   191    204                           output(",\n", hDst)
   192    205                       output(sText, hDst)
   193    206                       bComma = True
   194    207                   if hDst:
   195    208                       echo("§ %d\r" % i, end="", flush=True)

Modified gc_core/py/text.py from [72d4931466] to [133d154e72].

    66     66                       sErrLine += " " * (nEnd - len(sErrLine))
    67     67                   sErrLine = sErrLine[:nStart] + "°" * (nEnd - nStart) + sErrLine[nEnd:]
    68     68               else:
    69     69                   break
    70     70           if sErrLine:
    71     71               sText += sErrLine + "\n"
    72     72           if nGrammErr:
    73         -            for dErr in lGrammErrs[:nGrammErr]:
    74         -                sMsg, *others = getReadableError(dErr).split("\n")
    75         -                sText += "\n".join(textwrap.wrap(sMsg, nWidth, subsequent_indent="  ")) + "\n"
    76         -                for arg in others:
    77         -                    sText += "\n".join(textwrap.wrap(arg, nWidth, subsequent_indent="    ")) + "\n"
    78         -            sText += "\n"
           73  +            sText += getReadableErrors(lGrammErrs[:nGrammErr], nWidth)
    79     74               del lGrammErrs[0:nGrammErr]
    80     75           if nSpellErr:
           76  +            sText += getReadableErrors(lSpellErrs[:nSpellErr], nWidth, True)
    81     77               del lSpellErrs[0:nSpellErr]
    82     78           nOffset += ln
    83     79       return sText
    84     80   
    85     81   
    86         -def getReadableError (dErr):
           82  +def getReadableErrors (lErrs, nWidth, bSpell=False):
           83  +    "Returns lErrs errors as readable errors"
           84  +    sErrors = ""
           85  +    for dErr in lErrs:
           86  +        if not bSpell or "aSuggestions" in dErr:
           87  +            sMsg, *others = getReadableError(dErr, bSpell).split("\n")
           88  +            sErrors += "\n".join(textwrap.wrap(sMsg, nWidth, subsequent_indent="  ")) + "\n"
           89  +            for arg in others:
           90  +                sErrors += "\n".join(textwrap.wrap(arg, nWidth, subsequent_indent="    ")) + "\n"
           91  +    if sErrors != "":
           92  +        sErrors += "\n"
           93  +    return sErrors
           94  +
           95  +
           96  +def getReadableError (dErr, bSpell=False):
    87     97       "Returns an error dErr as a readable error"
    88     98       try:
    89         -        s = u"* {nStart}:{nEnd}  # {sLineId} / {sRuleId}:\n".format(**dErr)
    90         -        s += "  " + dErr.get("sMessage", "# error : message not found")
           99  +        if bSpell:
          100  +            s = u"* {nStart}:{nEnd}  # {sValue}:".format(**dErr)
          101  +        else:
          102  +            s = u"* {nStart}:{nEnd}  # {sLineId} / {sRuleId}:\n".format(**dErr)
          103  +            s += "  " + dErr.get("sMessage", "# error : message not found")
    91    104           if dErr.get("aSuggestions", None):
    92    105               s += "\n  > Suggestions : " + " | ".join(dErr.get("aSuggestions", "# error : suggestions not found"))
    93    106           if dErr.get("URL", None):
    94    107               s += "\n  > URL: " + dErr["URL"]
    95    108           return s
    96    109       except KeyError:
    97    110           return u"* Non-compliant error: {}".format(dErr)