Grammalecte  Check-in [5e5ee6df40]

Overview
Comment:[build][core] backreferences for suggestions and messages
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 5e5ee6df409fe87fbf3fa76f1a9f22b0cd94cbee8b6be4273f2d4f74b6a5b525
User & Date: olr on 2018-06-08 17:56:31
Other Links: branch diff | manifest | tags
Context
2018-06-09
09:46
[core] debug mode check-in: cb932c349b user: olr tags: core, rg
2018-06-08
17:56
[build][core] backreferences for suggestions and messages check-in: 5e5ee6df40 user: olr tags: build, core, rg
15:32
[build][core] disambigation check-in: 9e87c7d854 user: olr tags: build, core, rg
Changes

Modified compile_rules_graph.py from [36585ad2c3] to [37d848c323].

318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in lFUNCTIONS:
        if sFuncName.startswith("g_c_"): # condition
            sParams = "lToken, nTokenOffset, sCountry, bCondMemo"
        elif sFuncName.startswith("g_m_"): # message
            sParams = "lToken"
        elif sFuncName.startswith("g_s_"): # suggestion
            sParams = "lToken"
        elif sFuncName.startswith("g_p_"): # preprocessor
            sParams = "lToken"
        elif sFuncName.startswith("g_d_"): # disambiguator
            sParams = "lToken, nTokenOffset"
        else:
            print("# Unknown function type in [" + sFuncName + "]")
            continue







|

|







318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in lFUNCTIONS:
        if sFuncName.startswith("g_c_"): # condition
            sParams = "lToken, nTokenOffset, sCountry, bCondMemo"
        elif sFuncName.startswith("g_m_"): # message
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("g_s_"): # suggestion
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("g_p_"): # preprocessor
            sParams = "lToken"
        elif sFuncName.startswith("g_d_"): # disambiguator
            sParams = "lToken, nTokenOffset"
        else:
            print("# Unknown function type in [" + sFuncName + "]")
            continue

Modified gc_core/py/lang_core/gc_engine.py from [16e9c944d3] to [28162284b3].

621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
...
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
...
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
...
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777







778
779
780
781
782
783
784
                            yield dGraph[dNode["<re_morph>"][sRegex]]
                    else:
                        if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            continue
                        if any(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            yield dGraph[dNode["<re_morph>"][sRegex]]


    def parse (self, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
        dErr = {}
        dPriority = {}  # Key = position; value = priority
        dOpt = _dOptions  if not dOptions  else dOptions
        lPointer = []
        bChange = False
        for dToken in self.lToken:
................................................................................
                            # grammar error
                            print("-")
                            nTokenErrorStart = nTokenOffset + eAct[0]
                            nTokenErrorEnd = nTokenOffset + eAct[1]
                            nErrorStart = self.nOffset + self.lToken[nTokenErrorStart]["nStart"]
                            nErrorEnd = self.nOffset + self.lToken[nTokenErrorEnd]["nEnd"]
                            if nErrorStart not in dErrs or eAct[2] > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = self.createError(sWhat, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                dPriority[nErrorStart] = eAct[2]
                        elif cActionType == "~":
                            # text processor
                            print("~")
                            self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1])
                            bChange = True
                        elif cActionType == "=":
................................................................................
                            print("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
                except Exception as e:
                    raise Exception(str(e), sLineId)
        return bChange, dErrs

    def _createWriterError (self, sRepl, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error for Writer (LO/OO)"
        xErr = SingleProofreadingError()
        #xErr = uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" )
        xErr.nErrorStart = nStart
        xErr.nErrorLength = nEnd - nStart
        xErr.nErrorType = PROOFREADING
        xErr.aRuleIdentifier = sRuleId
        # suggestions
        if sRepl[0:1] == "=":
            sSugg = globals()[sRepl[1:]](self.lToken)
            if sSugg:
                if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
                    xErr.aSuggestions = tuple(map(str.capitalize, sSugg.split("|")))
                else:
                    xErr.aSuggestions = tuple(sSugg.split("|"))
            else:
                xErr.aSuggestions = ()
        elif sRepl == "_":
            xErr.aSuggestions = ()
        else:
            if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
                xErr.aSuggestions = tuple(map(str.capitalize, sRepl.split("|")))
            else:
                xErr.aSuggestions = tuple(sRepl.split("|"))
        # Message
        sMessage = globals()[sMsg[1:]](self.lToken)  if sMsg[0:1] == "="  else sMsg
        xErr.aShortComment = sMessage   # sMessage.split("|")[0]     # in context menu
        xErr.aFullComment = sMessage   # sMessage.split("|")[-1]    # in dialog
        if bShowRuleId:
            xErr.aShortComment += "  " + sLineId + " # " + sRuleId
        # URL
        if sURL:
            p = PropertyValue()
................................................................................
            p.Name = "FullCommentURL"
            p.Value = sURL
            xErr.aProperties = (p,)
        else:
            xErr.aProperties = ()
        return xErr
                                                             
    def _createDictError (self, sRepl, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error as a dictionary"
        dErr = {}
        dErr["nStart"] = nStart
        dErr["nEnd"] = nEnd
        dErr["sLineId"] = sLineId
        dErr["sRuleId"] = sRuleId
        dErr["sType"] = sOption  if sOption  else "notype"
        # suggestions
        if sRepl[0:1] == "=":
            sugg = globals()[sRepl[1:]](self.lToken)
            if sugg:
                if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
                    dErr["aSuggestions"] = list(map(str.capitalize, sugg.split("|")))
                else:
                    dErr["aSuggestions"] = sugg.split("|")
            else:
                dErr["aSuggestions"] = []
        elif sRepl == "_":
            dErr["aSuggestions"] = []
        else:
            if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
                dErr["aSuggestions"] = list(map(str.capitalize, sRepl.split("|")))
            else:
                dErr["aSuggestions"] = sRepl.split("|")
        # Message
        dErr["sMessage"] = globals()[sMsg[1:]](self.lToken)  if sMsg[0:1] == "="  else sMsg
        if bShowRuleId:
            dErr["sMessage"] += "  " + sLineId + " # " + sRuleId
        # URL
        dErr["URL"] = sURL  if sURL  else ""
        # Context
        if bContext:
            dErr['sUnderlined'] = self.sSentence0[dErr["nStart"]:dErr["nEnd"]]
            dErr['sBefore'] = self.sSentence0[max(0,dErr["nStart"]-80):dErr["nStart"]]
            dErr['sAfter'] = self.sSentence0[dErr["nEnd"]:dErr["nEnd"]+80]
        return dErr








    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True):
        "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True







<







 







|







 







|








|
|







|



|

|

|







 







|








|
|
|

|

|


|



|

|

|










>
>
>
>
>
>
>







621
622
623
624
625
626
627

628
629
630
631
632
633
634
...
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
...
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
...
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
                            yield dGraph[dNode["<re_morph>"][sRegex]]
                    else:
                        if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            continue
                        if any(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            yield dGraph[dNode["<re_morph>"][sRegex]]


    def parse (self, dPriority, sCountry="${country_default}", dOptions=None, bShowRuleId=False, bDebug=False, bContext=False):
        dErr = {}
        dPriority = {}  # Key = position; value = priority
        dOpt = _dOptions  if not dOptions  else dOptions
        lPointer = []
        bChange = False
        for dToken in self.lToken:
................................................................................
                            # grammar error
                            print("-")
                            nTokenErrorStart = nTokenOffset + eAct[0]
                            nTokenErrorEnd = nTokenOffset + eAct[1]
                            nErrorStart = self.nOffset + self.lToken[nTokenErrorStart]["nStart"]
                            nErrorEnd = self.nOffset + self.lToken[nTokenErrorEnd]["nEnd"]
                            if nErrorStart not in dErrs or eAct[2] > dPriority[nErrorStart]:
                                dErrs[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                dPriority[nErrorStart] = eAct[2]
                        elif cActionType == "~":
                            # text processor
                            print("~")
                            self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nTokenOffset + eAct[1])
                            bChange = True
                        elif cActionType == "=":
................................................................................
                            print("# error: unknown action at " + sLineId)
                    elif cActionType == ">":
                        break
                except Exception as e:
                    raise Exception(str(e), sLineId)
        return bChange, dErrs

    def _createWriterError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error for Writer (LO/OO)"
        xErr = SingleProofreadingError()
        #xErr = uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" )
        xErr.nErrorStart = nStart
        xErr.nErrorLength = nEnd - nStart
        xErr.nErrorType = PROOFREADING
        xErr.aRuleIdentifier = sRuleId
        # suggestions
        if sSugg[0:1] == "=":
            sSugg = globals()[sSugg[1:]](self.lToken)
            if sSugg:
                if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
                    xErr.aSuggestions = tuple(map(str.capitalize, sSugg.split("|")))
                else:
                    xErr.aSuggestions = tuple(sSugg.split("|"))
            else:
                xErr.aSuggestions = ()
        elif sSugg == "_":
            xErr.aSuggestions = ()
        else:
            if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
                xErr.aSuggestions = tuple(map(str.capitalize, self._expand(sSugg, nTokenOffset).split("|")))
            else:
                xErr.aSuggestions = tuple(self._expand(sSugg, nTokenOffset).split("|"))
        # Message
        sMessage = globals()[sMsg[1:]](self.lToken)  if sMsg[0:1] == "="  else self._expand(sMsg, nTokenOffset)
        xErr.aShortComment = sMessage   # sMessage.split("|")[0]     # in context menu
        xErr.aFullComment = sMessage   # sMessage.split("|")[-1]    # in dialog
        if bShowRuleId:
            xErr.aShortComment += "  " + sLineId + " # " + sRuleId
        # URL
        if sURL:
            p = PropertyValue()
................................................................................
            p.Name = "FullCommentURL"
            p.Value = sURL
            xErr.aProperties = (p,)
        else:
            xErr.aProperties = ()
        return xErr
                                                             
    def _createDictError (self, sSugg, nTokenOffset, iFirstToken, nStart, nEnd, sLineId, sRuleId, bUppercase, sMsg, sURL, bShowRuleId, sOption, bContext):
        "error as a dictionary"
        dErr = {}
        dErr["nStart"] = nStart
        dErr["nEnd"] = nEnd
        dErr["sLineId"] = sLineId
        dErr["sRuleId"] = sRuleId
        dErr["sType"] = sOption  if sOption  else "notype"
        # suggestions
        if sSugg[0:1] == "=":
            sSugg = globals()[sSugg[1:]](self.lToken)
            if sSugg:
                if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
                    dErr["aSuggestions"] = list(map(str.capitalize, sSugg.split("|")))
                else:
                    dErr["aSuggestions"] = sSugg.split("|")
            else:
                dErr["aSuggestions"] = []
        elif sSugg == "_":
            dErr["aSuggestions"] = []
        else:
            if bUppercase and self.lToken[iFirstToken]["sValue"][0:1].isupper():
                dErr["aSuggestions"] = list(map(str.capitalize, self._expand(sSugg, nTokenOffset).split("|")))
            else:
                dErr["aSuggestions"] = self._expand(sSugg, nTokenOffset).split("|")
        # Message
        dErr["sMessage"] = globals()[sMsg[1:]](self.lToken)  if sMsg[0:1] == "="  else self._expand(sMsg, nTokenOffset)
        if bShowRuleId:
            dErr["sMessage"] += "  " + sLineId + " # " + sRuleId
        # URL
        dErr["URL"] = sURL  if sURL  else ""
        # Context
        if bContext:
            dErr['sUnderlined'] = self.sSentence0[dErr["nStart"]:dErr["nEnd"]]
            dErr['sBefore'] = self.sSentence0[max(0,dErr["nStart"]-80):dErr["nStart"]]
            dErr['sAfter'] = self.sSentence0[dErr["nEnd"]:dErr["nEnd"]+80]
        return dErr

    def _expand (self, sMsg, nTokenOffset):
        print(sMsg)
        for m in re.finditer(r"\\([0-9]+)", sMsg):
            sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
        print(sMsg)
        return sMsg

    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True):
        "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True

Modified gc_lang/fr/rules_graph.grx from [4b06e6712a] to [7f8d7a1159].

71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
...
102
103
104
105
106
107
108
109
110
111
112
    il ne pense qu’ à sa gueule
        <<- ~4:7>> que|Z|a|perdu

TEST: il ne pense qu’à sa gueule.


__avoir_confiance_en__
    >avoir confiance (dans) [moi|toi|soi|lui|elle|nous|vous|eux|elles]
        <<-  -1>> en                                                                                # Avoir confiance en quelqu’un ou quelque chose.|http://grammalecte.net

TEST: Elle avait confiance {{dans}} lui.


__code_legacy__
    legacy code
    code legacy
        <<- -1:2>> code hérité|code reliquat                                                        # Anglicisme superflu.

TEST: c’est du {{legacy code}}.
TEST: ce {{code legacy}} est un cauchemar


__être_en_xxxx__
    [>être|>rester|>demeurer] an [désaccord|accord]
................................................................................
        <<- -2>> plaisir                                                                            # Faire plaisir : dans cette locution, “plaisir” doit être au singulier.
        <<- ~2>> *

TEST: Ça me fait {{plaisirs}}.


__test__
    je  ~préf[éè]r  [que|qu’]  @(?::Os|:M)¬:X  @:I
        <<- morph(\1, ":V") and morph(\4, ":Os|:M", ":X") -5>> SUBJONCTIF                  # SUBJONCTIF.

TEST: je préférerais qu’Isabelle {{est}} partie.







|
|







|







 







|
|

|
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
...
102
103
104
105
106
107
108
109
110
111
112
    il ne pense qu’ à sa gueule
        <<- ~4:7>> que|Z|a|perdu

TEST: il ne pense qu’à sa gueule.


__avoir_confiance_en__
    >avoir confiance dans [moi|toi|soi|lui|elle|nous|vous|eux|elles]
        <<-  -3>> en                                                                                # Avoir confiance en quelqu’un ou quelque chose.\3 \1 \2 \3|http://grammalecte.net

TEST: Elle avait confiance {{dans}} lui.


__code_legacy__
    legacy code
    code legacy
        <<- -1:2>> code hérité|code reliquat|\1-\2|\2-\1                                            # \1 \2. Anglicisme superflu.

TEST: c’est du {{legacy code}}.
TEST: ce {{code legacy}} est un cauchemar


__être_en_xxxx__
    [>être|>rester|>demeurer] an [désaccord|accord]
................................................................................
        <<- -2>> plaisir                                                                            # Faire plaisir : dans cette locution, “plaisir” doit être au singulier.
        <<- ~2>> *

TEST: Ça me fait {{plaisirs}}.


__test__
    je  ~co[mn]putes?  [que|qu’]  @(?::Os|:M)¬:X  @:I
        <<- morph(\4, ":Os|:M", ":X") -5>> \1|\5                                                    # SUBJONCTIF.

TEST: je conpute qu’Isabelle {{est}} partie.