Grammalecte  Check-in [3a5a4d302e]

Overview
Comment:[core][build] nTokenOffset necessary for text processor functions
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | build | rg
Files: files | file ages | folders
SHA3-256: 3a5a4d302e52d67c4e2f163eb1e682b9c6a59902f3f567cf2dbd1dd19e6077fa
User & Date: olr on 2018-06-27 07:43:53
Other Links: branch diff | manifest | tags
Context
2018-06-27
10:02
[core] gc engine: use expand for text processor too check-in: 816624027a user: olr tags: core, rg
07:43
[core][build] nTokenOffset necessary for text processor functions check-in: 3a5a4d302e user: olr tags: build, core, rg
07:42
[fr] mots composés avec -là (utilisation de slice) check-in: 3449bb65ee user: olr tags: fr, rg
Changes

Modified compile_rules_graph.py from [19fb543170] to [4e3eba14a0].

356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
        if sFuncName.startswith("_g_c_"): # condition
            sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags, sSentence, sSentence0"
        elif sFuncName.startswith("g_m_"): # message
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("_g_s_"): # suggestion
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("_g_p_"): # preprocessor
            sParams = "lToken"
        elif sFuncName.startswith("_g_d_"): # disambiguator
            sParams = "lToken, nTokenOffset"
        else:
            print("# Unknown function type in [" + sFuncName + "]")
            continue
        sPyCallables += "def {} ({}):\n".format(sFuncName, sParams)
        sPyCallables += "    return " + sReturn + "\n"







|







356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
        if sFuncName.startswith("_g_c_"): # condition
            sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags, sSentence, sSentence0"
        elif sFuncName.startswith("g_m_"): # message
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("_g_s_"): # suggestion
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("_g_p_"): # preprocessor
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("_g_d_"): # disambiguator
            sParams = "lToken, nTokenOffset"
        else:
            print("# Unknown function type in [" + sFuncName + "]")
            continue
        sPyCallables += "def {} ({}):\n".format(sFuncName, sParams)
        sPyCallables += "    return " + sReturn + "\n"

Modified gc_core/py/lang_core/gc_engine.py from [ca7f3318b7] to [f1fdc10ea4].

769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
...
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
...
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
...
918
919
920
921
922
923
924
925
926
927

928
929
930
931
932

933
934
935
936
937
938
939
                                        dError[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                        dPriority[nErrorStart] = eAct[2]
                                        if bDebug:
                                            print("ERROR:", sRuleId, dError[nErrorStart])
                            elif cActionType == "~":
                                # text processor
                                nEndToken = (nTokenOffset + eAct[1])  if eAct[1]  else nLastToken
                                self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nEndToken, bDebug)
                                if bDebug:
                                    print("RW:", sRuleId)
                                bChange = True
                            elif cActionType == "=":
                                # disambiguation
                                globals()[sWhat](self.lToken, nTokenOffset)
                                if bDebug:
................................................................................
                                    print("DA:", sRuleId)
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    print(">>>", sRuleId)
                                pass
                            elif cActionType == "/":
                                # tags
                                nTokenTag = nTokenOffset + eAct[0]
                                if sWhat not in self.dTags:
                                    self.dTags[sWhat] = (nTokenTag, nTokenTag)
                                elif nTokenTag > self.dTags[sWhat][1]:
                                    self.dTags[sWhat] = (self.dTags[sWhat][0], nTokenTag)
                                if bDebug:
                                    print("/", sRuleId)
................................................................................
    def _expand (self, sMsg, nTokenOffset):
        #print("*", sMsg)
        for m in re.finditer(r"\\([0-9]+)", sMsg):
            sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
        #print(">", sMsg)
        return sMsg

    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, bUppercase=True, bDebug=False):
        "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
        if bDebug:
            print("REWRITING:", nTokenRewriteStart, nTokenRewriteEnd)
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True
................................................................................
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["sNewValue"] = "_"
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["sNewValue"] = "_"
        else:
            if sWhat.startswith("="):
                sWhat = globals()[sWhat[1:]](self.lToken)
            bUppercase = bUppercase and self.lToken[nTokenRewriteStart]["sValue"][0:1].isupper()
            if nTokenRewriteEnd - nTokenRewriteStart == 0:

                sWhat = sWhat + " " * (len(self.lToken[nTokenRewriteStart]["sValue"])-len(sWhat))
                if bUppercase:
                    sWhat = sWhat[0:1].upper() + sWhat[1:]
                self.lToken[nTokenRewriteStart]["sNewValue"] = sWhat
            else:

                lTokenValue = sWhat.split("|")
                if len(lTokenValue) != (nTokenRewriteEnd - nTokenRewriteStart + 1):
                    print("Error. Text processor: number of replacements != number of tokens.")
                    return
                for i, sValue in zip(range(nTokenRewriteStart, nTokenRewriteEnd+1), lTokenValue):
                    if bUppercase:
                        sValue = sValue[0:1].upper() + sValue[1:]







|







 







|







 







|







 







|


>





>







769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
...
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
...
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
...
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
                                        dError[nErrorStart] = self.createError(sWhat, nTokenOffset, nTokenErrorStart, nErrorStart, nErrorEnd, sLineId, sRuleId, True, eAct[3], eAct[4], bShowRuleId, "notype", bContext)
                                        dPriority[nErrorStart] = eAct[2]
                                        if bDebug:
                                            print("ERROR:", sRuleId, dError[nErrorStart])
                            elif cActionType == "~":
                                # text processor
                                nEndToken = (nTokenOffset + eAct[1])  if eAct[1]  else nLastToken
                                self._tagAndPrepareTokenForRewriting(sWhat, nTokenOffset + eAct[0], nEndToken, nTokenOffset, bDebug)
                                if bDebug:
                                    print("RW:", sRuleId)
                                bChange = True
                            elif cActionType == "=":
                                # disambiguation
                                globals()[sWhat](self.lToken, nTokenOffset)
                                if bDebug:
................................................................................
                                    print("DA:", sRuleId)
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    print(">>>", sRuleId)
                                pass
                            elif cActionType == "/":
                                # sentence tags
                                nTokenTag = nTokenOffset + eAct[0]
                                if sWhat not in self.dTags:
                                    self.dTags[sWhat] = (nTokenTag, nTokenTag)
                                elif nTokenTag > self.dTags[sWhat][1]:
                                    self.dTags[sWhat] = (self.dTags[sWhat][0], nTokenTag)
                                if bDebug:
                                    print("/", sRuleId)
................................................................................
    def _expand (self, sMsg, nTokenOffset):
        #print("*", sMsg)
        for m in re.finditer(r"\\([0-9]+)", sMsg):
            sMsg = sMsg.replace(m.group(0), self.lToken[int(m.group(1))+nTokenOffset]["sValue"])
        #print(">", sMsg)
        return sMsg

    def _tagAndPrepareTokenForRewriting (self, sWhat, nTokenRewriteStart, nTokenRewriteEnd, nTokenOffset, bUppercase=True, bDebug=False):
        "text processor: rewrite tokens between <nTokenRewriteStart> and <nTokenRewriteEnd> position"
        if bDebug:
            print("REWRITING:", nTokenRewriteStart, nTokenRewriteEnd)
        if sWhat == "*":
            # purge text
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["bToRemove"] = True
................................................................................
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                self.lToken[nTokenRewriteStart]["sNewValue"] = "_"
            else:
                for i in range(nTokenRewriteStart, nTokenRewriteEnd+1):
                    self.lToken[i]["sNewValue"] = "_"
        else:
            if sWhat.startswith("="):
                sWhat = globals()[sWhat[1:]](self.lToken, nTokenOffset)
            bUppercase = bUppercase and self.lToken[nTokenRewriteStart]["sValue"][0:1].isupper()
            if nTokenRewriteEnd - nTokenRewriteStart == 0:
                # one token
                sWhat = sWhat + " " * (len(self.lToken[nTokenRewriteStart]["sValue"])-len(sWhat))
                if bUppercase:
                    sWhat = sWhat[0:1].upper() + sWhat[1:]
                self.lToken[nTokenRewriteStart]["sNewValue"] = sWhat
            else:
                # several tokens
                lTokenValue = sWhat.split("|")
                if len(lTokenValue) != (nTokenRewriteEnd - nTokenRewriteStart + 1):
                    print("Error. Text processor: number of replacements != number of tokens.")
                    return
                for i, sValue in zip(range(nTokenRewriteStart, nTokenRewriteEnd+1), lTokenValue):
                    if bUppercase:
                        sValue = sValue[0:1].upper() + sValue[1:]