Grammalecte  Check-in [22a9b5a923]

Overview
Comment:[build][core][fr] tags for sentence
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fr | core | build | rg
Files: files | file ages | folders
SHA3-256: 22a9b5a923787cdcf4bf410058b538c31cf2bea1d9d37c67e79c83de34d27e06
User & Date: olr on 2018-06-22 11:11:33
Other Links: branch diff | manifest | tags
Context
2018-06-22
12:23
[build] check casing within rules condition check-in: 0d097b9fcd user: olr tags: build, rg
11:11
[build][core][fr] tags for sentence check-in: 22a9b5a923 user: olr tags: build, core, fr, rg
08:40
[build] code clarification for graph rules reader check-in: 34a15fd0fb user: olr tags: build, rg
Changes

Modified compile_rules_graph.py from [06e4c2c6a2] to [e586a73a20].

10
11
12
13
14
15
16

17
18
19
20
21
22
23
24
25
...
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
...
228
229
230
231
232
233
234



235
236
237
238
239
240
241
...
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
dACTIONS = {}
dFUNCTIONS = {}


def prepareFunction (s, bTokenValue=False):
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")

    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(morph|analyse|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(switchGender|has(?:Mas|Fem)Form)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                     # next token
    s = re.sub(r"(morph|analyse)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                     # previous token
    s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s)                 # before_chk1(s)
................................................................................
    # Option
    sOption = False
    m = re.match("/(\\w+)/", sAction) 
    if m:
        sOption = m.group(1)
        sAction = sAction[m.end():].strip()
    # valid action?
    m = re.search("(?P<action>[-~=])(?P<start>\\d+|)(?P<end>:\\d+|)>> ", sAction)
    if not m:
        print(" # Error. No action found at: ", sActionId)
        print("   ==", sAction, "==")
        return None
    # Condition
    sCondition = sAction[:m.start()].strip()
    if sCondition:
................................................................................
    elif cAction == "~":
        ## text processor
        if sAction[0:1] == "=":
            dFUNCTIONS["g_p_"+sActionId] = sAction[1:]
            sAction = "=g_p_"+sActionId
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]



        return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
    elif cAction == "=":
        ## disambiguator
        if sAction[0:1] == "=":
            sAction = sAction[1:]
        if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
            print("# Error in action at line " + sActionId + ": second argument for <define> must be a list of strings")
................................................................................

    # creating file with all functions callable by rules
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in dFUNCTIONS.items():
        if sFuncName.startswith("g_c_"): # condition
            sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo"
        elif sFuncName.startswith("g_m_"): # message
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("g_s_"): # suggestion
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("g_p_"): # preprocessor
            sParams = "lToken"
        elif sFuncName.startswith("g_d_"): # disambiguator







>

|







 







|







 







>
>
>







 







|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
...
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
...
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
...
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
dACTIONS = {}
dFUNCTIONS = {}


def prepareFunction (s, bTokenValue=False):
    s = s.replace("__also__", "bCondMemo")
    s = s.replace("__else__", "not bCondMemo")
    s = re.sub(r"(morph|analyse|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(tag_before|tag_after)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    s = re.sub(r"(switchGender|has(?:Mas|Fem)Form)[(]\\(\d+)", '\\1(lToken[\\2+nTokenOffset]["sValue"]', s)
    s = re.sub(r"(morph|analyse)\(>1", 'g_\\1(lToken[nLastToken+1]', s)                     # next token
    s = re.sub(r"(morph|analyse)\(<1", 'g_\\1(lToken[nTokenOffset]', s)                     # previous token
    s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s)                 # before_chk1(s)
................................................................................
    # Option
    sOption = False
    m = re.match("/(\\w+)/", sAction) 
    if m:
        sOption = m.group(1)
        sAction = sAction[m.end():].strip()
    # valid action?
    m = re.search("(?P<action>[-~=/])(?P<start>\\d+|)(?P<end>:\\d+|)>> ", sAction)
    if not m:
        print(" # Error. No action found at: ", sActionId)
        print("   ==", sAction, "==")
        return None
    # Condition
    sCondition = sAction[:m.start()].strip()
    if sCondition:
................................................................................
    elif cAction == "~":
        ## text processor
        if sAction[0:1] == "=":
            dFUNCTIONS["g_p_"+sActionId] = sAction[1:]
            sAction = "=g_p_"+sActionId
        elif sAction.startswith('"') and sAction.endswith('"'):
            sAction = sAction[1:-1]
        return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
    elif cAction == "/":
        ## tags
        return [sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
    elif cAction == "=":
        ## disambiguator
        if sAction[0:1] == "=":
            sAction = sAction[1:]
        if "define" in sAction and not re.search(r"define\(\\\d+ *, *\[.*\] *\)", sAction):
            print("# Error in action at line " + sActionId + ": second argument for <define> must be a list of strings")
................................................................................

    # creating file with all functions callable by rules
    print("  creating callables...")
    sPyCallables = "# generated code, do not edit\n"
    #sJSCallables = "// generated code, do not edit\nconst oEvalFunc = {\n"
    for sFuncName, sReturn in dFUNCTIONS.items():
        if sFuncName.startswith("g_c_"): # condition
            sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags"
        elif sFuncName.startswith("g_m_"): # message
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("g_s_"): # suggestion
            sParams = "lToken, nTokenOffset"
        elif sFuncName.startswith("g_p_"): # preprocessor
            sParams = "lToken"
        elif sFuncName.startswith("g_d_"): # disambiguator

Modified gc_core/py/lang_core/gc_engine.py from [d166a26ff3] to [3f95e0d88e].

586
587
588
589
590
591
592

593

594
595
596
597
598
599
600
...
702
703
704
705
706
707
708
709




710
711
712
713
714
715
716
717
718
...
734
735
736
737
738
739
740









741
742
743
744
745
746
747
...
932
933
934
935
936
937
938















939
940
941
942
943
944
945

    def __init__ (self, sSentence, sSentence0, nOffset):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.nOffsetWithinParagraph = nOffset
        self.lToken = list(_oTokenizer.genTokens(sSentence, True))
        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken }

        self.createError = self._createWriterError  if _bWriterError  else self._createDictError


    def update (self, sSentence):
        self.sSentence = sSentence
        self.lToken = list(_oTokenizer.genTokens(sSentence, True))

    def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False):
        "generator: return nodes where <dToken> “values” match <dNode> arcs"
................................................................................
            bCondMemo = None
            for sRuleId in dGraph[nextNodeKey]:
                try:
                    if bDebug:
                        print("ACTION:", sRuleId)
                        print(dRule[sRuleId])
                    sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
                    # action in lActions: [ condition, action type, replacement/suggestion/action[, iTokenStart, iTokenEnd[, nPriority, message, URL]] ]




                    if not sOption or dOptions.get(sOption, False):
                        bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo)
                        if bCondMemo:
                            if cActionType == "-":
                                # grammar error
                                nTokenErrorStart = nTokenOffset + eAct[0]
                                nTokenErrorEnd = (nTokenOffset + eAct[1])  if eAct[1]  else nLastToken
                                nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"]
                                nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"]
................................................................................
                                if bDebug:
                                    print("=", sRuleId)
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    print(">", sRuleId)
                                pass









                            else:
                                print("# error: unknown action at " + sLineId)
                        elif cActionType == ">":
                            if bDebug:
                                print(">!", sRuleId)
                            break
                except Exception as e:
................................................................................
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)


















#### Disambiguator

def g_select (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    lMorph = dToken["lMorph"]  if "lMorph" in dToken  else _oSpellChecker.getMorph(dToken["sValue"])







>

>







 







|
>
>
>
>

|







 







>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
...
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
...
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
...
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975

    def __init__ (self, sSentence, sSentence0, nOffset):
        self.sSentence = sSentence
        self.sSentence0 = sSentence0
        self.nOffsetWithinParagraph = nOffset
        self.lToken = list(_oTokenizer.genTokens(sSentence, True))
        self.dTokenPos = { dToken["nStart"]: dToken  for dToken in self.lToken }
        self.dTags = {}
        self.createError = self._createWriterError  if _bWriterError  else self._createDictError


    def update (self, sSentence):
        self.sSentence = sSentence
        self.lToken = list(_oTokenizer.genTokens(sSentence, True))

    def _getNextMatchingNodes (self, dToken, dGraph, dNode, bDebug=False):
        "generator: return nodes where <dToken> “values” match <dNode> arcs"
................................................................................
            bCondMemo = None
            for sRuleId in dGraph[nextNodeKey]:
                try:
                    if bDebug:
                        print("ACTION:", sRuleId)
                        print(dRule[sRuleId])
                    sOption, sFuncCond, cActionType, sWhat, *eAct = dRule[sRuleId]
                    # Suggestion    [ option, condition, "-", replacement/suggestion/action, iTokenStart, iTokenEnd, nPriority, message, URL ]
                    # TextProcessor [ option, condition, "~", replacement/suggestion/action, iTokenStart, iTokenEnd ]
                    # Disambiguator [ option, condition, "=", replacement/suggestion/action ]
                    # Tag           [ option, condition, "/", replacement/suggestion/action, iTokenStart, iTokenEnd ]
                    # Test          [ option, condition, ">", "" ]
                    if not sOption or dOptions.get(sOption, False):
                        bCondMemo = not sFuncCond or globals()[sFuncCond](self.lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, self.dTags)
                        if bCondMemo:
                            if cActionType == "-":
                                # grammar error
                                nTokenErrorStart = nTokenOffset + eAct[0]
                                nTokenErrorEnd = (nTokenOffset + eAct[1])  if eAct[1]  else nLastToken
                                nErrorStart = self.nOffsetWithinParagraph + self.lToken[nTokenErrorStart]["nStart"]
                                nErrorEnd = self.nOffsetWithinParagraph + self.lToken[nTokenErrorEnd]["nEnd"]
................................................................................
                                if bDebug:
                                    print("=", sRuleId)
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    print(">", sRuleId)
                                pass
                            elif cActionType == "/":
                                # tags
                                nTokenTag = nTokenOffset + eAct[0]
                                if sWhat not in self.dTags:
                                    self.dTags[sWhat] = (nTokenTag, nTokenTag)
                                elif nTokenTag > self.dTags[sWhat][1]:
                                    self.dTags[sWhat] = (self.dTags[sWhat][0], nTokenTag)
                                if bDebug:
                                    print("/", sRuleId)
                            else:
                                print("# error: unknown action at " + sLineId)
                        elif cActionType == ">":
                            if bDebug:
                                print(">!", sRuleId)
                            break
                except Exception as e:
................................................................................
            zNegPattern = re.compile(sNegPattern)
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)


def g_tag_before (dToken, sTag, dTags):
    if sTag not in dTags:
        return False
    if dToken["nStart"] > dTags[sTag][0]:
        return True
    return False


def g_tag_after (dToken, sTag, dTags):
    if sTag not in dTags:
        return False
    if dToken["nStart"] < dTags[sTag][1]:
        return True
    return False


#### Disambiguator

def g_select (dToken, sPattern, lDefault=None):
    "select morphologies for <dToken> according to <sPattern>, always return True"
    lMorph = dToken["lMorph"]  if "lMorph" in dToken  else _oSpellChecker.getMorph(dToken["sValue"])

Modified gc_lang/fr/rules.grx from [25bf3d2ac0] to [130969b0ab].

4671
4672
4673
4674
4675
4676
4677


































4678
4679
4680
4681
4682
4683
4684
@@@@
@@@@
@@@@GRAPH: graphe1                                                                                  
@@@@
@@@@
@@@@
@@@@



































!!
!!
!!!! Locutions invariables                                                                          
!!
!!








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
@@@@
@@@@
@@@@GRAPH: graphe1                                                                                  
@@@@
@@@@
@@@@
@@@@


__tag_sujets__
    [je|j’]
    moi qui
    moi [seul|seule]
        <<- />> 1s

    tu
    toi ?,¿ qui
    toi [seul|seule]
        <<- />> 2s

    nous
    nous ?,¿ qui
    nous-même
    nous-mêmes
    nous [seul|seuls|seules]
    et moi
    ni moi
    moi et
        <<- />> 1p

    vous
    vous ?,¿ qui
    vous-même
    vous-mêmes
    vous [seul|seule|seuls|seules]
    et toi
    ni toi
    toi et
        <<- />> 2p



!!
!!
!!!! Locutions invariables                                                                          
!!
!!