Grammalecte  Check-in [eecbc6012a]

Overview
Comment:[core] gc engine: use stored morphologies in tokens if they exist
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: eecbc6012a8ab29bd5ccc6039f93db809ffc08c46bf23080104b6603cd9fba42
User & Date: olr on 2018-07-26 08:31:29
Other Links: branch diff | manifest | tags
Context
2018-07-26
11:51
[fr] conversion: regex rules -> graph rules check-in: 663cc65183 user: olr tags: fr, rg
08:31
[core] gc engine: use stored morphologies in tokens if they exist check-in: eecbc6012a user: olr tags: core, rg
08:13
[fr] conversion: regex rules -> graph rules check-in: 59eec1ef7b user: olr tags: fr, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [3ecd1c5c57] to [d9f8c3ac90].

657
658
659
660
661
662
663

664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679

680
681
682
683
684
685
686
687
688
689
....
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
                            print("  MATCH: >" + sLemma)
                        yield dGraph[dNode["<lemmas>"][sLemma]]
            # regex morph arcs
            if "<re_morph>" in dNode:
                for sRegex in dNode["<re_morph>"]:
                    if "¬" not in sRegex:
                        # no anti-pattern

                        if any(re.search(sRegex, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            if bDebug:
                                print("  MATCH: @" + sRegex)
                            yield dGraph[dNode["<re_morph>"][sRegex]]
                    else:
                        # there is an anti-pattern
                        sPattern, sNegPattern = sRegex.split("¬", 1)
                        if sNegPattern == "*":
                            # all morphologies must match with <sPattern>
                            if sPattern:
                                lMorph = _oSpellChecker.getMorph(dToken["sValue"])
                                if lMorph and all(re.search(sPattern, sMorph)  for sMorph in lMorph):
                                    if bDebug:
                                        print("  MATCH: @" + sRegex)
                                    yield dGraph[dNode["<re_morph>"][sRegex]]
                        else:

                            if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                                continue
                            if not sPattern or any(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                                if bDebug:
                                    print("  MATCH: @" + sRegex)
                                yield dGraph[dNode["<re_morph>"][sRegex]]
        # token tags
        if "tags" in dToken and "<tags>" in dNode:
            for sTag in dToken["tags"]:
                if sTag in dNode["<tags>"]:
................................................................................
    else:
        if nLeft is not None:
            lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)])
            if bMemorizeMorph:
                dToken["lMorph"] = lMorph
        else:
            lMorph = _oSpellChecker.getMorph(dToken["sValue"])
        if not lMorph:
            return False
    # check negative condition
    if sNegPattern:
        if sNegPattern == "*":
            # all morph must match sPattern
            if not lMorph:
                return False
            zPattern = re.compile(sPattern)







>
|









|





>
|

|







 







|
|







657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
....
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
                            print("  MATCH: >" + sLemma)
                        yield dGraph[dNode["<lemmas>"][sLemma]]
            # regex morph arcs
            if "<re_morph>" in dNode:
                for sRegex in dNode["<re_morph>"]:
                    if "¬" not in sRegex:
                        # no anti-pattern
                        lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
                        if any(re.search(sRegex, sMorph)  for sMorph in lMorph):
                            if bDebug:
                                print("  MATCH: @" + sRegex)
                            yield dGraph[dNode["<re_morph>"][sRegex]]
                    else:
                        # there is an anti-pattern
                        sPattern, sNegPattern = sRegex.split("¬", 1)
                        if sNegPattern == "*":
                            # all morphologies must match with <sPattern>
                            if sPattern:
                                lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
                                if lMorph and all(re.search(sPattern, sMorph)  for sMorph in lMorph):
                                    if bDebug:
                                        print("  MATCH: @" + sRegex)
                                    yield dGraph[dNode["<re_morph>"][sRegex]]
                        else:
                            lMorph = dToken.get("lMorph", _oSpellChecker.getMorph(dToken["sValue"]))
                            if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in lMorph):
                                continue
                            if not sPattern or any(re.search(sPattern, sMorph)  for sMorph in lMorph):
                                if bDebug:
                                    print("  MATCH: @" + sRegex)
                                yield dGraph[dNode["<re_morph>"][sRegex]]
        # token tags
        if "tags" in dToken and "<tags>" in dNode:
            for sTag in dToken["tags"]:
                if sTag in dNode["<tags>"]:
................................................................................
    else:
        if nLeft is not None:
            lMorph = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)])
            if bMemorizeMorph:
                dToken["lMorph"] = lMorph
        else:
            lMorph = _oSpellChecker.getMorph(dToken["sValue"])
    if not lMorph:
        return False
    # check negative condition
    if sNegPattern:
        if sNegPattern == "*":
            # all morph must match sPattern
            if not lMorph:
                return False
            zPattern = re.compile(sPattern)