Grammalecte  Check-in [da276e6368]

Overview
Comment:[core] gc engine: anti-pattern for value regex + restriction for universal arc
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: da276e63689e987543e988248fd635c16a89732ee5d65929cbefbf52afe1490a
User & Date: olr on 2018-06-22 13:26:05
Other Links: branch diff | manifest | tags
Context
2018-06-22
13:27
[build][bugs] fix functions rewriting + [fr] conversion: regex rules -> graph rules check-in: 521ae6bdfb user: olr tags: build, fr, rg
13:26
[core] gc engine: anti-pattern for value regex + restriction for universal arc check-in: da276e6368 user: olr tags: core, rg
12:51
[build][core] look before, look after (fix spaces) check-in: cdcc60d8eb user: olr tags: build, core, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [16275f0b5d] to [3b5bf65d7f].

627
628
629
630
631
632
633

634
635
636
637
638
639


640
641
642
643









644
645
646
647
648
649
650
...
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
...
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
            for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
                if sLemma in dNode["<lemmas>"]:
                    if bDebug:
                        print("  MATCH: >" + sLemma)
                    yield dGraph[dNode["<lemmas>"][sLemma]]
        # universal arc
        if "*" in dNode:

            if bDebug:
                print("  MATCH: *")
            yield dGraph[dNode["*"]]
        # regex value arcs
        if "<re_value>" in dNode:
            for sRegex in dNode["<re_value>"]:


                if re.search(sRegex, dToken["sValue"]):
                    if bDebug:
                        print("  MATCH: ~" + sRegex)
                    yield dGraph[dNode["<re_value>"][sRegex]]









        # regex morph arcs
        if "<re_morph>" in dNode:
            for sRegex in dNode["<re_morph>"]:
                if "¬" not in sRegex:
                    # no anti-pattern
                    if any(re.search(sRegex, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                        if bDebug:
................................................................................
                            print("  MATCH: @" + sRegex)
                        yield dGraph[dNode["<re_morph>"][sRegex]]
                else:
                    # there is an anti-pattern
                    sPattern, sNegPattern = sRegex.split("¬", 1)
                    if sNegPattern == "*":
                        # all morphologies must match with <sPattern>
                        if all(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            if bDebug:
                                print("  MATCH: @" + sRegex)
                            yield dGraph[dNode["<re_morph>"][sRegex]]
                    else:
                        if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            continue
                        if any(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
................................................................................
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)


def g_tag_before (dToken, sTag, dTags):
    if sTag not in dTags:
        return False
    if dToken["nStart"] > dTags[sTag][0]:
        return True
    return False


def g_tag_after (dToken, sTag, dTags):
    if sTag not in dTags:
        return False
    if dToken["nStart"] < dTags[sTag][1]:
        return True
    return False









>
|
|
|



>
>
|
|
|
|
>
>
>
>
>
>
>
>
>







 







|







 







|







|







627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
...
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
...
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
            for sLemma in _oSpellChecker.getLemma(dToken["sValue"]):
                if sLemma in dNode["<lemmas>"]:
                    if bDebug:
                        print("  MATCH: >" + sLemma)
                    yield dGraph[dNode["<lemmas>"][sLemma]]
        # universal arc
        if "*" in dNode:
            if dToken["sType"] != "PUNC":
                if bDebug:
                    print("  MATCH: *")
                yield dGraph[dNode["*"]]
        # regex value arcs
        if "<re_value>" in dNode:
            for sRegex in dNode["<re_value>"]:
                if "¬" not in sRegex:
                    # no anti-pattern
                    if re.search(sRegex, dToken["sValue"]):
                        if bDebug:
                            print("  MATCH: ~" + sRegex)
                        yield dGraph[dNode["<re_value>"][sRegex]]
                else:
                    # there is an anti-pattern
                    sPattern, sNegPattern = sRegex.split("¬", 1)
                    if sNegPattern and re.search(sNegPattern, dToken["sValue"]):
                        continue
                    if re.search(sPattern, dToken["sValue"]):
                        if bDebug:
                            print("  MATCH: ~" + sRegex)
                        yield dGraph[dNode["<re_value>"][sRegex]]
        # regex morph arcs
        if "<re_morph>" in dNode:
            for sRegex in dNode["<re_morph>"]:
                if "¬" not in sRegex:
                    # no anti-pattern
                    if any(re.search(sRegex, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                        if bDebug:
................................................................................
                            print("  MATCH: @" + sRegex)
                        yield dGraph[dNode["<re_morph>"][sRegex]]
                else:
                    # there is an anti-pattern
                    sPattern, sNegPattern = sRegex.split("¬", 1)
                    if sNegPattern == "*":
                        # all morphologies must match with <sPattern>
                        if sPattern and all(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            if bDebug:
                                print("  MATCH: @" + sRegex)
                            yield dGraph[dNode["<re_morph>"][sRegex]]
                    else:
                        if sNegPattern and any(re.search(sNegPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
                            continue
                        if any(re.search(sPattern, sMorph)  for sMorph in _oSpellChecker.getMorph(dToken["sValue"])):
................................................................................
            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
                return False
    # search sPattern
    zPattern = re.compile(sPattern)
    return any(zPattern.search(sMorph)  for sMorph in lMorph)


def g_tag_before (dToken, dTags, sTag):
    if sTag not in dTags:
        return False
    if dToken["nStart"] > dTags[sTag][0]:
        return True
    return False


def g_tag_after (dToken, dTags, sTag):
    if sTag not in dTags:
        return False
    if dToken["nStart"] < dTags[sTag][1]:
        return True
    return False