Grammalecte  Check-in [4134a01a49]

Overview
Comment:[core] darg: merge morph and morphex functions
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 4134a01a49e64e156c546934f6174aa95416f4a9c2fb64aa5a8d47b914566355
User & Date: olr on 2018-06-06 09:30:40
Other Links: branch diff | manifest | tags
Context
2018-06-06
09:54
[graphspell] end of lemma is now a slash instead of a space check-in: c5c926760b user: olr tags: graphspell, rg
09:30
[core] darg: merge morph and morphex functions check-in: 4134a01a49 user: olr tags: core, rg
07:47
[core] darg: anti-patterns for morphologies check-in: 47ae72e7f9 user: olr tags: core, rg
Changes

Modified compile_rules_graph.py from [ca6fc181e8] to [184a3e89b8].

    19     19       s = re.sub(r"isStart0 *\(\)", 'before0(["<START>", ","])', s)
    20     20       s = re.sub(r"isRealStart0 *\(\)", 'before0(["<START>"])', s)
    21     21       s = re.sub(r"isEnd *\(\)", 'after(["<END>", ","])', s)
    22     22       s = re.sub(r"isRealEnd *\(\)", 'after(["<END>"])', s)
    23     23       s = re.sub(r"isEnd0 *\(\)", 'after0(["<END>", ","])', s)
    24     24       s = re.sub(r"isRealEnd0 *\(\)", 'after0(["<END>"])', s)
    25     25       s = re.sub(r"(select|exclude|define)[(][\\](\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    26         -    s = re.sub(r"(morph|morphex|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
           26  +    s = re.sub(r"(morph|displayInfo)[(]\\(\d+)", 'g_\\1(lToken[\\2+nTokenOffset]', s)
    27     27       s = re.sub(r"token\(\s*(\d)", 'nextToken(\\1', s)                                       # token(n)
    28     28       s = re.sub(r"token\(\s*-(\d)", 'prevToken(\\1', s)                                      # token(-n)
    29     29       s = re.sub(r"before\(\s*", 'look(s[:m.start()], ', s)                                   # before(s)
    30     30       s = re.sub(r"after\(\s*", 'look(s[m.end():], ', s)                                      # after(s)
    31     31       s = re.sub(r"textarea\(\s*", 'look(s, ', s)                                             # textarea(s)
    32     32       s = re.sub(r"before_chk1\(\s*", 'look_chk1(dDA, s[:m.start()], 0, ', s)                 # before_chk1(s)
    33     33       s = re.sub(r"after_chk1\(\s*", 'look_chk1(dDA, s[m.end():], m.end(), ', s)              # after_chk1(s)

Modified gc_core/py/lang_core/gc_engine.py from [5975c86814] to [f0f3202267].

   815    815               print("# Error. Jump failed: ", sWhat)
   816    816               traceback.print_exc()
   817    817               return
   818    818   
   819    819   
   820    820   #### Analyse tokens
   821    821   
   822         -def g_morph (dToken, sPattern, bStrict=True):
   823         -    "analyse a token, return True if <sPattern> in morphologies"
   824         -    if "lMorph" in dToken:
   825         -        lMorph = dToken["lMorph"]
   826         -    else:
   827         -        lMorph = _oSpellChecker.getMorph(dToken["sValue"])
   828         -        if not lMorph:
   829         -            return False
   830         -    zPattern = re.compile(sPattern)
   831         -    if bStrict:
   832         -        return all(zPattern.search(sMorph)  for sMorph in lMorph)
   833         -    return any(zPattern.search(sMorph)  for sMorph in lMorph)
   834         -
   835         -def g_morphex (dToken, sPattern, sNegPattern):
          822  +def g_morph (dToken, sPattern, sNegPattern=""):
   836    823       "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies"
   837    824       if "lMorph" in dToken:
   838    825           lMorph = dToken["lMorph"]
   839    826       else:
   840    827           lMorph = _oSpellChecker.getMorph(dToken["sValue"])
   841    828           if not lMorph:
   842    829               return False
   843    830       # check negative condition
   844         -    zNegPattern = re.compile(sNegPattern)
   845         -    if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
   846         -        return False
          831  +    if sNegPattern:
          832  +        if sNegPattern == "*":
          833  +            # all morph must match sPattern
          834  +            zPattern = re.compile(sPattern)
          835  +            return all(zPattern.search(sMorph)  for sMorph in lMorph)
          836  +        else:
          837  +            zNegPattern = re.compile(sNegPattern)
          838  +            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
          839  +                return False
   847    840       # search sPattern
   848    841       zPattern = re.compile(sPattern)
   849    842       return any(zPattern.search(sMorph)  for sMorph in lMorph)
   850    843   
   851         -def g_analyse (dToken, sPattern, bStrict=True):
   852         -    "analyse a token, return True if <sPattern> in morphologies (disambiguation off)"
   853         -    lMorph = _oSpellChecker.getMorph(dToken["sValue"])
   854         -    if not lMorph:
   855         -        return False
   856         -    zPattern = re.compile(sPattern)
   857         -    if bStrict:
   858         -        return all(zPattern.search(sMorph)  for sMorph in lMorph)
   859         -    return any(zPattern.search(sMorph)  for sMorph in lMorph)
   860    844   
   861         -
   862         -def g_analysex (dToken, sPattern, sNegPattern):
          845  +def g_analyse (dToken, sPattern, sNegPattern=""):
   863    846       "analyse a token, return True if <sNegPattern> not in morphologies and <sPattern> in morphologies (disambiguation off)"
   864    847       lMorph = _oSpellChecker.getMorph(dToken["sValue"])
   865    848       if not lMorph:
   866    849           return False
   867    850       # check negative condition
   868         -    zNegPattern = re.compile(sNegPattern)
   869         -    if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
   870         -        return False
          851  +    if sNegPattern:
          852  +        if sNegPattern == "*":
          853  +            zPattern = re.compile(sPattern)
          854  +            return all(zPattern.search(sMorph)  for sMorph in lMorph)
          855  +        else:
          856  +            zNegPattern = re.compile(sNegPattern)
          857  +            if any(zNegPattern.search(sMorph)  for sMorph in lMorph):
          858  +                return False
   871    859       # search sPattern
   872    860       zPattern = re.compile(sPattern)
   873    861       return any(zPattern.search(sMorph)  for sMorph in lMorph)
   874    862   
   875    863   
   876    864   
   877    865   #### Disambiguator

Modified gc_lang/fr/rules_graph.grx from [1dc1051949] to [02356176f2].

    67     67           <<- -2>> plaisir                                                                            # Faire plaisir : dans cette locution, “plaisir” doit être au singulier.
    68     68   
    69     69   TEST: Ça me fait {{plaisirs}}.
    70     70   
    71     71   
    72     72   __test__
    73     73       je  ~préf[éè]r  [que|qu’]  @(?::Os|:M)¬:X  @:I
    74         -        <<- morph(\1, ":V", False) and morphex(\4, ":Os|:M", ":X") -5>> SUBJONCTIF                  # SUBJONCTIF.
           74  +        <<- morph(\1, ":V") and morph(\4, ":Os|:M", ":X") -5>> SUBJONCTIF                  # SUBJONCTIF.
    75     75   
    76     76   TEST: je préférerais qu’Isabelle {{est}} partie.