Grammalecte  Check-in [59d8df1fa2]

Overview
Comment:[core][bug] fix tokens merging
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | rg
Files: files | file ages | folders
SHA3-256: 59d8df1fa295536378d51f74f0e5bb0864f86b798c8412ad860cfc0168f2bac3
User & Date: olr on 2018-06-24 06:28:17
Other Links: branch diff | manifest | tags
Context
2018-06-24
11:39
[graphspell] code cleaning (pylint) check-in: 814d73b60e user: olr tags: graphspell, rg
06:28
[core][bug] fix tokens merging check-in: 59d8df1fa2 user: olr tags: core, rg
2018-06-23
16:28
[core] merge tokens check-in: c051cc6ca9 user: olr tags: core, rg
Changes

Modified gc_core/py/lang_core/gc_engine.py from [23dabd0b59] to [a4200b43e6].

   911    911                       if bUppercase:
   912    912                           sValue = sValue[0:1].upper() + sValue[1:]
   913    913                       self.lToken[i]["sNewValue"] = sValue
   914    914   
   915    915       def rewrite (self, bDebug=False):
   916    916           "rewrite the sentence, modify tokens, purge the token list"
   917    917           lNewToken = []
   918         -        nMergeUntil = -1
          918  +        nMergeUntil = 0
   919    919           dTokenMerger = None
   920    920           for dToken in self.lToken:
   921    921               bKeepToken = True
   922    922               if "bImmune" in dToken:
   923    923                   nErrorStart = self.nOffsetWithinParagraph + dToken["nStart"]
   924    924                   if nErrorStart in self.dError:
   925    925                       if bDebug:
   926    926                           print("immunity -> error removed:", self.dError[nErrorStart])
   927    927                       del self.dError[nErrorStart]
   928         -            if dToken["i"] <= nMergeUntil:
   929         -                dTokenMerger["sValue"] += " " * (dToken["i"]["nStart"] - dTokenMerger["nEnd"]) + dToken["i"]["sValue"]
   930         -                dTokenMerger["nEnd"] = dToken["i"]["nEnd"]
          928  +            if nMergeUntil and dToken["i"] <= nMergeUntil:
          929  +                dTokenMerger["sValue"] += " " * (dToken["nStart"] - dTokenMerger["nEnd"]) + dToken["sValue"]
          930  +                dTokenMerger["nEnd"] = dToken["nEnd"]
   931    931                   if bDebug:
   932    932                       print("Merged token:", dTokenMerger["sValue"])
   933    933                   bKeepToken = False
   934    934               if "nMergeUntil" in dToken:
   935         -                if not nMergeUntil: # this token should alerady been merged with a previous token
          935  +                if dToken["i"] > nMergeUntil: # this token is not already merged with a previous token
   936    936                       dTokenMerger = dToken
   937    937                   if dToken["nMergeUntil"] > nMergeUntil:
   938    938                       nMergeUntil = dToken["nMergeUntil"]  
   939    939                   del dToken["nMergeUntil"]
   940    940               elif "bToRemove" in dToken:
   941    941                   # remove useless token
   942    942                   self.sSentence = self.sSentence[:dToken["nStart"]] + " " * (dToken["nEnd"] - dToken["nStart"]) + self.sSentence[dToken["nEnd"]:]

Modified gc_lang/fr/rules.grx from [7874208805] to [0f58bfdc3c].

  4600   4600   @@@@
  4601   4601   @@@@
  4602   4602   @@@@
  4603   4603   
  4604   4604   __p_notre_père_qui_es_au_cieux__
  4605   4605       notre père qui [es|est] aux cieux
  4606   4606           <<- ~4>> !
  4607         -        <<- ~3:0>> *
         4607  +        <<- ~3:0>> _
  4608   4608   
  4609   4609   
  4610   4610   !!
  4611   4611   !!
  4612   4612   !!!! Formes verbales sans sujet                                                                     
  4613   4613   !!
  4614   4614   !!