Grammalecte  Check-in [c65b7e2b8b]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[graphspell][core][fr] code cleaning (pylint)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | core | graphspell
Files: files | file ages | folders
SHA3-256:c65b7e2b8b4b1507adbe01cd765e64b1b7329abb805b2d1c4df3e753ecbb5d1f
User & Date: olr 2019-05-15 11:55:44
Context
2019-05-15
16:12
[fx] position and size of panels check-in: eb18e7fd4b user: olr tags: fx, trunk
11:55
[graphspell][core][fr] code cleaning (pylint) check-in: c65b7e2b8b user: olr tags: core, fr, graphspell, trunk
10:47
[graphspell] import dictionary: include lang code in error message check-in: 2be0562a74 user: olr tags: graphspell, trunk
Changes

Changes to gc_core/py/lang_core/__init__.py.

            1  +"""
            2  +Grammalecte - core grammar checker engine
            3  +"""
     1      4   
     2      5   from .gc_engine import *

Changes to gc_core/py/lang_core/gc_engine.py.

   550    550                                   globals()[sWhat](self.lToken, nTokenOffset, nLastToken)
   551    551                                   if bDebug:
   552    552                                       echo("    DISAMBIGUATOR: ({})  [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"]))
   553    553                               elif cActionType == ">":
   554    554                                   # we do nothing, this test is just a condition to apply all following actions
   555    555                                   if bDebug:
   556    556                                       echo("    COND_OK")
   557         -                                pass
   558    557                               elif cActionType == "/":
   559    558                                   # Tag
   560    559                                   nTokenStart = nTokenOffset + eAct[0]  if eAct[0] > 0  else nLastToken + eAct[0]
   561    560                                   nTokenEnd = nTokenOffset + eAct[1]  if eAct[1] > 0  else nLastToken + eAct[1]
   562    561                                   for i in range(nTokenStart, nTokenEnd+1):
   563    562                                       if "aTags" in self.lToken[i]:
   564    563                                           self.lToken[i]["aTags"].update(sWhat.split("|"))

Changes to gc_lang/fr/modules/tests.py.

     4      4   Grammar checker tests for French language
     5      5   """
     6      6   
     7      7   import unittest
     8      8   import os
     9      9   import re
    10     10   import time
    11         -
           11  +from contextlib import contextmanager
    12     12   
    13     13   from ..graphspell.ibdawg import IBDAWG
    14     14   from ..graphspell.echo import echo
    15     15   from . import gc_engine as gce
    16     16   from . import conj
    17     17   from . import phonet
    18     18   from . import mfsp
    19     19   
           20  +
           21  +@contextmanager
           22  +def timeblock (label, hDst):
           23  +    "performance counter (contextmanager)"
           24  +    start = time.perf_counter()
           25  +    try:
           26  +        yield
           27  +    finally:
           28  +        end = time.perf_counter()
           29  +        print('{} : {}'.format(label, end - start))
           30  +        if hDst:
           31  +            hDst.write("{:<12.6}".format(end-start))
           32  +
           33  +
           34  +def perf (sVersion, hDst=None):
           35  +    "performance tests"
           36  +    print("\nPerformance tests")
           37  +    gce.load()
           38  +    gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")
           39  +
           40  +    spHere, _ = os.path.split(__file__)
           41  +    with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
           42  +        if hDst:
           43  +            hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M")))
           44  +        for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ):
           45  +            with timeblock(sText[:sText.find(".")], hDst):
           46  +                gce.parse(sText)
           47  +        if hDst:
           48  +            hDst.write("\n")
           49  +
    20     50   
    21     51   def _fuckBackslashUTF8 (s):
    22     52       "fuck that shit"
    23     53       return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—")
    24     54   
    25     55   
    26     56   class TestDictionary (unittest.TestCase):
................................................................................
   213    243           sRes = " " * len(sLine)
   214    244           for i, m in enumerate(self._zError.finditer(sLine)):
   215    245               nStart = m.start() - (4 * i)
   216    246               nEnd = m.end() - (4 * (i+1))
   217    247               sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4]
   218    248           return sRes
   219    249   
   220         -
   221         -from contextlib import contextmanager
   222         -@contextmanager
   223         -def timeblock (label, hDst):
   224         -    "performance counter (contextmanager)"
   225         -    start = time.perf_counter()
   226         -    try:
   227         -        yield
   228         -    finally:
   229         -        end = time.perf_counter()
   230         -        print('{} : {}'.format(label, end - start))
   231         -        if hDst:
   232         -            hDst.write("{:<12.6}".format(end-start))
   233         -
   234         -
   235         -def perf (sVersion, hDst=None):
   236         -    "performance tests"
   237         -    print("\nPerformance tests")
   238         -    gce.load()
   239         -    gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")
   240         -
   241         -    spHere, _ = os.path.split(__file__)
   242         -    with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
   243         -        if hDst:
   244         -            hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M")))
   245         -        for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ):
   246         -            with timeblock(sText[:sText.find(".")], hDst):
   247         -                gce.parse(sText)
   248         -        if hDst:
   249         -            hDst.write("\n")
   250         -
   251    250   
   252    251   def main():
   253    252       "start function"
   254    253       unittest.main()
   255    254   
   256    255   
   257    256   if __name__ == '__main__':
   258    257       main()

Changes to graphspell/ibdawg.py.

   186    186           for i in range(1, self.nChar+1):
   187    187               self.dChar[self.lArcVal[i]] = i
   188    188           self.dCharVal = { v: k  for k, v in self.dChar.items() }
   189    189           self.nBytesOffset = 1 # version 3
   190    190   
   191    191       def _initJSON (self, oJSON):
   192    192           "initialize with a JSON text file"
          193  +        self.sByDic = ""  # init to prevent pylint whining
   193    194           self.__dict__.update(oJSON)
   194    195           self.byDic = binascii.unhexlify(self.sByDic)
   195    196           self.dCharVal = { v: k  for k, v in self.dChar.items() }
   196         -        self.a2grams = set(self.l2grams)  if hasattr(self, 'l2grams')  else None
          197  +        self.a2grams = set(getattr(self, 'l2grams'))  if hasattr(self, 'l2grams')  else None
   197    198   
   198    199       def getInfo (self):
   199    200           "return string about the IBDAWG"
   200    201           return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
   201    202                   "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
   202    203                   "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
   203    204                   "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \

Changes to graphspell/tokenizer.py.

    56     56           if bStartEndToken:
    57     57               yield { "i": 0, "sType": "INFO", "sValue": "<start>", "nStart": 0, "nEnd": 0, "lMorph": ["<start>"] }
    58     58           for i, m in enumerate(self.zToken.finditer(sText), 1):
    59     59               yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() }
    60     60           if bStartEndToken:
    61     61               iEnd = len(sText)
    62     62               yield { "i": i+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] }
           63  +
           64  +    def getTokenTypes (self):
           65  +        "returns list of token types as tuple (token name, regex)"
           66  +        return [ sRegex[4:-1].split(">")  for sRegex in _PATTERNS[self.sLang] ]

Changes to pylintrc.

   280    280   #inlinevar-rgx=
   281    281   
   282    282   # Naming style matching correct method names
   283    283   method-naming-style=camelCase
   284    284   
   285    285   # Regular expression matching correct method names. Overrides method-naming-
   286    286   # style
   287         -#method-rgx=^test_
          287  +method-rgx=^test_|^_|^[a-zA-Z][a-zA-Z0-9]+$
   288    288   
   289    289   # Naming style matching correct module names
   290    290   module-naming-style=snake_case
   291    291   
   292    292   # Regular expression matching correct module names. Overrides module-naming-
   293    293   # style
   294    294   #module-rgx=