Grammalecte  Check-in [63b8e1b23a]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[core][fr] code cleaning (pylint)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | core
Files: files | file ages | folders
SHA3-256:63b8e1b23a899bd865b7878b3e6e10e5422ca5dd7fd676e24837c911a7b3c67b
User & Date: olr 2019-05-12 10:01:25
Context
2019-05-12
10:30
[fr] faux positifs check-in: 414cbe8c5a user: olr tags: fr, trunk
10:01
[core][fr] code cleaning (pylint) check-in: 63b8e1b23a user: olr tags: core, fr, trunk
10:00
[graphspell] code cleaning (pylint) check-in: c2f4d1d4ee user: olr tags: graphspell, trunk
Changes

Changes to gc_core/py/lang_core/gc_engine.py.

    60     60       global _sAppContext
    61     61       global _dOptions
    62     62       global _dOptionsColors
    63     63       global _oTokenizer
    64     64       try:
    65     65           _oSpellChecker = SpellChecker("${lang}", "${dic_main_filename_py}", "${dic_community_filename_py}", "${dic_personal_filename_py}")
    66     66           _sAppContext = sContext
    67         -        _dOptions = dict(gc_options.getOptions(sContext))   # duplication necessary, to be able to reset to default
           67  +        _dOptions = gc_options.getOptions(sContext).copy()   # duplication necessary, to be able to reset to default
    68     68           _dOptionsColors = gc_options.getOptionsColors(sContext, sColorType)
    69     69           _oTokenizer = _oSpellChecker.getTokenizer()
    70     70           _oSpellChecker.activateStorage()
    71     71       except:
    72     72           traceback.print_exc()
    73     73   
    74     74   
................................................................................
   164    164   def getOptions ():
   165    165       "return the dictionary of current options"
   166    166       return _dOptions
   167    167   
   168    168   
   169    169   def getDefaultOptions ():
   170    170       "return the dictionary of default options"
   171         -    return dict(gc_options.getOptions(_sAppContext))
          171  +    return gc_options.getOptions(_sAppContext).copy()
   172    172   
   173    173   
   174    174   def getOptionsLabels (sLang):
   175    175       "return options labels"
   176    176       return gc_options.getUI(sLang)
   177    177   
   178    178   
................................................................................
   182    182       echo("\n".join( [ k+":\t"+str(v)+"\t"+gc_options.getUI(sLang).get(k, ("?", ""))[0]  for k, v  in sorted(_dOptions.items()) ] ))
   183    183       echo("")
   184    184   
   185    185   
   186    186   def resetOptions ():
   187    187       "set options to default values"
   188    188       global _dOptions
   189         -    _dOptions = dict(gc_options.getOptions(_sAppContext))
          189  +    _dOptions = getDefaultOptions()
   190    190   
   191    191   
   192    192   #### Parsing
   193    193   
   194    194   _zEndOfSentence = re.compile(r'([.?!:;…]\W+(?=[A-ZÉÈÎÔ])|.$)')
   195    195   _zBeginOfParagraph = re.compile(r"^\W*")
   196    196   _zEndOfParagraph = re.compile(r"\W*$")
................................................................................
  1020   1020       bResult = any(zPattern.search(sMorph)  for sMorph in lMorph)
  1021   1021       if bResult and bSetMorph:
  1022   1022           dToken1["lMorph"] = lMorph
  1023   1023       return bResult
  1024   1024   
  1025   1025   
  1026   1026   def g_tag_before (dToken, dTags, sTag):
         1027  +    "returns True if <sTag> is present on tokens before <dToken>"
  1027   1028       if sTag not in dTags:
  1028   1029           return False
  1029   1030       if dToken["i"] > dTags[sTag][0]:
  1030   1031           return True
  1031   1032       return False
  1032   1033   
  1033   1034   
  1034   1035   def g_tag_after (dToken, dTags, sTag):
         1036  +    "returns True if <sTag> is present on tokens after <dToken>"
  1035   1037       if sTag not in dTags:
  1036   1038           return False
  1037   1039       if dToken["i"] < dTags[sTag][1]:
  1038   1040           return True
  1039   1041       return False
  1040   1042   
  1041   1043   
  1042   1044   def g_tag (dToken, sTag):
         1045  +    "returns True if <sTag> is present on token <dToken>"
  1043   1046       return "aTags" in dToken and sTag in dToken["aTags"]
  1044   1047   
  1045   1048   
  1046   1049   def g_space_between_tokens (dToken1, dToken2, nMin, nMax=None):
         1050  +    "checks if spaces between tokens is >= <nMin> and <= <nMax>"
  1047   1051       nSpace = dToken2["nStart"] - dToken1["nEnd"]
  1048   1052       if nSpace < nMin:
  1049   1053           return False
  1050   1054       if nMax is not None and nSpace > nMax:
  1051   1055           return False
  1052   1056       return True
  1053   1057   
  1054   1058   
  1055   1059   def g_token (lToken, i):
         1060  +    "return token at index <i> in lToken (or the closest one)"
  1056   1061       if i < 0:
  1057   1062           return lToken[0]
  1058   1063       if i >= len(lToken):
  1059   1064           return lToken[-1]
  1060   1065       return lToken[i]
  1061   1066   
  1062   1067   
................................................................................
  1152   1157       "set morphologies of <dToken>, always return True"
  1153   1158       dToken["lMorph"] = lMorph
  1154   1159       #echo("DA:", dToken["sValue"], lMorph)
  1155   1160       return True
  1156   1161   
  1157   1162   
  1158   1163   def g_define_from (dToken, nLeft=None, nRight=None):
         1164  +    "set morphologies of <dToken> with slicing its value with <nLeft> and <nRight>"
  1159   1165       if nLeft is not None:
  1160   1166           dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"][slice(nLeft, nRight)])
  1161   1167       else:
  1162   1168           dToken["lMorph"] = _oSpellChecker.getMorph(dToken["sValue"])
  1163   1169       return True
  1164   1170   
  1165   1171   

Changes to gc_lang/fr/modules/tests.py.

    20     20   
    21     21   def _fuckBackslashUTF8 (s):
    22     22       "fuck that shit"
    23     23       return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—")
    24     24   
    25     25   
    26     26   class TestDictionary (unittest.TestCase):
           27  +    "Test du correcteur orthographique"
    27     28   
    28     29       @classmethod
    29     30       def setUpClass (cls):
    30     31           cls.oDic = IBDAWG("${dic_main_filename_py}")
    31     32   
    32     33       def test_lookup (self):
    33     34           for sWord in ["branche", "Émilie"]:
................................................................................
    43     44   
    44     45       def test_isvalid_failed (self):
    45     46           for sWord in ["BranchE", "BRanche", "BRAnCHE", "émilie", "éMILIE", "émiLie"]:
    46     47               self.assertFalse(self.oDic.isValid(sWord), sWord)
    47     48   
    48     49   
    49     50   class TestConjugation (unittest.TestCase):
           51  +    "Tests des conjugaisons"
    50     52   
    51     53       @classmethod
    52     54       def setUpClass (cls):
    53     55           pass
    54     56   
    55     57       def test_isverb (self):
    56     58           for sVerb in ["avoir", "être", "aller", "manger", "courir", "venir", "faire", "finir"]:
................................................................................
    66     68       def test_getconj (self):
    67     69           for sVerb, sTense, sWho, sConj in [("aller", ":E", ":2s", "va"), ("avoir", ":Iq", ":1s", "avais"), ("être", ":Ip", ":2p", "êtes"),
    68     70                                              ("manger", ":Sp", ":3s", "mange"), ("finir", ":K", ":3p", "finiraient"), ("prendre", ":If", ":1p", "prendrons")]:
    69     71               self.assertEqual(conj.getConj(sVerb, sTense, sWho), sConj, sVerb)
    70     72   
    71     73   
    72     74   class TestPhonet (unittest.TestCase):
           75  +    "Tests des équivalences phonétiques"
    73     76   
    74     77       @classmethod
    75     78       def setUpClass (cls):
    76     79           cls.lSet = [
    77     80               ["ce", "se"],
    78     81               ["ces", "ses", "sais", "sait"],
    79     82               ["cet", "cette", "sept", "set", "sets"],
................................................................................
   102    105       def test_getsimil (self):
   103    106           for aSet in self.lSet:
   104    107               for sWord in aSet:
   105    108                   self.assertListEqual(phonet.getSimil(sWord), sorted(aSet))
   106    109   
   107    110   
   108    111   class TestMasFemSingPlur (unittest.TestCase):
          112  +    "Tests des masculins, féminins, singuliers et pluriels"
   109    113   
   110    114       @classmethod
   111    115       def setUpClass (cls):
   112    116           cls.lPlural = [
   113    117               ("travail", ["travaux"]),
   114    118               ("vœu", ["vœux"]),
   115    119               ("gentleman", ["gentlemans", "gentlemen"])
................................................................................
   117    121   
   118    122       def test_getplural (self):
   119    123           for sSing, lPlur in self.lPlural:
   120    124               self.assertListEqual(mfsp.getMiscPlural(sSing), lPlur)
   121    125   
   122    126   
   123    127   class TestGrammarChecking (unittest.TestCase):
          128  +    "Tests du correcteur grammatical"
   124    129   
   125    130       @classmethod
   126    131       def setUpClass (cls):
   127    132           gce.load()
   128    133           cls._zError = re.compile(r"\{\{.*?\}\}")
   129    134           cls._aTestedRules = set()
   130    135   
   131    136       def test_parse (self):
   132    137           zOption = re.compile("^__([a-zA-Z0-9]+)__ ")
   133         -        spHere, spfThisFile = os.path.split(__file__)
          138  +        spHere, _ = os.path.split(__file__)
   134    139           with open(os.path.join(spHere, "gc_test.txt"), "r", encoding="utf-8") as hSrc:
   135    140               nError = 0
   136    141               for sLine in ( s for s in hSrc if not s.startswith("#") and s.strip() ):
   137    142                   sLineNum = sLine[:10].strip()
   138    143                   sLine = sLine[10:].strip()
   139    144                   sOption = None
   140    145                   m = zOption.search(sLine)
................................................................................
   212    217               sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4]
   213    218           return sRes
   214    219   
   215    220   
   216    221   from contextlib import contextmanager
   217    222   @contextmanager
   218    223   def timeblock (label, hDst):
          224  +    "performance counter (contextmanager)"
   219    225       start = time.perf_counter()
   220    226       try:
   221    227           yield
   222    228       finally:
   223    229           end = time.perf_counter()
   224    230           print('{} : {}'.format(label, end - start))
   225    231           if hDst:
................................................................................
   226    232               hDst.write("{:<12.6}".format(end-start))
   227    233   
   228    234   
   229    235   def perf (sVersion, hDst=None):
   230    236       "performance tests"
   231    237       print("\nPerformance tests")
   232    238       gce.load()
   233         -    aErrs = gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")
          239  +    gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")
   234    240   
   235         -    spHere, spfThisFile = os.path.split(__file__)
          241  +    spHere, _ = os.path.split(__file__)
   236    242       with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
   237    243           if hDst:
   238    244               hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M")))
   239    245           for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ):
   240    246               with timeblock(sText[:sText.find(".")], hDst):
   241         -                aErrs = gce.parse(sText)
          247  +                gce.parse(sText)
   242    248           if hDst:
   243    249               hDst.write("\n")
   244    250   
   245    251   
   246    252   def main():
          253  +    "start function"
   247    254       unittest.main()
   248    255   
   249    256   
   250    257   if __name__ == '__main__':
   251    258       main()

Changes to gc_lang/fr/modules/textformatter.py.

   183    183       ## missing apostrophes
   184    184       "ma_word":                  [("(?i)(qu|lorsqu|puisqu|quoiqu|presqu|jusqu|aujourd|entr|quelqu|prud) ", "\\1’")],
   185    185       "ma_1letter_lowercase":     [("\\b([ldjnmtscç]) (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])", "\\1’")],
   186    186       "ma_1letter_uppercase":     [("\\b([LDJNMTSCÇ]) (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])", "\\1’")]
   187    187   }
   188    188   
   189    189   
   190         -lOptRepl = [
   191         -    ("ts_units", True),
   192         -    ("start_of_paragraph", True),
   193         -    ("end_of_paragraph", True),
   194         -    ("between_words", True),
   195         -    ("before_punctuation", True),
   196         -    ("within_parenthesis", True),
   197         -    ("within_square_brackets", True),
   198         -    ("within_quotation_marks", True),
   199         -    ("nbsp_before_punctuation", True),
   200         -    ("nbsp_within_quotation_marks", True),
   201         -    ("nbsp_within_numbers", True),
   202         -    ("nnbsp_before_punctuation", False),
   203         -    ("nnbsp_within_quotation_marks", False),
   204         -    ("nnbsp_within_numbers", False),
   205         -    ("nbsp_titles", False),
   206         -    ("nbsp_before_symbol", True),
   207         -    ("nbsp_before_units", True),
   208         -    ("nbsp_repair", True),
   209         -    ("add_space_after_punctuation", True),
   210         -    ("add_space_around_hyphens", True),
   211         -    ("add_space_repair", True),
   212         -    ("erase_non_breaking_hyphens", False),
   213         -    ("ts_apostrophe", True),
   214         -    ("ts_ellipsis", True),
   215         -    ("ts_n_dash_middle", True),
   216         -    ("ts_m_dash_middle", False),
   217         -    ("ts_n_dash_start", False),
   218         -    ("ts_m_dash_start", True),
   219         -    ("ts_quotation_marks", True),
   220         -    ("ts_spell", True),
   221         -    ("ts_ligature_ffi_on", False),
   222         -    ("ts_ligature_ffl_on", False),
   223         -    ("ts_ligature_fi_on", False),
   224         -    ("ts_ligature_fl_on", False),
   225         -    ("ts_ligature_ff_on", False),
   226         -    ("ts_ligature_ft_on", False),
   227         -    ("ts_ligature_st_on", False),
   228         -    ("ts_ligature_fi_off", False),
   229         -    ("ts_ligature_fl_off", False),
   230         -    ("ts_ligature_ff_off", False),
   231         -    ("ts_ligature_ffi_off", False),
   232         -    ("ts_ligature_ffl_off", False),
   233         -    ("ts_ligature_ft_off", False),
   234         -    ("ts_ligature_st_off", False),
   235         -    ("ordinals_exponant", False),
   236         -    ("ordinals_no_exponant", True),
   237         -    ("etc", True),
   238         -    ("mh_interrogatives", True),
   239         -    ("mh_numbers", True),
   240         -    ("mh_frequent_words", True),
   241         -    ("ma_word", True),
   242         -    ("ma_1letter_lowercase", False),
   243         -    ("ma_1letter_uppercase", False),
   244         -]
          190  +dDefaultOptions = {
          191  +    "ts_units": True,
          192  +    "start_of_paragraph": True,
          193  +    "end_of_paragraph": True,
          194  +    "between_words": True,
          195  +    "before_punctuation": True,
          196  +    "within_parenthesis": True,
          197  +    "within_square_brackets": True,
          198  +    "within_quotation_marks": True,
          199  +    "nbsp_before_punctuation": True,
          200  +    "nbsp_within_quotation_marks": True,
          201  +    "nbsp_within_numbers": True,
          202  +    "nnbsp_before_punctuation": False,
          203  +    "nnbsp_within_quotation_marks": False,
          204  +    "nnbsp_within_numbers": False,
          205  +    "nbsp_titles": False,
          206  +    "nbsp_before_symbol": True,
          207  +    "nbsp_before_units": True,
          208  +    "nbsp_repair": True,
          209  +    "add_space_after_punctuation": True,
          210  +    "add_space_around_hyphens": True,
          211  +    "add_space_repair": True,
          212  +    "erase_non_breaking_hyphens": False,
          213  +    "ts_apostrophe": True,
          214  +    "ts_ellipsis": True,
          215  +    "ts_n_dash_middle": True,
          216  +    "ts_m_dash_middle": False,
          217  +    "ts_n_dash_start": False,
          218  +    "ts_m_dash_start": True,
          219  +    "ts_quotation_marks": True,
          220  +    "ts_spell": True,
          221  +    "ts_ligature_ffi_on": False,
          222  +    "ts_ligature_ffl_on": False,
          223  +    "ts_ligature_fi_on": False,
          224  +    "ts_ligature_fl_on": False,
          225  +    "ts_ligature_ff_on": False,
          226  +    "ts_ligature_ft_on": False,
          227  +    "ts_ligature_st_on": False,
          228  +    "ts_ligature_fi_off": False,
          229  +    "ts_ligature_fl_off": False,
          230  +    "ts_ligature_ff_off": False,
          231  +    "ts_ligature_ffi_off": False,
          232  +    "ts_ligature_ffl_off": False,
          233  +    "ts_ligature_ft_off": False,
          234  +    "ts_ligature_st_off": False,
          235  +    "ordinals_exponant": False,
          236  +    "ordinals_no_exponant": True,
          237  +    "etc": True,
          238  +    "mh_interrogatives": True,
          239  +    "mh_numbers": True,
          240  +    "mh_frequent_words": True,
          241  +    "ma_word": True,
          242  +    "ma_1letter_lowercase": False,
          243  +    "ma_1letter_uppercase": False
          244  +}
   245    245   
   246    246   
   247    247   class TextFormatter:
   248    248       "Text Formatter: purge typographic mistakes from text"
   249    249   
   250    250       def __init__ (self):
   251         -        for sOpt, lTup in dReplTable.items():
          251  +        for _, lTup in dReplTable.items():
   252    252               for i, t in enumerate(lTup):
   253    253                   lTup[i] = (re.compile(t[0]), t[1])
   254    254   
   255    255       def formatText (self, sText):
   256    256           "returns formatted text"
   257         -        for sOptName, bVal in lOptRepl:
          257  +        for sOptName, bVal in dDefaultOptions.items():
   258    258               if bVal:
   259    259                   for zRgx, sRep in dReplTable[sOptName]:
   260    260                       sText = zRgx.sub(sRep, sText)
   261    261           return sText
          262  +
          263  +    def getDefaultOptions (self):
          264  +        "returns default options"
          265  +        return dDefaultOptions.copy()

Changes to pylintrc.

   230    230   docstring-min-length=-1
   231    231   
   232    232   # Naming style matching correct function names
   233    233   function-naming-style=camelCase
   234    234   
   235    235   # Regular expression matching correct function names. Overrides function-
   236    236   # naming-style
   237         -function-rgx=^[a-z]\w+|^_*
          237  +function-rgx=^[a-z]\w+|^_
   238    238   
   239    239   # Good variable names which should always be accepted, separated by a comma
   240    240   good-names=i,
   241    241              i1,
   242    242              i2,
   243    243              j,
   244    244              k,
   245    245              s,
   246    246              s1,
   247    247              s2,
   248         -           sx,
   249    248              sf,
   250    249              sp,
   251    250              spf,
   252    251              c,
   253    252              c1,
   254    253              c2,
   255    254              n,
................................................................................
   281    280   #inlinevar-rgx=
   282    281   
   283    282   # Naming style matching correct method names
   284    283   method-naming-style=camelCase
   285    284   
   286    285   # Regular expression matching correct method names. Overrides method-naming-
   287    286   # style
   288         -#method-rgx=
          287  +#method-rgx=^test_
   289    288   
   290    289   # Naming style matching correct module names
   291    290   module-naming-style=snake_case
   292    291   
   293    292   # Regular expression matching correct module names. Overrides module-naming-
   294    293   # style
   295    294   #module-rgx=
................................................................................
   329    328   # tab).
   330    329   indent-string='    '
   331    330   
   332    331   # Maximum number of characters on a single line.
   333    332   max-line-length=200
   334    333   
   335    334   # Maximum number of lines in a module
   336         -max-module-lines=1000
          335  +max-module-lines=5000
   337    336   
   338    337   # List of optional constructs for which whitespace checking is disabled. `dict-
   339    338   # separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
   340    339   # `trailing-comma` allows a space between comma and closing bracket: (a, ).
   341    340   # `empty-line` allows space-only lines.
   342    341   no-space-check=trailing-comma,
   343    342                  dict-separator
................................................................................
   488    487   
   489    488   # List of member names, which should be excluded from the protected access
   490    489   # warning.
   491    490   exclude-protected=_asdict,
   492    491                     _fields,
   493    492                     _replace,
   494    493                     _source,
   495         -                  _make
          494  +                  _make,
          495  +                  _getTags,
          496  +                  _hasConjWithTags,
          497  +                  _getConjWithTags
   496    498   
   497    499   # List of valid names for the first argument in a class method.
   498    500   valid-classmethod-first-arg=cls
   499    501   
   500    502   # List of valid names for the first argument in a metaclass class method.
   501    503   valid-metaclass-classmethod-first-arg=mcs
   502    504