Grammalecte  Check-in [a85f64f6f8]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[graphspell][core][fr] code cleaning (pylint)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | core | graphspell
Files: files | file ages | folders
SHA3-256:a85f64f6f87355970851429c257d916ea75897a4fbda83dc2c74b8e876e4d568
User & Date: olr 2019-05-10 20:52:12
Context
2019-05-11
07:17
[fx] gc panel: fix CSS nightmare check-in: a54db46fa5 user: olr tags: fx, trunk
2019-05-10
20:52
[graphspell][core][fr] code cleaning (pylint) check-in: a85f64f6f8 user: olr tags: core, fr, graphspell, trunk
19:44
[graphspell][core][fr] code cleaning (pylint) check-in: a186cf9261 user: olr tags: core, fr, graphspell, trunk
Changes

Changes to gc_core/py/grammar_checker.py.

53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
        "returns a tuple: (grammar errors, spelling errors)"
        aGrammErrs = self.gce.parse(sText, "FR", bDebug=bDebug, dOptions=dOptions, bContext=bContext)
        aSpellErrs = self.oSpellChecker.parseParagraph(sText, bSpellSugg)
        return aGrammErrs, aSpellErrs

    def generateText (self, sText, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False):
        "[todo]"
        pass

    def generateTextAsJSON (self, sText, bContext=False, bEmptyIfNoErrors=False, bSpellSugg=False, bReturnText=False, bDebug=False):
        "[todo]"
        pass

    def generateParagraph (self, sText, dOptions=None, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False):
        "parse text and return a readable text with underline errors"
        aGrammErrs, aSpellErrs = self.getParagraphErrors(sText, dOptions, False, bSpellSugg, bDebug)
        if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
            return ""
        return text.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth)







<



<







53
54
55
56
57
58
59

60
61
62

63
64
65
66
67
68
69
        "returns a tuple: (grammar errors, spelling errors)"
        aGrammErrs = self.gce.parse(sText, "FR", bDebug=bDebug, dOptions=dOptions, bContext=bContext)
        aSpellErrs = self.oSpellChecker.parseParagraph(sText, bSpellSugg)
        return aGrammErrs, aSpellErrs

    def generateText (self, sText, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False):
        "[todo]"


    def generateTextAsJSON (self, sText, bContext=False, bEmptyIfNoErrors=False, bSpellSugg=False, bReturnText=False, bDebug=False):
        "[todo]"


    def generateParagraph (self, sText, dOptions=None, bEmptyIfNoErrors=False, bSpellSugg=False, nWidth=100, bDebug=False):
        "parse text and return a readable text with underline errors"
        aGrammErrs, aSpellErrs = self.getParagraphErrors(sText, dOptions, False, bSpellSugg, bDebug)
        if bEmptyIfNoErrors and not aGrammErrs and not aSpellErrs:
            return ""
        return text.generateParagraph(sText, aGrammErrs, aSpellErrs, nWidth)

Changes to gc_lang/fr/modules/conj.py.

416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
            return "# erreur"

    def _getPronomSujet (self, sWho, bFem):
        try:
            if sWho == ":3s":
                if self._sRawInfo[5] == "r":
                    return "on"
                elif bFem:
                    return "elle"
            if sWho == ":3p" and bFem:
                return "elles"
            return _dProSuj[sWho]
        except:
            traceback.print_exc()
            return "# erreur"







|







416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
            return "# erreur"

    def _getPronomSujet (self, sWho, bFem):
        try:
            if sWho == ":3s":
                if self._sRawInfo[5] == "r":
                    return "on"
                if bFem:
                    return "elle"
            if sWho == ":3p" and bFem:
                return "elles"
            return _dProSuj[sWho]
        except:
            traceback.print_exc()
            return "# erreur"

Changes to gc_lang/fr/modules/conj_data.py.

cannot compute difference between binary files

Changes to gc_lang/fr/modules/conj_generator.py.

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def getVerbGroupChar (sVerb):
    "returns the group number of <sVerb> guessing on its ending"
    sVerb = sVerb.lower()
    if sVerb.endswith("er"):
        return "1"
    if sVerb.endswith("ir"):
        return "2"
    if sVerb == "être" or sVerb == "avoir":
        return "0"
    if sVerb.endswith("re"):
        return "3"
    return "4"


def getConjRules (sVerb, bVarPpas=True, nGroup=2):







|







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def getVerbGroupChar (sVerb):
    "returns the group number of <sVerb> guessing on its ending"
    sVerb = sVerb.lower()
    if sVerb.endswith("er"):
        return "1"
    if sVerb.endswith("ir"):
        return "2"
    if sVerb in ("être", "avoir"):
        return "0"
    if sVerb.endswith("re"):
        return "3"
    return "4"


def getConjRules (sVerb, bVarPpas=True, nGroup=2):

Changes to gc_lang/fr/modules/lexicographe.py.

196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
                aMorph.append( "{} : {}".format(sWord, self.formatTags(lMorph[0])) )
            else:
                aMorph.append( "{} :  inconnu du dictionnaire".format(sWord) )
            # suffixe d’un mot composé
            if m2:
                aMorph.append( "-{} : {}".format(m2.group(2), self._formatSuffix(m2.group(2).lower())) )
            # Verbes
            aVerb = set([ s[1:s.find("/")]  for s in lMorph  if ":V" in s ])
            return (aMorph, aVerb)
        except:
            traceback.print_exc()
            return (["#erreur"], None)

    def formatTags (self, sTags):
        "returns string: readable tags"







|







196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
                aMorph.append( "{} : {}".format(sWord, self.formatTags(lMorph[0])) )
            else:
                aMorph.append( "{} :  inconnu du dictionnaire".format(sWord) )
            # suffixe d’un mot composé
            if m2:
                aMorph.append( "-{} : {}".format(m2.group(2), self._formatSuffix(m2.group(2).lower())) )
            # Verbes
            aVerb = { s[1:s.find("/")]  for s in lMorph  if ":V" in s }
            return (aMorph, aVerb)
        except:
            traceback.print_exc()
            return (["#erreur"], None)

    def formatTags (self, sTags):
        "returns string: readable tags"

Changes to gc_lang/fr/modules/locutions_data.py.

cannot compute difference between binary files

Changes to gc_lang/fr/modules/mfsp_data.py.

cannot compute difference between binary files

Changes to gc_lang/fr/modules/phonet_data.py.

cannot compute difference between binary files

Changes to gc_lang/fr/modules/tests.py.

223
224
225
226
227
228
229

230
231
232
233
234
235
236
        end = time.perf_counter()
        print('{} : {}'.format(label, end - start))
        if hDst:
            hDst.write("{:<12.6}".format(end-start))


def perf (sVersion, hDst=None):

    print("\nPerformance tests")
    gce.load()
    aErrs = gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")

    spHere, spfThisFile = os.path.split(__file__)
    with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
        if hDst:







>







223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
        end = time.perf_counter()
        print('{} : {}'.format(label, end - start))
        if hDst:
            hDst.write("{:<12.6}".format(end-start))


def perf (sVersion, hDst=None):
    "performance tests"
    print("\nPerformance tests")
    gce.load()
    aErrs = gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")

    spHere, spfThisFile = os.path.split(__file__)
    with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
        if hDst:

Changes to gc_lang/fr/modules/textformatter.py.

1
2
3
4
5
6
7
8
9
...
243
244
245
246
247
248
249

250
251
252
253
254
255
256
257
258
259
260
261
#!python3

"""
Text formatter
"""

import re


................................................................................
    ("ma_word", True),
    ("ma_1letter_lowercase", False),
    ("ma_1letter_uppercase", False),
]


class TextFormatter:


    def __init__ (self):
        for sOpt, lTup in dReplTable.items():
            for i, t in enumerate(lTup):
                lTup[i] = (re.compile(t[0]), t[1])

    def formatText (self, sText, **args):
        for sOptName, bVal in lOptRepl:
            if bVal:
                for zRgx, sRep in dReplTable[sOptName]:
                    sText = zRgx.sub(sRep, sText)
        return sText
<
<







 







>






|







1
2
3
4
5
6
7
...
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260


"""
Text formatter
"""

import re


................................................................................
    ("ma_word", True),
    ("ma_1letter_lowercase", False),
    ("ma_1letter_uppercase", False),
]


class TextFormatter:
    "Text Formatter: purge typographic mistakes from text"

    def __init__ (self):
        for sOpt, lTup in dReplTable.items():
            for i, t in enumerate(lTup):
                lTup[i] = (re.compile(t[0]), t[1])

    def formatText (self, sText):
        for sOptName, bVal in lOptRepl:
            if bVal:
                for zRgx, sRep in dReplTable[sOptName]:
                    sText = zRgx.sub(sRep, sText)
        return sText

Changes to graphspell/char_player.py.

39
40
41
42
43
44
45

46
47
48
49
50
51
52


_xTransNumbersToExponent = str.maketrans({
    "0": "⁰", "1": "¹", "2": "²", "3": "³", "4": "⁴", "5": "⁵", "6": "⁶", "7": "⁷", "8": "⁸", "9": "⁹"
})

def numbersToExponent (sWord):

    return sWord.translate(_xTransNumbersToExponent)


aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ")  # letters that may be used twice successively








>







39
40
41
42
43
44
45
46
47
48
49
50
51
52
53


_xTransNumbersToExponent = str.maketrans({
    "0": "⁰", "1": "¹", "2": "²", "3": "³", "4": "⁴", "5": "⁵", "6": "⁶", "7": "⁷", "8": "⁸", "9": "⁹"
})

def numbersToExponent (sWord):
    "convert numeral chars to exponant chars"
    return sWord.translate(_xTransNumbersToExponent)


aVowel = set("aáàâäāeéèêëēiíìîïīoóòôöōuúùûüūyýỳŷÿȳœæAÁÀÂÄĀEÉÈÊËĒIÍÌÎÏĪOÓÒÔÖŌUÚÙÛÜŪYÝỲŶŸȲŒÆ")
aConsonant = set("bcçdfghjklmnñpqrstvwxzBCÇDFGHJKLMNÑPQRSTVWXZ")
aDouble = set("bcdfjklmnprstzBCDFJKLMNPRSTZ")  # letters that may be used twice successively

Changes to graphspell/dawg.py.

189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
            nCommonPrefix += 1

        # Check the lUncheckedNodes for redundant nodes, proceeding from last
        # one down to the common prefix size. Then truncate the list at that point.
        self._minimize(nCommonPrefix)

        # add the suffix, starting from the correct node mid-way through the graph
        if len(self.lUncheckedNodes) == 0:
            oNode = self.oRoot
        else:
            oNode = self.lUncheckedNodes[-1][2]

        iChar = nCommonPrefix
        for c in aEntry[nCommonPrefix:]:
            oNextNode = DawgNode()







|







189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
            nCommonPrefix += 1

        # Check the lUncheckedNodes for redundant nodes, proceeding from last
        # one down to the common prefix size. Then truncate the list at that point.
        self._minimize(nCommonPrefix)

        # add the suffix, starting from the correct node mid-way through the graph
        if not self.lUncheckedNodes:
            oNode = self.oRoot
        else:
            oNode = self.lUncheckedNodes[-1][2]

        iChar = nCommonPrefix
        for c in aEntry[nCommonPrefix:]:
            oNextNode = DawgNode()

Changes to graphspell/spellchecker.py.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
...
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
"""
Spellchecker.
Useful to check several dictionaries at once.

To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
- the main dictionary, bundled with the package
- the extended dictionary
- the community dictionary, added by an organization
- the personal dictionary, created by the user for its own convenience
"""

import importlib
import traceback

................................................................................
        lMorph = self.oMainDic.getMorph(sWord)
        if self.bCommunityDic:
            lMorph.extend(self.oCommunityDic.getMorph(sWord))
        if self.bPersonalDic:
            lMorph.extend(self.oPersonalDic.getMorph(sWord))
        if self.bStorage:
            self._dMorphologies[sWord] = lMorph
            self._dLemmas[sWord] = set([ s[1:s.find("/")]  for s in lMorph ])
        return lMorph

    def getLemma (self, sWord):
        "retrieves lemmas"
        if self.bStorage:
            if sWord not in self._dLemmas:
                self.getMorph(sWord)
            return self._dLemmas[sWord]
        return set([ s[1:s.find("/")]  for s in self.getMorph(sWord) ])

    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        if self.dDefaultSugg:
            if sWord in self.dDefaultSugg:
                yield self.dDefaultSugg[sWord].split("|")
            elif sWord.istitle() and sWord.lower() in self.dDefaultSugg:






<







 







|








|







1
2
3
4
5
6

7
8
9
10
11
12
13
...
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
"""
Spellchecker.
Useful to check several dictionaries at once.

To avoid iterating over a pile of dictionaries, it is assumed that 3 are enough:
- the main dictionary, bundled with the package

- the community dictionary, added by an organization
- the personal dictionary, created by the user for its own convenience
"""

import importlib
import traceback

................................................................................
        lMorph = self.oMainDic.getMorph(sWord)
        if self.bCommunityDic:
            lMorph.extend(self.oCommunityDic.getMorph(sWord))
        if self.bPersonalDic:
            lMorph.extend(self.oPersonalDic.getMorph(sWord))
        if self.bStorage:
            self._dMorphologies[sWord] = lMorph
            self._dLemmas[sWord] = { s[1:s.find("/")]  for s in lMorph }
        return lMorph

    def getLemma (self, sWord):
        "retrieves lemmas"
        if self.bStorage:
            if sWord not in self._dLemmas:
                self.getMorph(sWord)
            return self._dLemmas[sWord]
        return { s[1:s.find("/")]  for s in self.getMorph(sWord) }

    def suggest (self, sWord, nSuggLimit=10):
        "generator: returns 1, 2 or 3 lists of suggestions"
        if self.dDefaultSugg:
            if sWord in self.dDefaultSugg:
                yield self.dDefaultSugg[sWord].split("|")
            elif sWord.istitle() and sWord.lower() in self.dDefaultSugg:

Changes to pylintrc.

465
466
467
468
469
470
471
472
473
474
475
476
477
478
479

# A regular expression matching the name of dummy variables (i.e. expectedly
# not used).
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)

# Argument names that match this expression will be ignored. Default to name
# with leading underscore
ignored-argument-names=_.*|^sSentence|^dTags|^bCondMemo|^sCountry|^nLastToken|^sx?$|^m$|^dTokenPos

# Tells whether we should check for unused import in __init__ files.
init-import=no

# List of qualified module names which can have objects that can redefine
# builtins.
redefining-builtins-modules=six.moves,past.builtins,future.builtins,io,builtins







|







465
466
467
468
469
470
471
472
473
474
475
476
477
478
479

# A regular expression matching the name of dummy variables (i.e. expectedly
# not used).
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)

# Argument names that match this expression will be ignored. Default to name
# with leading underscore
ignored-argument-names=_.*|^sSentence|^dTags|^bCondMemo|^sCountry|^nLastToken|^sx?$|^m$|^dTokenPos|^nTokenOffset|^lToken

# Tells whether we should check for unused import in __init__ files.
init-import=no

# List of qualified module names which can have objects that can redefine
# builtins.
redefining-builtins-modules=six.moves,past.builtins,future.builtins,io,builtins