Grammalecte  Check-in [c65b7e2b8b]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:[graphspell][core][fr] code cleaning (pylint)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | core | graphspell
Files: files | file ages | folders
SHA3-256:c65b7e2b8b4b1507adbe01cd765e64b1b7329abb805b2d1c4df3e753ecbb5d1f
User & Date: olr 2019-05-15 11:55:44
Context
2019-05-15
16:12
[fx] position and size of panels check-in: eb18e7fd4b user: olr tags: fx, trunk
11:55
[graphspell][core][fr] code cleaning (pylint) check-in: c65b7e2b8b user: olr tags: core, fr, graphspell, trunk
10:47
[graphspell] import dictionary: include lang code in error message check-in: 2be0562a74 user: olr tags: graphspell, trunk
Changes

Changes to gc_core/py/lang_core/__init__.py.




1
2




from .gc_engine import *
>
>
>


1
2
3
4
5
"""
Grammalecte - core grammar checker engine
"""

from .gc_engine import *

Changes to gc_core/py/lang_core/gc_engine.py.

550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
                                globals()[sWhat](self.lToken, nTokenOffset, nLastToken)
                                if bDebug:
                                    echo("    DISAMBIGUATOR: ({})  [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"]))
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    echo("    COND_OK")
                                pass
                            elif cActionType == "/":
                                # Tag
                                nTokenStart = nTokenOffset + eAct[0]  if eAct[0] > 0  else nLastToken + eAct[0]
                                nTokenEnd = nTokenOffset + eAct[1]  if eAct[1] > 0  else nLastToken + eAct[1]
                                for i in range(nTokenStart, nTokenEnd+1):
                                    if "aTags" in self.lToken[i]:
                                        self.lToken[i]["aTags"].update(sWhat.split("|"))







<







550
551
552
553
554
555
556

557
558
559
560
561
562
563
                                globals()[sWhat](self.lToken, nTokenOffset, nLastToken)
                                if bDebug:
                                    echo("    DISAMBIGUATOR: ({})  [{}:{}]".format(sWhat, self.lToken[nTokenOffset+1]["sValue"], self.lToken[nLastToken]["sValue"]))
                            elif cActionType == ">":
                                # we do nothing, this test is just a condition to apply all following actions
                                if bDebug:
                                    echo("    COND_OK")

                            elif cActionType == "/":
                                # Tag
                                nTokenStart = nTokenOffset + eAct[0]  if eAct[0] > 0  else nLastToken + eAct[0]
                                nTokenEnd = nTokenOffset + eAct[1]  if eAct[1] > 0  else nLastToken + eAct[1]
                                for i in range(nTokenStart, nTokenEnd+1):
                                    if "aTags" in self.lToken[i]:
                                        self.lToken[i]["aTags"].update(sWhat.split("|"))

Changes to gc_lang/fr/modules/tests.py.

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19






























20
21
22
23
24
25
26
...
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
Grammar checker tests for French language
"""

import unittest
import os
import re
import time


from ..graphspell.ibdawg import IBDAWG
from ..graphspell.echo import echo
from . import gc_engine as gce
from . import conj
from . import phonet
from . import mfsp
































def _fuckBackslashUTF8 (s):
    "fuck that shit"
    return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—")


class TestDictionary (unittest.TestCase):
................................................................................
        sRes = " " * len(sLine)
        for i, m in enumerate(self._zError.finditer(sLine)):
            nStart = m.start() - (4 * i)
            nEnd = m.end() - (4 * (i+1))
            sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4]
        return sRes


from contextlib import contextmanager
@contextmanager
def timeblock (label, hDst):
    "performance counter (contextmanager)"
    start = time.perf_counter()
    try:
        yield
    finally:
        end = time.perf_counter()
        print('{} : {}'.format(label, end - start))
        if hDst:
            hDst.write("{:<12.6}".format(end-start))


def perf (sVersion, hDst=None):
    "performance tests"
    print("\nPerformance tests")
    gce.load()
    gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")

    spHere, _ = os.path.split(__file__)
    with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
        if hDst:
            hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M")))
        for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ):
            with timeblock(sText[:sText.find(".")], hDst):
                gce.parse(sText)
        if hDst:
            hDst.write("\n")


def main():
    "start function"
    unittest.main()


if __name__ == '__main__':
    main()







|








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<








4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
...
243
244
245
246
247
248
249































250
251
252
253
254
255
256
257
Grammar checker tests for French language
"""

import unittest
import os
import re
import time
from contextlib import contextmanager

from ..graphspell.ibdawg import IBDAWG
from ..graphspell.echo import echo
from . import gc_engine as gce
from . import conj
from . import phonet
from . import mfsp


@contextmanager
def timeblock (label, hDst):
    "performance counter (contextmanager)"
    start = time.perf_counter()
    try:
        yield
    finally:
        end = time.perf_counter()
        print('{} : {}'.format(label, end - start))
        if hDst:
            hDst.write("{:<12.6}".format(end-start))


def perf (sVersion, hDst=None):
    "performance tests"
    print("\nPerformance tests")
    gce.load()
    gce.parse("Texte sans importance… utile pour la compilation des règles avant le calcul des perfs.")

    spHere, _ = os.path.split(__file__)
    with open(os.path.join(spHere, "perf.txt"), "r", encoding="utf-8") as hSrc:
        if hDst:
            hDst.write("{:<12}{:<20}".format(sVersion, time.strftime("%Y.%m.%d %H:%M")))
        for sText in ( s.strip() for s in hSrc if not s.startswith("#") and s.strip() ):
            with timeblock(sText[:sText.find(".")], hDst):
                gce.parse(sText)
        if hDst:
            hDst.write("\n")


def _fuckBackslashUTF8 (s):
    "fuck that shit"
    return s.replace("\u2019", "'").replace("\u2013", "–").replace("\u2014", "—")


class TestDictionary (unittest.TestCase):
................................................................................
        sRes = " " * len(sLine)
        for i, m in enumerate(self._zError.finditer(sLine)):
            nStart = m.start() - (4 * i)
            nEnd = m.end() - (4 * (i+1))
            sRes = sRes[:nStart] + "~" * (nEnd - nStart) + sRes[nEnd:-4]
        return sRes

































def main():
    "start function"
    unittest.main()


if __name__ == '__main__':
    main()

Changes to graphspell/ibdawg.py.

186
187
188
189
190
191
192

193
194
195
196
197
198
199
200
201
202
203
        for i in range(1, self.nChar+1):
            self.dChar[self.lArcVal[i]] = i
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.nBytesOffset = 1 # version 3

    def _initJSON (self, oJSON):
        "initialize with a JSON text file"

        self.__dict__.update(oJSON)
        self.byDic = binascii.unhexlify(self.sByDic)
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.a2grams = set(self.l2grams)  if hasattr(self, 'l2grams')  else None

    def getInfo (self):
        "return string about the IBDAWG"
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \







>



|







186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
        for i in range(1, self.nChar+1):
            self.dChar[self.lArcVal[i]] = i
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.nBytesOffset = 1 # version 3

    def _initJSON (self, oJSON):
        "initialize with a JSON text file"
        self.sByDic = ""  # init to prevent pylint whining
        self.__dict__.update(oJSON)
        self.byDic = binascii.unhexlify(self.sByDic)
        self.dCharVal = { v: k  for k, v in self.dChar.items() }
        self.a2grams = set(getattr(self, 'l2grams'))  if hasattr(self, 'l2grams')  else None

    def getInfo (self):
        "return string about the IBDAWG"
        return  "  Language: {0.sLangName}   Lang code: {0.sLangCode}   Dictionary name: {0.sDicName}" \
                "  Compression method: {0.nCompressionMethod:>2}   Date: {0.sDate}   Stemming: {0.cStemming}FX\n" \
                "  Arcs values:  {0.nArcVal:>10,} = {0.nChar:>5,} characters,  {0.nAff:>6,} affixes,  {0.nTag:>6,} tags\n" \
                "  Dictionary: {0.nEntry:>12,} entries,    {0.nNode:>11,} nodes,   {0.nArc:>11,} arcs\n" \

Changes to graphspell/tokenizer.py.

56
57
58
59
60
61
62




        if bStartEndToken:
            yield { "i": 0, "sType": "INFO", "sValue": "<start>", "nStart": 0, "nEnd": 0, "lMorph": ["<start>"] }
        for i, m in enumerate(self.zToken.finditer(sText), 1):
            yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() }
        if bStartEndToken:
            iEnd = len(sText)
            yield { "i": i+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] }











>
>
>
>
56
57
58
59
60
61
62
63
64
65
66
        if bStartEndToken:
            yield { "i": 0, "sType": "INFO", "sValue": "<start>", "nStart": 0, "nEnd": 0, "lMorph": ["<start>"] }
        for i, m in enumerate(self.zToken.finditer(sText), 1):
            yield { "i": i, "sType": m.lastgroup, "sValue": m.group(), "nStart": m.start(), "nEnd": m.end() }
        if bStartEndToken:
            iEnd = len(sText)
            yield { "i": i+1, "sType": "INFO", "sValue": "<end>", "nStart": iEnd, "nEnd": iEnd, "lMorph": ["<end>"] }

    def getTokenTypes (self):
        "returns list of token types as tuple (token name, regex)"
        return [ sRegex[4:-1].split(">")  for sRegex in _PATTERNS[self.sLang] ]

Changes to pylintrc.

280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
#inlinevar-rgx=

# Naming style matching correct method names
method-naming-style=camelCase

# Regular expression matching correct method names. Overrides method-naming-
# style
#method-rgx=^test_

# Naming style matching correct module names
module-naming-style=snake_case

# Regular expression matching correct module names. Overrides module-naming-
# style
#module-rgx=







|







280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
#inlinevar-rgx=

# Naming style matching correct method names
method-naming-style=camelCase

# Regular expression matching correct method names. Overrides method-naming-
# style
method-rgx=^test_|^_|^[a-zA-Z][a-zA-Z0-9]+$

# Naming style matching correct module names
module-naming-style=snake_case

# Regular expression matching correct module names. Overrides module-naming-
# style
#module-rgx=