Grammalecte  Check-in [03448dc173]

Overview
Comment:[build] separate dictionary builder from make.py
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | build
Files: files | file ages | folders
SHA3-256: 03448dc173fd9dfae33d9b65b3c00036b9ca3111be395a373966e080e1fc5659
User & Date: olr on 2017-06-23 12:19:55
Other Links: manifest | tags
Context
2017-06-23
12:55
[build] change arguments order for dictionary building check-in: 4bc364b3ac user: olr tags: build, trunk
12:19
[build] separate dictionary builder from make.py check-in: 03448dc173 user: olr tags: build, trunk
08:04
[fr] faux positif concernant les nombres suivant un déterminant pluriel check-in: 469f8c6d69 user: olr tags: fr, trunk
Changes

Modified gc_lang/fr/config.ini from [649530543c] to [5ccf0657e5].

12
13
14
15
16
17
18

19
20
21
22
23
24
25
description = Correcteur grammatical pour le français.
extras = README_fr.txt
logo = logo.png

# lexicon source
lexicon_src = lexicons/French.lex
# binary dictionary name

py_binary_dic = French.bdic
js_binary_dic = French.json
# Finite state automaton compression: 1, 2 (experimental) or 3 (experimental)
fsa_method = 1
# stemming method: S for suffixes only, A for prefixes and suffixes
stemming_method = S








>







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
description = Correcteur grammatical pour le français.
extras = README_fr.txt
logo = logo.png

# lexicon source
lexicon_src = lexicons/French.lex
# binary dictionary name
dic_name = French
py_binary_dic = French.bdic
js_binary_dic = French.json
# Finite state automaton compression: 1, 2 (experimental) or 3 (experimental)
fsa_method = 1
# stemming method: S for suffixes only, A for prefixes and suffixes
stemming_method = S

Added lex_build.py version [e2774a881f].



























































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!python3

# Lexicon builder

from distutils import dir_util

import grammalecte.dawg as fsa
from grammalecte.ibdawg import IBDAWG


def build (spfSrc, sLangName, sDicName, cStemmingMethod, nCompressMethod, bJSON=False):
    "transform a text lexicon as a binary indexable dictionary"
    oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod)
    dir_util.mkpath("grammalecte/_dictionaries")
    oDAWG.writeInfo("grammalecte/_dictionaries/" + sDicName + ".info.txt")
    oDAWG.createBinary("grammalecte/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod))
    if bJSON:
        dir_util.mkpath("grammalecte-js/_dictionaries")
        oDic = IBDAWG(sDicName + ".bdic")
        #oDic.writeAsJSObject("gc_lang/"+sLang+"/modules-js/dictionary.js")
        oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + sDicName + ".json")


def main ():
    print("todo")


if __name__ == '__main__':
    main()

Modified make.py from [418a36d3bb] to [993f7b57c8].

310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
                try:
                    build_data_module = importlib.import_module("gc_lang."+sLang+".build_data")
                except ImportError:
                    print("# Error. Couldn’t import file build_data.py in folder gc_lang/"+sLang)
            if build_data_module:
                build_data_module.before('gc_lang/'+sLang, dVars, xArgs.javascript)
            if xArgs.dict or not os.path.exists("grammalecte/_dictionaries"):
                import grammalecte.dawg as fsa
                from grammalecte.ibdawg import IBDAWG
                # fsa builder
                oDAWG = fsa.DAWG(dVars['lexicon_src'], dVars['lang_name'], dVars['stemming_method'])
                dir_util.mkpath("grammalecte/_dictionaries")
                oDAWG.writeInfo("grammalecte/_dictionaries/" + dVars['py_binary_dic'] + ".info.txt")
                oDAWG.createBinary("grammalecte/_dictionaries/" + dVars['py_binary_dic'], int(dVars['fsa_method']))
                if xArgs.javascript:
                    dir_util.mkpath("grammalecte-js/_dictionaries")
                    oDic = IBDAWG(dVars['py_binary_dic'])
                    #oDic.writeAsJSObject("gc_lang/"+sLang+"/modules-js/dictionary.js")
                    oDic.writeAsJSObject("grammalecte-js/_dictionaries/"+dVars['js_binary_dic'])
            if build_data_module:
                build_data_module.after('gc_lang/'+sLang, dVars, xArgs.javascript)

            # make
            sVersion = create(sLang, xConfig, xArgs.install, xArgs.javascript, )

            # tests







|
<
<
|
<
<
<
<
<
<
<
<







310
311
312
313
314
315
316
317


318








319
320
321
322
323
324
325
                try:
                    build_data_module = importlib.import_module("gc_lang."+sLang+".build_data")
                except ImportError:
                    print("# Error. Couldn’t import file build_data.py in folder gc_lang/"+sLang)
            if build_data_module:
                build_data_module.before('gc_lang/'+sLang, dVars, xArgs.javascript)
            if xArgs.dict or not os.path.exists("grammalecte/_dictionaries"):
                import lex_build


                lex_build.build(dVars['lexicon_src'], dVars['lang_name'], dVars['dic_name'], dVars['stemming_method'], int(dVars['fsa_method']), xArgs.javascript)








            if build_data_module:
                build_data_module.after('gc_lang/'+sLang, dVars, xArgs.javascript)

            # make
            sVersion = create(sLang, xConfig, xArgs.install, xArgs.javascript, )

            # tests