Grammalecte  Check-in [03448dc173]

Overview
Comment:[build] separate dictionary builder from make.py
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | build
Files: files | file ages | folders
SHA3-256: 03448dc173fd9dfae33d9b65b3c00036b9ca3111be395a373966e080e1fc5659
User & Date: olr on 2017-06-23 12:19:55
Other Links: manifest | tags
Context
2017-06-23
12:55
[build] change arguments order for dictionary building check-in: 4bc364b3ac user: olr tags: build, trunk
12:19
[build] separate dictionary builder from make.py check-in: 03448dc173 user: olr tags: build, trunk
08:04
[fr] faux positif concernant les nombres suivant un déterminant pluriel check-in: 469f8c6d69 user: olr tags: fr, trunk
Changes

Modified gc_lang/fr/config.ini from [649530543c] to [5ccf0657e5].

    12     12   description = Correcteur grammatical pour le français.
    13     13   extras = README_fr.txt
    14     14   logo = logo.png
    15     15   
    16     16   # lexicon source
    17     17   lexicon_src = lexicons/French.lex
    18     18   # binary dictionary name
           19  +dic_name = French
    19     20   py_binary_dic = French.bdic
    20     21   js_binary_dic = French.json
    21     22   # Finite state automaton compression: 1, 2 (experimental) or 3 (experimental)
    22     23   fsa_method = 1
    23     24   # stemming method: S for suffixes only, A for prefixes and suffixes
    24     25   stemming_method = S
    25     26   

Added lex_build.py version [e2774a881f].

            1  +#!python3
            2  +
            3  +# Lexicon builder
            4  +
            5  +from distutils import dir_util
            6  +
            7  +import grammalecte.dawg as fsa
            8  +from grammalecte.ibdawg import IBDAWG
            9  +
           10  +
           11  +def build (spfSrc, sLangName, sDicName, cStemmingMethod, nCompressMethod, bJSON=False):
           12  +    "transform a text lexicon as a binary indexable dictionary"
           13  +    oDAWG = fsa.DAWG(spfSrc, sLangName, cStemmingMethod)
           14  +    dir_util.mkpath("grammalecte/_dictionaries")
           15  +    oDAWG.writeInfo("grammalecte/_dictionaries/" + sDicName + ".info.txt")
           16  +    oDAWG.createBinary("grammalecte/_dictionaries/" + sDicName + ".bdic", int(nCompressMethod))
           17  +    if bJSON:
           18  +        dir_util.mkpath("grammalecte-js/_dictionaries")
           19  +        oDic = IBDAWG(sDicName + ".bdic")
           20  +        #oDic.writeAsJSObject("gc_lang/"+sLang+"/modules-js/dictionary.js")
           21  +        oDic.writeAsJSObject("grammalecte-js/_dictionaries/" + sDicName + ".json")
           22  +
           23  +
           24  +def main ():
           25  +    print("todo")
           26  +
           27  +
           28  +if __name__ == '__main__':
           29  +    main()

Modified make.py from [418a36d3bb] to [993f7b57c8].

   310    310                   try:
   311    311                       build_data_module = importlib.import_module("gc_lang."+sLang+".build_data")
   312    312                   except ImportError:
   313    313                       print("# Error. Couldn’t import file build_data.py in folder gc_lang/"+sLang)
   314    314               if build_data_module:
   315    315                   build_data_module.before('gc_lang/'+sLang, dVars, xArgs.javascript)
   316    316               if xArgs.dict or not os.path.exists("grammalecte/_dictionaries"):
   317         -                import grammalecte.dawg as fsa
   318         -                from grammalecte.ibdawg import IBDAWG
   319         -                # fsa builder
   320         -                oDAWG = fsa.DAWG(dVars['lexicon_src'], dVars['lang_name'], dVars['stemming_method'])
   321         -                dir_util.mkpath("grammalecte/_dictionaries")
   322         -                oDAWG.writeInfo("grammalecte/_dictionaries/" + dVars['py_binary_dic'] + ".info.txt")
   323         -                oDAWG.createBinary("grammalecte/_dictionaries/" + dVars['py_binary_dic'], int(dVars['fsa_method']))
   324         -                if xArgs.javascript:
   325         -                    dir_util.mkpath("grammalecte-js/_dictionaries")
   326         -                    oDic = IBDAWG(dVars['py_binary_dic'])
   327         -                    #oDic.writeAsJSObject("gc_lang/"+sLang+"/modules-js/dictionary.js")
   328         -                    oDic.writeAsJSObject("grammalecte-js/_dictionaries/"+dVars['js_binary_dic'])
          317  +                import lex_build
          318  +                lex_build.build(dVars['lexicon_src'], dVars['lang_name'], dVars['dic_name'], dVars['stemming_method'], int(dVars['fsa_method']), xArgs.javascript)
   329    319               if build_data_module:
   330    320                   build_data_module.after('gc_lang/'+sLang, dVars, xArgs.javascript)
   331    321   
   332    322               # make
   333    323               sVersion = create(sLang, xConfig, xArgs.install, xArgs.javascript, )
   334    324   
   335    325               # tests