Grammalecte  Check-in [b71cbd8aad]

Overview
Comment:[build][graphspell] dawg builder: use data from lexicon when found
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | build | graphspell | comdic
Files: files | file ages | folders
SHA3-256: b71cbd8aad86f86d8b287442577c893998477f3d1f10b02ce7fa5d2ff67a4619
User & Date: olr on 2019-01-16 15:57:20
Other Links: branch diff | manifest | tags
Context
2019-01-22
10:12
[build][fx] Firefox Nightly has a new installation folder check-in: 3fc5e58719 user: olr tags: build, comdic, fx
2019-01-16
15:57
[build][graphspell] dawg builder: use data from lexicon when found check-in: b71cbd8aad user: olr tags: build, comdic, graphspell
2019-01-11
16:37
[build][fx][tb][lo] description field for dictionaries check-in: 831b79d96c user: olr tags: build, comdic, fx, lo, tb
Changes

Modified gc_lang/fr/dictionnaire/genfrdic.py from [321fa8c26f] to [fef905a8c3].

   522    522                   hDst.write(oFlex.__str__(oStatsLex))
   523    523   
   524    524       def writeGrammarCheckerLexicon (self, spfDst, version):
   525    525           echo(' * Lexique simplifié >> [ {} ] '.format(spfDst))
   526    526           with open(spfDst[:-4]+".lex", 'w', encoding='utf-8', newline="\n") as hDst:
   527    527               hDst.write(MPLHEADER)
   528    528               hDst.write("# Lexique simplifié pour Grammalecte v{}\n# Licence : MPL v2.0\n\n".format(version))
   529         -            hDst.write("## LangCode: fr\n")
   530         -            hDst.write("## LangName: Français\n")
   531         -            hDst.write("## DicName: fr.commun\n")
   532         -            hDst.write("## Description: Français commun (toutes variantes)\n")
   533         -            hDst.write("## Author: Olivier R.\n\n")
   534    529               hDst.write(Flexion.simpleHeader())
   535    530               for oFlex in self.lFlexions:
   536    531                   hDst.write(oFlex.getGrammarCheckerRepr())
   537    532   
   538    533       def createFiles (self, spDst, lDictVars, nMode, bSimplified):
   539    534           sDicName = PREFIX_DICT_PATH + self.sVersion
   540    535           spDic = spDst + '/' + sDicName

Modified graphspell/dawg.py from [70e7a3c81c] to [12eff6a146].

    18     18   import re
    19     19   import traceback
    20     20   
    21     21   from . import str_transform as st
    22     22   from .progressbar import ProgressBar
    23     23   
    24     24   
           25  +
           26  +dLexiconData = {}
    25     27   
    26     28   def readFile (spf):
    27     29       "generator: read file <spf> and return for each line a list of elements separated by a tabulation."
    28     30       print(" < Read lexicon: " + spf)
    29     31       if os.path.isfile(spf):
           32  +        dLexiconData.clear()
    30     33           with open(spf, "r", encoding="utf-8") as hSrc:
    31     34               for sLine in hSrc:
    32     35                   sLine = sLine.strip()
    33         -                if sLine and not sLine.startswith("#"):
           36  +                if sLine.startswith("##") :
           37  +                    m = re.match("## *(\\w+) *:(.*)$", sLine)
           38  +                    if m:
           39  +                        dLexiconData[m.group(1)] = m.group(2).strip()
           40  +                elif sLine and not sLine.startswith("#"):
    34     41                       yield sLine.split("\t")
           42  +        if dLexiconData:
           43  +            print("Data from dictionary:")
           44  +            print(dLexiconData)
    35     45       else:
    36     46           raise OSError("# Error. File not found or not loadable: " + spf)
    37     47   
    38     48   
    39     49   
    40     50   class DAWG:
    41     51       """DIRECT ACYCLIC WORD GRAPH"""
................................................................................
   118    128                           + [ (dTag[tag]+nChar+nAff, dTagOccur[tag]) for tag in dTag ] )
   119    129   
   120    130           self.sFileName = src  if type(src) is str  else "[None]"
   121    131           self.sLangCode = sLangCode
   122    132           self.sLangName = sLangName
   123    133           self.sDicName = sDicName
   124    134           self.sDescription = sDescription
          135  +        if dLexiconData:
          136  +            self.sLangCode = dLexiconData.get("LangCode", self.sLangCode)
          137  +            self.sLangName = dLexiconData.get("LangName", self.sLangName)
          138  +            self.sDicName = dLexiconData.get("DicName", self.sDicName)
          139  +            self.sDescription = dLexiconData.get("Description", self.sDescription)
   125    140           self.nEntry = len(lWord)
   126    141           self.aPreviousEntry = []
   127    142           DawgNode.resetNextId()
   128    143           self.oRoot = DawgNode()
   129    144           self.lUncheckedNodes = []  # list of nodes that have not been checked for duplication.
   130    145           self.lMinimizedNodes = {}  # list of unique nodes that have been checked for duplication.
   131    146           self.lSortedNodes = []     # version 2 and 3

Modified lexicons/French.lex from [b69ae271b0] to [af55bd82c0].

     1      1   # This Source Code Form is subject to the terms of the Mozilla Public
     2      2   # License, v. 2.0. If a copy of the MPL was not distributed with this
     3      3   # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     4      4   
     5      5   # Lexique simplifié pour Grammalecte v7.0
     6      6   # Licence : MPL v2.0
     7      7   
     8         -## LangCode: fr
     9         -## LangName: Français
    10         -## DicName: fr.commun
    11         -## Description: Français commun (toutes variantes)
    12         -## Author: Olivier R.
    13         -
    14      8   # :POS ;LEX ~SEM =FQ /DIC
    15      9   de	de	:G:D:e:i/*
    16     10   de	de	:G:R:Rv/*
    17     11   et	et	:G:Cc/*
    18     12   à	à	:G:R:Rv/*
    19     13   des	des	:G:D:e:p/*
    20     14   du	du	:G:D:m:s/*