Grammalecte  Check-in [3a75d57243]

Overview
Comment:[fr][build] merge genfrdic
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr
Files: files | file ages | folders
SHA3-256: 3a75d57243bd954d2f685c70fc77c503095109200e4361ab69f39e6a7e720329
User & Date: olr on 2017-06-08 17:52:52
Other Links: manifest | tags
Context
2017-06-08
21:17
[fr] pt: taux d’absorption check-in: c7f6fd414a user: olr tags: fr, trunk
19:38
merge trunk check-in: ab9feb3d66 user: olr tags: fr_killtricks
17:52
[fr][build] merge genfrdic check-in: 3a75d57243 user: olr tags: fr, trunk
17:51
[fr][bug] calcul des occurrences des flexions existant dans plusieurs lemmes Closed-Leaf check-in: 435b1fde99 user: olr tags: fr, genfrdic
05:03
[fr] correction bug regex sur taux de qqch check-in: 80a7b8c83d user: olr tags: fr, trunk
Changes

Modified gc_lang/fr/build_data.py from [040b9153d1] to [9294fbef92].

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

    def __exit__ (self, etype, value, traceback):
        os.chdir(self.savedPath)


def makeDictionaries (sp, sVersion):
    with cd(sp+"/dictionnaire"):
        os.system("genfrdic.py -s -v "+sVersion)


def makeConj (sp, bJS=False):
    print("> Conjugaisons ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dVerb = {}
    lVtyp = []; dVtyp = {}; nVtyp = 0







|







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

    def __exit__ (self, etype, value, traceback):
        os.chdir(self.savedPath)


def makeDictionaries (sp, sVersion):
    with cd(sp+"/dictionnaire"):
        os.system("genfrdic.py -s -gl -v "+sVersion)


def makeConj (sp, bJS=False):
    print("> Conjugaisons ", end="")
    print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
    dVerb = {}
    lVtyp = []; dVtyp = {}; nVtyp = 0

Modified gc_lang/fr/dictionnaire/genfrdic.py from [38f9af18d9] to [5036afecd5].

547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
...
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610

611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636

637
638
639
640
641
642
643
644
645

646
647
648
649
650
651
652
653
654

655
656
657
658
659
660
661
662
663
...
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
....
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
....
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
....
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166



1167
1168
1169
1170
1171
1172
1173
1174
....
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
....
1504
1505
1506
1507
1508
1509
1510

1511
1512
1513
1514
1515
1516
1517
....
1551
1552
1553
1554
1555
1556
1557






1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
            dVars['version'] = self.sVersion
            # Dictionaries files (.dic) (.aff)
            self.writeAffixes(spDic, dVars, nMode, bSimplified)
            self.writeDictionary(spDic, dVars, nMode, bSimplified)
        copyTemplate('orthographe', spDic, 'README_dict_fr.txt', dVars)
        createZipFiles(spDic, spDst, sDicName + '.zip')

    def createLibreOfficeExtension (self, spBuild, dTplVars, lDictVars, spGL):
        # LibreOffice extension
        echo(" * Dictionnaire >> extension pour LibreOffice")
        dTplVars['version'] = self.sVersion
        sExtensionName = EXT_PREFIX_OOO + self.sVersion
        spExt = spBuild + '/' + sExtensionName
        dir_util.mkpath(spExt+'/META-INF')
        dir_util.mkpath(spExt+'/ui')
................................................................................
        file_util.copy_file('césures/frhyph.tex', spExt+'/dictionaries')
        file_util.copy_file('césures/hyph-fr.tex', spExt+'/dictionaries')
        file_util.copy_file('césures/README_hyph_fr-3.0.txt', spExt+'/dictionaries')
        file_util.copy_file('césures/README_hyph_fr-2.9.txt', spExt+'/dictionaries')
        # zip
        createZipFiles(spExt, spBuild, sExtensionName + '.oxt')
        # copy to Grammalecte Project
        if spGL:
            echo("   extension copiée dans Grammalecte...")
            dir_util.copy_tree(spExt+'/dictionaries', spGL)
    
    def createMozillaExtensions (self, spBuild, dTplVars, lDictVars, spDestGL):
        # Mozilla extension 1
        echo(" * Dictionnaire >> extension pour Mozilla")
        dTplVars['version'] = self.sVersion
        sExtensionName = EXT_PREFIX_MOZ + self.sVersion
        spExt = spBuild + '/' + sExtensionName
        dir_util.mkpath(spExt+'/dictionaries')
        copyTemplate('_templates/moz', spExt, 'install.rdf', dTplVars)
        spDict = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion
        file_util.copy_file(spDict+'/fr-classique.dic', spExt+'/dictionaries/fr-classic.dic')
        file_util.copy_file(spDict+'/fr-classique.aff', spExt+'/dictionaries/fr-classic.aff')
        copyTemplate('orthographe', spExt, 'README_dict_fr.txt', dTplVars)
        createZipFiles(spExt, spBuild, sExtensionName + '.xpi')
        # Grammalecte

        echo(" * Dictionnaire >> copie des dicos dans Grammalecte")
        for dVars in lDictVars:
            file_util.copy_file(spDict+'/'+dVars['asciiName']+'.dic', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.dic')
            file_util.copy_file(spDict+'/'+dVars['asciiName']+'.aff', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.aff')
    
    def createFileIfqForDB (self, spBuild):
        echo(" * Dictionnaire >> indices de fréquence pour la DB...")
        with open(spBuild+'/dictIdxIfq-'+self.sVersion+'.diff.txt', 'w', encoding='utf-8', newline="\n") as hDiff, \
             open(spBuild+'/dictIdxIfq-'+self.sVersion+'.notes.txt', 'w', encoding='utf-8', newline="\n") as hNotes:
            for oEntry in self.lEntry:
                if oEntry.fq != oEntry.oldFq:
                    hDiff.write("{0.iD}\t{0.fq}\n".format(oEntry))
                    hNotes.write("{0.lemma}/{0.flags}\t{0.oldFq} > {0.fq}\n".format(oEntry))
        
    def createLexiconPackages (self, spBuild, version, oStatsLex, spLexGL):
        sLexName = LEX_PREFIX + version
        spLex = spBuild + '/' + sLexName
        dir_util.mkpath(spLex)
        # write Dicollecte lexicon
        self.sortLexiconByFreq()
        self.writeLexicon(spLex + '/' + sLexName + '.txt', version, oStatsLex)
        self.writeGrammarCheckerLexicon(spBuild + '/' + sLexName + '.lex', version)
        copyTemplate('lexique', spLex, 'README_lexique.txt', {'version': version})
        # zip
        createZipFiles(spLex, spBuild, sLexName + '.zip')
        # copy GC lexicon to Grammalecte

        file_util.copy_file(spBuild + '/' + sLexName + '.lex', spLexGL + '/French.lex')
        file_util.copy_file('lexique/French.tagset.txt', spLexGL)

    def createDictConj (self, spBuild, spCopy):
        echo(" * Dictionnaire >> fichier de conjugaison...")
        with open(spBuild+'/dictConj.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oEntry in self.lEntry:
                if oEntry.po.startswith("v"):
                    hDst.write(oEntry.getConjugation())

        echo("   Fichier de conjugaison copié dans Grammalecte...")
        file_util.copy_file(spBuild+'/dictConj.txt', spCopy)

    def createDictDecl (self, spBuild, spCopy):
        echo(" * Dictionnaire >> fichier de déclinaison...")
        with open(spBuild+'/dictDecl.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oEntry in self.lEntry:
                if re.match("[SXFWIA]", oEntry.flags) and (oEntry.po.startswith("nom") or oEntry.po.startswith("adj")):
                    hDst.write(oEntry.getDeclination())

        echo("   Fichier de déclinaison copié dans Grammalecte...")
        file_util.copy_file(spBuild+'/dictDecl.txt', spCopy)

    def generateSpellVariants (self, nReq, spBuild):
        if nReq < 1: nReq = 1
        if nReq > 2: nReq = 2
        echo(" * Lexique >> variantes par suppression... n = " + str(nReq))
        with open(spBuild+'/dictSpellVariants-'+str(nReq)+'.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oFlex in frozenset(self.lFlexions):
................................................................................
        if self.err:
            echo("\n## Erreur dans le dictionnaire : {}".format(self.err))
            echo("   dans : " + self.lemma)
                
    def __str__ (self):
        return "{0.lemma}/{0.flags} {1}".format(self, self.getMorph(2))

    def display (self):
        echo(self.__str__())

    def check (self):
        sErr = ''
        if self.lemma == '':
            sErr += 'lemme vide'
        if not re.match(r"[a-zA-ZéÉôÔàâÂîÎïèÈêÊÜœŒæÆçÇ0-9µåÅΩ&αβγδεζηθικλμνξοπρστυφχψωΔℓΩ_]", self.lemma):
            sErr += 'premier caractère inconnu: ' + self.lemma[0]
        if re.search(r"\s$", self.lemma):
................................................................................
        # moyenne des formes fléchies sans équivalent ou -1
        self.nAKO = math.ceil(nOccur / nFlex)  if nFlex > 0  else -1
    
    def solveOccurMultipleFlexions (self, hDst, oStatsLex):
        sBlank = "           "
        if self.nAKO >= 0:
            for oFlex in self.lFlexions:
                if oFlex.nMulti > 0 and not oFlex.bFixed:
                    # on trie les entrées avec AKO et sans AKO
                    lEntWithAKO = []
                    lEntNoAKO = []
                    for oEntry in oFlex.lMulti:
                        if oEntry.nAKO >= 0:
                            lEntWithAKO.append(oEntry)
                        else:
................................................................................
                        if nDiff > 0:
                            # on peut passer à les formes fléchies à AKO
                            hDst.write(" * {0.sFlexion}\n".format(oFlex))
                            hDst.write("       moyenne connue\n")
                            for oFlexD in self.lFlexions:
                                if oFlex.sFlexion == oFlexD.sFlexion:
                                    hDst.write(sBlank + "{2:<30} {0.sMorph:<30}  {0.nOccur:>10}  >> {1:>10}\n".format(oFlexD, self.nAKO, self.getShortDescr()))
                                    oFlexD.setOccur(self.nAKO)
                            for oEntry in lEntWithAKO:
                                hDst.write("       moyenne connue\n")
                                for oFlexM in oEntry.lFlexions:
                                    if oFlex.sFlexion == oFlexM.sFlexion:
                                        hDst.write(sBlank + "{2:<30} {0.sMorph:<30}  {0.nOccur:>10}  >> {1:>10}\n".format(oFlexM, oEntry.nAKO, oEntry.getShortDescr()))
                                        oFlexM.setOccur(oEntry.nAKO)
                            # on répercute nDiff sur les flexions sans AKO
                            for oEntry in lEntNoAKO:
                                hDst.write("       sans moyenne connue\n")
                                for oFlexM in oEntry.lFlexions:
                                    if oFlex.sFlexion == oFlexM.sFlexion:
                                        nNewOccur = oFlexM.nOccur + math.ceil((nDiff / len(lEntNoAKO)) / oFlexM.nDup)
                                        hDst.write(sBlank + "{2:<30} {0.sMorph:<30}  {0.nOccur:>10}  +> {1:>10}\n".format(oFlexM, nNewOccur, oEntry.getShortDescr()))
                                        oFlexM.setOccur(nNewOccur)
                    else:
                        # Toutes les entrées sont avec AKO : on pondère
                        nFlexOccur = oStatsLex.getFlexionOccur(oFlex.sFlexion)
                        nTotAKO = self.nAKO
                        for oEnt in oFlex.lMulti:
                            nTotAKO += oEnt.nAKO
                        
                        hDst.write(" = {0.sFlexion}\n".format(oFlex))
                        hDst.write("       moyennes connues\n")
                        for oFlexD in self.lFlexions:
                            if oFlex.sFlexion == oFlexD.sFlexion:
                                nNewOccur = math.ceil((nFlexOccur * (self.nAKO / nTotAKO)) / oFlexD.nDup)  if nTotAKO  else 0
                                hDst.write(sBlank + "{2:<30} {0.sMorph:<30}  {0.nOccur:>10}  %> {1:>10}\n".format(oFlexD, nNewOccur, self.getShortDescr()))
                                oFlexD.setOccur(nNewOccur)
                        for oEntry in oFlex.lMulti:
                            for oFlexM in oEntry.lFlexions:
                                if oFlex.sFlexion == oFlexM.sFlexion:
                                    nNewOccur = math.ceil((nFlexOccur * (oEntry.nAKO / nTotAKO)) / oFlexM.nDup)  if nTotAKO  else 0
                                    hDst.write(sBlank + "{2:<30} {0.sMorph:<30}  {0.nOccur:>10}  %> {1:>10}\n".format(oFlexM, nNewOccur, oEntry.getShortDescr()))
                                    oFlexM.setOccur(nNewOccur)
        
    def calcFreq (self, nTot):
        self.fFreq = (self.nOccur * 100) / nTot
        self.oldFq = self.fq
        self.fq = getIfq(self.fFreq)


................................................................................
class Flexion:
    def __init__ (self, oEntry, sFlex='', sMorph='', cDic=''):
        self.oEntry = oEntry
        self.sFlexion = sFlex
        self.sMorph = sMorph
        self.cDic    = cDic
        self.nOccur  = 0
        self.bFixed  = False
        self.nDup    = 0    # duplicates in the same entry
        self.nMulti  = 0    # duplicates with other entries
        self.lMulti  = []   # list of similar flexions
        self.fFreq   = 0
        self.cFq     = ''
        self.metagfx = ''   # métagraphe
        self.metaph2 = ''   # métaphone 2
        
    def setOccur (self, n):
        self.nOccur = n



        self.bFixed = True

    def calcOccur (self):
        self.nOccur = math.ceil((self.nOccur / (self.nMulti+1)) / self.nDup)
    
    def calcFreq (self, nTot):
        self.fFreq = (self.nOccur * 100) / nTot
        self.cFq = getIfq(self.fFreq)
................................................................................

    def __str__ (self, oStatsLex):
        sOccurs = ''
        for v in oStatsLex.dFlexions[self.sFlexion]:
            sOccurs += str(v) + "\t"
        return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.sRadical}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "*" else "")

    def display (self):
        echo(self.__str__())

    @classmethod
    def simpleHeader (cls):
        return "# :POS ;LEX ~SEM =FQ /DIC\n"

    def getGrammarCheckerRepr (self):
        return "{0.sFlexion}\t{0.oEntry.lemma}\t{1}\n".format(self, self._getSimpleTags())

................................................................................

    xParser = argparse.ArgumentParser()
    xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z")
    xParser.add_argument("-m", "--mode", help="0: no tags,  1: Hunspell tags (default),  2: All tags", type=int, choices=[0, 1, 2], default=1)
    xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true")
    xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true")
    xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true")

    xArgs = xParser.parse_args()

    if xArgs.simplify:
        xArgs.mode = 0
        xArgs.uncompress = True

    echo("Python: " + sys.version)
................................................................................
    oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource')
    oStatsLex.addLexFromFile('lexique/corpus_data/stats_litterature.txt', 'L', 'Littérature')
    oStatsLex.write(spBuild+'/test_lex.txt')
    oFrenchDict.calculateStats(oStatsLex, spfStats)
    
    ### écriture des paquets
    echo("Création des paquets...")






    if not xArgs.uncompress:
        oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats)
    oFrenchDict.createFiles(spBuild, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify)
    oFrenchDict.createLibreOfficeExtension(spBuild, dMOZEXT, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], "../oxt/Dictionnaires/dictionaries")
    oFrenchDict.createMozillaExtensions(spBuild, dMOZEXT, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], "../xpi/data/dictionaries")
    oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, "../../../lexicons")
    oFrenchDict.createFileIfqForDB(spBuild)
    oFrenchDict.createDictConj(spBuild, "../data")
    oFrenchDict.createDictDecl(spBuild, "../data")



if __name__ == '__main__':
    main()







|







 







|

|

|













>
|
|
|
|










|











>
|
|

|





>
|
|

|





>
|
|







 







<
<
<







 







|







 







|





|







|













|





|







 







|







|


>
>
>
|







 







<
<
<







 







>







 







>
>
>
>
>
>



|
|
|
|
|
|





547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
...
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
...
809
810
811
812
813
814
815



816
817
818
819
820
821
822
....
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
....
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
....
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
....
1194
1195
1196
1197
1198
1199
1200



1201
1202
1203
1204
1205
1206
1207
....
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
....
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
            dVars['version'] = self.sVersion
            # Dictionaries files (.dic) (.aff)
            self.writeAffixes(spDic, dVars, nMode, bSimplified)
            self.writeDictionary(spDic, dVars, nMode, bSimplified)
        copyTemplate('orthographe', spDic, 'README_dict_fr.txt', dVars)
        createZipFiles(spDic, spDst, sDicName + '.zip')

    def createLibreOfficeExtension (self, spBuild, dTplVars, lDictVars, spDestGL=""):
        # LibreOffice extension
        echo(" * Dictionnaire >> extension pour LibreOffice")
        dTplVars['version'] = self.sVersion
        sExtensionName = EXT_PREFIX_OOO + self.sVersion
        spExt = spBuild + '/' + sExtensionName
        dir_util.mkpath(spExt+'/META-INF')
        dir_util.mkpath(spExt+'/ui')
................................................................................
        file_util.copy_file('césures/frhyph.tex', spExt+'/dictionaries')
        file_util.copy_file('césures/hyph-fr.tex', spExt+'/dictionaries')
        file_util.copy_file('césures/README_hyph_fr-3.0.txt', spExt+'/dictionaries')
        file_util.copy_file('césures/README_hyph_fr-2.9.txt', spExt+'/dictionaries')
        # zip
        createZipFiles(spExt, spBuild, sExtensionName + '.oxt')
        # copy to Grammalecte Project
        if spDestGL:
            echo("   extension copiée dans Grammalecte...")
            dir_util.copy_tree(spExt+'/dictionaries', spDestGL)
    
    def createMozillaExtensions (self, spBuild, dTplVars, lDictVars, spDestGL=""):
        # Mozilla extension 1
        echo(" * Dictionnaire >> extension pour Mozilla")
        dTplVars['version'] = self.sVersion
        sExtensionName = EXT_PREFIX_MOZ + self.sVersion
        spExt = spBuild + '/' + sExtensionName
        dir_util.mkpath(spExt+'/dictionaries')
        copyTemplate('_templates/moz', spExt, 'install.rdf', dTplVars)
        spDict = spBuild + '/' + PREFIX_DICT_PATH + self.sVersion
        file_util.copy_file(spDict+'/fr-classique.dic', spExt+'/dictionaries/fr-classic.dic')
        file_util.copy_file(spDict+'/fr-classique.aff', spExt+'/dictionaries/fr-classic.aff')
        copyTemplate('orthographe', spExt, 'README_dict_fr.txt', dTplVars)
        createZipFiles(spExt, spBuild, sExtensionName + '.xpi')
        # Grammalecte
        if spDestGL:
            echo(" * Dictionnaire >> copie des dicos dans Grammalecte")
            for dVars in lDictVars:
                file_util.copy_file(spDict+'/'+dVars['asciiName']+'.dic', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.dic')
                file_util.copy_file(spDict+'/'+dVars['asciiName']+'.aff', spDestGL+'/'+dVars['mozAsciiName']+"/"+dVars['mozAsciiName']+'.aff')
    
    def createFileIfqForDB (self, spBuild):
        echo(" * Dictionnaire >> indices de fréquence pour la DB...")
        with open(spBuild+'/dictIdxIfq-'+self.sVersion+'.diff.txt', 'w', encoding='utf-8', newline="\n") as hDiff, \
             open(spBuild+'/dictIdxIfq-'+self.sVersion+'.notes.txt', 'w', encoding='utf-8', newline="\n") as hNotes:
            for oEntry in self.lEntry:
                if oEntry.fq != oEntry.oldFq:
                    hDiff.write("{0.iD}\t{0.fq}\n".format(oEntry))
                    hNotes.write("{0.lemma}/{0.flags}\t{0.oldFq} > {0.fq}\n".format(oEntry))
        
    def createLexiconPackages (self, spBuild, version, oStatsLex, spDestGL=""):
        sLexName = LEX_PREFIX + version
        spLex = spBuild + '/' + sLexName
        dir_util.mkpath(spLex)
        # write Dicollecte lexicon
        self.sortLexiconByFreq()
        self.writeLexicon(spLex + '/' + sLexName + '.txt', version, oStatsLex)
        self.writeGrammarCheckerLexicon(spBuild + '/' + sLexName + '.lex', version)
        copyTemplate('lexique', spLex, 'README_lexique.txt', {'version': version})
        # zip
        createZipFiles(spLex, spBuild, sLexName + '.zip')
        # copy GC lexicon to Grammalecte
        if spDestGL:
            file_util.copy_file(spBuild + '/' + sLexName + '.lex', spDestGL + '/French.lex')
            file_util.copy_file('lexique/French.tagset.txt', spDestGL)

    def createDictConj (self, spBuild, spDestGL=""):
        echo(" * Dictionnaire >> fichier de conjugaison...")
        with open(spBuild+'/dictConj.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oEntry in self.lEntry:
                if oEntry.po.startswith("v"):
                    hDst.write(oEntry.getConjugation())
        if spDestGL:
            echo("   Fichier de conjugaison copié dans Grammalecte...")
            file_util.copy_file(spBuild+'/dictConj.txt', spDestGL)

    def createDictDecl (self, spBuild, spDestGL=""):
        echo(" * Dictionnaire >> fichier de déclinaison...")
        with open(spBuild+'/dictDecl.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oEntry in self.lEntry:
                if re.match("[SXFWIA]", oEntry.flags) and (oEntry.po.startswith("nom") or oEntry.po.startswith("adj")):
                    hDst.write(oEntry.getDeclination())
        if spDestGL:
            echo("   Fichier de déclinaison copié dans Grammalecte...")
            file_util.copy_file(spBuild+'/dictDecl.txt', spDestGL)

    def generateSpellVariants (self, nReq, spBuild):
        if nReq < 1: nReq = 1
        if nReq > 2: nReq = 2
        echo(" * Lexique >> variantes par suppression... n = " + str(nReq))
        with open(spBuild+'/dictSpellVariants-'+str(nReq)+'.txt', 'w', encoding='utf-8', newline="\n") as hDst:
            for oFlex in frozenset(self.lFlexions):
................................................................................
        if self.err:
            echo("\n## Erreur dans le dictionnaire : {}".format(self.err))
            echo("   dans : " + self.lemma)
                
    def __str__ (self):
        return "{0.lemma}/{0.flags} {1}".format(self, self.getMorph(2))




    def check (self):
        sErr = ''
        if self.lemma == '':
            sErr += 'lemme vide'
        if not re.match(r"[a-zA-ZéÉôÔàâÂîÎïèÈêÊÜœŒæÆçÇ0-9µåÅΩ&αβγδεζηθικλμνξοπρστυφχψωΔℓΩ_]", self.lemma):
            sErr += 'premier caractère inconnu: ' + self.lemma[0]
        if re.search(r"\s$", self.lemma):
................................................................................
        # moyenne des formes fléchies sans équivalent ou -1
        self.nAKO = math.ceil(nOccur / nFlex)  if nFlex > 0  else -1
    
    def solveOccurMultipleFlexions (self, hDst, oStatsLex):
        sBlank = "           "
        if self.nAKO >= 0:
            for oFlex in self.lFlexions:
                if oFlex.nMulti > 0 and not oFlex.bBlocked:
                    # on trie les entrées avec AKO et sans AKO
                    lEntWithAKO = []
                    lEntNoAKO = []
                    for oEntry in oFlex.lMulti:
                        if oEntry.nAKO >= 0:
                            lEntWithAKO.append(oEntry)
                        else:
................................................................................
                        if nDiff > 0:
                            # on peut passer à les formes fléchies à AKO
                            hDst.write(" * {0.sFlexion}\n".format(oFlex))
                            hDst.write("       moyenne connue\n")
                            for oFlexD in self.lFlexions:
                                if oFlex.sFlexion == oFlexD.sFlexion:
                                    hDst.write(sBlank + "{2:<30} {0.sMorph:<30}  {0.nOccur:>10}  >> {1:>10}\n".format(oFlexD, self.nAKO, self.getShortDescr()))
                                    oFlexD.setOccurAndBlock(self.nAKO)
                            for oEntry in lEntWithAKO:
                                hDst.write("       moyenne connue\n")
                                for oFlexM in oEntry.lFlexions:
                                    if oFlex.sFlexion == oFlexM.sFlexion:
                                        hDst.write(sBlank + "{2:<30} {0.sMorph:<30}  {0.nOccur:>10}  >> {1:>10}\n".format(oFlexM, oEntry.nAKO, oEntry.getShortDescr()))
                                        oFlexM.setOccurAndBlock(oEntry.nAKO)
                            # on répercute nDiff sur les flexions sans AKO
                            for oEntry in lEntNoAKO:
                                hDst.write("       sans moyenne connue\n")
                                for oFlexM in oEntry.lFlexions:
                                    if oFlex.sFlexion == oFlexM.sFlexion:
                                        nNewOccur = oFlexM.nOccur + math.ceil((nDiff / len(lEntNoAKO)) / oFlexM.nDup)
                                        hDst.write(sBlank + "{2:<30} {0.sMorph:<30}  {0.nOccur:>10}  +> {1:>10}\n".format(oFlexM, nNewOccur, oEntry.getShortDescr()))
                                        oFlexM.setOccurAndBlock(nNewOccur)
                    else:
                        # Toutes les entrées sont avec AKO : on pondère
                        nFlexOccur = oStatsLex.getFlexionOccur(oFlex.sFlexion)
                        nTotAKO = self.nAKO
                        for oEnt in oFlex.lMulti:
                            nTotAKO += oEnt.nAKO
                        
                        hDst.write(" = {0.sFlexion}\n".format(oFlex))
                        hDst.write("       moyennes connues\n")
                        for oFlexD in self.lFlexions:
                            if oFlex.sFlexion == oFlexD.sFlexion:
                                nNewOccur = math.ceil((nFlexOccur * (self.nAKO / nTotAKO)) / oFlexD.nDup)  if nTotAKO  else 0
                                hDst.write(sBlank + "{2:<30} {0.sMorph:<30}  {0.nOccur:>10}  %> {1:>10}\n".format(oFlexD, nNewOccur, self.getShortDescr()))
                                oFlexD.setOccurAndBlock(nNewOccur)
                        for oEntry in oFlex.lMulti:
                            for oFlexM in oEntry.lFlexions:
                                if oFlex.sFlexion == oFlexM.sFlexion:
                                    nNewOccur = math.ceil((nFlexOccur * (oEntry.nAKO / nTotAKO)) / oFlexM.nDup)  if nTotAKO  else 0
                                    hDst.write(sBlank + "{2:<30} {0.sMorph:<30}  {0.nOccur:>10}  %> {1:>10}\n".format(oFlexM, nNewOccur, oEntry.getShortDescr()))
                                    oFlexM.setOccurAndBlock(nNewOccur)
        
    def calcFreq (self, nTot):
        self.fFreq = (self.nOccur * 100) / nTot
        self.oldFq = self.fq
        self.fq = getIfq(self.fFreq)


................................................................................
class Flexion:
    def __init__ (self, oEntry, sFlex='', sMorph='', cDic=''):
        self.oEntry = oEntry
        self.sFlexion = sFlex
        self.sMorph = sMorph
        self.cDic    = cDic
        self.nOccur  = 0
        self.bBlocked  = False
        self.nDup    = 0    # duplicates in the same entry
        self.nMulti  = 0    # duplicates with other entries
        self.lMulti  = []   # list of similar flexions
        self.fFreq   = 0
        self.cFq     = ''
        self.metagfx = ''   # métagraphe
        self.metaph2 = ''   # métaphone 2
    
    def setOccur (self, n):
        self.nOccur = n

    def setOccurAndBlock (self, n):
        self.nOccur = n
        self.bBlocked = True

    def calcOccur (self):
        self.nOccur = math.ceil((self.nOccur / (self.nMulti+1)) / self.nDup)
    
    def calcFreq (self, nTot):
        self.fFreq = (self.nOccur * 100) / nTot
        self.cFq = getIfq(self.fFreq)
................................................................................

    def __str__ (self, oStatsLex):
        sOccurs = ''
        for v in oStatsLex.dFlexions[self.sFlexion]:
            sOccurs += str(v) + "\t"
        return "{0.oEntry.iD}\t{0.sFlexion}\t{0.oEntry.sRadical}\t{0.sMorph}\t{0.metagfx}\t{0.metaph2}\t{0.oEntry.lx}\t{0.oEntry.se}\t{0.oEntry.et}\t{0.oEntry.di}{2}\t{1}{0.nOccur}\t{0.nDup}\t{0.nMulti}\t{0.fFreq:.15f}\t{0.cFq}\n".format(self, sOccurs, "/"+self.cDic if self.cDic != "*" else "")




    @classmethod
    def simpleHeader (cls):
        return "# :POS ;LEX ~SEM =FQ /DIC\n"

    def getGrammarCheckerRepr (self):
        return "{0.sFlexion}\t{0.oEntry.lemma}\t{1}\n".format(self, self._getSimpleTags())

................................................................................

    xParser = argparse.ArgumentParser()
    xParser.add_argument("-v", "--verdic", help="set dictionary version, i.e. 5.4", type=str, default="X.Y.z")
    xParser.add_argument("-m", "--mode", help="0: no tags,  1: Hunspell tags (default),  2: All tags", type=int, choices=[0, 1, 2], default=1)
    xParser.add_argument("-u", "--uncompress", help="do not use Hunspell compression", action="store_true")
    xParser.add_argument("-s", "--simplify", help="no virtual lemmas", action="store_true")
    xParser.add_argument("-sv", "--spellvariants", help="generate spell variants", action="store_true")
    xParser.add_argument("-gl", "--grammalecte", help="copy generated files to Grammalecte folders", action="store_true")
    xArgs = xParser.parse_args()

    if xArgs.simplify:
        xArgs.mode = 0
        xArgs.uncompress = True

    echo("Python: " + sys.version)
................................................................................
    oStatsLex.addLexFromFile('lexique/corpus_data/stats_frwikisource.txt', 'S', 'Wikisource')
    oStatsLex.addLexFromFile('lexique/corpus_data/stats_litterature.txt', 'L', 'Littérature')
    oStatsLex.write(spBuild+'/test_lex.txt')
    oFrenchDict.calculateStats(oStatsLex, spfStats)
    
    ### écriture des paquets
    echo("Création des paquets...")

    spLexiconDestGL = "../../../lexicons"  if xArgs.grammalecte  else ""
    spLibreOfficeExtDestGL = "../oxt/Dictionnaires/dictionaries"  if xArgs.grammalecte  else ""
    spMozillaExtDestGL = "../xpi/data/dictionaries"  if xArgs.grammalecte  else ""
    spDataDestGL = "../data"  if xArgs.grammalecte  else ""

    if not xArgs.uncompress:
        oFrenchDict.defineAbreviatedTags(xArgs.mode, spfStats)
    oFrenchDict.createFiles(spBuild, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], xArgs.mode, xArgs.simplify)
    oFrenchDict.createLexiconPackages(spBuild, xArgs.verdic, oStatsLex, spLexiconDestGL)
    oFrenchDict.createFileIfqForDB(spBuild)
    oFrenchDict.createLibreOfficeExtension(spBuild, dMOZEXT, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], spLibreOfficeExtDestGL)
    oFrenchDict.createMozillaExtensions(spBuild, dMOZEXT, [dMODERNE, dTOUTESVAR, dCLASSIQUE, dREFORME1990], spMozillaExtDestGL)
    oFrenchDict.createDictConj(spBuild, spDataDestGL)
    oFrenchDict.createDictDecl(spBuild, spDataDestGL)



if __name__ == '__main__':
    main()