Grammalecte  Check-in [0b7150270a]

Overview
Comment:merge trunk
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | multid
Files: files | file ages | folders
SHA3-256: 0b7150270a659e28c3cd498edd1990327ad2f219db00ca9c9c4c8eadc3d4384e
User & Date: olr on 2018-02-19 09:44:11
Other Links: branch diff | manifest | tags
Context
2018-02-19
12:37
[lo] update: helpers check-in: 12ad381687 user: olr tags: lo, multid
09:44
merge trunk check-in: 0b7150270a user: olr tags: multid
09:11
[fr] version 0.6.2 check-in: 18027d1022 user: olr tags: fr, trunk
2018-02-18
16:28
[lo] UI for dictionaries options check-in: edf22c7d52 user: olr tags: lo, multid
Changes

Modified gc_lang/fr/build_data.py from [a0d5d064eb] to [1f69de4a2f].

   267    267   def makePhonetTable (sp, bJS=False):
   268    268       print("> Correspondances phonétiques ", end="")
   269    269       print("(Python et JavaScript)"  if bJS  else "(Python seulement)")
   270    270       
   271    271       import gc_lang.fr.modules.conj as conj
   272    272   
   273    273       try:
   274         -        oDict = ibdawg.IBDAWG("French.bdic")
          274  +        oDict = ibdawg.IBDAWG("fr.bdic")
   275    275       except:
   276    276           traceback.print_exc()
   277    277           return
   278    278   
   279    279       # set of homophonic words
   280    280       lSet = []
   281    281       for sLine in readFile(sp+"/data/phonet_simil.txt"):

Modified gc_lang/fr/config.ini from [08f47bce51] to [c7e11a6902].

     2      2   lang = fr
     3      3   lang_name = French
     4      4   locales = fr_FR fr_BE fr_CA fr_CH fr_LU fr_MC fr_BF fr_CI fr_SN fr_ML fr_NE fr_TG fr_BJ
     5      5   country_default = FR
     6      6   name = Grammalecte
     7      7   implname = grammalecte
     8      8   # always use 3 numbers for version: x.y.z
     9         -version = 0.6.1
            9  +version = 0.6.2
    10     10   author = Olivier R.
    11     11   provider = Dicollecte
    12     12   link = http://grammalecte.net
    13     13   description = Correcteur grammatical pour le français.
    14     14   extras = README_fr.txt
    15     15   logo = logo.png
    16     16   

Modified gc_lang/fr/dictionnaire/genfrdic.py from [5036afecd5] to [e42dad16b6].

   813    813       def __str__ (self):
   814    814           return "{0.lemma}/{0.flags} {1}".format(self, self.getMorph(2))
   815    815   
   816    816       def check (self):
   817    817           sErr = ''
   818    818           if self.lemma == '':
   819    819               sErr += 'lemme vide'
   820         -        if not re.match(r"[a-zA-ZéÉôÔàâÂîÎïèÈêÊÜœŒæÆçÇ0-9µåÅΩ&αβγδεζηθικλμνξοπρστυφχψωΔℓΩ_]", self.lemma):
          820  +        if not re.match(r"[a-zA-ZéÉôÔàâáÂîÎïèÈêÊÜœŒæÆçÇ0-9µåÅΩ&αβγδεζηθικλμνξοπρστυφχψωΔℓΩ_]", self.lemma):
   821    821               sErr += 'premier caractère inconnu: ' + self.lemma[0]
   822    822           if re.search(r"\s$", self.lemma):
   823    823               sErr += 'espace en fin de lemme'
   824    824           if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]):
   825    825               sErr += 'verbe inconnu: ' + self.po
   826    826           if (re.match(r"S[*.]", self.flags) and re.search("[sxz]$", self.lemma)) or (re.match(r"X[*.]", self.flags) and not re.search("[ul]$", self.lemma)):
   827    827               sErr += 'drapeau inutile'

Modified gc_lang/fr/dictionnaire/orthographe/FRANCAIS_5.aff from [078b475b37] to [a2e4f22697].

    48     48   MAP tT
    49     49   MAP vV
    50     50   MAP wW
    51     51   MAP xX
    52     52   MAP zZ
    53     53   
    54     54   # Remplacements envisagés & barbarismes
    55         -REP 84
           55  +REP 82
    56     56   REP ^Ca$ Ça
    57     57   REP ^l l'
    58     58   REP ^d d'
    59     59   REP ^n n'
    60     60   REP ^s s'
    61     61   REP ^j j'
    62     62   REP ^m m'
................................................................................
   131    131   REP faisez$ faites
   132    132   REP puit puits
   133    133   REP sanctionnable punissable
   134    134   REP questionnable discutable
   135    135   REP antitartre détartrant
   136    136   REP email courriel
   137    137   REP construirent construisirent
   138         -REP cad$ c’est-à-dire
   139         -REP càd$ c’est-à-dire
   140    138   
   141    139   
   142    140   # Phonétique
   143    141   #PHONE 69
   144    142   #PHONE AN(DT)$                   @
   145    143   #PHONE AILL                      AY
   146    144   #PHONE AIS$                      E
................................................................................
   322    320   # Astuce de Hunspell pour contourner la non-normalisation de l’unicode dans OOo
   323    321   # http://www.openoffice.org/issues/show_bug.cgi?id=75769
   324    322   # La première colonne dresse une liste de caractères écrits avec des diacritiques combinants :
   325    323   # http://www.unicode.org/charts/    U0300 +
   326    324   # La seconde colonne établit l’équivalent en Latin-1 étendu :
   327    325   # Hunspell fait la modification pour vérifier l’orthographe. (Peut-être pas utile pour Mozilla)
   328    326   # Apostrophes: U+2019, U+02BC
   329         -ICONV 41
          327  +ICONV 42
   330    328   ICONV ’ '
   331    329   ICONV ʼ '
   332    330   ICONV ffi ffi
   333    331   ICONV ffl ffl
   334    332   ICONV ff ff
   335    333   ICONV ſt ft
   336    334   ICONV fi fi
   337    335   ICONV fl fl
   338    336   ICONV st st
          337  +ICONV ſ s
   339    338   ICONV à à
   340    339   ICONV â â
   341    340   ICONV ä ä
   342    341   ICONV é é
   343    342   ICONV è è
   344    343   ICONV ê ê
   345    344   ICONV ë ë

Modified gc_lang/fr/perf_memo.txt from [cec037999d] to [15962af16c].

    19     19   0.5.12      2016.10.14 18:58    4.51895     1.0843      0.772805    0.22387     0.249411    0.261593    0.628802    0.339303    0.0570326   0.00805416  
    20     20   0.5.15      2017.01.22 11:44    4.85204     1.16134     0.770762    0.227874    0.244574    0.253305    0.58831     0.319987    0.0603996   0.00694786  
    21     21   0.5.15      2017.01.22 11:47    4.85593     1.15248     0.762924    0.22744     0.243461    0.254609    0.586741    0.317503    0.0588827   0.00701016  (unicode normalisation NFC)
    22     22   0.5.15      2017.01.31 12:06    4.88227     1.18008     0.782217    0.232617    0.247672    0.257628    0.596903    0.32169     0.0603505   0.00695196  
    23     23   0.5.15      2017.02.05 10:10    4.90222     1.18444     0.786696    0.233413    0.25071     0.260214    0.602112    0.325235    0.0609932   0.00706897  
    24     24   0.5.16      2017.05.12 07:41    4.92201     1.19269     0.80639     0.239147    0.257518    0.266523    0.62111     0.33359     0.0634668   0.00757178  
    25     25   0.6.1       2018.02.12 09:58    5.25924     1.2649      0.878442    0.257465    0.280558    0.293903    0.686887    0.391275    0.0672474   0.00824723  
           26  +0.6.2       2018.02.19 09:06    6.20116     1.44334     1.02936     0.272956    0.311561    0.362367    0.812705    0.419061    0.0773003   0.00845671  (spelling normalization)

Modified gc_lang/fr/webext/manifest.json from [d0c2c44fc6] to [92ab4049ef].

     1      1   {
     2      2     "manifest_version": 2,
     3      3     "name": "Grammalecte [fr]",
     4      4     "short_name": "Grammalecte [fr]",
     5         -  "version": "0.6.1",
            5  +  "version": "0.6.2",
     6      6   
     7      7     "applications": {
     8      8       "gecko": {
     9      9         "id": "French-GC@grammalecte.net",
    10     10         "strict_min_version": "56.0"
    11     11       }
    12     12     },

Modified graphspell-js/char_player.js from [c9b14a8774] to [c171c18615].

     2      2   // useful for suggestion mechanism
     3      3   
     4      4   ${map}
     5      5   
     6      6   
     7      7   var char_player = {
     8      8   
     9         -    _dTransChars: new Map([
            9  +    _xTransCharsForSpelling: new Map([
           10  +        ['ſ', 's'],  ['ffi', 'ffi'],  ['ffl', 'ffl'],  ['ff', 'ff'],  ['ſt', 'ft'],  ['fi', 'fi'],  ['fl', 'fl'],  ['st', 'st']
           11  +    ]),
           12  +
           13  +    spellingNormalization: function (sWord) {
           14  +        let sNewWord = "";
           15  +        for (let c of sWord) {
           16  +            sNewWord += this._xTransCharsForSpelling.gl_get(c, c);
           17  +        }
           18  +        return sNewWord.normalize("NFC");
           19  +    },
           20  +
           21  +    _xTransCharsForSimplification: new Map([
    10     22           ['à', 'a'],  ['é', 'e'],  ['î', 'i'],  ['ô', 'o'],  ['û', 'u'],  ['ÿ', 'i'],  ['y', 'i'],
    11     23           ['â', 'a'],  ['è', 'e'],  ['ï', 'i'],  ['ö', 'o'],  ['ù', 'u'],  ['ŷ', 'i'],
    12     24           ['ä', 'a'],  ['ê', 'e'],  ['í', 'i'],  ['ó', 'o'],  ['ü', 'u'],  ['ý', 'i'],
    13     25           ['á', 'a'],  ['ë', 'e'],  ['ì', 'i'],  ['ò', 'o'],  ['ú', 'u'],  ['ỳ', 'i'],
    14     26           ['ā', 'a'],  ['ē', 'e'],  ['ī', 'i'],  ['ō', 'o'],  ['ū', 'u'],  ['ȳ', 'i'],
    15     27           ['ñ', 'n'],  ['k', 'q'],  ['w', 'v'],
    16     28           ['œ', 'oe'], ['æ', 'ae'], 
           29  +        ['ſ', 's'],  ['ffi', 'ffi'],  ['ffl', 'ffl'],  ['ff', 'ff'],  ['ſt', 'ft'],  ['fi', 'fi'],  ['fl', 'fl'],  ['st', 'st']
    17     30       ]),
    18     31   
    19     32       simplifyWord: function (sWord) {
    20     33           // word simplication before calculating distance between words
    21     34           sWord = sWord.toLowerCase();
    22     35           let sNewWord = "";
    23     36           let i = 1;
    24     37           for (let c of sWord) {
    25         -            let cNew = this._dTransChars.gl_get(c, c);
           38  +            let cNew = this._xTransCharsForSimplification.gl_get(c, c);
    26     39               let cNext = sWord.slice(i, i+1)
    27         -            if (cNew != this._dTransChars.gl_get(cNext, cNext)) {
           40  +            if (cNew != this._xTransCharsForSimplification.gl_get(cNext, cNext)) {
    28     41                   sNewWord += cNew;
    29     42               }
    30     43               i++;
    31     44           }
    32     45           return sNewWord.replace(/eau/g, "o").replace(/au/g, "o").replace(/ai/g, "e").replace(/ei/g, "e").replace(/ph/g, "f");
    33     46       },
    34     47   

Modified graphspell-js/ibdawg.js from [08ad598b63] to [73e27f350e].

   206    206               "sByDic": this.sByDic    // binary word graph
   207    207           };
   208    208           return oJSON;
   209    209       }
   210    210   
   211    211       isValidToken (sToken) {
   212    212           // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked)
          213  +        sToken = char_player.spellingNormalization(sToken)
   213    214           if (this.isValid(sToken)) {
   214    215               return true;
   215    216           }
   216    217           if (sToken.includes("-")) {
   217    218               if (sToken.gl_count("-") > 4) {
   218    219                   return true;
   219    220               }
................................................................................
   276    277               }
   277    278           }
   278    279           return Boolean(this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask);
   279    280       }
   280    281   
   281    282       getMorph (sWord) {
   282    283           // retrieves morphologies list, different casing allowed
          284  +        sWord = char_player.spellingNormalization(sWord)
   283    285           let l = this.morph(sWord);
   284    286           if (sWord[0].gl_isUpperCase()) {
   285    287               l.push(...this.morph(sWord.toLowerCase()));
   286    288               if (sWord.gl_isUpperCase() && sWord.length > 1) {
   287    289                   l.push(...this.morph(sWord.gl_toCapitalize()));
   288    290               }
   289    291           }
   290    292           return l;
   291    293       }
   292    294   
   293    295       suggest (sWord, nSuggLimit=10) {
   294    296           // returns a array of suggestions for <sWord>
          297  +        sWord = char_player.spellingNormalization(sWord)
   295    298           let sPfx = "";
   296    299           let sSfx = "";
   297    300           [sPfx, sWord, sSfx] = char_player.cut(sWord);
   298    301           let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1);
   299    302           let nMaxDel = Math.floor(sWord.length / 5);
   300    303           let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1);
   301    304           let oSuggResult = new SuggResult(sWord);

Modified graphspell/char_player.py from [82e97eae54] to [e841b9211a].

     1      1   # list of similar chars
     2      2   # useful for suggestion mechanism
     3      3   
     4      4   import re
            5  +import unicodedata
     5      6   
     6      7   
     7         -_xTransChars = str.maketrans({
            8  +_xTransCharsForSpelling = str.maketrans({
            9  +    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st'
           10  +})
           11  +
           12  +def spellingNormalization (sWord):
           13  +    return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))
           14  +
           15  +
           16  +_xTransCharsForSimplification = str.maketrans({
     8     17       'à': 'a',  'é': 'e',  'î': 'i',  'ô': 'o',  'û': 'u',  'ÿ': 'i',  "y": "i",
     9     18       'â': 'a',  'è': 'e',  'ï': 'i',  'ö': 'o',  'ù': 'u',  'ŷ': 'i',
    10     19       'ä': 'a',  'ê': 'e',  'í': 'i',  'ó': 'o',  'ü': 'u',  'ý': 'i',
    11     20       'á': 'a',  'ë': 'e',  'ì': 'i',  'ò': 'o',  'ú': 'u',  'ỳ': 'i',
    12     21       'ā': 'a',  'ē': 'e',  'ī': 'i',  'ō': 'o',  'ū': 'u',  'ȳ': 'i',
    13     22       'ñ': 'n',  'k': 'q',  'w': 'v',
    14         -    'œ': 'oe',  'æ': 'ae', 
           23  +    'œ': 'oe',  'æ': 'ae',
           24  +    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st', 
    15     25   })
    16     26   
    17     27   def simplifyWord (sWord):
    18     28       "word simplication before calculating distance between words"
    19         -    sWord = sWord.lower().translate(_xTransChars)
           29  +    sWord = sWord.lower().translate(_xTransCharsForSimplification)
    20     30       sNewWord = ""
    21     31       for i, c in enumerate(sWord, 1):
    22     32           if c != sWord[i:i+1]:
    23     33               sNewWord += c
    24     34       return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e").replace("ph", "f")
    25     35   
    26     36   

Modified graphspell/ibdawg.py from [3bf18d8144] to [c41b426a86].

   214    214                               "sByDic": self.byDic.hex()  if bBinaryDictAsHexString  else [ e  for e in self.byDic ]
   215    215                           }, ensure_ascii=False))
   216    216               if bInJSModule:
   217    217                   hDst.write(";\n\nexports.dictionary = dictionary;\n")
   218    218   
   219    219       def isValidToken (self, sToken):
   220    220           "checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)"
          221  +        sToken = cp.spellingNormalization(sToken)
   221    222           if self.isValid(sToken):
   222    223               return True
   223    224           if "-" in sToken:
   224    225               if sToken.count("-") > 4:
   225    226                   return True
   226    227               return all(self.isValid(sWord)  for sWord in sToken.split("-"))
   227    228           return False
................................................................................
   256    257               iAddr = self._lookupArcNode(self.dChar[c], iAddr)
   257    258               if iAddr == None:
   258    259                   return False
   259    260           return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask)
   260    261   
   261    262       def getMorph (self, sWord):
   262    263           "retrieves morphologies list, different casing allowed"
          264  +        sWord = cp.spellingNormalization(sWord)
   263    265           l = self.morph(sWord)
   264    266           if sWord[0:1].isupper():
   265    267               l.extend(self.morph(sWord.lower()))
   266    268               if sWord.isupper() and len(sWord) > 1:
   267    269                   l.extend(self.morph(sWord.capitalize()))
   268    270           return l
   269    271   
   270    272       #@timethis
   271    273       def suggest (self, sWord, nSuggLimit=10):
   272    274           "returns a set of suggestions for <sWord>"
          275  +        sWord = cp.spellingNormalization(sWord)
   273    276           sPfx, sWord, sSfx = cp.cut(sWord)
   274    277           nMaxSwitch = max(len(sWord) // 3, 1)
   275    278           nMaxDel = len(sWord) // 5
   276    279           nMaxHardRepl = max((len(sWord) - 5) // 4, 1)
   277    280           oSuggResult = SuggResult(sWord)
   278    281           self._suggest(oSuggResult, sWord, nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl)
   279    282           if sWord.istitle():
................................................................................
   326    329                   self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) # remove last char and go on
   327    330                   for sRepl in cp.dFinal1.get(sRemain, ()):
   328    331                       self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True)
   329    332   
   330    333       #@timethis
   331    334       def suggest2 (self, sWord, nMaxSugg=10):
   332    335           "returns a set of suggestions for <sWord>"
          336  +        sWord = cp.spellingNormalization(sWord)
   333    337           sPfx, sWord, sSfx = cp.cut(sWord)
   334    338           oSuggResult = SuggResult(sWord)
   335    339           self._suggest2(oSuggResult)
   336    340           aSugg = oSuggResult.getSuggestions()
   337    341           if sSfx or sPfx:
   338    342               # we add what we removed
   339    343               return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg))
................................................................................
   382    386                       aTails.add(sTail + self.dCharVal[nVal])
   383    387                   if n and not aTails:
   384    388                       aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1))
   385    389           return aTails
   386    390   
   387    391       def drawPath (self, sWord, iAddr=0):
   388    392           "show the path taken by <sWord> in the graph"
          393  +        sWord = cp.spellingNormalization(sWord)
   389    394           c1 = sWord[0:1]  if sWord  else " "
   390    395           iPos = -1
   391    396           n = 0
   392    397           print(c1 + ": ", end="")
   393    398           for c2, jAddr in self._getCharArcs(iAddr):
   394    399               print(c2, end="")
   395    400               if c2 == sWord[0:1]: