Overview
Comment: | merge trunk |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | multid |
Files: | files | file ages | folders |
SHA3-256: |
0b7150270a659e28c3cd498edd199032 |
User & Date: | olr on 2018-02-19 09:44:11 |
Other Links: | branch diff | manifest | tags |
Context
2018-02-19
| ||
12:37 | [lo] update: helpers check-in: 12ad381687 user: olr tags: lo, multid | |
09:44 | merge trunk check-in: 0b7150270a user: olr tags: multid | |
09:11 | [fr] version 0.6.2 check-in: 18027d1022 user: olr tags: fr, trunk | |
2018-02-18
| ||
16:28 | [lo] UI for dictionaries options check-in: edf22c7d52 user: olr tags: lo, multid | |
Changes
Modified gc_lang/fr/build_data.py from [a0d5d064eb] to [1f69de4a2f].
267 267 def makePhonetTable (sp, bJS=False): 268 268 print("> Correspondances phonétiques ", end="") 269 269 print("(Python et JavaScript)" if bJS else "(Python seulement)") 270 270 271 271 import gc_lang.fr.modules.conj as conj 272 272 273 273 try: 274 - oDict = ibdawg.IBDAWG("French.bdic") 274 + oDict = ibdawg.IBDAWG("fr.bdic") 275 275 except: 276 276 traceback.print_exc() 277 277 return 278 278 279 279 # set of homophonic words 280 280 lSet = [] 281 281 for sLine in readFile(sp+"/data/phonet_simil.txt"):
Modified gc_lang/fr/config.ini from [08f47bce51] to [c7e11a6902].
2 2 lang = fr 3 3 lang_name = French 4 4 locales = fr_FR fr_BE fr_CA fr_CH fr_LU fr_MC fr_BF fr_CI fr_SN fr_ML fr_NE fr_TG fr_BJ 5 5 country_default = FR 6 6 name = Grammalecte 7 7 implname = grammalecte 8 8 # always use 3 numbers for version: x.y.z 9 -version = 0.6.1 9 +version = 0.6.2 10 10 author = Olivier R. 11 11 provider = Dicollecte 12 12 link = http://grammalecte.net 13 13 description = Correcteur grammatical pour le français. 14 14 extras = README_fr.txt 15 15 logo = logo.png 16 16
Modified gc_lang/fr/dictionnaire/genfrdic.py from [5036afecd5] to [e42dad16b6].
813 813 def __str__ (self): 814 814 return "{0.lemma}/{0.flags} {1}".format(self, self.getMorph(2)) 815 815 816 816 def check (self): 817 817 sErr = '' 818 818 if self.lemma == '': 819 819 sErr += 'lemme vide' 820 - if not re.match(r"[a-zA-ZéÉôÔàâÂîÎïèÈêÊÜœŒæÆçÇ0-9µåÅΩ&αβγδεζηθικλμνξοπρστυφχψωΔℓΩ_]", self.lemma): 820 + if not re.match(r"[a-zA-ZéÉôÔàâáÂîÎïèÈêÊÜœŒæÆçÇ0-9µåÅΩ&αβγδεζηθικλμνξοπρστυφχψωΔℓΩ_]", self.lemma): 821 821 sErr += 'premier caractère inconnu: ' + self.lemma[0] 822 822 if re.search(r"\s$", self.lemma): 823 823 sErr += 'espace en fin de lemme' 824 824 if re.match(r"v[0123]", self.po) and not re.match(r"[eas_][ix_][tx_][nx_][pqreuvx_][mx_][ex_z][ax_z]\b", self.po[2:]): 825 825 sErr += 'verbe inconnu: ' + self.po 826 826 if (re.match(r"S[*.]", self.flags) and re.search("[sxz]$", self.lemma)) or (re.match(r"X[*.]", self.flags) and not re.search("[ul]$", self.lemma)): 827 827 sErr += 'drapeau inutile'
Modified gc_lang/fr/dictionnaire/orthographe/FRANCAIS_5.aff from [078b475b37] to [a2e4f22697].
48 48 MAP tT 49 49 MAP vV 50 50 MAP wW 51 51 MAP xX 52 52 MAP zZ 53 53 54 54 # Remplacements envisagés & barbarismes 55 -REP 84 55 +REP 82 56 56 REP ^Ca$ Ça 57 57 REP ^l l' 58 58 REP ^d d' 59 59 REP ^n n' 60 60 REP ^s s' 61 61 REP ^j j' 62 62 REP ^m m' ................................................................................ 131 131 REP faisez$ faites 132 132 REP puit puits 133 133 REP sanctionnable punissable 134 134 REP questionnable discutable 135 135 REP antitartre détartrant 136 136 REP email courriel 137 137 REP construirent construisirent 138 -REP cad$ c’est-à-dire 139 -REP càd$ c’est-à-dire 140 138 141 139 142 140 # Phonétique 143 141 #PHONE 69 144 142 #PHONE AN(DT)$ @ 145 143 #PHONE AILL AY 146 144 #PHONE AIS$ E ................................................................................ 322 320 # Astuce de Hunspell pour contourner la non-normalisation de l’unicode dans OOo 323 321 # http://www.openoffice.org/issues/show_bug.cgi?id=75769 324 322 # La première colonne dresse une liste de caractères écrits avec des diacritiques combinants : 325 323 # http://www.unicode.org/charts/ U0300 + 326 324 # La seconde colonne établit l’équivalent en Latin-1 étendu : 327 325 # Hunspell fait la modification pour vérifier l’orthographe. (Peut-être pas utile pour Mozilla) 328 326 # Apostrophes: U+2019, U+02BC 329 -ICONV 41 327 +ICONV 42 330 328 ICONV ’ ' 331 329 ICONV ʼ ' 332 330 ICONV ffi ffi 333 331 ICONV ffl ffl 334 332 ICONV ff ff 335 333 ICONV ſt ft 336 334 ICONV fi fi 337 335 ICONV fl fl 338 336 ICONV st st 337 +ICONV ſ s 339 338 ICONV à à 340 339 ICONV â â 341 340 ICONV ä ä 342 341 ICONV é é 343 342 ICONV è è 344 343 ICONV ê ê 345 344 ICONV ë ë
Modified gc_lang/fr/perf_memo.txt from [cec037999d] to [15962af16c].
19 19 0.5.12 2016.10.14 18:58 4.51895 1.0843 0.772805 0.22387 0.249411 0.261593 0.628802 0.339303 0.0570326 0.00805416 20 20 0.5.15 2017.01.22 11:44 4.85204 1.16134 0.770762 0.227874 0.244574 0.253305 0.58831 0.319987 0.0603996 0.00694786 21 21 0.5.15 2017.01.22 11:47 4.85593 1.15248 0.762924 0.22744 0.243461 0.254609 0.586741 0.317503 0.0588827 0.00701016 (unicode normalisation NFC) 22 22 0.5.15 2017.01.31 12:06 4.88227 1.18008 0.782217 0.232617 0.247672 0.257628 0.596903 0.32169 0.0603505 0.00695196 23 23 0.5.15 2017.02.05 10:10 4.90222 1.18444 0.786696 0.233413 0.25071 0.260214 0.602112 0.325235 0.0609932 0.00706897 24 24 0.5.16 2017.05.12 07:41 4.92201 1.19269 0.80639 0.239147 0.257518 0.266523 0.62111 0.33359 0.0634668 0.00757178 25 25 0.6.1 2018.02.12 09:58 5.25924 1.2649 0.878442 0.257465 0.280558 0.293903 0.686887 0.391275 0.0672474 0.00824723 26 +0.6.2 2018.02.19 09:06 6.20116 1.44334 1.02936 0.272956 0.311561 0.362367 0.812705 0.419061 0.0773003 0.00845671 (spelling normalization)
Modified gc_lang/fr/webext/manifest.json from [d0c2c44fc6] to [92ab4049ef].
1 1 { 2 2 "manifest_version": 2, 3 3 "name": "Grammalecte [fr]", 4 4 "short_name": "Grammalecte [fr]", 5 - "version": "0.6.1", 5 + "version": "0.6.2", 6 6 7 7 "applications": { 8 8 "gecko": { 9 9 "id": "French-GC@grammalecte.net", 10 10 "strict_min_version": "56.0" 11 11 } 12 12 },
Modified graphspell-js/char_player.js from [c9b14a8774] to [c171c18615].
2 2 // useful for suggestion mechanism 3 3 4 4 ${map} 5 5 6 6 7 7 var char_player = { 8 8 9 - _dTransChars: new Map([ 9 + _xTransCharsForSpelling: new Map([ 10 + ['ſ', 's'], ['ffi', 'ffi'], ['ffl', 'ffl'], ['ff', 'ff'], ['ſt', 'ft'], ['fi', 'fi'], ['fl', 'fl'], ['st', 'st'] 11 + ]), 12 + 13 + spellingNormalization: function (sWord) { 14 + let sNewWord = ""; 15 + for (let c of sWord) { 16 + sNewWord += this._xTransCharsForSpelling.gl_get(c, c); 17 + } 18 + return sNewWord.normalize("NFC"); 19 + }, 20 + 21 + _xTransCharsForSimplification: new Map([ 10 22 ['à', 'a'], ['é', 'e'], ['î', 'i'], ['ô', 'o'], ['û', 'u'], ['ÿ', 'i'], ['y', 'i'], 11 23 ['â', 'a'], ['è', 'e'], ['ï', 'i'], ['ö', 'o'], ['ù', 'u'], ['ŷ', 'i'], 12 24 ['ä', 'a'], ['ê', 'e'], ['í', 'i'], ['ó', 'o'], ['ü', 'u'], ['ý', 'i'], 13 25 ['á', 'a'], ['ë', 'e'], ['ì', 'i'], ['ò', 'o'], ['ú', 'u'], ['ỳ', 'i'], 14 26 ['ā', 'a'], ['ē', 'e'], ['ī', 'i'], ['ō', 'o'], ['ū', 'u'], ['ȳ', 'i'], 15 27 ['ñ', 'n'], ['k', 'q'], ['w', 'v'], 16 28 ['œ', 'oe'], ['æ', 'ae'], 29 + ['ſ', 's'], ['ffi', 'ffi'], ['ffl', 'ffl'], ['ff', 'ff'], ['ſt', 'ft'], ['fi', 'fi'], ['fl', 'fl'], ['st', 'st'] 17 30 ]), 18 31 19 32 simplifyWord: function (sWord) { 20 33 // word simplication before calculating distance between words 21 34 sWord = sWord.toLowerCase(); 22 35 let sNewWord = ""; 23 36 let i = 1; 24 37 for (let c of sWord) { 25 - let cNew = this._dTransChars.gl_get(c, c); 38 + let cNew = this._xTransCharsForSimplification.gl_get(c, c); 26 39 let cNext = sWord.slice(i, i+1) 27 - if (cNew != this._dTransChars.gl_get(cNext, cNext)) { 40 + if (cNew != this._xTransCharsForSimplification.gl_get(cNext, cNext)) { 28 41 sNewWord += cNew; 29 42 } 30 43 i++; 31 44 } 32 45 return sNewWord.replace(/eau/g, "o").replace(/au/g, "o").replace(/ai/g, "e").replace(/ei/g, "e").replace(/ph/g, "f"); 33 46 }, 34 47
Modified graphspell-js/ibdawg.js from [08ad598b63] to [73e27f350e].
206 206 "sByDic": this.sByDic // binary word graph 207 207 }; 208 208 return oJSON; 209 209 } 210 210 211 211 isValidToken (sToken) { 212 212 // checks if sToken is valid (if there is hyphens in sToken, sToken is split, each part is checked) 213 + sToken = char_player.spellingNormalization(sToken) 213 214 if (this.isValid(sToken)) { 214 215 return true; 215 216 } 216 217 if (sToken.includes("-")) { 217 218 if (sToken.gl_count("-") > 4) { 218 219 return true; 219 220 } ................................................................................ 276 277 } 277 278 } 278 279 return Boolean(this._convBytesToInteger(this.byDic.slice(iAddr, iAddr+this.nBytesArc)) & this._finalNodeMask); 279 280 } 280 281 281 282 getMorph (sWord) { 282 283 // retrieves morphologies list, different casing allowed 284 + sWord = char_player.spellingNormalization(sWord) 283 285 let l = this.morph(sWord); 284 286 if (sWord[0].gl_isUpperCase()) { 285 287 l.push(...this.morph(sWord.toLowerCase())); 286 288 if (sWord.gl_isUpperCase() && sWord.length > 1) { 287 289 l.push(...this.morph(sWord.gl_toCapitalize())); 288 290 } 289 291 } 290 292 return l; 291 293 } 292 294 293 295 suggest (sWord, nSuggLimit=10) { 294 296 // returns a array of suggestions for <sWord> 297 + sWord = char_player.spellingNormalization(sWord) 295 298 let sPfx = ""; 296 299 let sSfx = ""; 297 300 [sPfx, sWord, sSfx] = char_player.cut(sWord); 298 301 let nMaxSwitch = Math.max(Math.floor(sWord.length / 3), 1); 299 302 let nMaxDel = Math.floor(sWord.length / 5); 300 303 let nMaxHardRepl = Math.max(Math.floor((sWord.length - 5) / 4), 1); 301 304 let oSuggResult = new SuggResult(sWord);
Modified graphspell/char_player.py from [82e97eae54] to [e841b9211a].
1 1 # list of similar chars 2 2 # useful for suggestion mechanism 3 3 4 4 import re 5 +import unicodedata 5 6 6 7 7 -_xTransChars = str.maketrans({ 8 +_xTransCharsForSpelling = str.maketrans({ 9 + 'ſ': 's', 'ffi': 'ffi', 'ffl': 'ffl', 'ff': 'ff', 'ſt': 'ft', 'fi': 'fi', 'fl': 'fl', 'st': 'st' 10 +}) 11 + 12 +def spellingNormalization (sWord): 13 + return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling)) 14 + 15 + 16 +_xTransCharsForSimplification = str.maketrans({ 8 17 'à': 'a', 'é': 'e', 'î': 'i', 'ô': 'o', 'û': 'u', 'ÿ': 'i', "y": "i", 9 18 'â': 'a', 'è': 'e', 'ï': 'i', 'ö': 'o', 'ù': 'u', 'ŷ': 'i', 10 19 'ä': 'a', 'ê': 'e', 'í': 'i', 'ó': 'o', 'ü': 'u', 'ý': 'i', 11 20 'á': 'a', 'ë': 'e', 'ì': 'i', 'ò': 'o', 'ú': 'u', 'ỳ': 'i', 12 21 'ā': 'a', 'ē': 'e', 'ī': 'i', 'ō': 'o', 'ū': 'u', 'ȳ': 'i', 13 22 'ñ': 'n', 'k': 'q', 'w': 'v', 14 - 'œ': 'oe', 'æ': 'ae', 23 + 'œ': 'oe', 'æ': 'ae', 24 + 'ſ': 's', 'ffi': 'ffi', 'ffl': 'ffl', 'ff': 'ff', 'ſt': 'ft', 'fi': 'fi', 'fl': 'fl', 'st': 'st', 15 25 }) 16 26 17 27 def simplifyWord (sWord): 18 28 "word simplication before calculating distance between words" 19 - sWord = sWord.lower().translate(_xTransChars) 29 + sWord = sWord.lower().translate(_xTransCharsForSimplification) 20 30 sNewWord = "" 21 31 for i, c in enumerate(sWord, 1): 22 32 if c != sWord[i:i+1]: 23 33 sNewWord += c 24 34 return sNewWord.replace("eau", "o").replace("au", "o").replace("ai", "e").replace("ei", "e").replace("ph", "f") 25 35 26 36
Modified graphspell/ibdawg.py from [3bf18d8144] to [c41b426a86].
214 214 "sByDic": self.byDic.hex() if bBinaryDictAsHexString else [ e for e in self.byDic ] 215 215 }, ensure_ascii=False)) 216 216 if bInJSModule: 217 217 hDst.write(";\n\nexports.dictionary = dictionary;\n") 218 218 219 219 def isValidToken (self, sToken): 220 220 "checks if <sToken> is valid (if there is hyphens in <sToken>, <sToken> is split, each part is checked)" 221 + sToken = cp.spellingNormalization(sToken) 221 222 if self.isValid(sToken): 222 223 return True 223 224 if "-" in sToken: 224 225 if sToken.count("-") > 4: 225 226 return True 226 227 return all(self.isValid(sWord) for sWord in sToken.split("-")) 227 228 return False ................................................................................ 256 257 iAddr = self._lookupArcNode(self.dChar[c], iAddr) 257 258 if iAddr == None: 258 259 return False 259 260 return bool(int.from_bytes(self.byDic[iAddr:iAddr+self.nBytesArc], byteorder='big') & self._finalNodeMask) 260 261 261 262 def getMorph (self, sWord): 262 263 "retrieves morphologies list, different casing allowed" 264 + sWord = cp.spellingNormalization(sWord) 263 265 l = self.morph(sWord) 264 266 if sWord[0:1].isupper(): 265 267 l.extend(self.morph(sWord.lower())) 266 268 if sWord.isupper() and len(sWord) > 1: 267 269 l.extend(self.morph(sWord.capitalize())) 268 270 return l 269 271 270 272 #@timethis 271 273 def suggest (self, sWord, nSuggLimit=10): 272 274 "returns a set of suggestions for <sWord>" 275 + sWord = cp.spellingNormalization(sWord) 273 276 sPfx, sWord, sSfx = cp.cut(sWord) 274 277 nMaxSwitch = max(len(sWord) // 3, 1) 275 278 nMaxDel = len(sWord) // 5 276 279 nMaxHardRepl = max((len(sWord) - 5) // 4, 1) 277 280 oSuggResult = SuggResult(sWord) 278 281 self._suggest(oSuggResult, sWord, nMaxSwitch=nMaxSwitch, nMaxDel=nMaxDel, nMaxHardRepl=nMaxHardRepl) 279 282 if sWord.istitle(): ................................................................................ 326 329 self._suggest(oSuggResult, "", nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) # remove last char and go on 327 330 for sRepl in cp.dFinal1.get(sRemain, ()): 328 331 self._suggest(oSuggResult, sRepl, nMaxSwitch, nMaxDel, nMaxHardRepl, nDeep+1, iAddr, sNewWord, True) 329 332 330 333 #@timethis 331 334 def suggest2 (self, sWord, nMaxSugg=10): 332 335 "returns a set of suggestions for <sWord>" 336 + sWord = cp.spellingNormalization(sWord) 333 337 sPfx, sWord, sSfx = cp.cut(sWord) 334 338 oSuggResult = SuggResult(sWord) 335 339 self._suggest2(oSuggResult) 336 340 aSugg = oSuggResult.getSuggestions() 337 341 if sSfx or sPfx: 338 342 # we add what we removed 339 343 return list(map(lambda sSug: sPfx + sSug + sSfx, aSugg)) ................................................................................ 382 386 aTails.add(sTail + self.dCharVal[nVal]) 383 387 if n and not aTails: 384 388 aTails.update(self._getTails(jAddr, sTail+self.dCharVal[nVal], n-1)) 385 389 return aTails 386 390 387 391 def drawPath (self, sWord, iAddr=0): 388 392 "show the path taken by <sWord> in the graph" 393 + sWord = cp.spellingNormalization(sWord) 389 394 c1 = sWord[0:1] if sWord else " " 390 395 iPos = -1 391 396 n = 0 392 397 print(c1 + ": ", end="") 393 398 for c2, jAddr in self._getCharArcs(iAddr): 394 399 print(c2, end="") 395 400 if c2 == sWord[0:1]: