Grammalecte  Check-in [62304c0cd5]

Overview
Comment:[core] use spellchecker instead of ibdawg directly
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | core | multid
Files: files | file ages | folders
SHA3-256: 62304c0cd5d17eab2062f1c643570ae3ce324f1f84c70c44986d5b9c243fc2b4
User & Date: olr on 2018-02-13 14:26:26
Other Links: branch diff | manifest | tags
Context
2018-02-13
15:44
[core][cli][server][graphspell][fx] use spellchecker instead of ibdawg check-in: 18db5d65f0 user: olr tags: cli, core, fx, graphspell, multid, server
14:26
[core] use spellchecker instead of ibdawg directly check-in: 62304c0cd5 user: olr tags: core, multid
14:25
[graphspell][js] ibdawg: remove useless var in constructor check-in: 1b2e823757 user: olr tags: graphspell, multid
Changes

Modified compile_rules.py from [7fa2e820bf] to [9bd1433006].

    52     52       s = re.sub(r"textarea0\(\s*", 'look(sx, ', s)                                           # textarea0(s)
    53     53       s = re.sub(r"before0_chk1\(\s*", 'look_chk1(dDA, sx[:m.start()], 0, ', s)               # before0_chk1(s)
    54     54       s = re.sub(r"after0_chk1\(\s*", 'look_chk1(dDA, sx[m.end():], m.end(), ', s)            # after0_chk1(s)
    55     55       s = re.sub(r"textarea0_chk1\(\s*", 'look_chk1(dDA, sx, 0, ', s)                         # textarea0_chk1(s)
    56     56       s = re.sub(r"isEndOfNG\(\s*\)", 'isEndOfNG(dDA, s[m.end():], m.end())', s)              # isEndOfNG(s)
    57     57       s = re.sub(r"isNextNotCOD\(\s*\)", 'isNextNotCOD(dDA, s[m.end():], m.end())', s)        # isNextNotCOD(s)
    58     58       s = re.sub(r"isNextVerb\(\s*\)", 'isNextVerb(dDA, s[m.end():], m.end())', s)            # isNextVerb(s)
    59         -    s = re.sub(r"\bspell *[(]", '_oDict.isValid(', s)
           59  +    s = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', s)
    60     60       s = re.sub(r"[\\](\d+)", 'm.group(\\1)', s)
    61     61       return s
    62     62   
    63     63   
    64     64   def uppercase (s, sLang):
    65     65       "(flag i is not enough): converts regex to uppercase regex: 'foo' becomes '[Ff][Oo][Oo]', but 'Bar' becomes 'B[Aa][Rr]'."
    66     66       sUp = ""

Modified gc_core/js/lang_core/gc_engine.js from [08b8dd1900] to [dcb651b444].

    34     34   }
    35     35   
    36     36   
    37     37   // data
    38     38   let _sAppContext = "";                                  // what software is running
    39     39   let _dOptions = null;
    40     40   let _aIgnoredRules = new Set();
    41         -let _oDict = null;
    42     41   let _oSpellChecker = null;
    43     42   let _dAnalyses = new Map();                             // cache for data from dictionary
    44     43   
    45     44   
    46     45   var gc_engine = {
    47     46   
    48     47       //// Informations
................................................................................
   319    318       },
   320    319   
   321    320       //// Initialization
   322    321   
   323    322       load: function (sContext="JavaScript", sPath="") {
   324    323           try {
   325    324               if (typeof(require) !== 'undefined') {
   326         -                //var ibdawg = require("resource://grammalecte/graphspell/ibdawg.js");
   327         -                //_oDict = new ibdawg.IBDAWG("${dic_filename}.json");
   328         -                console.log("<resource:>");
   329    325                   var spellchecker = require("resource://grammalecte/graphspell/spellchecker.js");
   330         -                _oSpellChecker = new spellchecker.Spellchecker("${lang}", "${dic_filename}.json");
          326  +                _oSpellChecker = new spellchecker.SpellChecker("${lang}", "", "${dic_filename}.json");
   331    327               } else {
   332         -                //_oDict = new IBDAWG("${dic_filename}.json", sPath);
   333         -                console.log("no <resource:>");
   334         -                _oSpellChecker = new Spellchecker("${lang}", "${dic_filename}.json", sPath);
          328  +                _oSpellChecker = new SpellChecker("${lang}", sPath, "${dic_filename}.json");
   335    329               }
   336    330               _sAppContext = sContext;
   337    331               _dOptions = gc_options.getOptions(sContext).gl_shallowCopy();     // duplication necessary, to be able to reset to default
   338    332           }
   339    333           catch (e) {
   340    334               helpers.logerror(e);
   341    335           }

Modified gc_core/py/lang_core/gc_engine.py from [9fc11201d4] to [e36972dc16].

     4      4   import re
     5      5   import sys
     6      6   import os
     7      7   import traceback
     8      8   #import unicodedata
     9      9   from itertools import chain
    10     10   
    11         -from ..graphspell.ibdawg import IBDAWG
           11  +from ..graphspell.spellchecker import SpellChecker
    12     12   from ..graphspell.echo import echo
    13     13   from . import gc_options
    14     14   
    15     15   
    16     16   __all__ = [ "lang", "locales", "pkg", "name", "version", "author", \
    17     17               "load", "parse", "getDictionary", \
    18     18               "setOption", "setOptions", "getOptions", "getDefaultOptions", "getOptionsLabels", "resetOptions", "displayOptions", \
................................................................................
    30     30   
    31     31   _rules = None                               # module gc_rules
    32     32   
    33     33   # data
    34     34   _sAppContext = ""                           # what software is running
    35     35   _dOptions = None
    36     36   _aIgnoredRules = set()
    37         -_oDict = None
           37  +_oSpellChecker = None
    38     38   _dAnalyses = {}                             # cache for data from dictionary
    39     39   
    40     40   
    41     41   
    42     42   #### Parsing
    43     43   
    44     44   def parse (sText, sCountry="${country_default}", bDebug=False, dOptions=None, bContext=False):
................................................................................
   284    284       #import lightproof_handler_${implname} as opt
   285    285       _createError = _createWriterError
   286    286   except ImportError:
   287    287       _createError = _createDictError
   288    288   
   289    289   
   290    290   def load (sContext="Python"):
   291         -    global _oDict
          291  +    global _oSpellChecker
   292    292       global _sAppContext
   293    293       global _dOptions
   294    294       try:
   295         -        _oDict = IBDAWG("${dic_filename}.bdic")
          295  +        _oSpellChecker = SpellChecker("${lang}", "${dic_filename}.bdic")
   296    296           _sAppContext = sContext
   297    297           _dOptions = dict(gc_options.getOptions(sContext))   # duplication necessary, to be able to reset to default
   298    298       except:
   299    299           traceback.print_exc()
   300    300   
   301    301   
   302    302   def setOption (sOpt, bVal):
................................................................................
   330    330   
   331    331   def resetOptions ():
   332    332       global _dOptions
   333    333       _dOptions = dict(gc_options.getOptions(_sAppContext))
   334    334   
   335    335   
   336    336   def getDictionary ():
   337         -    return _oDict
          337  +    return _oSpellChecker
   338    338   
   339    339   
   340    340   def _getRules (bParagraph):
   341    341       try:
   342    342           if not bParagraph:
   343    343               return _rules.lSentenceRules
   344    344           return _rules.lParagraphRules
................................................................................
   394    394       if tWord[0] in dDA:
   395    395           echo("DA: " + str(dDA[tWord[0]]))
   396    396       echo("FSA: " + str(_dAnalyses[tWord[1]]))
   397    397       return True
   398    398   
   399    399   
   400    400   def _storeMorphFromFSA (sWord):
   401         -    "retrieves morphologies list from _oDict -> _dAnalyses"
          401  +    "retrieves morphologies list from _oSpellChecker -> _dAnalyses"
   402    402       global _dAnalyses
   403         -    _dAnalyses[sWord] = _oDict.getMorph(sWord)
          403  +    _dAnalyses[sWord] = _oSpellChecker.getMorph(sWord)
   404    404       return True  if _dAnalyses[sWord]  else False
   405    405   
   406    406   
   407    407   def morph (dDA, tWord, sPattern, bStrict=True, bNoWord=False):
   408    408       "analyse a tuple (position, word), return True if sPattern in morphologies (disambiguation on)"
   409    409       if not tWord:
   410    410           return bNoWord

Modified gc_lang/fr/modules-js/gce_suggestions.js from [d955978cbc] to [0c31bc1a27].

   206    206           } else if (sGender == ":f") {
   207    207               return suggFemPlur(sFlex);
   208    208           }
   209    209       }
   210    210       let aSugg = new Set();
   211    211       if (!sFlex.includes("-")) {
   212    212           if (sFlex.endsWith("l")) {
   213         -            if (sFlex.endsWith("al") && sFlex.length > 2 && _oDict.isValid(sFlex.slice(0,-1)+"ux")) {
          213  +            if (sFlex.endsWith("al") && sFlex.length > 2 && _oSpellChecker.isValid(sFlex.slice(0,-1)+"ux")) {
   214    214                   aSugg.add(sFlex.slice(0,-1)+"ux");
   215    215               }
   216         -            if (sFlex.endsWith("ail") && sFlex.length > 3 && _oDict.isValid(sFlex.slice(0,-2)+"ux")) {
          216  +            if (sFlex.endsWith("ail") && sFlex.length > 3 && _oSpellChecker.isValid(sFlex.slice(0,-2)+"ux")) {
   217    217                   aSugg.add(sFlex.slice(0,-2)+"ux");
   218    218               }
   219    219           }
   220         -        if (_oDict.isValid(sFlex+"s")) {
          220  +        if (_oSpellChecker.isValid(sFlex+"s")) {
   221    221               aSugg.add(sFlex+"s");
   222    222           }
   223         -        if (_oDict.isValid(sFlex+"x")) {
          223  +        if (_oSpellChecker.isValid(sFlex+"x")) {
   224    224               aSugg.add(sFlex+"x");
   225    225           }
   226    226       }
   227    227       if (mfsp.hasMiscPlural(sFlex)) {
   228    228           mfsp.getMiscPlural(sFlex).forEach(function(x) { aSugg.add(x); });
   229    229       }
   230    230       if (aSugg.size > 0) {
................................................................................
   236    236   function suggSing (sFlex) {
   237    237       // returns singular forms assuming sFlex is plural
   238    238       if (sFlex.includes("-")) {
   239    239           return "";
   240    240       }
   241    241       let aSugg = new Set();
   242    242       if (sFlex.endsWith("ux")) {
   243         -        if (_oDict.isValid(sFlex.slice(0,-2)+"l")) {
          243  +        if (_oSpellChecker.isValid(sFlex.slice(0,-2)+"l")) {
   244    244               aSugg.add(sFlex.slice(0,-2)+"l");
   245    245           }
   246         -        if (_oDict.isValid(sFlex.slice(0,-2)+"il")) {
          246  +        if (_oSpellChecker.isValid(sFlex.slice(0,-2)+"il")) {
   247    247               aSugg.add(sFlex.slice(0,-2)+"il");
   248    248           }
   249    249       }
   250         -    if (_oDict.isValid(sFlex.slice(0,-1))) {
          250  +    if (_oSpellChecker.isValid(sFlex.slice(0,-1))) {
   251    251           aSugg.add(sFlex.slice(0,-1));
   252    252       }
   253    253       if (aSugg.size > 0) {
   254    254           return Array.from(aSugg).join("|");
   255    255       }
   256    256       return "";
   257    257   }

Modified gc_lang/fr/modules/gce_suggestions.py from [50fbeb414d] to [79835965e4].

   155    155           if sGender == ":m":
   156    156               return suggMasPlur(sFlex)
   157    157           elif sGender == ":f":
   158    158               return suggFemPlur(sFlex)
   159    159       aSugg = set()
   160    160       if "-" not in sFlex:
   161    161           if sFlex.endswith("l"):
   162         -            if sFlex.endswith("al") and len(sFlex) > 2 and _oDict.isValid(sFlex[:-1]+"ux"):
          162  +            if sFlex.endswith("al") and len(sFlex) > 2 and _oSpellChecker.isValid(sFlex[:-1]+"ux"):
   163    163                   aSugg.add(sFlex[:-1]+"ux")
   164         -            if sFlex.endswith("ail") and len(sFlex) > 3 and _oDict.isValid(sFlex[:-2]+"ux"):
          164  +            if sFlex.endswith("ail") and len(sFlex) > 3 and _oSpellChecker.isValid(sFlex[:-2]+"ux"):
   165    165                   aSugg.add(sFlex[:-2]+"ux")
   166         -        if _oDict.isValid(sFlex+"s"):
          166  +        if _oSpellChecker.isValid(sFlex+"s"):
   167    167               aSugg.add(sFlex+"s")
   168         -        if _oDict.isValid(sFlex+"x"):
          168  +        if _oSpellChecker.isValid(sFlex+"x"):
   169    169               aSugg.add(sFlex+"x")
   170    170       if mfsp.hasMiscPlural(sFlex):
   171    171           aSugg.update(mfsp.getMiscPlural(sFlex))
   172    172       if aSugg:
   173    173           return "|".join(aSugg)
   174    174       return ""
   175    175   
................................................................................
   176    176   
   177    177   def suggSing (sFlex):
   178    178       "returns singular forms assuming sFlex is plural"
   179    179       if "-" in sFlex:
   180    180           return ""
   181    181       aSugg = set()
   182    182       if sFlex.endswith("ux"):
   183         -        if _oDict.isValid(sFlex[:-2]+"l"):
          183  +        if _oSpellChecker.isValid(sFlex[:-2]+"l"):
   184    184               aSugg.add(sFlex[:-2]+"l")
   185         -        if _oDict.isValid(sFlex[:-2]+"il"):
          185  +        if _oSpellChecker.isValid(sFlex[:-2]+"il"):
   186    186               aSugg.add(sFlex[:-2]+"il")
   187         -    if _oDict.isValid(sFlex[:-1]):
          187  +    if _oSpellChecker.isValid(sFlex[:-1]):
   188    188           aSugg.add(sFlex[:-1])
   189    189       if aSugg:
   190    190           return "|".join(aSugg)
   191    191       return ""
   192    192   
   193    193   
   194    194   def suggMasSing (sFlex, bSuggSimil=False):