Grammalecte  Check-in [3f84923104]

Overview
Comment:[graphspell][py] data memorization
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | graphspell | rg
Files: files | file ages | folders
SHA3-256: 3f84923104b978670536719804e050dcf17b045e97f0283abe013e0c94d2ecc8
User & Date: olr on 2018-05-23 08:37:58
Other Links: branch diff | manifest | tags
Context
2018-05-23
08:46
[core][py] gc: use spellchecker storage check-in: 445405d362 user: olr tags: core, rg
08:37
[graphspell][py] data memorization check-in: 3f84923104 user: olr tags: graphspell, rg
2018-05-21
13:08
[build][core] small code cleaning check-in: 29461e1888 user: olr tags: build, core, rg
Changes

Modified graphspell/spellchecker.py from [cbd22d2c4d] to [b09975dd6b].

    32     32           self.oExtendedDic = self._loadDictionary(sfExtendedDic)
    33     33           self.oCommunityDic = self._loadDictionary(sfCommunityDic)
    34     34           self.oPersonalDic = self._loadDictionary(sfPersonalDic)
    35     35           self.bExtendedDic = bool(self.oExtendedDic)
    36     36           self.bCommunityDic = bool(self.oCommunityDic)
    37     37           self.bPersonalDic = bool(self.oPersonalDic)
    38     38           self.oTokenizer = None
           39  +        # storage
           40  +        self.bStorage = False
           41  +        self._dMorphologies = {}        # key: flexion, value: list of morphologies
           42  +        self._dLemmas = {}              # key: flexion, value: list of lemmas
    39     43   
    40     44       def _loadDictionary (self, source, bNecessary=False):
    41     45           "returns an IBDAWG object"
    42     46           if not source:
    43     47               return None
    44     48           try:
    45     49               return ibdawg.IBDAWG(source)
................................................................................
    95     99   
    96    100       def deactivateCommunityDictionary (self):
    97    101           self.bCommunityDic = False
    98    102   
    99    103       def deactivatePersonalDictionary (self):
   100    104           self.bPersonalDic = False
   101    105   
          106  +
          107  +    # Storage
          108  +
          109  +    def activateStorage (self):
          110  +        self.bStorage = True
          111  +
          112  +    def deactivateStorage (self):
          113  +        self.bStorage = False
          114  +
          115  +    def clearStorage (self):
          116  +        self._dLemmas.clear()
          117  +        self._dMorphologies.clear()
          118  +
   102    119   
   103    120       # parse text functions
   104    121   
   105    122       def parseParagraph (self, sText, bSpellSugg=False):
   106    123           if not self.oTokenizer:
   107    124               self.loadTokenizer()
   108    125           aSpellErrs = []
................................................................................
   167    184               return True
   168    185           if self.bPersonalDic and self.oPersonalDic.lookup(sWord):
   169    186               return True
   170    187           return False
   171    188   
   172    189       def getMorph (self, sWord):
   173    190           "retrieves morphologies list, different casing allowed"
   174         -        lResult = self.oMainDic.getMorph(sWord)
          191  +        if self.bStorage and sWord in self._dMorphologies:
          192  +            return self._dMorphologies[sWord]
          193  +        lMorph = self.oMainDic.getMorph(sWord)
   175    194           if self.bExtendedDic:
   176         -            lResult.extend(self.oExtendedDic.getMorph(sWord))
          195  +            lMorph.extend(self.oExtendedDic.getMorph(sWord))
   177    196           if self.bCommunityDic:
   178         -            lResult.extend(self.oCommunityDic.getMorph(sWord))
          197  +            lMorph.extend(self.oCommunityDic.getMorph(sWord))
   179    198           if self.bPersonalDic:
   180         -            lResult.extend(self.oPersonalDic.getMorph(sWord))
   181         -        return lResult
          199  +            lMorph.extend(self.oPersonalDic.getMorph(sWord))
          200  +        if self.bStorage:
          201  +            self._dMorphologies[sWord] = lMorph
          202  +            self._dLemmas[sWord] = set([ s[1:s.find(" ")]  for s in lMorph ])
          203  +        return lMorph
   182    204   
   183    205       def getLemma (self, sWord):
          206  +        "retrieves lemmas (Warning: if <self.bStorage> then lemmas are returned with the preceding sign “>”)"
          207  +        if self.bStorage:
          208  +            if sWord not in self._dLemmas:
          209  +                self.getMorph(sWord)
          210  +            return self._dLemmas[sWord]
   184    211           return set([ s[1:s.find(" ")]  for s in self.getMorph(sWord) ])
   185    212   
   186    213       def suggest (self, sWord, nSuggLimit=10):
   187    214           "generator: returns 1, 2 or 3 lists of suggestions"
   188    215           yield self.oMainDic.suggest(sWord, nSuggLimit)
   189    216           if self.bExtendedDic:
   190    217               yield self.oExtendedDic.suggest(sWord, nSuggLimit)