Grammalecte  Check-in [766f20e23c]

Overview
Comment:[core] str_transform: change functions names
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: 766f20e23c4c92bb43d777545a47444788a877166ecc97bfeb31414b3021a599
User & Date: olr on 2017-06-23 17:25:20
Other Links: manifest | tags
Context
2017-06-23
19:23
[build] use one dictionary name instead of two check-in: cfc69abb68 user: olr tags: build, trunk
17:25
[core] str_transform: change functions names check-in: 766f20e23c user: olr tags: core, trunk
17:11
[core] dawg: compressed lexicon check-in: e5f3698eb4 user: olr tags: build, new_feature, trunk
Changes

Modified gc_core/py/dawg.py from [7e6ed7295c] to [ddd6fe1cc6].

    14     14   import collections
    15     15   
    16     16   from . import str_transform as st
    17     17   from .progressbar import ProgressBar
    18     18   
    19     19   
    20     20   def readFile (spf):
    21         -    print("Read lexicon: " + spf)
           21  +    print(" < Read lexicon: " + spf)
    22     22       if os.path.isfile(spf):
    23     23           with open(spf, "r", encoding="utf-8") as hSrc:
    24     24               for sLine in hSrc:
    25     25                   sLine = sLine.strip()
    26     26                   if sLine and not sLine.startswith("#"):
    27     27                       yield sLine
    28     28       else:
................................................................................
    65     65                           continue
    66     66                       sFlex, sStem = sLine.split("\t")
    67     67                   else:
    68     68                       sFlex = sStem = sLine
    69     69                   #print(sFlex, sStem, sTag)
    70     70                   yield (sFlex, sStem, sTag)
    71     71                   if sTag2:
    72         -                    sFlex2 = st.getStemFromSuffixCode(sFlex, sSfxCode)
           72  +                    sFlex2 = st.changeWordWithSuffixCode(sFlex, sSfxCode)
    73     73                       #print(sFlex2, sStem, sTag2)
    74     74                       yield (sFlex2, sStem, sTag2)
    75     75       if nErr:
    76     76           print(" # Lines ignored: {:>10}".format(nErr))
    77     77   
    78     78   
    79     79   
................................................................................
   159    159           self.nChar = len(dChar)
   160    160           self.nAff = nAff
   161    161           self.lArcVal = lVal
   162    162           self.nArcVal = len(lVal)
   163    163           self.nTag = self.nArcVal - self.nChar - nAff
   164    164           self.cStemming = cStemming
   165    165           if cStemming == "A":
   166         -            self.funcStemming = st.getStemFromAffixCode
          166  +            self.funcStemming = st.changeWordWithAffixCode
   167    167           elif cStemming == "S":    
   168         -            self.funcStemming = st.getStemFromSuffixCode
          168  +            self.funcStemming = st.changeWordWithSuffixCode
   169    169           else:
   170    170               self.funcStemming = st.noStemming
   171    171           
   172    172           # build
   173    173           lWord.sort()
   174    174           oProgBar = ProgressBar(0, len(lWord))
   175    175           for word in lWord:

Modified gc_core/py/ibdawg.py from [9ce1ce821d] to [095d971150].

    40     40           self.nBytesNodeAddress = int(l[3])
    41     41           self.nEntries = int(l[4])
    42     42           self.nNode = int(l[5])
    43     43           self.nArc = int(l[6])
    44     44           self.nAff = int(l[7])
    45     45           self.cStemming = l[8]
    46     46           if self.cStemming == "S":
    47         -            self.funcStemming = st.getStemFromSuffixCode
           47  +            self.funcStemming = st.changeWordWithSuffixCode
    48     48           elif self.cStemming == "A":
    49         -            self.funcStemming = st.getStemFromAffixCode
           49  +            self.funcStemming = st.changeWordWithAffixCode
    50     50           else:
    51     51               self.funcStemming = st.noStemming
    52     52           self.nTag = self.nArcVal - self.nChar - self.nAff
    53     53           self.dChar = {}
    54     54           for i in range(1, self.nChar):
    55     55               self.dChar[self.lArcVal[i]] = i
    56     56               

Modified gc_core/py/str_transform.py from [e86906e5ce] to [7df400eceb].

    69     69       jSfx = 0
    70     70       for i in range(min(len(sFlex), len(sStem))):
    71     71           if sFlex[i] != sStem[i]:
    72     72               break
    73     73           jSfx += 1
    74     74       return chr(len(sFlex)-jSfx+48) + sStem[jSfx:]  
    75     75   
    76         -def getStemFromSuffixCode (sFlex, sSfxCode):
           76  +def changeWordWithSuffixCode (sWord, sSfxCode):
    77     77       if sSfxCode == "0":
    78         -        return sFlex
    79         -    return sFlex[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sFlex + sSfxCode[1:]
           78  +        return sWord
           79  +    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]
    80     80   
    81     81   
    82     82   # Prefix and suffix
    83     83   def defineAffixCode (sFlex, sStem):
    84     84       """ Returns a string defining how to get stem from flexion. Examples:
    85     85               "0" if stem = flexion
    86     86               "stem" if no common substring
................................................................................
   120    120                   if M[x][y] > longest:
   121    121                       longest = M[x][y]
   122    122                       x_longest = x
   123    123               else:
   124    124                   M[x][y] = 0
   125    125       return s1[x_longest-longest : x_longest]
   126    126   
   127         -def getStemFromAffixCode (sFlex, sAffCode):
          127  +def changeWordWithAffixCode (sWord, sAffCode):
   128    128       if sAffCode == "0":
   129         -        return sFlex
          129  +        return sWord
   130    130       if '/' not in sAffCode:
   131    131           return "# error #"
   132    132       sPfxCode, sSfxCode = sAffCode.split('/')
   133         -    sFlex = sPfxCode[1:] + sFlex[(ord(sPfxCode[0])-48):] 
   134         -    return sFlex[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sFlex + sSfxCode[1:]
          133  +    sWord = sPfxCode[1:] + sWord[(ord(sPfxCode[0])-48):] 
          134  +    return sWord[:-(ord(sSfxCode[0])-48)] + sSfxCode[1:]  if sSfxCode[0] != '0'  else sWord + sSfxCode[1:]
   135    135