Grammalecte  Diff

Differences From Artifact [182863e043]:

To Artifact [5a6ef5f70e]:


     1      1   #!python3
     2      2   
     3         -# RULE GRAPH BUILDER
     4         -#
            3  +"""
            4  +RULE GRAPH BUILDER
            5  +"""
            6  +
     5      7   # by Olivier R.
     6      8   # License: MPL 2
     7      9   
     8     10   
     9         -import json
    10         -import time
    11         -import traceback
    12         -
    13     11   from graphspell.progressbar import ProgressBar
    14         -
    15     12   
    16     13   
    17     14   class DARG:
    18     15       """DIRECT ACYCLIC RULE GRAPH"""
    19     16       # This code is inspired from Steve Hanov’s DAWG, 2011. (http://stevehanov.ca/blog/index.php?id=115)
    20     17   
    21     18       def __init__ (self, lRule, sLangCode):
................................................................................
    28     25           self.aPreviousRule = []
    29     26           Node.resetNextId()
    30     27           self.oRoot = Node()
    31     28           self.lUncheckedNodes = []  # list of nodes that have not been checked for duplication.
    32     29           self.lMinimizedNodes = {}  # list of unique nodes that have been checked for duplication.
    33     30           self.nNode = 0
    34     31           self.nArc = 0
    35         -        
           32  +
    36     33           # build
    37     34           lRule.sort()
    38     35           oProgBar = ProgressBar(0, len(lRule))
    39     36           for aRule in lRule:
    40     37               self.insert(aRule)
    41     38               oProgBar.increment(1)
    42     39           oProgBar.done()
................................................................................
    43     40           self.finish()
    44     41           self.countNodes()
    45     42           self.countArcs()
    46     43           self.displayInfo()
    47     44   
    48     45       # BUILD DARG
    49     46       def insert (self, aRule):
           47  +        "insert a new rule (tokens must be inserted in order)"
    50     48           if aRule < self.aPreviousRule:
    51         -            sys.exit("# Error: tokens must be inserted in order.")
    52         -    
           49  +            exit("# Error: tokens must be inserted in order.")
           50  +
    53     51           # find common prefix between word and previous word
    54     52           nCommonPrefix = 0
    55     53           for i in range(min(len(aRule), len(self.aPreviousRule))):
    56     54               if aRule[i] != self.aPreviousRule[i]:
    57     55                   break
    58     56               nCommonPrefix += 1
    59     57   
................................................................................
    68     66               oNode = self.lUncheckedNodes[-1][2]
    69     67   
    70     68           iToken = nCommonPrefix
    71     69           for sToken in aRule[nCommonPrefix:]:
    72     70               oNextNode = Node()
    73     71               oNode.dArcs[sToken] = oNextNode
    74     72               self.lUncheckedNodes.append((oNode, sToken, oNextNode))
    75         -            if iToken == (len(aRule) - 2): 
           73  +            if iToken == (len(aRule) - 2):
    76     74                   oNode.bFinal = True
    77     75               iToken += 1
    78     76               oNode = oNextNode
    79     77           oNode.bFinal = True
    80     78           self.aPreviousRule = aRule
    81     79   
    82     80       def finish (self):
................................................................................
    92     90                   oNode.dArcs[sToken] = self.lMinimizedNodes[oChildNode]
    93     91               else:
    94     92                   # add the state to the minimized nodes.
    95     93                   self.lMinimizedNodes[oChildNode] = oChildNode
    96     94               self.lUncheckedNodes.pop()
    97     95   
    98     96       def countNodes (self):
           97  +        "count nodes within the whole graph"
    99     98           self.nNode = len(self.lMinimizedNodes)
   100     99   
   101    100       def countArcs (self):
          101  +        "count arcs within the whole graph"
   102    102           self.nArc = 0
   103    103           for oNode in self.lMinimizedNodes:
   104    104               self.nArc += len(oNode.dArcs)
   105    105   
   106    106       def displayInfo (self):
          107  +        "display informations about the rule graph"
   107    108           print(" * {:<12} {:>16,}".format("Rules:", self.nRule))
   108    109           print(" * {:<12} {:>16,}".format("Nodes:", self.nNode))
   109    110           print(" * {:<12} {:>16,}".format("Arcs:", self.nArc))
   110    111   
   111    112       def createGraph (self):
          113  +        "create the graph as a dictionary"
   112    114           dGraph = { 0: self.oRoot.getNodeAsDict() }
   113    115           for oNode in self.lMinimizedNodes:
   114         -            sHashId = oNode.__hash__() 
          116  +            sHashId = oNode.__hash__()
   115    117               if sHashId not in dGraph:
   116    118                   dGraph[sHashId] = oNode.getNodeAsDict()
   117    119               else:
   118    120                   print("Error. Double node… same id: ", sHashId)
   119    121                   print(str(oNode.getNodeAsDict()))
   120    122           return dGraph
   121    123   
   122    124   
   123    125   
   124    126   class Node:
          127  +    """Node of the rule graph"""
          128  +
   125    129       NextId = 0
   126         -    
          130  +
   127    131       def __init__ (self):
   128    132           self.i = Node.NextId
   129    133           Node.NextId += 1
   130    134           self.bFinal = False
   131    135           self.dArcs = {}          # key: arc value; value: a node
   132    136   
   133    137       @classmethod
   134    138       def resetNextId (cls):
          139  +        "reset to 0 the node counter"
   135    140           cls.NextId = 0
   136    141   
   137    142       def __str__ (self):
   138    143           # Caution! this function is used for hashing and comparison!
   139    144           cFinal = "1"  if self.bFinal  else "0"
   140    145           l = [cFinal]
   141    146           for (key, oNode) in self.dArcs.items():
................................................................................
   146    151       def __hash__ (self):
   147    152           # Used as a key in a python dictionary.
   148    153           return self.__str__().__hash__()
   149    154   
   150    155       def __eq__ (self, other):
   151    156           # Used as a key in a python dictionary.
   152    157           # Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states.
   153         -        return self.__str__() == other.__str__()        
          158  +        return self.__str__() == other.__str__()
   154    159   
   155    160       def getNodeAsDict (self):
   156    161           "returns the node as a dictionary structure"
   157    162           dNode = {}
   158    163           dReValue = {}
   159    164           dReMorph = {}
   160    165           dRule = {}