Overview
Comment: | [build] darg: regex for multi-token morphologies |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | build | mtok |
Files: | files | file ages | folders |
SHA3-256: |
1e120c280b156e231a4aaf70cece5e3f |
User & Date: | olr on 2021-03-10 19:39:58 |
Other Links: | branch diff | manifest | tags |
Context
2021-03-10
| ||
19:54 | [core] gc engine: regex for multi-tokens [fr] tests check-in: 3e55498f43 user: olr tags: core, fr, mtok | |
19:39 | [build] darg: regex for multi-token morphologies check-in: 1e120c280b user: olr tags: build, mtok | |
2021-03-09
| ||
17:23 | [fr] action du processeur de texte -> dans les règles par § uniquement check-in: 684b825f2a user: olr tags: fr, mtok | |
Changes
Modified darg.py from [f98928fa4d] to [6cee0c2543].
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
...
243
244
245
246
247
248
249
250
251
252
253
254
255
256
|
# Used as a key in a python dictionary.
# Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states.
return self.__str__() == other.__str__()
def getNodeAsDict (self):
"returns the node as a dictionary structure"
dNode = {}
dReValue = {} # regex for token values
dReMorph = {} # regex for morph
dMorph = {} # simple search in morph
dLemma = {}
dPhonet = {}
dMeta = {}
dTag = {}
dRule = {}
for sArc, oNode in self.dArcs.items():
if sArc.startswith("@") and len(sArc) > 1:
dReMorph[sArc[1:]] = oNode.__hash__()
elif sArc.startswith("$") and len(sArc) > 1:
dMorph[sArc[1:]] = oNode.__hash__()
elif sArc.startswith("~") and len(sArc) > 1:
dReValue[sArc[1:]] = oNode.__hash__()
elif sArc.startswith(">") and len(sArc) > 1:
dLemma[sArc[1:]] = oNode.__hash__()
elif sArc.startswith("%") and len(sArc) > 1:
dPhonet[sArc[1:]] = oNode.__hash__()
elif sArc.startswith("*") and len(sArc) > 1:
................................................................................
dRule[sArc[1:]] = oNode.__hash__()
else:
dNode[sArc] = oNode.__hash__()
if dReValue:
dNode["<re_value>"] = dReValue
if dReMorph:
dNode["<re_morph>"] = dReMorph
if dMorph:
dNode["<morph>"] = dMorph
if dLemma:
dNode["<lemmas>"] = dLemma
if dPhonet:
dNode["<phonet>"] = dPhonet
if dTag:
|
|
|
|
>
>
>
>
>
|
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
...
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
|
# Used as a key in a python dictionary. # Nodes are equivalent if they have identical arcs, and each identical arc leads to identical states. return self.__str__() == other.__str__() def getNodeAsDict (self): "returns the node as a dictionary structure" dNode = {} dReValue = {} # regex for token values dReMorph = {} # regex for morph dMorph = {} # simple search in morph dReMultiMorph = {} # regex for morph in multi-tokens dLemma = {} dPhonet = {} dMeta = {} dTag = {} dRule = {} for sArc, oNode in self.dArcs.items(): if sArc.startswith("@") and len(sArc) > 1: dReMorph[sArc[1:]] = oNode.__hash__() elif sArc.startswith("$") and len(sArc) > 1: dMorph[sArc[1:]] = oNode.__hash__() elif sArc.startswith("&") and len(sArc) > 1: dReMultiMorph[sArc[1:]] = oNode.__hash__() elif sArc.startswith("~") and len(sArc) > 1: dReValue[sArc[1:]] = oNode.__hash__() elif sArc.startswith(">") and len(sArc) > 1: dLemma[sArc[1:]] = oNode.__hash__() elif sArc.startswith("%") and len(sArc) > 1: dPhonet[sArc[1:]] = oNode.__hash__() elif sArc.startswith("*") and len(sArc) > 1: ................................................................................ dRule[sArc[1:]] = oNode.__hash__() else: dNode[sArc] = oNode.__hash__() if dReValue: dNode["<re_value>"] = dReValue if dReMorph: dNode["<re_morph>"] = dReMorph if dReMultiMorph: dNode["<re_mmorph>"] = dReMultiMorph if dMorph: dNode["<morph>"] = dMorph if dLemma: dNode["<lemmas>"] = dLemma if dPhonet: dNode["<phonet>"] = dPhonet if dTag: |