Grammalecte  Check-in [9377402874]

Overview
Comment:[graphspell] lexicographer: better readbility for past participle
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 9377402874b68007e8d5f2c6c77387747356ad8c76cc69411d5eb80f48f810c3
User & Date: olr on 2020-11-19 18:40:57
Other Links: manifest | tags
Context
2020-11-19
23:48
[fr] ajustements check-in: 9b10c609d3 user: olr tags: fr, trunk
18:40
[graphspell] lexicographer: better readbility for past participle check-in: 9377402874 user: olr tags: graphspell, trunk
18:38
[fr] ajustements check-in: 38b9862aab user: olr tags: fr, trunk
Changes

Modified graphspell-js/lexgraph_fr.js from [d137f983fd] to [56d17d9958].

365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
...
401
402
403
404
405
406
407


408






409
410
411
412
413
414
415
416
417



418
419

420
421
422
423
424







425
426
427
428
429
430
431
            ['‰', "signe pour mille"],
        ]),

    _zPartDemForm: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"),
    _aPartDemExceptList: new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]),
    _zInterroVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$", "i"),
    _zImperatifVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$", "i"),
    _zTag: new RegExp("[:;/][a-zA-Z0-9ÑÂĴĈŔÔṼŴ!][^:;/]*", "g"),

    split: function (sWord) {
        // returns an arry of strings (prefix, trimed_word, suffix)
        let sPrefix = "";
        let sSuffix = "";
        // préfixe élidé
        let m = /^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)['’ʼ‘‛´`′‵՚ꞌꞋ]([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+)/i.exec(sWord);
................................................................................
    },

    readableMorph: function (sMorph) {
        if (!sMorph) {
            return " mot inconnu";
        }
        let sRes = "";


        sMorph = sMorph.replace(/:V([0-3][ea_])[itpqnmr_eaxz]+/, ":V$1");






        let m;
        while ((m = this._zTag.exec(sMorph)) !== null) {
            if (this.dTag.has(m[0])) {
                sRes += this.dTag.get(m[0])[0];
            } else {
                sRes += " [" + m[0] + "]?";
            }
        }
        if (sRes.startsWith(" verbe") && !sRes.includes("infinitif")) {



            sRes += " [" + sMorph.slice(1, sMorph.indexOf("/")) + "]";
        }

        if (!sRes) {
            return " [" + sMorph + "]: étiquettes inconnues";
        }
        return sRes.gl_trimRight(",");
    },








    setLabelsOnToken (oToken) {
        // Token: .sType, .sValue, .nStart, .nEnd, .lMorph
        let m = null;
        try {
            switch (oToken.sType) {
                case 'PUNC':







|







 







>
>
|
>
>
>
>
>
>


<
|
<
<
|
<
|
>
>
>
|
|
>





>
>
>
>
>
>
>







365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
...
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418

419


420

421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
            ['‰', "signe pour mille"],
        ]),

    _zPartDemForm: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)-(là|ci)$", "i"),
    _aPartDemExceptList: new Set(["celui", "celle", "ceux", "celles", "de", "jusque", "par", "marie-couche-toi"]),
    _zInterroVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$", "i"),
    _zImperatifVerb: new RegExp("([a-zA-Zà-ö0-9À-Öø-ÿØ-ßĀ-ʯ]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$", "i"),
    _zTag: new RegExp("[:;/][a-zA-Z0-9É@*!][^:;/]*", "g"),

    split: function (sWord) {
        // returns an arry of strings (prefix, trimed_word, suffix)
        let sPrefix = "";
        let sSuffix = "";
        // préfixe élidé
        let m = /^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)['’ʼ‘‛´`′‵՚ꞌꞋ]([a-zA-Zà-öÀ-Ö0-9_ø-ÿØ-ßĀ-ʯfi-st-]+)/i.exec(sWord);
................................................................................
    },

    readableMorph: function (sMorph) {
        if (!sMorph) {
            return " mot inconnu";
        }
        let sRes = "";
        let sVType = "";
        if (sMorph.includes(":V")) {
            sMorph = sMorph.replace(/:V([0-3][ea_])[itpqnmr_eaxz]+/, ":V$1");
        }
        if (sMorph.includes(":Q")) {
            let nVerbTag = sMorph.indexOf(":V")
            sVType = sMorph.slice(nVerbTag, nVerbTag+4);
            sMorph = sMorph.replace(/:V[0123]./, "").replace(/:1[ŝś]/, "");
        }
        let m;
        while ((m = this._zTag.exec(sMorph)) !== null) {

            sRes += this._readableTag(m[0]);


        }

        if ((sRes.startsWith(" verbe") && !sRes.includes("infinitif")) || sRes.startsWith(" participe")) {
            if (sVType) {
                sRes += " [" + sMorph.slice(1, sMorph.indexOf("/")) + " : " + this._readableTag(sVType).gl_trimRight(",") + "]";
            } else {
                sRes += " [" + sMorph.slice(1, sMorph.indexOf("/")) + "]";
            }
        }
        if (!sRes) {
            return " [" + sMorph + "]: étiquettes inconnues";
        }
        return sRes.gl_trimRight(",");
    },

    _readableTag: function (sTag) {
        if (this.dTag.has(sTag)) {
            return this.dTag.get(sTag)[0];
        }
        return " [" + sTag + "]?";
    },

    setLabelsOnToken (oToken) {
        // Token: .sType, .sValue, .nStart, .nEnd, .lMorph
        let m = null;
        try {
            switch (oToken.sType) {
                case 'PUNC':

Modified graphspell/lexgraph_fr.py from [24bc88a28a] to [851691ac43].

369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
...
401
402
403
404
405
406
407


408




409
410
411




412
413
414
415
416
417
418
419






420
421
422
423
424
425
426
    '%': "signe de pourcentage",
    '‰': "signe pour mille"
}


_zElidedPrefix = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`ʼ]([\\w-]+)")
_zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$")
_zTag = re.compile("[:;/][\\w*][^:;/]*")

def split (sWord):
    "split word in 3 parts: prefix, root, suffix"
    sPrefix = ""
    sSuffix = ""
    # préfixe élidé
    m = _zElidedPrefix.match(sWord)
................................................................................


def readableMorph (sMorph):
    "returns string: readable tags"
    if not sMorph:
        return "mot inconnu"
    sRes = ""


    sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph)




    for m in _zTag.finditer(sMorph):
        if m.group(0) in _dTAGS:
            sRes += _dTAGS[m.group(0)][0]




        else:
            sRes += " [" + m.group(0) + "]?"
    if sRes.startswith(" verbe") and not sRes.endswith("infinitif"):
        sRes += " [" + sMorph[1:sMorph.find("/")] +"]"
    if not sRes:
        return " [" + sMorph + "]: étiquettes inconnues"
    return sRes.rstrip(",")








_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
_zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$")

def setLabelsOnToken (dToken):
    # Token: .sType, .sValue, .nStart, .nEnd, .lMorph







|







 







>
>
|
>
>
>
>

<
<
>
>
>
>

<
<
|




>
>
>
>
>
>







369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
...
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415


416
417
418
419
420


421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
    '%': "signe de pourcentage",
    '‰': "signe pour mille"
}


_zElidedPrefix = re.compile("(?i)^([ldmtsnjcç]|lorsqu|presqu|jusqu|puisqu|quoiqu|quelqu|qu)[’'‘`ʼ]([\\w-]+)")
_zCompoundWord = re.compile("(?i)(\\w+)(-(?:(?:les?|la)-(?:moi|toi|lui|[nv]ous|leur)|t-(?:il|elle|on)|y|en|[mts]’(?:y|en)|les?|l[aà]|[mt]oi|leur|lui|je|tu|ils?|elles?|on|[nv]ous|ce))$")
_zTag = re.compile("[:;/][\\w@*!][^:;/]*")

def split (sWord):
    "split word in 3 parts: prefix, root, suffix"
    sPrefix = ""
    sSuffix = ""
    # préfixe élidé
    m = _zElidedPrefix.match(sWord)
................................................................................


def readableMorph (sMorph):
    "returns string: readable tags"
    if not sMorph:
        return "mot inconnu"
    sRes = ""
    sVType = ""
    if ":V" in sMorph:
        sMorph = re.sub("(?<=V[0123][ea_])[itpqnmr_eaxz]+", "", sMorph)
    if ":Q" in sMorph:
        nVerbTag = sMorph.find(":V")
        sVType = sMorph[nVerbTag:nVerbTag+4]
        sMorph = sMorph[4:].replace(":1ŝ", "").replace(":1ś", "")
    for m in _zTag.finditer(sMorph):


        sRes += _readableTag(m.group(0))
    if sRes.startswith((" verbe", " participe")) and not sRes.endswith("infinitif"):
        if sVType:
            sRes += " [" + sMorph[1:sMorph.find("/")] + " : " + _readableTag(sVType).rstrip(",") + "]"
        else:


            sRes += " [" + sMorph[1:sMorph.find("/")] + "]"
    if not sRes:
        return " [" + sMorph + "]: étiquettes inconnues"
    return sRes.rstrip(",")

def _readableTag (sTag):
    "returns string: readable tag"
    if sTag in _dTAGS:
        return _dTAGS[sTag][0]
    return " [" + sTag + "]?"


_zPartDemForm = re.compile("([\\w]+)-(là|ci)$")
_zInterroVerb = re.compile("([\\w]+)(-(?:t-(?:ie?l|elle|on)|je|tu|ie?ls?|elles?|on|[nv]ous))$")
_zImperatifVerb = re.compile("([\\w]+)(-(?:l(?:es?|a)-(?:moi|toi|lui|[nv]ous|leur)|y|en|[mts]['’ʼ‘‛´`′‵՚ꞌꞋ](?:y|en)|les?|la|[mt]oi|leur|lui))$")

def setLabelsOnToken (dToken):
    # Token: .sType, .sValue, .nStart, .nEnd, .lMorph