Grammalecte  Check-in [f49fe68f6c]

Overview
Comment:[core] adjusst sentence detection
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | core
Files: files | file ages | folders
SHA3-256: f49fe68f6c994c0754db8ea31635dd3be1a0883f4b757cba2f9aafa3bab03812
User & Date: olr on 2020-03-20 16:30:27
Other Links: manifest | tags
Context
2020-03-21
10:16
[fr] test: untested rules check-in: ac727cfd6b user: olr tags: fr, trunk
2020-03-20
16:30
[core] adjusst sentence detection check-in: f49fe68f6c user: olr tags: core, trunk
16:29
[fr] ajustements et faux positifs check-in: a654df5567 user: olr tags: fr, trunk
Changes

Modified gc_core/js/text.js from [650a548528] to [17a1dd3378].

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/* global require, exports, console */

"use strict";


var text = {

    _zEndOfSentence: new RegExp ('[.?!:;…]+[»”’)]?[   ]+[»”’]?(?=[«"“‘–—   ]*[A-ZÀÂÉÈÊÎÔÇ])', "g"),

    getSentenceBoundaries: function* (sText) {
        // generator: returns start and end of sentences found in <sText>
        let iStart = 0;
        let m;
        while ((m = this._zEndOfSentence.exec(sText)) !== null) {
            yield [iStart, this._zEndOfSentence.lastIndex];







|







5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/* global require, exports, console */

"use strict";


var text = {

    _zEndOfSentence: new RegExp ('[.?!…]+[»”’)]?[   ]+[»”’]?(?=[«"“‘–—   ]*[A-ZÀÂÉÈÊÎÔÇ])|[:;][   ]+', "g"),

    getSentenceBoundaries: function* (sText) {
        // generator: returns start and end of sentences found in <sText>
        let iStart = 0;
        let m;
        while ((m = this._zEndOfSentence.exec(sText)) !== null) {
            yield [iStart, this._zEndOfSentence.lastIndex];

Modified gc_core/py/text.py from [df68ef0738] to [15f2da6650].

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
"""

import re
import textwrap
from itertools import chain


_zEndOfSentence = re.compile(r'[.?!:;…]+[»”’)]?[   ]+[»”’]?(?=[«"“‘–—   ]*[A-ZÀÂÉÈÊÎÔÇ])')

def getSentenceBoundaries (sText):
    "generator: returns start and end of sentences found in <sText>"
    iStart = 0
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())
        iStart = m.end()







|







5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
"""

import re
import textwrap
from itertools import chain


_zEndOfSentence = re.compile(r'[.?!…]+[»”’)]?[   ]+[»”’]?(?=[«"“‘–—   ]*[A-ZÀÂÉÈÊÎÔÇ])|[:;][   ]+')

def getSentenceBoundaries (sText):
    "generator: returns start and end of sentences found in <sText>"
    iStart = 0
    for m in _zEndOfSentence.finditer(sText):
        yield (iStart, m.end())
        iStart = m.end()