Grammalecte  Check-in [3f34a1e2e7]

Overview
Comment:[graphspell] replace straight apostrophe in spellingNormalization()
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | graphspell
Files: files | file ages | folders
SHA3-256: 3f34a1e2e7a7fe0ecdde83b18be97ee63f6d3bce0d096455ec304634b2e9e48c
User & Date: olr on 2020-09-30 16:10:44
Other Links: manifest | tags
Context
2020-09-30
16:19
[fr] update tests check-in: ec053526e5 user: olr tags: fr, trunk
16:10
[graphspell] replace straight apostrophe in spellingNormalization() check-in: 3f34a1e2e7 user: olr tags: graphspell, trunk
16:06
[graphspell] ignore underscore for spellchecking and morph check-in: c2a07ab8a2 user: olr tags: graphspell, trunk
Changes

Modified graphspell-js/ibdawg.js from [a4ceb45f56] to [5c5b2cad28].

273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
    }

    isValid (sWord) {
        // checks if sWord is valid (different casing tested if the first letter is a capital)
        if (!sWord) {
            return true;
        }
        if (sWord.includes("'")) { // ugly hack
            sWord = sWord.replace("'", "’");
        }
        if (this.lookup(sWord)) {
            return true;
        }
        if (sWord.charAt(0).gl_isUpperCase()) {
            if (sWord.length > 1) {
                if (sWord.gl_isTitle()) {
                    return !!this.lookup(sWord.toLowerCase());







<
<
<







273
274
275
276
277
278
279



280
281
282
283
284
285
286
    }

    isValid (sWord) {
        // checks if sWord is valid (different casing tested if the first letter is a capital)
        if (!sWord) {
            return true;
        }



        if (this.lookup(sWord)) {
            return true;
        }
        if (sWord.charAt(0).gl_isUpperCase()) {
            if (sWord.length > 1) {
                if (sWord.gl_isTitle()) {
                    return !!this.lookup(sWord.toLowerCase());

Modified graphspell-js/str_transform.js from [3bf8e3b480] to [a1596e1ca3].

22
23
24
25
26
27
28
29

30
31
32
33
34
35
36
        for (let i=0;  i <= sWord.length - n;  i++) {
            lNgrams.push(sWord.slice(i, i+n));
        }
        return lNgrams;
    },

    _xTransCharsForSpelling: new Map([
        ['ſ', 's'],  ['ffi', 'ffi'],  ['ffl', 'ffl'],  ['ff', 'ff'],  ['ſt', 'ft'],  ['fi', 'fi'],  ['fl', 'fl'],  ['st', 'st']

    ]),

    spellingNormalization: function (sWord) {
        let sNewWord = "";
        for (let c of sWord) {
            sNewWord += this._xTransCharsForSpelling.gl_get(c, c);
        }







|
>







22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
        for (let i=0;  i <= sWord.length - n;  i++) {
            lNgrams.push(sWord.slice(i, i+n));
        }
        return lNgrams;
    },

    _xTransCharsForSpelling: new Map([
        ['ſ', 's'],  ['ffi', 'ffi'],  ['ffl', 'ffl'],  ['ff', 'ff'],  ['ſt', 'ft'],  ['fi', 'fi'],  ['fl', 'fl'],  ['st', 'st'],
        ["'", '’']
    ]),

    spellingNormalization: function (sWord) {
        let sNewWord = "";
        for (let c of sWord) {
            sNewWord += this._xTransCharsForSpelling.gl_get(c, c);
        }

Modified graphspell/ibdawg.py from [43975eca5b] to [cae8eca48c].

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
            return True
        return False

    def isValid (self, sWord):
        "checks if <sWord> is valid (different casing tested if the first letter is a capital)"
        if not sWord:
            return True
        if "'" in sWord: # ugly hack
            sWord = sWord.replace("'", "’")
        if self.lookup(sWord):
            return True
        if sWord[0:1].isupper():
            if len(sWord) > 1:
                if sWord.istitle():
                    return self.lookup(sWord.lower())
                if sWord.isupper():







<
<







275
276
277
278
279
280
281


282
283
284
285
286
287
288
            return True
        return False

    def isValid (self, sWord):
        "checks if <sWord> is valid (different casing tested if the first letter is a capital)"
        if not sWord:
            return True


        if self.lookup(sWord):
            return True
        if sWord[0:1].isupper():
            if len(sWord) > 1:
                if sWord.istitle():
                    return self.lookup(sWord.lower())
                if sWord.isupper():

Modified graphspell/str_transform.py from [98c57fa9ba] to [ee895b8347].

17
18
19
20
21
22
23
24

25
26
27
28
29
30
31
    return [ sWord[i:i+n]  for i in range(len(sWord)-n+1) ]



#### WORD NORMALIZATION

_xTransCharsForSpelling = str.maketrans({
    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st'

})

def spellingNormalization (sWord):
    "nomalization NFC and removing ligatures"
    return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))









|
>







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
    return [ sWord[i:i+n]  for i in range(len(sWord)-n+1) ]



#### WORD NORMALIZATION

_xTransCharsForSpelling = str.maketrans({
    'ſ': 's',  'ffi': 'ffi',  'ffl': 'ffl',  'ff': 'ff',  'ſt': 'ft',  'fi': 'fi',  'fl': 'fl',  'st': 'st',
    "'": '’'
})

def spellingNormalization (sWord):
    "nomalization NFC and removing ligatures"
    return unicodedata.normalize("NFC", sWord.translate(_xTransCharsForSpelling))