Grammalecte  Check-in [f069a117e4]

Overview
Comment:[core][fr] fix text formtatter
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr | core
Files: files | file ages | folders
SHA3-256: f069a117e4f2ebe0f2513770d4e1d1a6c93664d002533eaf31f37cbee18af548
User & Date: olr on 2021-02-15 16:32:22
Other Links: manifest | tags
Context
2021-02-17
10:45
[core][fr][oxt] text formatter: another apostrophe check-in: 26cf74769b user: olr tags: core, fr, lo, trunk
2021-02-15
16:32
[core][fr] fix text formtatter check-in: f069a117e4 user: olr tags: core, fr, trunk
13:46
[build][fr] include lemmas of words that are also verbal forms check-in: 69affb5433 user: olr tags: build, fr, trunk
Changes

Modified gc_lang/fr/modules/textformatter.py from [4ba47078d2] to [bc3ccf87ae].

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
...
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    "ts_n_dash_start":        [ ("^[-—][  ]", "– "),
                                ("^– ", "– "),
                                ("^[-–—](?=[\\w.…])", "– ") ],
    "ts_m_dash_start":        [ ("^[-–][  ]", "— "),
                                ("^— ", "— "),
                                ("^«[  ][—–-][  ]", "« — "),
                                ("^[-–—](?=[\\w.…])", "— ") ],
    "ts_quotation_marks":     [ ('"(\\w+)"', "“$1”"),
                                ("''(\\w+)''", "“$1”"),
                                ("'(\\w+)'", "“$1”"),
                                ("^(?:\"|'')(?=\\w)", "« "),
                                (" (?:\"|'')(?=\\w)", " « "),
                                ("\\((?:\"|'')(?=\\w)", "(« "),
                                ("(?<=\\w)(?:\"|'')$", " »"),
                                ("(?<=\\w)(?:\"|'')(?=[] ,.:;?!…)])", " »"),
                                ('(?<=[.!?…])" ', " » "),
                                ('(?<=[.!?…])"$', " »") ],
................................................................................
                                ("noeu", "nœu"), ("Noeu", "Nœu"),
                                ("soeur", "sœur"), ("Soeur", "Sœur"),
                                ("voeu", "vœu"), ("Voeu", "Vœu"),
                                ("aequo", "æquo"), ("Aequo", "Æquo"),
                                ("\\bCa\\b", "Ça"), (" ca\\b", " ça"),
                                ("\\bdej[aà]\\b", "déjà"), ("\\bplutot\\b", "plutôt"),
                                ("\\bmeme\\b", "même"), ("\\bmemes\\b", "mêmes"), ("\\bMeme\\b", "Même"),
                                ("\\b([cC]e(?:ux|lles?|lui))-la\\b", "$1-là"),
                                ("\\bmalgre\\b", "malgré"), ("\\bMalgre\\b", "Malgré"),
                                ("\\betre\\b", "être"), ("\\bEtre\\b", "Être"),
                                ("\\btres\\b", "très"), ("\\bTres\\b", "Très"),
                                ("\\bEtai([ts]|ent)\\b", "Étai$1"),
                                ("\\bE(tat|cole|crit|poque|tude|ducation|glise|conomi(?:qu|)e|videmment|lysée|tienne|thiopie|cosse|gypt(?:e|ien)|rythrée|pinal|vreux)", "É$1") ],
    "ts_ligature_ffi_on":       [("ffi", "ffi")],
    "ts_ligature_ffl_on":       [("ffl", "ffl")],
    "ts_ligature_fi_on":        [("fi", "fi")],
    "ts_ligature_fl_on":        [("fl", "fl")],
    "ts_ligature_ff_on":        [("ff", "ff")],
    "ts_ligature_ft_on":        [("ft", "ſt")],
    "ts_ligature_st_on":        [("st", "st")],







|
|
|







 







|



|
|







76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
...
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    "ts_n_dash_start":        [ ("^[-—][  ]", "– "),
                                ("^– ", "– "),
                                ("^[-–—](?=[\\w.…])", "– ") ],
    "ts_m_dash_start":        [ ("^[-–][  ]", "— "),
                                ("^— ", "— "),
                                ("^«[  ][—–-][  ]", "« — "),
                                ("^[-–—](?=[\\w.…])", "— ") ],
    "ts_quotation_marks":     [ ('"(\\w+)"', "“\\1”"),
                                ("''(\\w+)''", "“\\1”"),
                                ("'(\\w+)'", "“\\1”"),
                                ("^(?:\"|'')(?=\\w)", "« "),
                                (" (?:\"|'')(?=\\w)", " « "),
                                ("\\((?:\"|'')(?=\\w)", "(« "),
                                ("(?<=\\w)(?:\"|'')$", " »"),
                                ("(?<=\\w)(?:\"|'')(?=[] ,.:;?!…)])", " »"),
                                ('(?<=[.!?…])" ', " » "),
                                ('(?<=[.!?…])"$', " »") ],
................................................................................
                                ("noeu", "nœu"), ("Noeu", "Nœu"),
                                ("soeur", "sœur"), ("Soeur", "Sœur"),
                                ("voeu", "vœu"), ("Voeu", "Vœu"),
                                ("aequo", "æquo"), ("Aequo", "Æquo"),
                                ("\\bCa\\b", "Ça"), (" ca\\b", " ça"),
                                ("\\bdej[aà]\\b", "déjà"), ("\\bplutot\\b", "plutôt"),
                                ("\\bmeme\\b", "même"), ("\\bmemes\\b", "mêmes"), ("\\bMeme\\b", "Même"),
                                ("\\b([cC]e(?:ux|lles?|lui))-la\\b", "\\1-là"),
                                ("\\bmalgre\\b", "malgré"), ("\\bMalgre\\b", "Malgré"),
                                ("\\betre\\b", "être"), ("\\bEtre\\b", "Être"),
                                ("\\btres\\b", "très"), ("\\bTres\\b", "Très"),
                                ("\\bEtai([ts]|ent)\\b", "Étai\\1"),
                                ("\\bE(tat|cole|crit|poque|tude|ducation|glise|conomi(?:qu|)e|videmment|lysée|tienne|thiopie|cosse|gypt(?:e|ien)|rythrée|pinal|vreux)", "É\\1") ],
    "ts_ligature_ffi_on":       [("ffi", "ffi")],
    "ts_ligature_ffl_on":       [("ffl", "ffl")],
    "ts_ligature_fi_on":        [("fi", "fi")],
    "ts_ligature_fl_on":        [("fl", "fl")],
    "ts_ligature_ff_on":        [("ff", "ff")],
    "ts_ligature_ft_on":        [("ft", "ſt")],
    "ts_ligature_st_on":        [("st", "st")],