Grammalecte  Check-in [36321ba3ed]

Overview
Comment:[fr][oxt] table de remplacement pour le formateur de texte
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | fr
Files: files | file ages | folders
SHA3-256: 36321ba3ed787c60eede5ce7ea8a8194965b35b36aac1f5182a0c7af56893b78
User & Date: olr on 2017-06-27 13:57:43
Other Links: manifest | tags
Context
2017-06-27
16:40
[core] ibdawg: suggestion mechanism check-in: eaa97b4c18 user: olr tags: core, trunk
13:57
[fr][oxt] table de remplacement pour le formateur de texte check-in: 36321ba3ed user: olr tags: fr, trunk
13:38
[fr][oxt] formateur de texte: table de remplacement séparée check-in: 58cdb431b8 user: olr tags: fr, trunk
Changes

Added gc_lang/fr/oxt/TextFormatter/tf_tabrep.py version [89da85a6c2].

            1  +# Regular expressions for the text formatter of LO
            2  +# working with ICU (bag of bugs)
            3  +
            4  +
            5  +# ICU: & is $0 in replacement field
            6  +
            7  +# NOTE: A LOT OF REGEX COULD BE MERGED IF ICU ENGINE WAS NOT SO BUGGY
            8  +# "([;?!…])(?=[:alnum:])" => "$1 " doesn’t work properly
            9  +# "(?<=[:alnum:])([;?!…])" => " $1 " doesn’t work properly
           10  +
           11  +
           12  +
           13  +#
           14  +#                   String to replace                   replacement     regex?  case sensitive?
           15  +#
           16  +
           17  +dTableRepl = {
           18  +    # Restructuration
           19  +    "struct1": [
           20  +                    ("\\n",                             "\\n",          True,   True)   # end of line => end of paragraph
           21  +    ],
           22  +    "struct2": [
           23  +                    ("([:alpha:])- *\n([:alpha:])",     "$1$2",         True,   False)  # EOL
           24  +    ],
           25  +
           26  +    # espaces surnuméraires
           27  +    "ssp1": [
           28  +                    ("^[  ]+",                          "",             True,   True)
           29  +    ],
           30  +    "ssp2": [
           31  +                    ("  ",                              " ",            False,  True),  # espace + espace insécable -> espace
           32  +                    ("  ",                              " ",            False,  True),  # espace insécable + espace -> espace
           33  +                    ("  +",                             " ",            True,   True),  # espaces surnuméraires
           34  +                    ("  +",                             " ",            True,   True)   # espaces insécables surnuméraires
           35  +    ],
           36  +    "ssp3": [
           37  +                    ("[  ]+$",                          "",             True,   True)
           38  +    ],
           39  +    "ssp4": [
           40  +                    (" +(?=[.,…])",                     "",             True,   True)
           41  +    ],
           42  +    "ssp5": [
           43  +                    ("\\([  ]+",                        "(",            True,   True),
           44  +                    ("[  ]+\\)",                        ")",            True,   True)
           45  +    ],
           46  +    "ssp6": [
           47  +                    ("\\[[  ]+",                        "[",            True,   True),
           48  +                    ("[  ]+\\]",                        "]",            True,   True)
           49  +    ],
           50  +    "ssp7": [
           51  +                    ("“[  ]+",                          "“",            True,   True),
           52  +                    ("[  ]”",                           "”",            True,   True)
           53  +    ],
           54  +
           55  +    # espaces insécables
           56  +    "nbsp1": [
           57  +                    ("(?<=[:alnum:]):[   ]",            " : ",          True,   False),
           58  +                    ("(?<=[:alnum:]):$",                " :",           True,   False),
           59  +                    ("(?<=[:alnum:]);",                 " ;",           True,   False),
           60  +                    ("(?<=[:alnum:])[?][   ]",          " ? ",          True,   False),
           61  +                    ("(?<=[:alnum:])[?]$",              " ?",           True,   False),
           62  +                    ("(?<=[:alnum:])!",                 " !",           True,   False),
           63  +                    ("(?<=[]…)»}]):",                   " :",           True,   False),
           64  +                    ("(?<=[]…)»}]);",                   " ;",           True,   False),
           65  +                    ("(?<=[]…)»}])[?][   ]",            " ? ",          True,   False),
           66  +                    ("(?<=[]…)»}])[?]$",                " ?",           True,   False),
           67  +                    ("(?<=[]…)»}])!",                   " !",           True,   False),
           68  +                    ("[  ]+([:;?!])",                   " $1",          True,   False)
           69  +    ],
           70  +    "nnbsp1": [
           71  +                    ("(?<=[:alnum:]);",                 " ;",           True,   False),
           72  +                    ("(?<=[:alnum:])[?][   ]",          " ? ",          True,   False),
           73  +                    ("(?<=[:alnum:])[?]$",              " ?",           True,   False),
           74  +                    ("(?<=[:alnum:])!",                 " !",           True,   False),
           75  +                    ("(?<=[]…)»}]);",                   " ;",           True,   False),
           76  +                    ("(?<=[]…)»}])[?][   ]",            " ? ",          True,   False),
           77  +                    ("(?<=[]…)»}])[?]$",                " ?",           True,   False),
           78  +                    ("(?<=[]…)»}])!",                   " !",           True,   False),
           79  +                    ("[  ]+([;?!])",                    " $1",          True,   False),
           80  +                    ("(?<=[:alnum:]):[   ]",            " : ",          True,   False),
           81  +                    ("(?<=[:alnum:]):$",                " :",           True,   False),
           82  +                    ("(?<=[]…)»}]):",                   " :",           True,   False),
           83  +                    ("[  ]+:",                          " :",           True,   False)
           84  +    ],
           85  +    "nbsp1_fix": [
           86  +                    ("([[(])[   ]([!?:;])",             "$1$2",         True,   False),
           87  +                    ("(?<=http)[   ]://",               "://",          True,   False),
           88  +                    ("(?<=https)[   ]://",              "://",          True,   False),
           89  +                    ("(?<=ftp)[   ]://",                "://",          True,   False),
           90  +                    ("(?<=&)amp[   ];",                 "amp;",         True,   False),
           91  +                    ("(?<=&)nbsp[   ];",                "nbsp;",        True,   False),
           92  +                    ("(?<=&)lt[   ];",                  "lt;",          True,   False),
           93  +                    ("(?<=&)gt[   ];",                  "gt;",          True,   False),
           94  +                    ("(?<=&)apos[   ];",                "apos;",        True,   False),
           95  +                    ("(?<=&)quot[   ];",                "quot;",        True,   False),
           96  +                    ("(?<=&)thinsp[   ];",              "thinsp;",      True,   False)
           97  +    ],
           98  +    "nbsp2": [
           99  +                    ("«(?=[:alnum:])",                  "« ",           True,   False),
          100  +                    ("«[  ]+",                          "« ",           True,   False),
          101  +                    ("(?<=[:alnum:]|[.!?])»",           " »",           True,   False),
          102  +                    ("[  ]+»",                          " »",           True,   False)
          103  +    ],
          104  +    "nnbsp2": [
          105  +                    ("«(?=[:alnum:])",                  "« ",           True,   False),
          106  +                    ("«[  ]+",                          "« ",           True,   False),
          107  +                    ("(?<=[:alnum:]|[.!?])»",           " »",           True,   False),
          108  +                    ("[  ]+»",                          " »",           True,   False)
          109  +    ],
          110  +    "nbsp3": [
          111  +                    ("([:digit:])([%‰€$£¥˚℃])",         "$1 $2",        True,   True),
          112  +                    ("([:digit:]) ([%‰€$£¥˚℃])",        "$1 $2",        True,   True),
          113  +    ],
          114  +    "nbsp4": [
          115  +                    ("([:digit:])[  ]([:digit:])",      "$1 $2",        True,   True)
          116  +    ],
          117  +    "nnbsp4": [
          118  +                    ("([:digit:])[  ]([:digit:])",      "$1 $2",        True,   True)
          119  +    ],
          120  +    "nbsp5": [
          121  +                    ("(?<=[0-9⁰¹²³⁴⁵⁶⁷⁸⁹]) ?([kcmµnd]?(?:[slgJKΩΩℓ]|m[²³]?|Wh?|Hz|dB)|[%‰]|°C)\\b", " $1", True, True)
          122  +    ],
          123  +    "nbsp6": [
          124  +                    ("\\b(MM?\\.|Mlle|Mgr) ",           "$1 ",          True,   True)
          125  +    ],
          126  +
          127  +    # espaces manquants
          128  +    "space1": [
          129  +                    (";(?=[:alnum:])",                  "; ",           True,   True),
          130  +                    ("\\?(?=[A-ZÉÈÊÂÀÎ])",              "? ",           True,   True),
          131  +                    ("!(?=[:alnum:])",                  "! ",           True,   True),
          132  +                    ("…(?=[:alnum:])",                  "… ",           True,   True),
          133  +                    ("\\.(?=[A-ZÉÈÎ][:alpha:])",        ". ",           True,   True),
          134  +                    ("\\.(?=À)",                        ". ",           True,   True),
          135  +                    (",(?=[:alpha:])",                  ", ",           True,   True),
          136  +                    ("([:alpha:]),([0-9])",             "$1, $2",       True,   True),
          137  +                    (":(?=[:alpha:])",                  ": ",           True,   True)
          138  +    ],
          139  +    "space1_fix": [
          140  +                    ("(?<=DnT), w\\b",                  ",w",           True,   True),
          141  +                    ("(?<=DnT), A\\b",                  ",A",           True,   True)
          142  +    ],
          143  +    "space2": [
          144  +                    (" -(?=[:alpha:]|[\"«“'‘])",        " - ",          True,   False),
          145  +                    (" –(?=[:alpha:]|[\"«“'‘])",        " – ",          True,   False), # demi-cadratin
          146  +                    (" —(?=[:alpha:]|[\"«“'‘])",        " — ",          True,   False), # cadratin
          147  +                    ("(?<=[:alpha:])– ",                " – ",          True,   False),
          148  +                    ("(?<=[:alpha:])— ",                " — ",          True,   False),
          149  +                    ("(?<=[:alpha:])- ",                " - ",          True,   False),
          150  +                    ("(?<=[\"»”'’])– ",                 " – ",          True,   False),
          151  +                    ("(?<=[\"»”'’])— ",                 " — ",          True,   False),
          152  +                    ("(?<=[\"»”'’])- ",                 " - ",          True,   False)
          153  +    ],
          154  +
          155  +    # Suppressions
          156  +    "delete1": [
          157  +                    ("­",                               "",             False,  True)
          158  +    ],
          159  +
          160  +    # Signes typographiques
          161  +    "typo1": [
          162  +                    ("\\bl['´‘′`](?=[:alnum:])",        "l’",           True,   True),
          163  +                    ("\\bj['´‘′`](?=[:alnum:])",        "j’",           True,   True),
          164  +                    ("\\bm['´‘′`](?=[:alnum:])",        "m’",           True,   True),
          165  +                    ("\\bt['´‘′`](?=[:alnum:])",        "t’",           True,   True),
          166  +                    ("\\bs['´‘′`](?=[:alnum:])",        "s’",           True,   True),
          167  +                    ("\\bc['´‘′`](?=[:alnum:])",        "c’",           True,   True),
          168  +                    ("\\bd['´‘′`](?=[:alnum:])",        "d’",           True,   True),
          169  +                    ("\\bn['´‘′`](?=[:alnum:])",        "n’",           True,   True),
          170  +                    ("\\bç['´‘′`](?=[:alnum:])",        "ç’",           True,   True),
          171  +                    ("\\bL['´‘′`](?=[:alnum:])",        "L’",           True,   True),
          172  +                    ("\\bJ['´‘′`](?=[:alnum:])",        "J’",           True,   True),
          173  +                    ("\\bM['´‘′`](?=[:alnum:])",        "M’",           True,   True),
          174  +                    ("\\bT['´‘′`](?=[:alnum:])",        "T’",           True,   True),
          175  +                    ("\\bS['´‘′`](?=[:alnum:])",        "S’",           True,   True),
          176  +                    ("\\bC['´‘′`](?=[:alnum:])",        "C’",           True,   True),
          177  +                    ("\\bD['´‘′`](?=[:alnum:])",        "D’",           True,   True),
          178  +                    ("\\bN['´‘′`](?=[:alnum:])",        "N’",           True,   True),
          179  +                    ("\\bÇ['´‘′`](?=[:alnum:])",        "Ç’",           True,   True),
          180  +                    ("(qu|jusqu|lorsqu|puisqu|quoiqu|quelqu|presqu|entr|aujourd|prud)['´‘′`]", "$1’", True, False)
          181  +    ],
          182  +    "typo2": [
          183  +                    ("...",                             "…",            False,  True),
          184  +                    ("(?<=…)[.][.]",                    "…",            True,   True),
          185  +                    ("…[.](?![.])",                     "…",            True,   True)
          186  +    ],
          187  +    "typo3a": [     # cadratin
          188  +                    (" - ",                             " — ",          False,  True),
          189  +                    (" – ",                             " — ",          False,  True),
          190  +                    (" -,",                             " —,",          False,  True),
          191  +                    (" –,",                             " —,",          False,  True)
          192  +    ],
          193  +    "typo3b": [     # demi-cadratin
          194  +                    (" - ",                             " – ",          False,  True),
          195  +                    (" — ",                             " – ",          False,  True),
          196  +                    (" -,",                             " –,",          False,  True),
          197  +                    (" —,",                             " –,",          False,  True)
          198  +    ],
          199  +    "typo4a": [     # cadratin
          200  +                    ("^-[  ]",                          "— ",           True,   True),
          201  +                    ("^–[  ]",                          "— ",           True,   True),
          202  +                    ("^— ",                             "— ",           True,   True),
          203  +                    ("^«[  ][—–-][  ]",                 "« — ",         True,   True),
          204  +                    ("^[-–—](?=[:alnum:])",             "— ",           True,   False)
          205  +    ],
          206  +    "typo4b": [     # demin-cadratin
          207  +                    ("^-[  ]",                          "– ",           True,   True),
          208  +                    ("^—[  ]",                          "– ",           True,   True),
          209  +                    ("^– ",                             "– ",           True,   True),
          210  +                    ("^«[  ][—–-][  ]",                 "« – ",         True,   True),
          211  +                    ("^[-–—](?=[:alnum:])",             "– ",           True,   False)
          212  +    ],
          213  +    "typo5": [
          214  +                    ('"([:alpha:]+)"',                      "“$1”",         True,   False),
          215  +                    ("''([:alpha:]+)''",                    "“$1”",         True,   False),
          216  +                    ("'([:alpha:]+)'",                      "“$1”",         True,   False),
          217  +                    ('^"(?=[:alnum:])',                     "« ",           True,   False),
          218  +                    ("^''(?=[:alnum:])",                    "« ",           True,   False),
          219  +                    (' "(?=[:alnum:])',                     " « ",          True,   False),
          220  +                    (" ''(?=[:alnum:])",                    " « ",          True,   False),
          221  +                    ('\\("(?=[:alnum:])',                   "(« ",          True,   False),
          222  +                    ("\\(''(?=[:alnum:])",                  "(« ",          True,   False),
          223  +                    ('(?<=[:alnum:])"$',                    " »",           True,   False),
          224  +                    ("(?<=[:alnum:])''$",                   " »",           True,   False),
          225  +                    ('(?<=[:alnum:])"(?=[] ,.:;?!…)])',     " »",           True,   False),
          226  +                    ("(?<=[:alnum:])''(?=[] ,.:;?!…)])",    " »",           True,   False),
          227  +                    ('(?<=[.!?…])" ',                       " » ",          True,   False),
          228  +                    ('(?<=[.!?…])"$',                       " »",           True,   False)
          229  +    ],
          230  +    "typo6": [
          231  +                    ("\\bN\\.([ms])\\b",                    "N·$1",         True,   True),  # N·m et N·m-1, N·s
          232  +                    ("\\bW\\.h\\b",                         "W·h",          True,   True),
          233  +                    ("\\bPa\\.s\\b",                        "Pa·s",         True,   True),
          234  +                    ("\\bA\\.h\\b",                         "A·h",          True,   True),
          235  +                    ("\\bΩ\\.m\\b",                         "Ω·m",          True,   True),
          236  +                    ("\\bS\\.m\\b",                         "S·m",          True,   True),
          237  +                    ("\\bg\\.s(?=-1)\\b",                   "g·s",          True,   True),
          238  +                    ("\\bm\\.s(?=-[12])\\b",                "m·s",          True,   True),
          239  +                    ("\\bg\\.m(?=2|-3)\\b",                 "g·m",          True,   True),
          240  +                    ("\\bA\\.m(?=-1)\\b",                   "A·m",          True,   True),
          241  +                    ("\\bJ\\.K(?=-1)\\b",                   "J·K",          True,   True),
          242  +                    ("\\bW\\.m(?=-2)\\b",                   "W·m",          True,   True),
          243  +                    ("\\bcd\\.m(?=-2)\\b",                  "cd·m",         True,   True),
          244  +                    ("\\bC\\.kg(?=-1)\\b",                  "C·kg",         True,   True),
          245  +                    ("\\bH\\.m(?=-1)\\b",                   "H·m",          True,   True),
          246  +                    ("\\bJ\\.kg(?=-1)\\b",                  "J·kg",         True,   True),
          247  +                    ("\\bJ\\.m(?=-3)\\b",                   "J·m",          True,   True),
          248  +                    ("\\bm[2²]\\.s\\b",                     "m²·s",         True,   True),
          249  +                    ("\\bm[3³]\\.s(?=-1)\\b",               "m³·s",         True,   True),
          250  +                    #("\\bJ.kg-1.K-1\\b",                   "J·kg-1·K-1",   True,   True),
          251  +                    #("\\bW.m-1.K-1\\b",                    "W·m-1·K-1",    True,   True),
          252  +                    #("\\bW.m-2.K-1\\b",                    "W·m-2·K-1",    True,   True),
          253  +                    ("\\b(Y|Z|E|P|T|G|M|k|h|da|d|c|m|µ|n|p|f|a|z|y)Ω\\b", "$1Ω", True, True)
          254  +    ],
          255  +    "typo7": [
          256  +                    # ligatures: pas de majuscules
          257  +                    ("coeur",                               "cœur",         False,  True),
          258  +                    ("coel([aeio])",                        "cœl$1",        True,   True),
          259  +                    ("choeur",                              "chœur",        False,  True),
          260  +                    ("foet",                                "fœt",          False,  True),
          261  +                    ("oeil",                                "œil",          False,  True),
          262  +                    ("oeno",                                "œno",          False,  True),
          263  +                    ("oesoph",                              "œsoph",        False,  True),
          264  +                    ("oestro",                              "œstro",        False,  True),
          265  +                    ("oeuf",                                "œuf",          False,  True),
          266  +                    ("oeuvr",                               "œuvr",         False,  True),
          267  +                    ("moeur",                               "mœur",         False,  True),
          268  +                    ("noeu",                                "nœu",          False,  True),
          269  +                    ("soeur",                               "sœur",         False,  True),
          270  +                    ("voeu",                                "vœu",          False,  True),
          271  +                    ("aequo",                               "æquo",         False,  True),
          272  +                    # ligatures: majuscules
          273  +                    ("Coeur",                               "Cœur",         False,  True),
          274  +                    ("Coel([aeio])",                        "Cœl$1",        True,   True),
          275  +                    ("Choeur",                              "Chœur",        False,  True),
          276  +                    ("Foet",                                "Fœt",          False,  True),
          277  +                    ("Oeil",                                "Œil",          False,  True),
          278  +                    ("Oeno",                                "Œno",          False,  True),
          279  +                    ("Oesoph",                              "Œsoph",        False,  True),
          280  +                    ("Oestro",                              "Œstro",        False,  True),
          281  +                    ("Oeuf",                                "Œuf",          False,  True),
          282  +                    ("Oeuvr",                               "Œuvr",         False,  True),
          283  +                    ("Moeur",                               "Mœur",         False,  True),
          284  +                    ("Noeu",                                "Nœu",          False,  True),
          285  +                    ("Soeur",                               "Sœur",         False,  True),
          286  +                    ("Voeu",                                "Vœu",          False,  True),
          287  +                    ("Aequo",                               "Æquo",         False,  True),
          288  +                    # mots communs avec diacritiques manquants
          289  +                    ("\\bCa\\b",                            "Ça",           True,   True),
          290  +                    (" ca\\b",                              " ça",          True,   True),
          291  +                    ("\\bdej[aà]\\b",                       "déjà",         True,   True),
          292  +                    ("\\bDej[aà]\\b",                       "Déjà",         True,   True),
          293  +                    ("\\bplutot\\b",                        "plutôt",       True,   True),
          294  +                    ("\\bPlutot\\b",                        "Plutôt",       True,   True),
          295  +                    ("\\b([cC]e(?:ux|lles?|lui))-la\\b",    "$1-là",        True,   True),
          296  +                    ("\\bmalgre\\b",                        "malgré",       True,   True),
          297  +                    ("\\bMalgre\\b",                        "Malgré",       True,   True),
          298  +                    ("\\betre\\b",                          "être",         True,   True),
          299  +                    ("\\bEtre\\b",                          "Être",         True,   True),
          300  +                    ("\\btres\\b",                          "très",         True,   True),
          301  +                    ("\\bTres\\b",                          "Très",         True,   True),
          302  +                    ("\\bEtai([ts]|ent)\\b",                "Étai$1",       True,   True),
          303  +                    ("\\bE(tat|cole|crit|poque|tude|ducation|glise|conomi(?:qu|)e|videmment|lysée|tienne|thiopie|cosse|gypt(?:e|ien)|rythrée|pinal|vreux)", "É$1", True, True)
          304  +    ],
          305  +    # faire ligatures
          306  +    "typo_ffi_do": [
          307  +                    ("ffi",                                 "ffi",            False,  True)
          308  +    ],
          309  +    "typo_ffl_do": [
          310  +                    ("ffl",                                 "ffl",            False,  True)
          311  +    ],
          312  +    "typo_fi_do": [
          313  +                    ("fi",                                  "fi",            False,  True)
          314  +    ],
          315  +    "typo_fl_do": [
          316  +                    ("fl",                                  "fl",            False,  True)
          317  +    ],
          318  +    "typo_ff_do": [
          319  +                    ("ff",                                  "ff",            False,  True)
          320  +    ],
          321  +    "typo_ft_do": [
          322  +                    ("ft",                                  "ſt",            False,  True)
          323  +    ],
          324  +    "typo_st_do": [
          325  +                    ("st",                                  "st",            False,  True)
          326  +    ],
          327  +    # défaire ligatures
          328  +    "typo_fi_undo": [
          329  +                    ("fi",                                   "fi",           False,  True)
          330  +    ],
          331  +    "typo_fl_undo": [
          332  +                    ("fl",                                   "fl",           False,  True)
          333  +    ],
          334  +    "typo_ff_undo": [
          335  +                    ("ff",                                   "ff",           False,  True)
          336  +    ],
          337  +    "typo_ff_undo": [
          338  +                    ("ffi",                                   "ffi",          False,  True)
          339  +    ],
          340  +    "typo_ff_undo": [
          341  +                    ("ffl",                                   "ffl",          False,  True)
          342  +    ],
          343  +    "typo_ft_undo": [
          344  +                    ("ſt",                                   "ft",           False,  True)
          345  +    ],
          346  +    "typo_st_undo": [
          347  +                    ("st",                                   "st",           False,  True)
          348  +    ],
          349  +
          350  +    # Divers
          351  +    "misc1a": [
          352  +                    ("(?<=\\b[0-9][0-9][0-9][0-9])(i?[èe]me|è|e)\\b",           "ᵉ",    True, False),
          353  +                    ("(?<=\\b[0-9][0-9][0-9])(i?[èe]me|è|e)\\b",                "ᵉ",    True, False),
          354  +                    ("(?<=\\b[0-9][0-9])(i?[èe]me|è|e)\\b",                     "ᵉ",    True, False),
          355  +                    ("(?<=\\b[0-9])(i?[èe]me|è|e)\\b",                          "ᵉ",    True, False),
          356  +                    ("(?<=\\b[XVICL][XVICL][XVICL][XVICL])(i?[èe]me|è|e)\\b",   "ᵉ",    True, True),
          357  +                    ("(?<=\\b[XVICL][XVICL][XVICL])(i?[èe]me|è|e)\\b",          "ᵉ",    True, True),
          358  +                    ("(?<=\\b[XVICL][XVICL])(i?[èe]me|è|e)\\b",                 "ᵉ",    True, True),
          359  +                    ("(?<=\\b[XVICL])(i?[èe]me|è)\\b",                          "ᵉ",    True, True),
          360  +                    ("(?<=\\b(au|l[ea]|du) [XVICL])e\\b",                       "ᵉ",    True, True),
          361  +                    ("(?<=\\b[XVI])e(?= siècle)",                               "ᵉ",    True, True),
          362  +                    ("(?<=\\b[1I])er\\b",                                       "ᵉʳ",   True, True),
          363  +                    ("(?<=\\b[1I])re\\b",                                       "ʳᵉ",   True, True)
          364  +    ],
          365  +    "misc1b": [
          366  +                    ("(?<=\\b[0-9][0-9][0-9][0-9])(i?[èe]me|è|ᵉ)\\b",           "e",    True, False),
          367  +                    ("(?<=\\b[0-9][0-9][0-9])(i?[èe]me|è|ᵉ)\\b",                "e",    True, False),
          368  +                    ("(?<=\\b[0-9][0-9])(i?[èe]me|è|ᵉ)\\b",                     "e",    True, False),
          369  +                    ("(?<=\\b[0-9])(i?[èe]me|è|ᵉ)\\b",                          "e",    True, False),
          370  +                    ("(?<=\\b[XVICL][XVICL][XVICL][XVICL])(i?[èe]me|è|ᵉ)\\b",   "e",    True, True),
          371  +                    ("(?<=\\b[XVICL][XVICL][XVICL])(i?[èe]me|è|ᵉ)\\b",          "e",    True, True),
          372  +                    ("(?<=\\b[XVICL][XVICL])(i?[èe]me|è|ᵉ)\\b",                 "e",    True, True),
          373  +                    ("(?<=\\b[XVICL])(i?[èe]me|è|ᵉ)\\b",                        "e",    True, True),
          374  +                    ("(?<=\\b[1I])ᵉʳ\\b",                                       "er",   True, True),
          375  +                    ("(?<=\\b[1I])ʳᵉ\\b",                                       "er",   True, True)
          376  +    ],
          377  +    "misc2": [
          378  +                    ("etc(…|[.][.][.]?)",                       "etc.",         True,   True),
          379  +                    ("(?<!,) etc[.]",                           ", etc.",       True,   True)
          380  +    ],
          381  +    "misc3": [
          382  +                    ("[ -]t[’'](?=il\\b|elle|on\\b)",           "-t-",          True,   True),
          383  +                    (" t-(?=il|elle|on)",                       "-t-",          True,   True),
          384  +                    ("[ -]t[’'-](?=ils|elles)",                 "-",            True,   True),
          385  +                    ("(?<=[td])-t-(?=il|elle|on)",              "-",            True,   True),
          386  +                    ("(celles?|celui|ceux) (ci|là)\\b",         "$1-$2",        True,   False),
          387  +                    ("\\bdix (sept|huit|neuf)",                 "dix-$1",       True,   False),
          388  +                    ("quatre vingt",                            "quatre-vingt", False,  True),
          389  +                    ("(soixante|quatre-vingt) dix",             "$1-dix",       True,   False),
          390  +                    ("(vingt|trente|quarante|cinquante|soixante(?:-dix|)|quatre-vingt(?:-dix|)) (deux|trois|quatre|cinq|six|sept|huit|neuf)", "$1-$2", True, False),
          391  +                    ("(?<!-)\\b(ci) (joint|desso?us|contre|devant|avant|après|incluse|g[îi]t|gisent)", "$1-$2", True, False),
          392  +                    ("\\bvis à vis",                            "vis-à-vis",    False,  True),
          393  +                    ("\\bVis à vis",                            "Vis-à-vis",    False,  True),
          394  +                    ("week end",                                "week-end",     False,  True),
          395  +                    ("Week end",                                "Week-end",     False,  True),
          396  +                    ("(plus|moins) value",                      "$1-value",     True,   False)
          397  +    ],
          398  +    "misc5a": [
          399  +                    ("(qu|lorsqu|puisqu|quoiqu|presqu|jusqu|aujourd|entr|quelqu) ", "$1’", True, True),
          400  +    ],
          401  +    "misc5b": [
          402  +                    ("\\bj (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "j’",           True,   True),
          403  +                    ("\\bn (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "n’",           True,   True),
          404  +                    ("\\bm (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "m’",           True,   True),
          405  +                    ("\\bt (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "t’",           True,   True),
          406  +                    ("\\bs (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "s’",           True,   True),
          407  +                    ("\\bc (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "c’",           True,   True),
          408  +                    ("\\bç (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "ç’",           True,   True),
          409  +                    ("\\bl (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "l’",           True,   True),
          410  +                    ("\\bd (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "d’",           True,   True)
          411  +    ],
          412  +    "misc5c": [
          413  +                    ("\\bJ (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "J’",           True,   True),
          414  +                    ("\\bN (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "N’",           True,   True),
          415  +                    ("\\bM (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "M’",           True,   True),
          416  +                    ("\\bT (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "T’",           True,   True),
          417  +                    ("\\bS (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "S’",           True,   True),
          418  +                    ("\\bC (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "C’",           True,   True),
          419  +                    ("\\bÇ (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "Ç’",           True,   True),
          420  +                    ("\\bL (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "L’",           True,   True),
          421  +                    ("\\bD (?=[aàeéêiîoôuyhAÀEÉÊIÎOÔUYH])",     "D’",           True,   True)
          422  +    ]
          423  +}