22 lines
732 B
Python
22 lines
732 B
Python
import re
|
|
|
|
def add_slash_to_text_as_regex(text: str):
|
|
if text is None or len(text) == 0:
|
|
return text
|
|
special_char_iter = re.finditer("\W", text)
|
|
for special_iter in special_char_iter:
|
|
if len(special_iter.group().strip()) == 0:
|
|
continue
|
|
replace = r"\{0}".format(special_iter.group())
|
|
if replace not in text:
|
|
text = re.sub(replace, replace, text)
|
|
text = re.sub(r"\s+", r"\\s+", text)
|
|
return text
|
|
|
|
|
|
def clean_text(text: str) -> str:
|
|
text = text.lower()
|
|
# update the specical character which begin with \u, e.g \u2004 or \u00a0 to be space
|
|
text = re.sub(r"\\u[0-9a-z]{4}", ' ', text)
|
|
text = re.sub(r"( ){2,}", ' ', text.strip())
|
|
return text |