|
@@ -29,7 +29,7 @@ def clean_text(text):
|
|
text = re.sub(r':\s*', ' ', text)
|
|
text = re.sub(r':\s*', ' ', text)
|
|
text = re.sub(r'&', ', ', text)
|
|
text = re.sub(r'&', ', ', text)
|
|
text = re.sub(r'/', ', ', text)
|
|
text = re.sub(r'/', ', ', text)
|
|
- text = re.sub(r'\.*\n\.*', '.', text)
|
|
|
|
|
|
+ text = re.sub(r'\.*\n\.*', ', ', text)
|
|
text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
|
|
text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
|
|
return text
|
|
return text
|
|
|
|
|