import re
def text_cleaner(text):
# Remove all emogies which don't contribute anything.
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
"]+", flags=re.UNICODE)
text = re.sub(emoji_pattern, ' ', text)
rules = [
{r'>\s+': u'>'}, # remove spaces after a tag opens or closes
{r'\s+': u' '}, # replace consecutive spaces
{r'\s*
\s*': u'\n'}, # newline after a
{r'(div)\s*>\s*': u'\n'}, # newline after