Explorar el Código

Clean model input feature befor inference

Mohidul Islam hace 5 años
padre
commit
7b8c7f2488
Se han modificado 1 ficheros con 12 adiciones y 0 borrados
  1. 12 0
      nlu_job/nlu_utils.py

+ 12 - 0
nlu_job/nlu_utils.py

@@ -1,3 +1,4 @@
+import re
 from django.conf import settings
 from requests import post
 import json
@@ -23,8 +24,19 @@ def filter_with_last_ten_reviews(location_id, replies):
     return replies
 
 
+def clean_text(text):
+    # replace some letter in text for getting better performance
+    text = re.sub(r':\s*', ' ', text)
+    text = re.sub(r'&', ', ', text)
+    text = re.sub(r'/', ', ', text)
+    text = re.sub(r'\.*\n\.*', '.', text)
+    text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
+    return text
+
+
 def model_inference(text):
     url = nlu_server_url + '/model/parse'
+    text = clean_text(text)
     payload = {'text': text}
     headers = {'content-type': 'application/json'}