Prechádzať zdrojové kódy

Clean model input feature befor inference

Mohidul Islam 5 rokov pred
rodič
commit
7b8c7f2488
1 zmenil súbory, kde vykonal 12 pridanie a 0 odobranie
  1. 12 0
      nlu_job/nlu_utils.py

+ 12 - 0
nlu_job/nlu_utils.py

@@ -1,3 +1,4 @@
+import re
 from django.conf import settings
 from requests import post
 import json
@@ -23,8 +24,19 @@ def filter_with_last_ten_reviews(location_id, replies):
     return replies
 
 
+def clean_text(text):
+    # replace some letter in text for getting better performance
+    text = re.sub(r':\s*', ' ', text)
+    text = re.sub(r'&', ', ', text)
+    text = re.sub(r'/', ', ', text)
+    text = re.sub(r'\.*\n\.*', '.', text)
+    text = re.sub(r'[dD][rR](\.|\s*)*', 'Dr. ', text)
+    return text
+
+
 def model_inference(text):
     url = nlu_server_url + '/model/parse'
+    text = clean_text(text)
     payload = {'text': text}
     headers = {'content-type': 'application/json'}