another attempt to fix unicode escape sequences being read out in response

2025-04-02 20:58:44 -05:00 · 2025-04-02 20:58:44 -05:00 · 0d255b7bb6
commit 0d255b7bb6
parent 5e06c03a15
1 changed files with 5 additions and 4 deletions
--- a/init.py
+++ b/init.py
@ -98,15 +98,16 @@ class KagiSkill(FallbackSkill):
        pattern_refs = re.compile(r'<div[^>]*>.*$', re.DOTALL)
        pattern_sups = re.compile(r'<sup[^>]*>.*?</sup>', re.DOTALL)
        pattern_tags = re.compile(r'<.*?>')
-        text = re.sub(pattern_refs, '', html_text)
+        text = self.convert_to_ascii(html_text)
+        text = re.sub(pattern_refs, '', text)
        text = re.sub(pattern_sups, '', text)
        text = re.sub(pattern_tags, '', text)
-        return self.remove_diacritics(html.unescape(text))
+        return html.unescape(text)

    def clean_api_string(self, text):
        text = text.replace('*', '').replace('_', '')
        text = re.sub(r'\【\d+】', '', text)
-        return self.remove_diacritics(text)
+        return self.convert_to_ascii(text)

    def get_api_response(self, json_data):
        if not isinstance(json_data.get("data"), dict):
@ -121,7 +122,7 @@ class KagiSkill(FallbackSkill):
            return None
        return output_value

-    def remove_diacritics(self, text):
+    def convert_to_ascii(self, text):
        normalized = unicodedata.normalize('NFD', text)
        ascii = ''.join(c for c in normalized if not unicodedata.combining(c))
        return ascii