another attempt to fix unicode escape sequences being read out in response

This commit is contained in:
Rudis Muiznieks 2025-04-02 20:58:44 -05:00
parent 5e06c03a15
commit 0d255b7bb6
Signed by: rudism
GPG key ID: CABF2F86EF7884F9

View file

@ -98,15 +98,16 @@ class KagiSkill(FallbackSkill):
pattern_refs = re.compile(r'<div[^>]*>.*$', re.DOTALL)
pattern_sups = re.compile(r'<sup[^>]*>.*?</sup>', re.DOTALL)
pattern_tags = re.compile(r'<.*?>')
text = re.sub(pattern_refs, '', html_text)
text = self.convert_to_ascii(html_text)
text = re.sub(pattern_refs, '', text)
text = re.sub(pattern_sups, '', text)
text = re.sub(pattern_tags, '', text)
return self.remove_diacritics(html.unescape(text))
return html.unescape(text)
def clean_api_string(self, text):
text = text.replace('*', '').replace('_', '')
text = re.sub(r'\\d+】', '', text)
return self.remove_diacritics(text)
return self.convert_to_ascii(text)
def get_api_response(self, json_data):
if not isinstance(json_data.get("data"), dict):
@ -121,7 +122,7 @@ class KagiSkill(FallbackSkill):
return None
return output_value
def remove_diacritics(self, text):
def convert_to_ascii(self, text):
normalized = unicodedata.normalize('NFD', text)
ascii = ''.join(c for c in normalized if not unicodedata.combining(c))
return ascii