another attempt to fix unicode escape sequences being read out in response
This commit is contained in:
parent
5e06c03a15
commit
0d255b7bb6
1 changed files with 5 additions and 4 deletions
|
@ -98,15 +98,16 @@ class KagiSkill(FallbackSkill):
|
|||
pattern_refs = re.compile(r'<div[^>]*>.*$', re.DOTALL)
|
||||
pattern_sups = re.compile(r'<sup[^>]*>.*?</sup>', re.DOTALL)
|
||||
pattern_tags = re.compile(r'<.*?>')
|
||||
text = re.sub(pattern_refs, '', html_text)
|
||||
text = self.convert_to_ascii(html_text)
|
||||
text = re.sub(pattern_refs, '', text)
|
||||
text = re.sub(pattern_sups, '', text)
|
||||
text = re.sub(pattern_tags, '', text)
|
||||
return self.remove_diacritics(html.unescape(text))
|
||||
return html.unescape(text)
|
||||
|
||||
def clean_api_string(self, text):
|
||||
text = text.replace('*', '').replace('_', '')
|
||||
text = re.sub(r'\【\d+】', '', text)
|
||||
return self.remove_diacritics(text)
|
||||
return self.convert_to_ascii(text)
|
||||
|
||||
def get_api_response(self, json_data):
|
||||
if not isinstance(json_data.get("data"), dict):
|
||||
|
@ -121,7 +122,7 @@ class KagiSkill(FallbackSkill):
|
|||
return None
|
||||
return output_value
|
||||
|
||||
def remove_diacritics(self, text):
|
||||
def convert_to_ascii(self, text):
|
||||
normalized = unicodedata.normalize('NFD', text)
|
||||
ascii = ''.join(c for c in normalized if not unicodedata.combining(c))
|
||||
return ascii
|
||||
|
|
Loading…
Add table
Reference in a new issue