real fix for unicode escape sequences being read out

This commit is contained in:
Rudis Muiznieks 2025-04-02 21:12:07 -05:00
parent 48f5bda16f
commit c115baa705
Signed by: rudism
GPG key ID: CABF2F86EF7884F9
2 changed files with 4 additions and 10 deletions

View file

@ -16,7 +16,6 @@ import requests
import json
import html
import re
import unicodedata
from urllib.parse import quote_plus
class KagiSkill(FallbackSkill):
@ -94,11 +93,11 @@ class KagiSkill(FallbackSkill):
self.log.error(f'error fetching kagi api response: {str(e)}')
return False
def clean_session_string(self, html_text):
def clean_session_string(self, raw_text):
text = json.loads(raw_text)
pattern_refs = re.compile(r'<div[^>]*>.*$', re.DOTALL)
pattern_sups = re.compile(r'<sup[^>]*>.*?</sup>', re.DOTALL)
pattern_tags = re.compile(r'<.*?>')
text = self.convert_to_ascii(html_text)
text = re.sub(pattern_refs, '', text)
text = re.sub(pattern_sups, '', text)
text = re.sub(pattern_tags, '', text)
@ -107,7 +106,7 @@ class KagiSkill(FallbackSkill):
def clean_api_string(self, text):
text = text.replace('*', '').replace('_', '')
text = re.sub(r'\\d+】', '', text)
return self.convert_to_ascii(text)
return text
def get_api_response(self, json_data):
if not isinstance(json_data.get("data"), dict):
@ -121,8 +120,3 @@ class KagiSkill(FallbackSkill):
self.log.error(f"'output' was not a string in kagi api response (found {type(output_value).__name__})")
return None
return output_value
def convert_to_ascii(self, text):
normalized = unicodedata.normalize('NFD', text)
ascii = ''.join(c for c in normalized if not unicodedata.combining(c))
return ascii

View file

@ -4,7 +4,7 @@ import os
from os import walk, path
PYPI_NAME = "skill-ovos-fallback-kagi" # pip install PYPI_NAME
VERSION = "0.1.4"
VERSION = "0.1.5"
URL = f"https://code.sitosis.com/rudism/{PYPI_NAME}"
SKILL_CLAZZ = "KagiSkill" # needs to match __init__.py class name