urllib.parse.urlparse is not a consistent way of telling apart URL vs XPATH
Xithrius opened this issue · comments
https://github.com/Xithrius/Xythrion/blob/main/xythrion/bot/extensions/mapping/link_map.py#L183-L187
This needs to be replaced with some more robust code, such as
import re
from urllib.parse import urlparse
from enum import Enum
VALID_XPATH_PATTERN = re.compile(r'(/|//)[a-zA-Z]+(\[[0-9]+\])?(/([a-zA-Z]+(\[[0-9]+\])?)?)*')
class DestinationType(Enum):
XPATH = 1
URL = 2
def validate_destination(destination: str) -> DestinationType:
if re.fullmatch(VALID_XPATH_PATTERN, destination):
return DestinationType.XPATH
parsed_url = urlparse(input_string)
if parsed_url.scheme and parsed_url.netloc:
return DestinationType.URL
raise ValueError("Input string is neither a valid XPath nor a valid URL.")