`url.join(...)` drops parts of the path if `url` does not end with `/`
hf-kklein opened this issue · comments
Describe the bug
I'm joining two parts of an URL:
- One URL with the host and first part of the path
- One URL with the second part of the path and query params
Expected behaviour: The result path is concatenated from the two parts.
Actual behaviour: The result path is concatenated but drops the last part of the first URL if the first URL does not endwith a "/"
To Reproduce
import pytest
from yarl import URL
@pytest.mark.parametrize(
"first_url_part",
[
pytest.param(URL("https://example.inv/Foo/Bar"), id="w/o trailing slash"),
pytest.param(URL("https://example.inv/") / "Foo" / "Bar", id="dynamically without trailing slash"),
pytest.param(URL("https://example.inv/Foo/Bar/"), id="with trailing slash"),
pytest.param(URL("https://example.inv/") / "Foo" / "Bar/", id="dynamically with trailing slash"),
],
)
def test_url_join(first_url_part: URL):
assert isinstance(first_url_part, URL)
second_url_part = URL("Baz/asd?xyz=123")
actual = first_url_part.join(second_url_part)
assert actual == URL("https://example.inv/Foo/Bar/Baz/asd?xyz=123")
# for 'w/o trailing slash' case the assertion is not fulfilled
# Expected :URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
# Actual :URL('https://example.inv/Foo/Baz/asd?xyz=123') # <-- why is the '/Bar/' part missing?
Expected behavior
The last part of the first URL path should not be dropped.
Test Output
========================= 3 failed, 1 passed in 0.13s =========================
FAILED [ 25%]
unittests\urltest.py:4 (test_url_join[w/o trailing slash])
URL('https://example.inv/Foo/Baz/asd?xyz=123') != URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
Expected :URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
Actual :URL('https://example.inv/Foo/Baz/asd?xyz=123')
<Click to see difference>
first_url_part = URL('https://example.inv/Foo/Bar')
@pytest.mark.parametrize(
"first_url_part",
[
pytest.param(URL("https://example.inv/Foo/Bar"), id="w/o trailing slash"),
pytest.param(URL("https://example.inv/") / "Foo" / "Bar", id="dynamically without trailing slash"),
pytest.param(URL("https://example.inv/Foo/Bar/"), id="with trailing slash"),
pytest.param(URL("https://example.inv/") / "Foo" / "Bar/", id="dynamically with trailing slash"),
],
)
def test_url_join(first_url_part: URL):
assert isinstance(first_url_part, URL)
second_url_part = URL("Baz/asd?xyz=123")
actual = first_url_part.join(second_url_part)
> assert actual == URL("https://example.inv/Foo/Bar/Baz/asd?xyz=123")
E AssertionError: assert URL('https://example.inv/Foo/Baz/asd?xyz=123') == URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
E + where URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123') = URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
urltest.py:18: AssertionError
FAILED [ 50%]
unittests\urltest.py:4 (test_url_join[dynamically without trailing slash])
URL('https://example.inv/Foo/Baz/asd?xyz=123') != URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
Expected :URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
Actual :URL('https://example.inv/Foo/Baz/asd?xyz=123')
<Click to see difference>
first_url_part = URL('https://example.inv/Foo/Bar')
@pytest.mark.parametrize(
"first_url_part",
[
pytest.param(URL("https://example.inv/Foo/Bar"), id="w/o trailing slash"),
pytest.param(URL("https://example.inv/") / "Foo" / "Bar", id="dynamically without trailing slash"),
pytest.param(URL("https://example.inv/Foo/Bar/"), id="with trailing slash"),
pytest.param(URL("https://example.inv/") / "Foo" / "Bar/", id="dynamically with trailing slash"),
],
)
def test_url_join(first_url_part: URL):
assert isinstance(first_url_part, URL)
second_url_part = URL("Baz/asd?xyz=123")
actual = first_url_part.join(second_url_part)
> assert actual == URL("https://example.inv/Foo/Bar/Baz/asd?xyz=123")
E AssertionError: assert URL('https://example.inv/Foo/Baz/asd?xyz=123') == URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
E + where URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123') = URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
urltest.py:18: AssertionError
PASSED [ 75%]FAILED [100%]
unittests\urltest.py:4 (test_url_join[dynamically with trailing slash])
URL('https://example.inv/Foo/Baz/asd?xyz=123') != URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
Expected :URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
Actual :URL('https://example.inv/Foo/Baz/asd?xyz=123')
<Click to see difference>
first_url_part = URL('https://example.inv/Foo/Bar')
@pytest.mark.parametrize(
"first_url_part",
[
pytest.param(URL("https://example.inv/Foo/Bar"), id="w/o trailing slash"),
pytest.param(URL("https://example.inv/") / "Foo" / "Bar", id="dynamically without trailing slash"),
pytest.param(URL("https://example.inv/Foo/Bar/"), id="with trailing slash"),
pytest.param(URL("https://example.inv/") / "Foo" / "Bar/", id="dynamically with trailing slash"),
],
)
def test_url_join(first_url_part: URL):
assert isinstance(first_url_part, URL)
second_url_part = URL("Baz/asd?xyz=123")
actual = first_url_part.join(second_url_part)
> assert actual == URL("https://example.inv/Foo/Bar/Baz/asd?xyz=123")
E AssertionError: assert URL('https://example.inv/Foo/Baz/asd?xyz=123') == URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
E + where URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123') = URL('https://example.inv/Foo/Bar/Baz/asd?xyz=123')
urltest.py:18: AssertionError
Process finished with exit code 1
Python Version
3.11.0
multidict Version
multidict==6.0.4
yarl Version
yarl==1.9.2
OS
Windows
Additional context
No response
Code of Conduct
- I agree to follow the aio-libs Code of Conduct
Workaround:
actual = URL(str(first_url_part)+"/").join(second_url_part)
This looks like correct behavior to me at least.
Compare e.g.:
>>> urllib.parse.urljoin("https://example.inv/Foo/Bar", "Baz/asd?xyz=123")
'https://example.inv/Foo/Baz/asd?xyz=123'
Though some web servers or applications treat paths with or without trailing slashes the same, according to RFC3986 they are not necessarily, and joining them if the path doesn't have a trailing slash indeed has different behavior, similar to what happens in your browser with these relative references.