是否有一个内置函数来获取这样的url:../images.html
给定一个这样的基本url:http://www.example.com/faq/index.html
和一个目标url,比如http://www.example.com/images.html
我检查了 urlparse 模块。我想要的是 urljoin() 函数的对应物。
您可以使用urlparse.urlparse来查找路径,并使用os.path.relname的 posixpath 版本来查找相对路径。
(警告:这适用于 Linux,但可能不适用于 Windows):
import urlparse
import sys
import posixpath
def relurl(target,base):
base=urlparse.urlparse(base)
target=urlparse.urlparse(target)
if base.netloc != target.netloc:
raise ValueError('target and base netlocs do not match')
base_dir='.'+posixpath.dirname(base.path)
target='.'+target.path
return posixpath.relpath(target,start=base_dir)
tests=[
('http://www.example.com/images.html','http://www.example.com/faq/index.html','../images.html'),
('http://google.com','http://google.com','.'),
('http://google.com','http://google.com/','.'),
('http://google.com/','http://google.com','.'),
('http://google.com/','http://google.com/','.'),
('http://google.com/index.html','http://google.com/','index.html'),
('http://google.com/index.html','http://google.com/index.html','index.html'),
]
for target,base,answer in tests:
try:
result=relurl(target,base)
except ValueError as err:
print('{t!r},{b!r} --> {e}'.format(t=target,b=base,e=err))
else:
if result==answer:
print('{t!r},{b!r} --> PASS'.format(t=target,b=base))
else:
print('{t!r},{b!r} --> {r!r} != {a!r}'.format(
t=target,b=base,r=result,a=answer))
想到的第一个解决方案是:
>>> os.path.relpath('/images.html', os.path.dirname('/faq/index.html'))
'../images.html'
当然,这需要 URL 解析 -> 域名比较 (!!) -> 如果是这种情况,则路径重写 -> 重新添加查询和片段。
import urlparse
import posixpath
def relative_url(destination, source):
u_dest = urlparse.urlsplit(destination)
u_src = urlparse.urlsplit(source)
_uc1 = urlparse.urlunsplit(u_dest[:2]+tuple('' for i in range(3)))
_uc2 = urlparse.urlunsplit(u_src[:2]+tuple('' for i in range(3)))
if _uc1 != _uc2:
## This is a different domain
return destination
_relpath = posixpath.relpath(u_dest.path, posixpath.dirname(u_src.path))
return urlparse.urlunsplit(('', '', _relpath, u_dest.query, u_dest.fragment)
然后
>>> relative_url('http://www.example.com/images.html', 'http://www.example.com/faq/index.html')
'../images.html'
>>> relative_url('http://www.example.com/images.html?my=query&string=here#fragment', 'http://www.example.com/faq/index.html')
'../images.html?my=query&string=here#fragment'
>>> relative_url('http://www.example.com/images.html', 'http://www2.example.com/faq/index.html')
'http://www.example.com/images.html'
>>> relative_url('https://www.example.com/images.html', 'http://www.example.com/faq/index.html')
'https://www.example.com/images.html'
编辑:现在使用 的posixpath
实现os.path
使其也可以在 Windows 下工作。
import itertools
import urlparse
def makeRelativeUrl(sourceUrl, targetUrl):
'''
:param sourceUrl: a string
:param targetUrl: a string
:return: the path to target url relative to first or targetUrl if at different net location
'''
# todo test
parsedSource = urlparse.urlparse(sourceUrl)
parsedTarget = urlparse.urlparse(targetUrl)
if parsedSource.netloc == parsedTarget.netloc:
# if target on same path but lower than source url
if parsedTarget.path.startswith(parsedSource.path):
return parsedTarget.path.replace(parsedSource.path, '.')
# on same path
elif parsedTarget.path.rsplit('/', 1)[0] == parsedSource.path.rsplit('/', 1)[0]:
return './' + parsedTarget.path.rsplit('/', 1)[1]
# same netloc, varying paths
else:
path = ''
upCount = 0
for item in list(itertools.izip_longest(parsedSource.path.rsplit('/'), parsedTarget.path.rsplit('/'))):
if item[0] == item[1]:
pass
else:
if item[0] is not None:
upCount += 1
if item[1] is not None:
path += item[1] + '/'
return upCount * '../' + path
else:
return targetUrl
if __name__ == '__main__':
'''
"tests" :p
'''
url1 = 'http://coolwebsite.com/questions/bobobo/bo/bo/1663807/how-can-i-iterate-through-two-lists-in-parallel-in-python'
url2 = 'http://coolwebsite.com/questions/126524/iterate-a-list-with-indexes-in-python'
print url1
print url2
print 'second relative to second:'
print makeRelativeUrl(url1, url2)
url1 = 'http://coolwebsite.com/questions/1663807/how-can-i-iterate-through-two-lists-in-parallel-in-python'
url2 = 'http://coolwebsite.com/questions/1663807/bananas'
print url1
print url2
print 'second relative to first:'
print makeRelativeUrl(url1, url2)
url1 = 'http://coolwebsite.com/questions/1663807/fruits'
url2 = 'http://coolwebsite.com/questions/1663807/fruits/berries/bananas'
print url1
print url2
print 'second relative to first:'
print makeRelativeUrl(url1, url2)
运行“测试”以查看它是否有效:P