mirror of https://gitee.com/openkylin/wget.git
101 lines
2.8 KiB
Python
101 lines
2.8 KiB
Python
|
#!/usr/bin/env python3
|
||
|
from sys import exit
|
||
|
from test.http_test import HTTPTest
|
||
|
from misc.wget_file import WgetFile
|
||
|
|
||
|
"""
|
||
|
This test executed Wget in recursive mode with a rejected log outputted.
|
||
|
"""
|
||
|
############# File Definitions ###############################################
|
||
|
mainpage = """
|
||
|
<html>
|
||
|
<head>
|
||
|
<title>Main Page</title>
|
||
|
</head>
|
||
|
<body>
|
||
|
<p>
|
||
|
Recurse to a <a href="http://localhost:{{port}}/secondpage.html">second page</a>.
|
||
|
</p>
|
||
|
</body>
|
||
|
</html>
|
||
|
"""
|
||
|
|
||
|
secondpage = """
|
||
|
<html>
|
||
|
<head>
|
||
|
<title>Second Page</title>
|
||
|
</head>
|
||
|
<body>
|
||
|
<p>
|
||
|
Recurse to a <a href="http://localhost:{{port}}/thirdpage.html">third page</a>.
|
||
|
Try the blacklisted <a href="http://localhost:{{port}}/index.html">main page</a>.
|
||
|
</p>
|
||
|
</body>
|
||
|
</html>
|
||
|
"""
|
||
|
|
||
|
thirdpage = """
|
||
|
<html>
|
||
|
<head>
|
||
|
<title>Third Page</title>
|
||
|
</head>
|
||
|
<body>
|
||
|
<p>
|
||
|
Try a hidden <a href="http://localhost:{{port}}/dummy.txt">dummy file</a>.
|
||
|
Try to leave to <a href="http://no.such.domain/">another domain</a>.
|
||
|
</p>
|
||
|
</body>
|
||
|
</html>
|
||
|
"""
|
||
|
|
||
|
robots = """
|
||
|
User-agent: *
|
||
|
Disallow: /dummy.txt
|
||
|
"""
|
||
|
|
||
|
log = """\
|
||
|
REASON\tU_URL\tU_SCHEME\tU_HOST\tU_PORT\tU_PATH\tU_PARAMS\tU_QUERY\tU_FRAGMENT\tP_URL\tP_SCHEME\tP_HOST\tP_PORT\tP_PATH\tP_PARAMS\tP_QUERY\tP_FRAGMENT
|
||
|
BLACKLIST\thttp%3A//localhost%3A{{port}}/index.html\tSCHEME_HTTP\tlocalhost\t{{port}}\tindex.html\t\t\t\thttp%3A//localhost%3A{{port}}/secondpage.html\tSCHEME_HTTP\tlocalhost\t{{port}}\tsecondpage.html\t\t\t
|
||
|
ROBOTS\thttp%3A//localhost%3A{{port}}/dummy.txt\tSCHEME_HTTP\tlocalhost\t{{port}}\tdummy.txt\t\t\t\thttp%3A//localhost%3A{{port}}/thirdpage.html\tSCHEME_HTTP\tlocalhost\t{{port}}\tthirdpage.html\t\t\t
|
||
|
SPANNEDHOST\thttp%3A//no.such.domain/\tSCHEME_HTTP\tno.such.domain\t80\t\t\t\t\thttp%3A//localhost%3A{{port}}/thirdpage.html\tSCHEME_HTTP\tlocalhost\t{{port}}\tthirdpage.html\t\t\t
|
||
|
"""
|
||
|
|
||
|
dummyfile = "Don't care."
|
||
|
|
||
|
|
||
|
index_html = WgetFile ("index.html", mainpage)
|
||
|
secondpage_html = WgetFile ("secondpage.html", secondpage)
|
||
|
thirdpage_html = WgetFile ("thirdpage.html", thirdpage)
|
||
|
robots_txt = WgetFile ("robots.txt", robots)
|
||
|
dummy_txt = WgetFile ("dummy.txt", dummyfile)
|
||
|
log_csv = WgetFile ("log.csv", log)
|
||
|
|
||
|
WGET_OPTIONS = "-nd -r --rejected-log log.csv"
|
||
|
WGET_URLS = [["index.html"]]
|
||
|
|
||
|
Files = [[index_html, secondpage_html, thirdpage_html, robots_txt, dummy_txt]]
|
||
|
|
||
|
ExpectedReturnCode = 0
|
||
|
ExpectedDownloadedFiles = [index_html, secondpage_html, thirdpage_html, robots_txt, log_csv]
|
||
|
|
||
|
################ Pre and Post Test Hooks #####################################
|
||
|
pre_test = {
|
||
|
"ServerFiles" : Files
|
||
|
}
|
||
|
test_options = {
|
||
|
"WgetCommands" : WGET_OPTIONS,
|
||
|
"Urls" : WGET_URLS
|
||
|
}
|
||
|
post_test = {
|
||
|
"ExpectedFiles" : ExpectedDownloadedFiles,
|
||
|
"ExpectedRetcode" : ExpectedReturnCode
|
||
|
}
|
||
|
|
||
|
err = HTTPTest (
|
||
|
pre_hook=pre_test,
|
||
|
test_params=test_options,
|
||
|
post_hook=post_test
|
||
|
).begin ()
|
||
|
|
||
|
exit (err)
|