爬取网页
<!DOCTYPE html>
<html lang="en-GB">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>Login :: Damn Vulnerable Web Application (DVWA) v1.10 *Development*</title>
<link rel="stylesheet" type="text/css" href="dvwa/css/login.css" />
</head>
<body>
<div id="wrapper">
<div id="header">
<br />
<p><img src="dvwa/images/login_logo.png" /></p>
<br />
</div> <!--<div id="header">-->
<div id="content">
<form action="login.php" method="post">
<fieldset>
<label for="user">Username</label> <input type="text" class="loginInput" size="20" name="username"><br />
<label for="pass">Password</label> <input type="password" class="loginInput" AUTOCOMPLETE="off" size="20" name="password"><br />
<br />
<p class="submit"><input type="submit" value="Login" name="Login"></p>
</fieldset>
<input type='hidden' name='user_token' value='f5c8380cb1ef21c13dd3ca00c8378bb7' />
</form>
<br />
<br />
<br />
<br />
<br />
<br />
<br />
<br />
<br />
<!-- <img src="dvwa/images/RandomStorm.png" /> -->
</div > <!--<div id="content">-->
<div id="footer">
<p><a href="https://github.com/digininja/DVWA/" target="_blank">Damn Vulnerable Web Application (DVWA)</a></p>
</div> <!--<div id="footer"> -->
</div> <!--<div id="wrapper"> -->
</body>
</html>
爬取
from lxml import etree
import requests
from requests.auth import HTTPBasicAuth
from requests.exceptions import RequestException
def get_url(url,payload={}):
try:
b = requests.get(url,data=payload)
if b.status_code == 200:
return b.text
return b.status_code
except RequestException:
return None
def get_text(html):
tree = etree.HTML(html)
xp = '/html/body/div/div[2]/form//input[@name="user_token"]/@value'
nodetitle = tree.xpath(xp)
print(nodetitle[0])
return nodetitle[0]
# for i in nodetitle:
# print(i)
def main():
url = "http://127.0.0.1/DVWA/login.php"
while(1):
html = get_url(url)
t = get_text(html)
# print(t)
if __name__ == '__main__':
main()