admin 管理员组

文章数量: 1086019

HtmlUnit的简单例子

1.首先下载HtmlUnit的jar包,导入。

package com.shu.htmlUnit;


import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;


public class HtmlUnit01 {
public static void main(String[] args) throws Exception{
String url = "";
String str;

//创建一个webClient,模拟浏览器
//WebClient webClient = new WebClient();
//使用FireFox读取网页
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_38);
//使用Chrome读取网页
//WebClient webClient = new WebClient(BrowserVersion.CHROME);

//打开的话,就是执行javaScript/Css
webClient.getOptions().setJavaScriptEnabled(false);
webClient.getOptions().setCssEnabled(false);

//获取页面
HtmlPage page = webClient.getPage(url);


//获取页面的title
str = page.getTitleText();
System.out.println("Title:------"+str);

//获取页面的XML代码
str = page.asXml();
System.out.println("Xml:------"+str);

//获取页面的文本
str = page.asText();
System.out.println("Text:------"+str);

//关闭webClient
webClient.close();
}
}



package com.shu.htmlUnit;


import java.io.IOException;
import java.net.MalformedURLException;


import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlInput;
import com.gargoylesoftware.htmlunit.html.HtmlPage;


/**
 * 找到页面中特定的元素
 * @author xiangni123
 *
 */
public class HtmlUnit02 {
public static void main(String[] args) throws Exception{
String url = "";
String str;

//创建一个webClient,模拟浏览器
WebClient webClient = new WebClient();
//打开的话,就是执行javaScript/Css
webClient.getOptions().setJavaScriptEnabled(false);
webClient.getOptions().setCssEnabled(false);

//获取页面
HtmlPage page = webClient.getPage(url);


//通过id获得“百度一下”按钮.(在此页面中改按钮的id为"su")
HtmlInput btn = (HtmlInput)page.getHtmlElementById("su");
System.out.println(btn.getDefaultValue());

//关闭webClient
webClient.close();
}
}



package com.shu.htmlUnit;


import java.util.List;


import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlInput;
import com.gargoylesoftware.htmlunit.html.HtmlPage;


public class HtmlUnit03 {
public static void main(String[] args) throws Exception{
String url = "";
String str;

//创建一个webClient,模拟浏览器
WebClient webClient = new WebClient();

//打开的话,就是执行javaScript/Css
webClient.getOptions().setJavaScriptEnabled(false);
webClient.getOptions().setCssEnabled(false);

//获取页面
HtmlPage page = webClient.getPage(url);


//查找所有div
System.out.println("查找所有div---------------");
List<HtmlDivision> dList = (List<HtmlDivision>) page.getByXPath("//div");
for(HtmlDivision d: dList){
System.out.println(d.toString());
}
//查找第一个div
System.out.println("查找第一个div--------------");
HtmlDivision d = (HtmlDivision)dList.get(0);
System.out.println(d.toString());

//查找所有的input
System.out.println("查找并获取特定的input");
List<?> iList = page.getByXPath("//input[@id='su']");
HtmlInput input = (HtmlInput)iList.get(0);
System.out.println(input.toString());

//关闭webClient
webClient.close();
}
}




package com.shu.htmlUnit;


import java.util.List;


import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlInput;
import com.gargoylesoftware.htmlunit.html.HtmlPage;


public class HtmlUnit04 {
public static void main(String[] args) throws Exception{
String url = "";
String str;

//创建一个webClient,模拟浏览器
WebClient webClient = new WebClient();

//打开的话,就是执行javaScript/Css
webClient.getOptions().setJavaScriptEnabled(false);
webClient.getOptions().setCssEnabled(false);

//获取页面
HtmlPage page = webClient.getPage(url);


//获取搜索输入框并提交搜索内容
System.out.println("获取搜索输入框并提交搜索内容------------");
HtmlInput input = (HtmlInput)page.getHtmlElementById("kw");
System.out.println(input.toString());
input.setValueAttribute("传世");
System.out.println(input.toString());

//获取搜索按钮并点击
System.out.println("获取搜索按钮并点击-------------------");
HtmlInput btn = (HtmlInput)page.getHtmlElementById("su");
HtmlPage page2 = btn.click();
//输出新页面文本
System.out.println("输出新页面文本----------------------");
System.out.println(page2.asText());

//关闭webClient
webClient.close();
}
}

本文标签: HtmlUnit的简单例子