admin 管理员组

文章数量: 1086864

Java爬虫

Java爬虫 — 爬取王者荣耀英雄图片

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;/*** @author 郭珂*/
public class TextMain {public static void main(String[] args) throws IOException {//导包//和王者服务器创建连接Connection connection = Jsoup.connect(".shtml");//通过连接 获取那个Document对象 来间接操作(HTML)Document document = connection.get();//找到那些存储图片头像的位置Element elementUL = document.selectFirst("[class=herolist clearfix]");//通过UL找寻其中包含的LiElements elementLis = elementUL.select("li");//将Lis遍历for(Element elementLi : elementLis){//每一次在li中获取一个aElement elementA = elementLi.selectFirst("a");//获取A标签中的那个href属性String hrefURL = elementA.attr("href");//获取A标签中夹着的那个文字String InnerText = elementA.text();//地址拼接成完整的全路径String path = "/" + hrefURL;//通过拼接好的path创建一个新的连接Connection newConnection = Jsoup.connect(path);//通过新连接获取一个新的Document对象Document newDocument = newConnection.get();//通过document找寻那个存有大图的divElement div = newDocument.selectFirst("[class=zk-con1 zk-con]");//找到div标签中的那个style属性,要里面的地址String backgroundURL = div.attr("style");int left = backgroundURL.indexOf("'");int right = backgroundURL.lastIndexOf("'");String newBG = backgroundURL.substring(left+1,right);URL url = new URL("https:" + newBG);//==============================================================================//通过url获取一个用来读取图片的输入流InputStream inputStream = url.openStream();//写在本地的硬盘上FileOutputStream fileOutputStream = new FileOutputStream("D:\\King\\"+ InnerText +".jpg\\");//需要一个临时小数组byte[] b = new byte[1024];//读取图片信息,存入小数组int count = inputStream.read(b);while(count != -1){fileOutputStream.write(b,0,count);//清空流管道fileOutputStream.flush();//再读取下一次count = inputStream.read(b);}fileOutputStream.close();inputStream.close();}}}

本文标签: Java爬虫