2025年java多线程的爬虫撸177图片~~

java多线程的爬虫撸177图片~~来个多线程的 主程序 package cn fu threadimage import org jsoup Jsoup import org jsoup nodes Document import org jsoup nodes Element import org jsoup select Elements

大家好,我是讯享网,很高兴认识大家。

#来个多线程的


讯享网

主程序 package cn.fu.threadimage; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; public class CrawImage { 
    static String url = "http://www.177picaa.pw/html/2017/12/1610354.html" +"/"; static String file;//下载存放路径 static Integer num;//分页 static Integer subcut;//图片命名截取imgurl,仅仅针对当前网站适用; static { 
    try { 
    Document document = Jsoup.parse(new URL(url), 5000); //获取标题 Element element = document.getElementsByClass("entry-title").first(); file = "F://paqu/" + element.text(); //判断目标文件夹是否存在 File files = new File(file); if (!files.exists()) { 
    files.mkdirs(); } Elements select = document.select(".page-links>a"); //获取分页 num = select.size(); //177pic vpn访问网:www.177pic.pw 内网:www.177picaa.pw if (url.contains("aa")) { 
    subcut = 40; } else { 
    subcut = 38; } } catch (IOException e) { 
    e.printStackTrace(); } } public static void main(String[] args) throws Exception { 
    try { 
    //创建一个缓冲池 ExecutorService pool = Executors.newCachedThreadPool(); //设置其容量为9 pool = Executors.newFixedThreadPool(9); for (int i = 1; i < num; i++) { 
    //获取指定网页源码 Document document = Jsoup.connect(url + i) .userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31").get(); Elements pages = document.select(".page-links>a"); getUrl(document, pool); } pool.shutdown(); } catch (Exception e) { 
    System.out.print(e); } } public static void getUrl(Document document, ExecutorService pool) { 
    Elements elements = document.getElementsByClass("alignnone"); for (Element el : elements) { 
    String imageUrl = el.attr("data-lazy-src"); if (imageUrl != "") { 
    //下载图片 pool.execute(new DownloadImage(imageUrl, file, subcut)); System.out.println(imageUrl); } } } } 

讯享网
讯享网下载工具 package cn.fu.threadimage; import java.io.*; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; public class DownloadImage implements Runnable { 
    String file;//下载的目标路径 String downUrl; int subcut; public DownloadImage(String downUrl, String file,int subcut) { 
    this.downUrl = downUrl; this.file = file; this.subcut=subcut; } public void run() { 
    InputStream is; FileOutputStream out; try { 
    URL url = new URL(downUrl); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)"); is = connection.getInputStream(); // 创建文件 File fileofImg = new File(file + "/" + downUrl.substring(subcut)); out = new FileOutputStream(fileofImg); int i = 0; while ((i = is.read()) != -1) { 
    out.write(i); } is.close(); out.close(); } catch (MalformedURLException e) { 
    // TODO Auto-generated catch block e.printStackTrace(); } catch (FileNotFoundException e) { 
    // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { 
    // TODO Auto-generated catch block e.printStackTrace(); } } } 

在这里插入图片描述

小讯
上一篇 2025-01-07 07:04
下一篇 2025-02-09 11:56

相关推荐

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/59382.html