{"id":81,"date":"2022-08-30T16:07:22","date_gmt":"2022-08-30T08:07:22","guid":{"rendered":"https:\/\/xinchewhd.com.cn\/?p=81"},"modified":"2022-08-30T16:29:40","modified_gmt":"2022-08-30T08:29:40","slug":"java%e7%88%ac%e5%8f%96%e5%b9%b6%e8%87%aa%e5%ae%9a%e4%b9%89%e8%a7%a3%e6%9e%90%e6%95%b0%e6%8d%ae","status":"publish","type":"post","link":"https:\/\/xinchewhd.com.cn\/index.php\/java\/java%e7%88%ac%e5%8f%96%e5%b9%b6%e8%87%aa%e5%ae%9a%e4%b9%89%e8%a7%a3%e6%9e%90%e6%95%b0%e6%8d%ae\/","title":{"rendered":"Java\u722c\u53d6\u5e76\u81ea\u5b9a\u4e49\u89e3\u6790\u6570\u636e"},"content":{"rendered":"\n<p class=\"has-medium-font-size\">\u4e00\u3001\u5f15\u5165jar\u5305<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>&lt;dependency>\n    &lt;groupId>us.codecraft&lt;\/groupId>\n    &lt;artifactId>webmagic-core&lt;\/artifactId>\n    &lt;version>0.7.5&lt;\/version>\n&lt;\/dependency>\n&lt;!--webmagic-extension  \u62d3\u5c55-->\n&lt;dependency>\n    &lt;groupId>us.codecraft&lt;\/groupId>\n    &lt;artifactId>webmagic-extension&lt;\/artifactId>\n    &lt;version>0.7.5&lt;\/version>\n&lt;\/dependency>\n\n&lt;!--selenium-java -->\n&lt;dependency>\n    &lt;groupId>org.seleniumhq.selenium&lt;\/groupId>\n    &lt;artifactId>selenium-java&lt;\/artifactId>\n    &lt;version>3.141.59&lt;\/version>\n&lt;\/dependency><\/code><\/pre>\n\n\n\n<p class=\"has-medium-font-size\">\u4e8c\u3001\u4e0b\u8f7d chromedriver.exe<\/p>\n\n\n\n<p><a href=\"https:\/\/xinche-shanghai-bucketent.oss-cn-hangzhou.aliyuncs.com\/chromedriver_win32%20%281%29.zip\" target=\"_blank\" rel=\"noreferrer noopener\" aria-label=\"\uff08\u5728\u65b0\u7a97\u53e3\u6253\u5f00\uff09\" rel=\"nofollow\" >windows10\u7248\u672c<\/a><\/p>\n\n\n\n<p><a href=\"https:\/\/xinche-shanghai-bucketent.oss-cn-hangzhou.aliyuncs.com\/chromedriver_linux64_%281%29.zip\" target=\"_blank\"  rel=\"nofollow\" >linux\u7248\u672c\u4e0b\u8f7d<\/a><\/p>\n\n\n\n<p class=\"has-medium-font-size\">\u4e09\u3001\u914d\u7f6e\u6a21\u62df\u8bbe\u5b9a\u4eba\u5458\u64cd\u4f5c\u7f51\u9875\u4ee3\u7801<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>path \uff1achromeDriver\u8def\u5f84\nSystem.setProperty(\"webdriver.chrome.driver\", path);\nChromeOptions chromeOptions = new ChromeOptions();\nchromeOptions.addArguments(\"--headless\");\nchromeOptions.addArguments(\"--no-sandbox\");\/\/\u65e0\u5934\u6d4f\u89c8\u5668\nchromeOptions.addArguments(\"--disable-gpu\");\/\/\u65e0\u754c\u9762\nchromeOptions.addArguments(\"lang=zh_CN.UTF-8\");\nchromeOptions.setCapability(\"acceptSslCerts\", true);\n\/\/\u622a\u5c4f\u652f\u6301\nchromeOptions.setCapability(\"takesScreenshot\", true);\n\/\/css\u641c\u7d22\u652f\u6301\nchromeOptions.setCapability(\"cssSelectorsEnabled\", true);\n\/\/\u521b\u5efa\u4e00\u4e2aWebDriver \/\/(3)\u5efa\u7acbselenium \u9a71\u52a8\nWebDriver driver = new ChromeDriver(chromeOptions);\ndriver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);\n\/\/\u6253\u5f00\u7f51\u7ad9\ndriver.get(searchUrlPath);\n\/\/\u6240\u6709\u5728js\u4e2d\u7684\u65b9\u6cd5\u5728\u8fd9\u91cc\u90fd\u53ef\u4ee5\u4f7f\u7528\nThread.sleep(1000);\nDocument document = Jsoup.parse(driver.getPageSource());\n\/\/ \u722c\u53d6\u5b8c\u5c31\u5173\u95ed\ndriver.quit();<\/code><\/pre>\n\n\n\n<p class=\"has-medium-font-size\">\u56db\u3001\u6309\u7167\u7f51\u9875\u6807\u7b7e\u89e3\u6790\u5230\u5bf9\u5e94\u6570\u636e<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>Element elementById = document.getElementById(\"J_main\");\nElement goodsList = elementById.getElementById(\"J_goodsList\");\nElements elements = goodsList.getElementsByClass(\"gl-item\");\n        for (Element el:elements){\n            String price = el.getElementsByClass(\"p-price\").eq(0).text();\n            String title = el.getElementsByClass(\"p-name\").eq(0).text();\n            String shop = el.getElementsByClass(\"p-shop\").eq(0).text();\n            Elements a = el.getElementsByTag(\"a\");\n            String href = a.eq(0).attr(\"href\");\n            String imageStr = a.eq(0).select(\"img\").eq(0).toString();\n            String imageUrl = ClimbHtmlMethodUtils.extractImageUrlTwo(imageStr);\n            String imageReplaceUrl = ClimbHtmlMethodUtils.replaceImageSize(imageUrl);\n            System.out.println(\"=========================\");\n            System.out.println(\"\u6807\u9898:\" + title);\n            System.out.println(\"\u56fe\u7247:\" + imageReplaceUrl);\n            System.out.println(\"\u5e97\u94fa:\" + shop);\n            System.out.println(\"\u4ef7\u683c:\" + price);\n            System.out.println(\"\u8be6\u60c5\u5730\u5740:\" + href);\n        }<\/code><\/pre>\n\n\n\n<p class=\"has-medium-font-size\">\u793a\u4f8b\u4ee3\u7801<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>public static void main(String[] args) throws Exception {\n        String url = \"https:\/\/search.jd.com\/Search?keyword=apple&amp;enc=utf-8\";\n        parseJDDetail(url);\n    }\n\n    public static void  parseJDDetail(String url) throws Exception {\n        System.setProperty(\"webdriver.chrome.driver\", \"D:\\\\\\\\file\\\\\\\\new\\\\\\\\chromedriver.exe\");\n        ChromeOptions chromeOptions = new ChromeOptions();\n        chromeOptions.addArguments(\"--headless\");\n        chromeOptions.addArguments(\"--no-sandbox\");\/\/\u65e0\u5934\u6d4f\u89c8\u5668\n        chromeOptions.addArguments(\"--disable-gpu\");\/\/\u65e0\u754c\u9762\n        chromeOptions.addArguments(\"lang=zh_CN.UTF-8\");\n        chromeOptions.setCapability(\"acceptSslCerts\", true);\n        \/\/\u622a\u5c4f\u652f\u6301\n        chromeOptions.setCapability(\"takesScreenshot\", true);\n        \/\/css\u641c\u7d22\u652f\u6301\n        chromeOptions.setCapability(\"cssSelectorsEnabled\", true);\n        \/\/\u521b\u5efa\u4e00\u4e2aWebDriver \/\/(3)\u5efa\u7acbselenium \u9a71\u52a8\n        WebDriver driver = new ChromeDriver(chromeOptions);\n        driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);\n        \/\/\u6253\u5f00\u7f51\u7ad9\n        driver.get(url);\n        \/\/\u6240\u6709\u5728js\u4e2d\u7684\u65b9\u6cd5\u5728\u8fd9\u91cc\u90fd\u53ef\u4ee5\u4f7f\u7528\n        Thread.sleep(1000);\n        Document document = Jsoup.parse(driver.getPageSource());\n        Element elementById = document.getElementById(\"J_main\");\n        Element goodsList = elementById.getElementById(\"J_goodsList\");\n        Elements tableTypeElement = goodsList.getElementsByClass(\"ps-wrap\");\n        for (Element el:tableTypeElement){\n            Elements elements = el.getElementsByTag(\"img\");\n            String attr = elements.eq(0).attr(\"data-url\");\n            System.out.println(\"\u6807\u989811:\" + attr);\n            String src = elements.eq(0).attr(\"data-lazy-img\");\n            String imageReplaceUrl = ClimbHtmlMethodUtils.replaceImageSize(src);\n            src = StringUtils.checkNull(imageReplaceUrl) ? src : imageReplaceUrl;\n            System.out.println(\"\u6807\u9898222:\" + src);\n        }\n        Elements elements = goodsList.getElementsByClass(\"gl-item\");\n        for (Element el:elements){\n            String price = el.getElementsByClass(\"p-price\").eq(0).text();\n            String title = el.getElementsByClass(\"p-name\").eq(0).text();\n            String shop = el.getElementsByClass(\"p-shop\").eq(0).text();\n            Elements a = el.getElementsByTag(\"a\");\n            String href = a.eq(0).attr(\"href\");\n            String imageStr = a.eq(0).select(\"img\").eq(0).toString();\n            String imageUrl = ClimbHtmlMethodUtils.extractImageUrlTwo(imageStr);\n            String imageReplaceUrl = ClimbHtmlMethodUtils.replaceImageSize(imageUrl);\n            System.out.println(\"=========================\");\n            System.out.println(\"\u6807\u9898:\" + title);\n            System.out.println(\"\u56fe\u7247:\" + imageReplaceUrl);\n            System.out.println(\"\u5e97\u94fa:\" + shop);\n            System.out.println(\"\u4ef7\u683c:\" + price);\n            System.out.println(\"\u8be6\u60c5\u5730\u5740:\" + href);\n        }\n        \/\/\u5173\u95ed\u670d\u52a1\n        driver.quit();\n\n    }<\/code><\/pre>\n","raw":"","protected":false},"excerpt":{"rendered":"<p>\u4e00\u3001\u5f15\u5165jar\u5305 \u4e8c\u3001\u4e0b\u8f7d chromedriver.exe windows10\u7248\u672c linux\u7248\u672c\u4e0b\u8f7d \u4e09\u3001\u914d\u7f6e\u6a21\u62df\u8bbe\u5b9a\u4eba\u5458\u64cd\u4f5c &#8230;<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"emotion":"","emotion_color":"","title_style":"","license":"","footnotes":""},"categories":[2],"tags":[],"class_list":["post-81","post","type-post","status-publish","format-standard","hentry","category-java"],"post_thumbnail_image":"","content_first_image":null,"post_medium_image_300":"","post_thumbnail_image_624":"","post_frist_image":null,"post_medium_image":"","post_large_image":"","post_full_image":"","post_all_images":[],"videoAdId":"","listAd":"0","listAdId":"","listAdEvery":6,"total_comments":0,"category_name":"Java","post_date":"2022-08-30","like_count":"0","praiseWord":"\u9f13\u52b1","copyright_state":"","excitationAd":"0","rewardedVideoAdId":"","detailAdId":"","detailAd":"0","enterpriseMinapp":"0","audios":[],"postImageUrl":"https:\/\/wp-moto-1258805347.cos.ap-shanghai.myqcloud.com\/2023\/05\/20230519082947553.jpg","avatarurls":[],"related_posts":null,"pageviews":312,"next_post_id":389,"next_post_title":"\u7ebf\u7a0b\u7684\u521b\u5efa\u53ca\u7ebf\u7a0b\u7684\u57fa\u672c\u4f7f\u7528","previous_post_id":1395,"previous_post_title":"\u7406\u89e3Java\u6ce8\u89e3\u4e0e\u53cd\u5c04\u8fd9\u4e00\u7bc7\u5c31\u591f\u4e86(\u8fd1\u4e07\u5b57,\u5efa\u8bae\u6536\u85cf\uff09","_links":{"self":[{"href":"https:\/\/xinchewhd.com.cn\/index.php\/wp-json\/wp\/v2\/posts\/81","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/xinchewhd.com.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/xinchewhd.com.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/xinchewhd.com.cn\/index.php\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/xinchewhd.com.cn\/index.php\/wp-json\/wp\/v2\/comments?post=81"}],"version-history":[{"count":0,"href":"https:\/\/xinchewhd.com.cn\/index.php\/wp-json\/wp\/v2\/posts\/81\/revisions"}],"wp:attachment":[{"href":"https:\/\/xinchewhd.com.cn\/index.php\/wp-json\/wp\/v2\/media?parent=81"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/xinchewhd.com.cn\/index.php\/wp-json\/wp\/v2\/categories?post=81"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/xinchewhd.com.cn\/index.php\/wp-json\/wp\/v2\/tags?post=81"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}