博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
jsoup简单的爬取网页数据
阅读量:5126 次
发布时间:2019-06-13

本文共 9746 字,大约阅读时间需要 32 分钟。

/** * Project Name:JavaTest * File Name:BankOfChinaExchangeRate.java * Package Name:com.lee.javatest * Date:2016年7月22日下午1:34:09 * Copyright (c) 2016年7月22日, Pwenlee All Rights Reserved. **/package com.lee.javatest;import java.io.Serializable;import java.math.BigDecimal;import java.text.SimpleDateFormat;import java.util.ArrayList;import java.util.Arrays;import java.util.Date;import java.util.List;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse;import org.apache.http.HttpStatus;import org.apache.http.client.HttpClient;import org.apache.http.client.methods.HttpGet;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.params.CoreConnectionPNames;import org.apache.http.util.EntityUtils;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;/** * ClassName:BankOfChinaExchangeRate 
* Function: 中行外汇牌价.
* Date: 2016年7月22日 下午1:34:09
* @author PwenLee * @version * @see */public class BankOfChinaExchangeRate implements Serializable{ private static final Integer DEAFULT_PAGESIZE = 20; private static final long serialVersionUID = -913877619191789389L; /** * 货币名称 中文简体 */ private String currency; /** * 现汇买入价 */ private BigDecimal buyingRate; /** * 现钞买入价 */ private BigDecimal cashBuyingRate; /** * 现汇卖出价 */ private BigDecimal sellingRate; /** * 现钞卖出价 */ private BigDecimal cashSellingRate; /** * 外管局中间价 */ private BigDecimal SAFEMiddleRate; /** * 中行折算价 */ private BigDecimal bankConvertRate; /** * 发布时间 */ private String dateTime; public String getCurrency() { return currency; } public void setCurrency(String currency) { this.currency = currency; } public BigDecimal getBuyingRate() { return buyingRate; } public void setBuyingRate(BigDecimal buyingRate) { this.buyingRate = buyingRate; } public BigDecimal getCashBuyingRate() { return cashBuyingRate; } public void setCashBuyingRate(BigDecimal cashBuyingRate) { this.cashBuyingRate = cashBuyingRate; } public BigDecimal getSellingRate() { return sellingRate; } public void setSellingRate(BigDecimal sellingRate) { this.sellingRate = sellingRate; } public BigDecimal getCashSellingRate() { return cashSellingRate; } public void setCashSellingRate(BigDecimal cashSellingRate) { this.cashSellingRate = cashSellingRate; } public BigDecimal getSAFEMiddleRate() { return SAFEMiddleRate; } public void setSAFEMiddleRate(BigDecimal sAFEMiddleRate) { SAFEMiddleRate = sAFEMiddleRate; } public BigDecimal getBankConvertRate() { return bankConvertRate; } public void setBankConvertRate(BigDecimal bankConvertRate) { this.bankConvertRate = bankConvertRate; } public String getDateTime() { return dateTime; } public void setDateTime(String dateTime) { this.dateTime = dateTime; } /** * * BankOfChinaExchangeRate: * date:日期 例入“2016-07-22” * time:时间 例如“05:30:00” * BankOfChinaCurrencyCode 枚举类 * @author PwenLee * @param startDate * @param endDate * @param currencyCode * @return BankOfChinaExchangeRate */ public BankOfChinaExchangeRate (String date, String time, BankOfChinaCurrencyCode currencyCode){ List
context = getExchangeRate(date, time, currencyCode); this.currency = context.get(0); this.buyingRate = new BigDecimal(context.get(1)); this.cashBuyingRate = new BigDecimal(context.get(2)); this.sellingRate = new BigDecimal(context.get(3)); this.cashSellingRate = new BigDecimal(context.get(4)); this.SAFEMiddleRate = new BigDecimal(context.get(5)); this.bankConvertRate = new BigDecimal(context.get(6)); this.dateTime = context.get(7) + " " + context.get(8); } /** * 取当天凌晨05:30:00的数据 */ public BankOfChinaExchangeRate(){ SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd"); Date date=new Date(); String nowDate=sdf.format(date); List
context = getExchangeRate(nowDate, "05:30:00", BankOfChinaCurrencyCode.USD); this.currency = context.get(0); this.buyingRate = new BigDecimal(context.get(1)); this.cashBuyingRate = new BigDecimal(context.get(2)); this.sellingRate = new BigDecimal(context.get(3)); this.cashSellingRate = new BigDecimal(context.get(4)); this.SAFEMiddleRate = new BigDecimal(context.get(5)); this.bankConvertRate = new BigDecimal(context.get(6)); this.dateTime = context.get(7) + " " + context.get(8); } /** * 模拟请求url,返回html源码 * @author PwenLee * @param url * @return */ private static String GetHtml(String url) { String html = null; HttpClient httpClient = new DefaultHttpClient(); httpClient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000); HttpGet httpGet = new HttpGet(url); try { HttpResponse httpResponse = httpClient.execute(httpGet); int resStatu = httpResponse.getStatusLine().getStatusCode(); if (resStatu == HttpStatus.SC_OK) { HttpEntity entity = httpResponse.getEntity(); if (entity != null) { html = EntityUtils.toString(entity, "utf-8"); } } } catch (Exception e) { //TODO 打成logger System.out.println("Connect " + url + " error"); e.printStackTrace(); } finally { httpClient.getConnectionManager().shutdown(); } return html; } private List
getExchangeRate(String date, String time, BankOfChinaCurrencyCode currencyCode){ Integer totalPage = totalPage(date, time, currencyCode); List
contextList = new ArrayList
(); if(totalPage <= 0){ //TODO logger return contextList; } String context = ""; for(int i=totalPage;i>=0;i--){ String url = "http://srh.bankofchina.com/search/whpj/search.jsp?erectDate="+date+"&nothing="+date+"&pjname="+currencyCode.getCode()+"&page="+i; String html = GetHtml(url); Document doc = Jsoup.parse(html); Elements linkElements = doc.getElementsByClass("BOC_main"); Elements datas = linkElements.get(0).getElementsByTag("tr"); for (Element ele : datas) { if(ele.text().indexOf(time) != -1){ context = ele.text(); break; } } if(context != ""){ //TODO 换成StringUtils.isNotBlank break; } } if(context == "") {
//TODO 换成StringUtils.isBlank //TODO logger return contextList; }else{ contextList = Arrays.asList(context.split(" ")); } return contextList; } public static Integer totalPage(){ Integer totalPage = 0; try{ SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd"); Date date=new Date(); String nowDate=sdf.format(date); String url = "http://srh.bankofchina.com/search/whpj/search.jsp?erectDate="+nowDate+"&nothing="+nowDate+"&pjname="+BankOfChinaCurrencyCode.USD.getCode(); String html = GetHtml(url); //截取网页总条数变量 String stringTemp = html.substring(html.indexOf("m_nRecordCount = ")); //获取变量的值 String totalcount = stringTemp.substring(stringTemp.indexOf("m_nRecordCount = ")+"m_nRecordCount = ".length(),stringTemp.indexOf(";")); Integer totalnum = Integer.valueOf(totalcount); if(totalnum % DEAFULT_PAGESIZE == 0){ totalPage = totalnum/DEAFULT_PAGESIZE; }else{ totalPage = totalnum/DEAFULT_PAGESIZE+1; } }catch(Exception e){ //TODO 打成logger } return totalPage; } public static Integer totalPage(String date, String time, BankOfChinaCurrencyCode currencyCode){ Integer totalPage = 0; try{ String url = "http://srh.bankofchina.com/search/whpj/search.jsp?erectDate="+date+"&nothing="+date+"&pjname="+currencyCode.getCode(); String html = GetHtml(url); //截取网页总条数变量 String stringTemp = html.substring(html.indexOf("m_nRecordCount = ")); //获取变量的值 String totalcount = stringTemp.substring(stringTemp.indexOf("m_nRecordCount = ")+"m_nRecordCount = ".length(),stringTemp.indexOf(";")); Integer totalnum = Integer.valueOf(totalcount); if(totalnum % DEAFULT_PAGESIZE == 0){ totalPage = totalnum/DEAFULT_PAGESIZE; }else{ totalPage = totalnum/DEAFULT_PAGESIZE+1; } }catch(Exception e){ //TODO 打成logger } return totalPage; } @Override public String toString() { return "BankOfChinaExchangeRate [currency=" + currency + ", buyingRate=" + buyingRate + ", cashBuyingRate=" + cashBuyingRate + ", sellingRate=" + sellingRate + ", cashSellingRate=" + cashSellingRate + ", SAFEMiddleRate=" + SAFEMiddleRate + ", bankConvertRate=" + bankConvertRate + ", dateTime=" + dateTime + "]"; } }
View Code

 

转载于:https://www.cnblogs.com/pwenlee/p/5704010.html

你可能感兴趣的文章
sql server必知多种日期函数时间格式转换
查看>>
jQuery EasyUI 的下拉选择combobox后台动态赋值
查看>>
timeline时间轴进度“群英荟萃”
查看>>
python if else elif statement
查看>>
网络编程
查看>>
文本隐藏(图片代替文字)
查看>>
java面试题
查看>>
提高码力专题(未完待续)
查看>>
pair的例子
查看>>
前端框架性能对比
查看>>
uva 387 A Puzzling Problem (回溯)
查看>>
12.2日常
查看>>
同步代码时忽略maven项目 target目录
查看>>
Oracle中包的创建
查看>>
团队开发之个人博客八(4月27)
查看>>
发布功能完成
查看>>
【原】小程序常见问题整理
查看>>
C# ITextSharp pdf 自动打印
查看>>
【Java】synchronized与lock的区别
查看>>
django高级应用(分页功能)
查看>>