Java自动登录并抓取网页相关内容

canca canca
2012-01-12 09:36
1
0

package com.taobao.test;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class Test {
public static void main(String[] args) throws Exception {
Map<String,String> loginParams = new HashMap<String,String>();
loginParams.put("username", "xxxx@123.com");
loginParams.put("password", "xxxxx");
loginParams.put("list", "1");
loginParams.put("remuser", "on");
Object[] result = loginWebSize("http://new.cnzz.com/user/login.php", loginParams, null);
showWebInfo("1421407", (Map<String,String>)result[1], "xxxx");
showWebInfo("2842384", (Map<String,String>)result[1], "xxxx");
}
public static void showWebInfo(String webId,Map<String,String>cookies,String webName){
String mainUrl = "http://new.cnzz.com/v1/go_site.php?siteid={WEBID}&s_id=402";
String onlineUrl = "http://new.cnzz.com/v1/main.php?siteid={WEBID}&s=online_list";
Document doc = getUrlDocument(mainUrl.replace("{WEBID}",webId), cookies, "http://new.cnzz.com/v1/main.php?s=site_list", false);
Elements element = doc.select(".gktable tr:gt(1)");
StringBuffer msg = new StringBuffer(webName + "\n");
for(int i = 0 ; i < 2 ; i++){
Elements tdElements = element.eq(i).select("td");
msg.append(i == 0 ? "今日统计\n" : "昨日统计\n");
msg.append("PV:" + tdElements.get(1).text()).append("\n");
msg.append("独立访客:" + tdElements.get(2).text()).append("\n");
msg.append("IP:" + tdElements.get(3).text()).append("\n");
msg.append("新独立访客:" + tdElements.get(4).text()).append("\n");
msg.append("人均浏览次数:" + tdElements.get(5).text()).append("\n\n");
}
doc = getUrlDocument(onlineUrl.replace("{WEBID}",webId), cookies,mainUrl, false);
Elements curOnlineElems = doc.select(".online_d1");
Elements pageViewElems = doc.select(".online_d2");
msg.append(curOnlineElems.select(".num3").html() + ":" + curOnlineElems.select(".num4").html()).append("\n");
msg.append(pageViewElems.select(".num3").html() + ":" + pageViewElems.select(".num4").html()).append("\n");
javax.swing.JOptionPane.showMessageDialog(null,msg.toString(),"CNZZ统计",javax.swing.JOptionPane.YES_OPTION);
}
public static Object[] loginWebSize(String loginUrl,Map<String,String> loginParams,String referrer){
Connection conn = Jsoup.connect(loginUrl);
conn.method(Method.POST).timeout(10000).followRedirects(true);
if(referrer != null && !referrer.equals(""))conn.referrer(referrer);
if(loginParams != null){
Iterator<String> keyIt = loginParams.keySet().iterator();
while(keyIt.hasNext()){
String key = keyIt.next();
conn.data(key,loginParams.get(key));
}
}
try{
Response response = conn.execute();
return new Object[]{response,conn.response().cookies()};
}catch(IOException ex){
ex.printStackTrace();
}
return null;
}
public static Document getUrlDocument(String url,Map<String,String> cookies,String referrer,boolean isPostMethod){
Connection conn = Jsoup.connect(url).timeout(10000).followRedirects(true);
if(referrer != null && !referrer.equals(""))conn.referrer(referrer);
conn.method(isPostMethod ? Method.POST : Method.GET);
setCookies(conn, cookies);
Document doc = null;
try{
doc = isPostMethod ? conn.post() : conn.get();
}catch(IOException ex){
ex.printStackTrace();
}
return doc;
}
/**
* 设置Cookies
* @param conn
* @param cookies
*/
public static void setCookies(Connection conn,Map<String,String>cookies){
if(cookies == null)return;
Iterator<String> it = cookies.keySet().iterator();
while(it.hasNext()){
String key = it.next();
conn.cookie(key, cookies.get(key));
}
}
}

发表评论