java爬虫

标签:#java##爬虫# 时间:2020/06/02 14:35:27 作者:shy温温温

package com.lgw.test;

import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;

public class Test5 {

public static void main(String[] args) {
    // 登陆 Url
    String loginUrl = "https://www.datalearner.com/login";
    // 需登陆后访问的 Url
    String dataUrl = "https://www.datalearner.com/account/";
    HttpClient httpClient = new HttpClient();

    // 模拟登陆,按实际服务器端要求选用 Post请求方式
    PostMethod postMethod = new PostMethod(loginUrl);

    // 设置登陆时要求的信息(表单),用户名和密码
    NameValuePair[] data = { new NameValuePair("username", "1733184622@qq.com"), 
                             new NameValuePair("password", "lgw2921352") };
    postMethod.setRequestBody(data);
    try {
        // 设置 HttpClient 接收 Cookie,用与浏览器一样的策略
        httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
        int statusCode=httpClient.executeMethod(postMethod);

        // 获得登陆后的 Cookie
        Cookie[] cookies = httpClient.getState().getCookies();
        StringBuffer tmpcookies = new StringBuffer();
        for (Cookie c : cookies) {
            tmpcookies.append(c.toString() + ";");
            System.out.println("cookies = "+c.toString());
        }
        if(statusCode==302){//重定向到新的URL
            System.out.println("模拟登录成功");
            // 进行登陆后的操作
            GetMethod getMethod = new GetMethod(dataUrl);
            // 每次访问需授权的网址时需带上前面的 cookie 作为通行证
            getMethod.setRequestHeader("cookie", tmpcookies.toString());
            // 你还可以通过 PostMethod/GetMethod 设置更多的请求后数据
            // 例如,referer 从哪里来的,UA 像搜索引擎都会表名自己是谁,无良搜索引擎除外
            postMethod.setRequestHeader("Referer", "https://www.datalearner.com/signin/");
            postMethod.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36");
            httpClient.executeMethod(getMethod);
            // 打印出返回数据,检验一下是否成功
            String text = getMethod.getResponseBodyAsString();
            System.out.println(text);
        }
        else {
            System.out.println("登录失败");
        }
    }
    catch (Exception e) {
        e.printStackTrace();
    }
}

}

欢迎大家关注DataLearner官方微信,接受最新的AI技术推送