java爬虫

标签:#java##爬虫# 时间:2020/06/02 14:35:27 作者:shy温温温

package com.lgw.test;

import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;

public class Test5 {

  1. public static void main(String[] args) {
  2. // 登陆 Url
  3. String loginUrl = "https://www.datalearner.com/login";
  4. // 需登陆后访问的 Url
  5. String dataUrl = "https://www.datalearner.com/account/";
  6. HttpClient httpClient = new HttpClient();
  7. // 模拟登陆,按实际服务器端要求选用 Post请求方式
  8. PostMethod postMethod = new PostMethod(loginUrl);
  9. // 设置登陆时要求的信息(表单),用户名和密码
  10. NameValuePair[] data = { new NameValuePair("username", "1733184622@qq.com"),
  11. new NameValuePair("password", "lgw2921352") };
  12. postMethod.setRequestBody(data);
  13. try {
  14. // 设置 HttpClient 接收 Cookie,用与浏览器一样的策略
  15. httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
  16. int statusCode=httpClient.executeMethod(postMethod);
  17. // 获得登陆后的 Cookie
  18. Cookie[] cookies = httpClient.getState().getCookies();
  19. StringBuffer tmpcookies = new StringBuffer();
  20. for (Cookie c : cookies) {
  21. tmpcookies.append(c.toString() + ";");
  22. System.out.println("cookies = "+c.toString());
  23. }
  24. if(statusCode==302){//重定向到新的URL
  25. System.out.println("模拟登录成功");
  26. // 进行登陆后的操作
  27. GetMethod getMethod = new GetMethod(dataUrl);
  28. // 每次访问需授权的网址时需带上前面的 cookie 作为通行证
  29. getMethod.setRequestHeader("cookie", tmpcookies.toString());
  30. // 你还可以通过 PostMethod/GetMethod 设置更多的请求后数据
  31. // 例如,referer 从哪里来的,UA 像搜索引擎都会表名自己是谁,无良搜索引擎除外
  32. postMethod.setRequestHeader("Referer", "https://www.datalearner.com/signin/");
  33. postMethod.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36");
  34. httpClient.executeMethod(getMethod);
  35. // 打印出返回数据,检验一下是否成功
  36. String text = getMethod.getResponseBodyAsString();
  37. System.out.println(text);
  38. }
  39. else {
  40. System.out.println("登录失败");
  41. }
  42. }
  43. catch (Exception e) {
  44. e.printStackTrace();
  45. }
  46. }

}

欢迎大家关注DataLearner官方微信,接受最新的AI技术推送
Back to Top