需求是这样的,需要登录带验证码的网站,获取数据,但是不可能人为一直去记录数据,想通过自动采集的方式进行,如下是试验出来的结果代码!有需要的可以参考下!
1
|
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
<?php namespace Home\Controller; use Think\Controller; class LoginController extends Controller { protected $cookieName = array ( 'cookie_verify' , 'cookie_verify' ); protected $cookiePath = '/cookie/' ; protected $cookiePathFile = array (); public function index() { $this ->display(); } public function _initialize(){ foreach ( $this ->cookieName as $key => $name ) { $this ->cookiePathFile[] = ROOT_PATH . $this ->cookiePath . $this ->cookieName[ $key ] . '_xxx.txt' ; } } /** * 登录xxx */ public function xxxLogin() { $username = I( 'username' ); $password = I( 'password' ); $verifyCode = I( 'verify' ); $loginData = array ( '__VIEWSTATE' => '/wEPDwUKMTU0MzAzOTU4NmQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFDExvZ2luX1N1Ym1pdL/yae69NsY163G3yuP0lxjz8oXu' , //不把参数补全可能会不被响应哦 '__VIEWSTATEGENERATOR' => 'DC42DE27' , 'txt_UserName' => $username , 'txt_PWD' => $password , 'txt_VerifyCode' => $verifyCode , 'SMONEY' => 'ABC' , 'Login_Submit.x' => '52' , 'Login_Submit.y' => '19' , ); $getBack = $this ->_cookieRequest( ' http://xxx.com/noLogin.aspx ' , $loginData ); if (preg_match( '/<div[^\<div]*?id\s*=\s*[\'\"]{1}div_msg[\'\"]{1}.*?>(.*?)<\/div>/s' , $getBack , $match )){ echo 'matched\r\n' ; print_r( $match ); } else { echo $getBack , '<br />' ; $paramsFull = parse_url ( $getBack ); parse_str ( $paramsFull [ 'query' ], $paramsFull [ 'parsedQuery' ]); if (! empty ( $paramsFull [ 'parsedQuery' ][ 'Warn' ])) { $msg = "您好,欢迎来P,请先登录。" ; switch ( $paramsFull [ 'parsedQuery' ][ 'Warn' ]) { case '2' : $msg = '您输入的验证码错误,请重试' ; break ; case '3' : $msg = '该帐号不存在,还没帐号?' ; break ; case '5' : $msg = '账户已注销' ; break ; case '6' : $msg = '密码错误,如果连续错误3次半小时内不能登录!' ; break ; case '20' : $msg = '今日密码错误3次及以上,请于半小时后再来登录!' ; break ; case '21' : $msg = '今日您所在IP的所有帐号密码错误9次以上,请于半小时后再来登录!' ; break ; case '22' : $msg = '登录失败,您所在IP今日登录的帐号过多!' ; break ; case '23' : $msg = '登录失败,验证码失效!' ; break ; case '32' : $msg = '该帐号已经绑定其他xx帐号!' ; break ; case '33' : $msg = '一台电脑一天只能注册一个帐号!' ; break ; } $this ->error( $msg , '' , 5); } else { $_SESSION [ 'user_id' ] = '123456' ; //登录设置session $this ->success( '登录P网站成功' , U( 'Index/index' ), 5); } } } /** * 获取验证码 */ public function getVerifyCode() { $img = $this ->_cookieRequest( ' http://xxx.com/VerifyCode_Login.aspx?id= ' . rand(10000,999999), null, true, 1); echo $img ; } /** * 删除cookie */ public function clearCookie() { for ( $i = 0; $i < count ( $this ->cookieName); $i ++) { setcookie( $this ->cookieName[ $i ], '' , time() - 3600); } // unlink($this->cookiePathFile); $this ->success( '清除cookie成功!' ); } /** * 带COOKIE的访问curl * @param $url 访问地址 * @param bool|array $data 传递的数据 * @param bool $redirect 是否获取重定向的地址 * @return mixed 地址或者返回内容 */ public function _cookieRequest( $url , $data = null, $redirect = false, $cookieNum = 0) { $ch = curl_init(); $params [CURLOPT_URL] = $url ; //请求url地址 $params [CURLOPT_HEADER] = false; //是否返回响应头信息 $params [CURLOPT_RETURNTRANSFER] = true; //是否将结果返回 $params [CURLOPT_FOLLOWLOCATION] = true; //是否重定向 $params [CURLOPT_USERAGENT] = 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1' ; if ( $data ) { $params [CURLOPT_POST] = true; $params [CURLOPT_POSTFIELDS] = http_build_query( $data ); } //判断是否有cookie,有的话直接使用 if (! empty ( $_COOKIE [ $this ->cookieName[ $cookieNum ]]) && is_file ( $this ->cookiePathFile[ $cookieNum ])) { $params [CURLOPT_COOKIEFILE] = $this ->cookiePathFile[ $cookieNum ]; //这里判断cookie } else { // $cookie_jar = tempnam($cookie_path, 'cookie'); //产生一个cookie文件 $params [CURLOPT_COOKIEJAR] = $this ->cookiePathFile[ $cookieNum ]; //写入cookie信息 setcookie( $this ->cookieName[ $cookieNum ], $this ->cookiePathFile[ $cookieNum ], time() + 120); //保存cookie路径 } curl_setopt_array( $ch , $params ); //传入curl参数 $content = curl_exec( $ch ); $headers = curl_getinfo( $ch ); // echo $content; curl_close( $ch ); if ( $url != $headers [ "url" ] && $redirect == false) { return $headers [ "url" ]; } return $content ; } } |
登录以后,就可以使用带cookie的访问其他页面了!
ps:php curl 登录淘宝
提交上去后显示为填写验证码,登录不上去
填写验证码提交:
1
|
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" " http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd " > <html xmlns= " http://www.w3.org/1999/xhtml " > <head> <meta http-equiv= "Content-Type" content= "text/html; charset=gb2312" /> <title></title> </head> <body> <iframe id= 'img' src= "b.php" width= "950" height= "300" scrolling= "No" frameborder= "0" ></iframe> <form action= "tb.php" method= "POST" > <textarea name= "vv" cols= "50" rows= "10" >umto=&action=Authenticator&event_submit_do_login=anything&from=tb&fc= default &style= default &css_style=&tid=XOR_1_000000000000000000000000000000_635045544 70A7C717F750278&support=000001&CtrlVersion=1,0,0,7&loginType=3&minititle=&minipara=&pstrong=&llnick=&sign=&need_sign=&isIgnore=&full_redirect=&popid=&callback=&guf=¬_duplite_str=&need_user_id=&poy=XOR_1_000000000000000000000000000000_625A424 A45137C6F7A7F0B786D08&gvfdcname=&gvfdcre=&from_encoding=&TPL_redirect_url=http:www.taobao.com&TPL_username=xxx&TPL_password=xxxx&need_check_code=&&TPL_checkcode=</textarea> <input type= "submit" /> </form> </body> </html> |
1
|
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
<?php session_start(); if ( empty ( $_SESSION [ 'cookie_jar' ])) exit (); $cookie_jar = $_SESSION [ 'cookie_jar' ]; $post_fields = $_POST [ "vv" ]; $ch = curl_init( ' https://login.taobao.com/member/login.jhtml ' ); curl_setopt( $ch , CURLOPT_HEADER, 0); curl_setopt( $ch , CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; QQWubi 133; Embedded Web Browser from: http://bsalsa.com/; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Tablet PC 2.0; .NET4.0C; .NET4.0E; InfoPath.3; Media Center PC 6.0)" ); curl_setopt( $ch , CURLOPT_RETURNTRANSFER, 1); curl_setopt( $ch , CURLOPT_POST, 1); curl_setopt( $ch , CURLOPT_POSTFIELDS, $post_fields ); curl_setopt( $ch , CURLOPT_SSL_VERIFYPEER, false); curl_setopt( $ch , CURLOPT_SSL_VERIFYHOST, 1); curl_setopt( $ch , CURLOPT_COOKIEJAR, $cookie_jar ); $data = curl_exec( $ch ); curl_close( $ch ); echo $data ; exit ; $ch = curl_init( ' http://www.taobao.com ' ); curl_setopt( $ch , CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; QQWubi 133; Embedded Web Browser from: http://bsalsa.com/; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Tablet PC 2.0; .NET4.0C; .NET4.0E; InfoPath.3; Media Center PC 6.0)" ); curl_setopt( $ch , CURLOPT_HEADER, 0); curl_setopt( $ch , CURLOPT_RETURNTRANSFER, 0); curl_setopt( $ch , CURLOPT_COOKIEFILE, $cookie_jar ); curl_setopt( $ch , CURLOPT_SSL_VERIFYPEER, TRUE); curl_exec( $ch ); curl_close( $ch ); ?> |
提取验证码
1
|
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
<?php session_start(); $cookie_jar =tempnam( "./temp/" , "cookie" ); $_SESSION [ 'cookie_jar' ]= $cookie_jar ; $post_fields = "action=Authenticator&event_submit_do_login=anything&from=tb&fc= default &style= default &css_style=&tid=XOR_1_000000000000000000000000000000_635045544 70A7C717F750278&support=000001&CtrlVersion=1,0,0,7&loginType=3&minititle=&minipara=&pstrong=&llnick=&sign=&need_sign=&isIgnore=&full_redirect=&popid=&callback=&guf=¬_duplite_str=&need_user_id=&poy=XOR_1_000000000000000000000000000000_625A424A45137C6F7A7F0B786D08&gvfdcname=&gvfdcre=&from_encoding=&TPL_redirect_url=http:www.taobao.com&TPL_username=xxx&TPL_password=xxx"; $ch = curl_init( ' https://login.taobao.com/member/login.jhtml ' ); curl_setopt( $ch , CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; QQWubi 133; Embedded Web Browser from: http://bsalsa.com/; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Tablet PC 2.0; .NET4.0C; .NET4.0E; InfoPath.3; Media Center PC 6.0)" ); curl_setopt( $ch , CURLOPT_HEADER, 0); curl_setopt( $ch , CURLOPT_RETURNTRANSFER, 1); curl_setopt( $ch , CURLOPT_SSL_VERIFYPEER, false); curl_setopt( $ch , CURLOPT_SSL_VERIFYHOST, 2); $data = curl_exec( $ch ); curl_close( $ch ); preg_match( "/id=\"um_to\" name=\"umto\" value=\"(.*?)\"\/>/" , $data , $arr ); $post_fields = "umto=" . $arr [1] . "&" . $post_fields . "&TPL_checkcode=" ; echo "<textarea cols=50 rows=10>" . $post_fields . "</textarea><br/>" ; $ch = curl_init( ' https://login.taobao.com/member/login.jhtml ' ); curl_setopt( $ch , CURLOPT_HEADER, 0); curl_setopt( $ch , CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; QQWubi 133; Embedded Web Browser from: http://bsalsa.com/; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Tablet PC 2.0; .NET4.0C; .NET4.0E; InfoPath.3; Media Center PC 6.0)" ); curl_setopt( $ch , CURLOPT_RETURNTRANSFER, 1); curl_setopt( $ch , CURLOPT_POST, 1); curl_setopt( $ch , CURLOPT_POSTFIELDS, $post_fields ); curl_setopt( $ch , CURLOPT_SSL_VERIFYPEER, false); curl_setopt( $ch , CURLOPT_SSL_VERIFYHOST, 2); curl_setopt( $ch ,CURLOPT_COOKIEJAR, $cookie_jar ); curl_setopt( $ch ,CURLOPT_COOKIEFILE, $cookie_jar ); $data = curl_exec( $ch ); curl_close( $ch ); preg_match( "/img id=\"J_StandardCode_m\" src=\"(.*?)\" data-src=/" , $data , $arr1 ); echo "<img src=" . $arr1 [1]. " />" ; exit ; ?> |