提供一款免费的php 问问采集代码,如果你正想采集问问,但是又不知道怎么写采集程序,这里为你提供一款经典的问问采集程序,代码如下:
- <?php
- session_start();
- header("content-type:text/html;charset=utf-8");
- require("stole_config.php");
- require("conn.php");
- require("keyword.php");
- if(!emptyempty($_post['ask']))
- {
- $ask=urlencode(trim($_post['ask']));
- $sp="s".$ask;
- }else
- {
- $sp=urlencode($_get['sp']);
- }
- if(emptyempty($_get['jl']))
- {
- $_get['jl']=1;
- }
- $jl=$_get['jl'];
- $pg=intval($_get['pg']);
- $rs=intval($_get['rs']);
- if($rs>9)
- {
- $rs=0;
- $pg++;
- }
- if($pg>51)
- {
- echo "采集完毕! 总共采集 ".urldecode($sp)." ".$jl."条记录";
- exit();
- }
- if($sp)
- {
- $str=@file_get_contents("http://wenwen.soso.com/z/search.e?sp={$sp}&pg={$pg}");
- @preg_match("/<ol class="result_list">(.*)</ol>/ius",$str,$asklist);
-
- $url="/<a target="_blank" href="/z/(q.*.htm)/ius";
- @preg_match_all($url,$asklist[1],$urllist);
- $t=$urllist[1][$rs];
- $uid=$t;
- $suid="ww{$uid}";
- $sct=mysql教程_query("select count(*) from {$table_prefix}c_article where suid='$suid' ");
- $sct=mysql_fetch_array($sct);
- $sct=$sct[0];
- if($sct==0)
- {
- $html=@file_get_contents("http://wenwen.soso.com/z/${t}");
- $html=str_replace("<pre>","",str_replace("</pre>","",$html));
- $html=str_replace("<br/><br/><br/>","<br/><br/>",$html);
-
- @preg_match("/<div class="question_main">.*<h3>(.*)</h3>/ius",$html,$ask_title);
- $art_title=$ask_title[1];
- @preg_match("/<div class="answer_con">(.*)</div>/ius",$html,$answer);
- $j=count($answer)-1;
- $art_content="";
- for($i=$j;$i>=1;$i--)
- {
- if(strlen($answer[$i])>$min_t1)
- {
- $art_content .= $answer[$i];
- }
- }
- $art_content=trim($art_content);
- $s1="/(<a .*>)(.*)</a>/ius";
- $art_content=preg_replace($s1,${2},trim($art_content));
- $word_arr=explode(",",iconv("gbk","utf-8",$cj_word));
- $word_allow=false;
- $word_count=count($word_arr);
- for($i=0;$i<$word_count;$i++)
- {
- if(substr_count($art_title,$word_arr[$i])>0)
- {
- $word_allow=1;
- $i=$word_count;
- }
- }
- if($word_allow)
- {
- if(strlen($art_content)>$min_t2)
- {
- echo "<font color=red>添加中............................</font><br>";
- echo $art_title."<br>";
- $art_title=iconv('utf-8','gbk', $art_title);
- $title_ct=mysql_query("select count(*) from {$table_prefix}c_article where art_title ='$art_title' ");
- $title_ct=@mysql_fetch_array($title_ct);
- $title_ct=$title_ct[0];
- if($title_ct>0)
- {
- $art_title .="{$same_title}{$title_ct}";
- }
- $art_content=iconv('utf-8','gbk',str_replace(" ","<br>",$art_content));
- $art_content=strtr($art_content,$keyword);
- $art_time=date("y-m-d");
- $sql="insert into {$table_prefix}c_article(art_title,art_content,art_time,art_author,suid) values('$art_title','$art_content','$art_time','$art_author','$suid')";
- mysql_query($sql);
- if(emptyempty($t_catx_id))
- {
- $sql2="insert into {$t_table}({$t_art_title},{$t_art_content},{$t_art_time},{$t_artx_author}) values('$art_title','$art_content','$art_time','$art_author')";
- }else
- {
- $sql2="insert into {$t_table}({$t_art_title},{$t_art_content},{$t_art_time},{$t_artx_author},{$t_catx_id}) values('$art_title','$art_content','$art_time','$art_author','$cat_id')";
- }
- mysql_query($sql2);
- $jl++;
-
- }else
- {
- echo "长度不够";
- }
- }else
- {
- echo "主题不符合要求";
- }
- }else
- {
- echo "已经存在";
- }
- $rs++;
-
- $f_tt= urldecode($sp)."--页数".$pg." 记录数 ".$jl ;
- file_put_contents("ss.txt",$f_tt);
- echo "<script>location.href='wenwen.php?jl=".$jl."&sp=".$sp."&pg=".$pg."&rs=".$rs." ';</script>";
- exit();
- }
- ?>
- <link href="style.css教程" rel="stylesheet" type="text/css" />
- <table width="700" border="0" align="center" cellspacing="1" bgcolor="#cccccc">
- <tr>
- <td height="50" align="center" bgcolor="#00cc00"><h1><a href="http://www.111cn.net">荐礼啦</a>知道问问采集插件</h1></td>
- </tr>
- </table>
- <table width="700" border="0" align="center" cellspacing="1" bgcolor="#cccccc" style="margin-top:6px; margin-bottom:6px;">
- <tr>
- <td height="30" align="center" bgcolor="#ffffff"><a href="cj_config.php">采集设置</a> <a href="uninstall.php" onclick="return confirm('您确定要卸载采集插件吗');">卸载采集</a> <a href="cj_view.php">查看采集记录</a> <a href="cj_help.php">采集帮助</a> <a href="baidu.php" target="_blank">知道采集</a> <a href="wenwen.php" target="_blank">问问采集</a></td>
- </tr>
- </table>
- <form action="wenwen.php" method="post">
- <table width="628" height="49" border="0" align="center">
- <tr>
- <td width="413" align="right"><input name="ask" type="text" id="ask" size="50"></td>
- <td width="205"><input type="submit" name="button" id="button" value="问问采集" style=" padding-left:15px; padding-right:15px; height:25px; line-height:25px;"></td>
- </tr>
- </table>
- </form>
|