php自动获取关键字代码 |
时间:2015-01-23 来源:西部数据 作者:西部数据 |
|
- $mincipin=5;
- $minlen=4;
- tiqukeyword($tiqustr,$minlen,$mincipin);
- function tiqukeyword($tiqustr,$minlen,$mincipin)
- {$strlong=strlen($tiqustr);
- $arr=array();
- $k=-1;
- for($i=0;$i<($strlong-$mincipin*$minlen);$i++){
- $end=ceil(($strlong-$i)/$mincipin+$i);
- for($j=$minlen;$j<$end;$j++){$num=0;
- if(($guanjianzi=substr($tiqustr,$i,$j))!==false){
- $wz=$i+$j;
- $num++;}
- else{break;}
- while($wz<$strlong){if(($wz=strpos($tiqustr,$guanjianzi,$wz))!==false)
- {$num++;
- $wz=$wz+strlen($guanjianzi);}
- else break;
- }
- if($j==$minlen){
- if($num>=$mincipin){$maxnum=$num;$k++;$str=substr($tiqustr,$i,$j);
- $arr[$k]=array($i,$j,$str,$num,0);
- }
- else{break;}
- }
- else{
- if($num>=$maxnum){
- $maxnum=$num;
- $str=substr($tiqustr,$i,$j);
- $arr[$k]=array($i,$j,$str,$num,0);
- }
- else break;
- }
- }
- }
- echo '初步得到的数组:';
- print_r($arr);
-
-
- $arrlong=count($arr);
- for($i=0;$i<$arrlong;$i++){
- $bjarr=$arr[$i];
- $nowid=$i;
- if($bjarr[4]==1)continue;
- for($j=$i+1;$j<$arrlong;$j++){
- if($arr[$j][4]==1)continue;
- $qujianks=$bjarr[0];
- $qujianjs=$bjarr[1]+$bjarr[0]-1;
- $a=$arr[$j][0];
- $b=$arr[$j][1]+$arr[$j][0]-1;
- if(($bjarr[2]==$arr[$j][2])&&($bjarr[3]>$arr[$j][3]))$arr[$j][4]=1;
-
- if($a<=$qujianks&&$qujianks<=$b&&$a<=$qujianjs&&$qujianjs<=$b)
- {if($bjarr[3]<=$arr[$j][3]){
- $arr[$nowid][4]=1;$nowid=$j;$bjarr=$arr[$j];
- }
- }
- elseif($qujianks<=$a&&$a<=$qujianjs&&$qujianks<=$b&&$b<=$qujianjs){
- $arr[$j][4]=1;
- }
- }
- }
-
- echo '<br/><br/><br/><br/>重叠加标记后的数组:';
- print_r($arr);
-
- $jieguoarr=array();
- for($i=0;$i<$arrlong;$i++)
- {if($arr[$i][4]==0)$jieguoarr[]=$arr[$i];
-
- }
- echo '<br/><br/><br/><br/>';
- echo '最后得到的数组:';
- print_r($jieguoarr);
- }
|
|
|
|