php 采集书并合成txt格式的实现代码

前端之家收集整理的这篇文章主要介绍了php 采集书并合成txt格式的实现代码前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

<?PHP @H_4030@/** @H403_0@ @name 采集书.PHP @H_403_0@ @date Sun Mar 01 22:48:02 CST 2009 @H_4030@* @copyright 马永占(MyZ) @H4030@* @author 马永占(MyZ) @H4030@* @link http://blog.csdn.net/mayongzhan/ @H4030@*/ @H4030@//header('Content-Type:text/html;charset=utf8'); @H4030@header('Content-Type:text/html;charset=gb2312'); @H403_0@error_reporting(EALL); @H403_0@date_default_timezoneset('Asia/Shanghai'); @H403_0@set_timelimit(0); @H4030@function writer($content,$url) @H4030@{ @H4030@$fp = fopen($url,'ab'); @H4030@fwrite($fp,$content); @H4030@fclose($fp); @H4030@} @H403_0@$folder = '2'; //文件夹 @H_403_0@$book_baseurl = 'xxxxxxxxxxxxxxxxxxxxx'; @H403_0@$bookurl = 'yyyyyyyyyyyyy.html'; @H403_0@$main = file_get_contents($book_base_url.$bookurl); @H403_0@preg_matchall('/chapter.?.html/',$main,$pages); @H_403_0@$pages = arrayunique($pages[0]); @H4030@foreach ($pages as $value) { @H403_0@writer(file_get_contents($book_baseurl.$value),'./'.$folder.'/'.$value.'.txt'); @H403_0@$str = file_getcontents('./'.$folder.'/'.$value.'.txt'); @H403_0@//printr($str); @H403_0@preg_match("/(

)(.?)(<\/h1>)(.?)(<div id=\"contTxt\" class=\"contTxt1\">)(.?)(<\/div>)/s",$str,$arr); @H_403_0@//printr($arr);die(); @H403_0@$arr[6] = preg_replace("/(<span[^>]+>.?<a[^>]+>)(.?)(<\/a><\/span>)/s","$2",pregreplace("/

|<\/p>/","\r\n",$arr[6])); @H4030@$result = "\r\n------------------------------------------------\r\n------------------------------------------------\r\n------------------------------------------------\r\n----------------".$arr[2]."\r\n------------------------------------------------\r\n------------------------------------------------\r\n------------------------------------------------\r\n".$arr[6]; @H4030@writer($result,'./'.$folder.'/new.txt'); @H4030@} @H4030@?> @H403_0@

猜你在找的PHP相关文章