<?PHP @H_4030@/** @H403_0@ @name 采集书.PHP @H_403_0@ @date Sun Mar 01 22:48:02 CST 2009 @H_4030@* @copyright 马永占(MyZ) @H4030@* @author 马永占(MyZ) @H4030@* @link http://blog.csdn.net/mayongzhan/ @H4030@*/ @H4030@//header('Content-Type:text/html;charset=utf8'); @H4030@header('Content-Type:text/html;charset=gb2312'); @H403_0@error_reporting(EALL); @H403_0@date_default_timezoneset('Asia/Shanghai'); @H403_0@set_timelimit(0); @H4030@function writer($content,$url) @H4030@{ @H4030@$fp = fopen($url,'ab'); @H4030@fwrite($fp,$content); @H4030@fclose($fp); @H4030@} @H403_0@$folder = '2'; //文件夹 @H_403_0@$book_baseurl = 'xxxxxxxxxxxxxxxxxxxxx'; @H403_0@$bookurl = 'yyyyyyyyyyyyy.html'; @H403_0@$main = file_get_contents($book_base_url.$bookurl); @H403_0@preg_matchall('/chapter.?.html/',$main,$pages); @H_403_0@$pages = arrayunique($pages[0]); @H4030@foreach ($pages as $value) { @H403_0@writer(file_get_contents($book_baseurl.$value),'./'.$folder.'/'.$value.'.txt'); @H403_0@$str = file_getcontents('./'.$folder.'/'.$value.'.txt'); @H403_0@//printr($str); @H403_0@preg_match("/( |<\/p>/","\r\n",$arr[6])); @H)(.
?)(<\/h1>)(.?)(<div id=\"contTxt\" class=\"contTxt1\">)(.?)(<\/div>)/s",$str,$arr); @H_403_0@//printr($arr);die(); @H403_0@$arr[6] = preg_replace("/(<span[^>]+>.?<a[^>]+>)(.?)(<\/a><\/span>)/s","$2",pregreplace("/