태그스토로 html 파싱해서 데이타 이전 하기
<?
header("Content-Type: text/html; charset=UTF-8");
require_once("./ins_lib/ins_string_class.php");
require_once("./ins_lib/ins_db_lib_class.php");
$Ins_News_db_lib=new Ins_News_db_lib();
$Ins_News_db_lib->DB_Conn();
$Ins_News_db_lib->DB_Select();
$old_url = 'http://aaa.co.kr/';
$img_url_old="https://t1.daumcdn.net/cfile/tistory";
$img_url = 'http://www.aaaa.co.kr/data/klpnnews_co_kr/tistory/';
$img_url_dir = '/home/ins_news3/ins_news-UTF8/data/aaaa/tistory/';
$report_name=aaa"";
$id="admin";
/*
title = b.find('div', {'class': 'tit'}).find('a', {'class': 'link'}).contents[0].encode('utf-8')
http://lpnnews.co.kr/483
*/
?>
<?
for($i=1;$i<504;$i++)
{
$ct=file_get_contents($old_url.$i);
if($ct)
{
//echo"<h2><a href=\"/$i\"> a";
$s_data=ins_tmp_work('<span class="date">',"<\/span>",$ct);
$date_tmp=explode(" ",$s_data);
$date_tmp3=explode(".",$date_tmp[0]);
$date_tmp4=explode(":",$date_tmp[1]);
$wdate=mktime( $date_tmp4[0], $date_tmp4[1],0,$date_tmp3[1],$date_tmp3[2],$date_tmp3[0]);
//echo "$date_tmp3[0] : $date_tmp3[1] : $date_tmp3[2] : $date_tmp4[0] : $date_tmp4[1] <br>";
$s_data2=ins_tmp_work('<div class="titleWrap">',"<\/h2>",$ct);
// echo $s_data2."<br>";
$s_data4=ins_tmp_work('<h2>',"<\/a>",$s_data2);
// $s_data4=str_replace("<a href=\"/".$i."\">","",$s_data4);
$title=strip_tags($s_data4);
//echo $title."<br>";
$s_data5=ins_tmp_work('<span class="category">',"<\/a>",$s_data2);
$section_k2=strip_tags($s_data5);
// echo $section_k2."<br>";
$body=ins_tmp_work('<div class="tt_article_useless_p_margin">','<div class="container_postbtn">',$ct);
// echo $body."<br>";
if(!$sbody) {
$body_tmp=preg_replace("/<table(.*?)>(.*?)<\/table>/si","",$body);
$body_tmp=strip_tags($body_tmp);
$body_tmp=stripslashes($body_tmp);
$body_tmp=trim($body_tmp);
$body_tmp=trim($body_tmp, ' ' . chr(194) . chr(160));
$body_tmp = str_replace("\r\n","",$body_tmp);
$body_tmp = str_replace("\r","",$body_tmp);
$body_tmp = str_replace("\n","",$body_tmp);
$body_tmp = str_replace(" ","",$body_tmp);
$sbody=$Ins_News_db_lib->text_cut($body_tmp,"300");
}
///////////////////////////////////////////////////
if(eregi("<img [^<>]*>",$body,$img_u))
{
$up_dir=$img_url_dir;
preg_match_all("/<img[^>]*src=[\"']?([^>\"']+)[\"']?[^>]*>/i", $body, $matches);
foreach($matches[1] as $k => $v) {
//echo"$v <br> ";
e_xec(" w_get -N -P $up_dir $v ");
}
// echo"w_get -N -P $up_dir $tmp1_str :: $img_u[0] ";
}
///////////////////////////////////////////////////////
$sbody=addslashes($sbody);
$body=addslashes($body);
$body=str_replace("$img_url_old","$img_url",$body);
$insert_data_tmp=array("uid"=>"$i","name"=>"$report_name","email"=>"$mail","title"=>"$title","stitle"=>"$stitle","sbody"=>"$sbody","body"=>"$body","userfile"=>"$f","imgposition"=>"$imgposition","section"=>"$section[0]","section_k"=>"$section_k2","section2"=>"$section_k2","section_k2"=>"$section_k22","keyword"=>"$keyword","wdate"=>"$wdate","onoff"=>"1","viewonoff"=>"$viewonoff","count"=>"1","ip"=>"$ip","id"=>"$id","local"=>"$local","level"=>"$news_level","movie"=>"$movie","r_news"=>"$r_news","read_r_news"=>"$read_r_news");
////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
$Ins_News_db_lib->db_data_insert($insert_data_tmp,"ins_news_user");
}
}
?>
<?
$Ins_News_db_lib->db_close();
function ins_tmp_work($key1,$key2,$data) {
$content=$data;
//echo(" $content ");
$regs = preg_split("/($key1|$key2)/",$content, -1, PREG_SPLIT_DELIM_CAPTURE);
$key=key($regs);
$regs[$key] = trim($regs[$key]);
$tmp_count=sizeof($regs);
for($i=0 ; $i < $tmp_count ; $i++) {
if($regs[$i] =="$key1") {
$tmp_t=$i-1;
$tmp_arr_up_conent=$regs[$tmp_t];
unset($tmp_t);
$tmp_t=$i+1;
$tmp_arr_loop=$regs[$tmp_t];
} else if($regs[$i] =="$key2") {
unset($tmp_t);
$tmp_t=$i+1;
$tmp_arr_down_conent=$regs[$tmp_t];
} else if($regs[$i] =="첨부파일시작") {
$tmp_t=$i-1;
$tmp_arr_up_conent=$regs[$tmp_t];
unset($tmp_t);
$tmp_t=$i+1;
$tmp_arr_loop=$regs[$tmp_t];
} else if($regs[$i] =="첨부파일끝") {
unset($tmp_t);
$tmp_t=$i+1;
$tmp_arr_down_conent=$regs[$tmp_t];
}
}
//$tmp_array_var=array($tmp_arr_up_conent,$tmp_arr_loop,$tmp_arr_down_conent);
$tmp_array_var=$tmp_arr_loop;
return $tmp_array_var;
}
?>