抓取页面
地址:http://www.meipai.com/medias/hot
public function getContentByFilegetcontents($url) {
$content = file_get_contents($url);
return $content;
}然后我们会获取到整个页面的代码,接下来就是从代码中提取出视频的地址 标题 图片等关键信息
2.提取
我们发现视频的主要代码集中在以下代码中
通过正则匹配
public function extracturl($page) {
$matches = array();
$voide=array();
$mainurl="";
$list=array();
$j=0;
$pat = "/全部代码
.*?<\/li>/ism"; preg_match_all($pat, $page, $matches, PREG_PATTERN_ORDER); for ($i=0; $i(.*?)<\/strong>/ism"; preg_match_all($pat3, $matches[0][$i], $title, PREG_PATTERN_ORDER); $mytitle= $title[1][0]; $list[$j++]=array( 'voide'=>$myvoide, 'title'=>$mytitle, 'img'=>$myimg); } return $list; } } $url = "http://www.meipai.com/medias/hot"; $crawler = new Cutecrawler(); $content = $crawler->getContentByFilegetcontents($url); $c=$crawler->extracturl($content); var_dump($c); ?>
最后结果:
array(24) {
[0]=>
array(3) {
["voide"]=>
string(51) "http://mvvideo2.meitudata.com/5737fd5caeb838981.mp4"
["title"]=>
string(27) "老师那些年常说的话"
["img"]=>
string(58) "https://cache.yisu.com/upload/information/20200310/52/108720.jpg!thumb320"
}
[1]=>
array(3) {
["voide"]=>
string(50) "http://mvvideo2.meitudata.com/5737fceabf873602.mp4"
["title"]=>
string(21) "女友突然冷落你"
["img"]=>
string(58) "http://mvimg2.meitudata.com/5736d25d0aa5d8991.jpg!thumb320"
}
[2]=>
array(3) {
["voide"]=>
string(51) "http://mvvideo2.meitudata.com/5737f300131e18596.mp4"
["title"]=>
string(27) "女明星之间的内心戏"
["img"]=>
string(58) "https://cache.yisu.com/upload/information/20200310/52/108722.jpg!thumb320"
}
[3]=>
array(3) {
["voide"]=>
string(51) "http://mvvideo2.meitudata.com/5737eb9d0bfc92046.mp4"
["title"]=>
string(24) "真替老师感到悲剧"
["img"]=>
string(57) "https://cache.yisu.com/upload/information/20200310/52/108723.jpg!thumb320"
}接下来。。。你可以存入数据库