php查词采集器
admin
2023-06-29 15:23:52
0

/**  * dict.class.php 采集百度词典翻译内容  *  * @copyright      (C) 2014 widuu  * @license       http://www.widuu.com  * @lastmodify     2014-2-15  */      header("content-type:text/html;charset=utf8"); class Dict{       private $word;           //显示的条数     private static $num = 10;       public function __construct(){}                 /**    * 公用返回百度采集数据的方法    * @param string 英文单词    * retun array(      *              symbol" => 音标      *              "pro"    => 发音      *              "example"=> 例句      *              "explain"=> 简明释义      *              "synonym"=> 同反义词      *              "phrase" => 短语数组      *          )    *      */    public function content($word){          $this -> word = $word;          $symbol = $this -> Pronounced();          $pro    = $this->getSay();          $example = $this -> getExample();          $explain = $this -> getExplain();          $synonym = $this -> getSynonym();          $phrase = $this -> getPhrase();          $result = array(                 "symbol" => $symbol,     //音标                 "pro"    => $pro,            //发音                 "example"=> $example,        //例句                 "explain"=> $explain,        //简明释义                 "synonym"=> $synonym,        //同反义词                 "phrase" => $phrase      //短语数组             );         return $result;     }         /**    * 远程获取百度翻译内容    * get function curl    * retun string    *      */      private function getContent(){         $useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0";         $ch = curl_init();         $url = "http://dict.baidu.com/s?wd=".$this->word;         curl_setopt($ch, CURLOPT_URL, $url);         curl_setopt($ch, CURLOPT_USERAGENT,$useragent);         curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);          curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);          curl_setopt($ch, CURLOPT_HTTPGET, 1);         curl_setopt($ch, CURLOPT_AUTOREFERER,1);         curl_setopt($ch, CURLOPT_HEADER, 0);          curl_setopt($ch, CURLOPT_TIMEOUT, 30);         $result = curl_exec($ch);         if (curl_errno($curl)) {             echo 'Errno'.curl_error($curl);         }         curl_close($ch);         return $result;     }         /**    * 获取百度翻译发音    * retun array(英,美)    *      */      private function Pronounced(){         $data = $this -> getContent();         preg_match_all("/\"EN\-US\"\>(.*)\<\/b\>/Ui",$data,$pronounced);         return array(             'en' => $pronounced[1][0],             'us' => $pronounced[1][1]         );     }       /**      * 获取百度翻译发音      * return array(英,美)      *      */      private function getSay(){         $data = $this -> getContent();         preg_match_all("/url=\"(.*)\"/Ui",$data,$pronounced);         return array(             'en' => $pronounced[1][0],             'us' => $pronounced[1][1]         );       }       /**    * 获取百度翻译例句    * return array() 多维数组 例句    *       */      private function getExample(){         $str = "";         $data = $this -> getContent();         preg_match_all("/var example_data = (.*)\]\;/Us",$data,$example);       $data1 = "[[[".ltrim($example[1][0],"[");       $data2 = explode("[[[",$data1);       $num = count(array_filter($data2));         foreach($data2 as $key => $value){             $data3 = explode("[[","[[".$value);             foreach ($data3 as $k => $v) {                 preg_match_all("/\[\"(.*)\",/Us","[".$v, $match);                 if(!empty($match[1])){                     $str .= implode($match[1]," ")."@";                 }             }         }         $data4 = trim($str,"@");         $data5 = explode("@", $data4);         $result = array_chunk($data5, 2);         return $result;     }       /**    * 获取简明释义    * return array (x => "词性",b => "附属")    *       **/      private function getExplain(){         $data = $this -> getContent();         preg_match_all("/id\=\"en\-simple\-means\"\>(.*)\/Us",$data,$explain);         $r_data = $explain[1][0];         preg_match_all("/\\(?P.*)\<\/strong\>\(?P.*)\<\/span\>\<\/p\>/Us", $r_data, $a_data);         preg_match_all("/\(?P[^\>]+)\:\(?P.*)\<\/a\>\<\/span\>/Us", $r_data, $b_data);                   $result = array();         foreach ($a_data["adj"] as $key => $value) {             $result[$value] = $a_data["name"][$key];         }                   $word_b = array();         foreach ($b_data["tag"] as $key => $value) {             $word_b[$value] = strip_tags($b_data["word"][$key]);         }                   $result_data = array("x" => $result,"b" => $word_b);           return $result_data;     }         /**    * 获取同义词    * return array(0 => "同义词", 1 => "反义词") 一般为多维数组    *       */      private function getSynonym(){         $data = $this -> getContent();         preg_match_all("/id=\"en\-syn\-ant\"\>(.*)/Us",$data,$synonym);         $content = $synonym[1][0];         $data1 = explode("", $content);         $result = array();         $data2 = array();         foreach ($data1 as $key => $value) {             preg_match_all("/\(?P.*)\ \;\<\/strong\>\<\/div\>\\(?.*)\<\/ul\>/Us", $value, $r_data);             $data2[$key]["adj"] = $r_data["adj"];             $data2[$key]["content"] = $r_data["content"];         }           foreach ($data2 as $key => $value) {             foreach ($value["content"] as $k => $v) {                 if(!empty($v)){                     preg_match_all("/\\(?P.*)\<\/p\>(?P<value>.*)\<\/li>/Us", $v, $v_data);                     foreach ($v_data['title'] as $m => $d) {                         $data = strip_tags(preg_replace("<</a>>"," ", $v_data["value"][$m]));                         $result[$key][$value["adj"][$k]][$d] = $data;                     }                 }             }         }         return $result;     }       /**    * 获取短语词组    * return array (key => value) 一维或者多维数组    *       */      private function getPhrase(){         $num = self::$num;         $data = $this -> getContent();         preg_match_all("/id=\"en\-phrase\"\>(.*)\<div class\=\"source\"\>/Us",$data,$phrase);         $data = explode("</dd>",$phrase[1][0]);         $data1 = array_slice($data,0,$num);         $result = array();         foreach ($data1 as $key => $value) {             $data2 = explode("</p>", $value);             $n = count($data2);             if($n<=3){                 $result[str_replace(" ","",strip_tags($data2[0]))] = strip_tags($data2[1]);             }else{                 $data3 = array_slice($data2,0,$n-1);                 $data4 = array_slice($data2,0,2);                 $res = array_diff($data3,$data4);                 $data5 = array_chunk($res,2);                 $key_value = trim(str_replace(" ","",strip_tags($data4[0])));                 $result[$key_value] = strip_tags($data4[1]);                 foreach ($data5 as $key => $value) {                     foreach ($value as $k => $v) {                         $value[$k] = strip_tags($v);                     }                     $array = array($result[$key_value],$value);                     if (array_key_exists($key_value, $result)){                         $result[$key_value] = $array;                     }                 }                               }         }         return $result;     }       /**      * 将数组转换为字符串      *      * @param  array  $data    数组      * @param  bool  $isformdata 如果为0,则不使用new_stripslashes处理,可选参数,默认为1      * @return  string 返回字符串,如果,data为空,则返回空      */    private function array2string($data, $isformdata = 1) {       if($data == '') return '';       if($isformdata) $data = $this->new_stripslashes($data);       return addslashes(var_export($data, TRUE));     }       /**      * 返回经stripslashes处理过的字符串或数组      * @param $string 需要处理的字符串或数组codego.net/25/1/1/      * @return mixed      */    private function new_stripslashes($string) {       if(!is_array($string)) return stripslashes($string);       foreach($string as $key => $val) $string[$key] = $this->new_stripslashes($val);       return $string;     }   }   // $word = new dict("express"); // $word ->content();</p> <!--end::Text--> </div> <!--end::Description--> <div class="mt-5"> <!--关键词搜索--> </div> <div class="mt-5"> <p class="fc-show-prev-next"> <strong>上一篇:</strong><a href="/kaifa/show-564362.html">PHP添加SOAP模块</a><br> </p> <p class="fc-show-prev-next"> <strong>下一篇:</strong><a href="/kaifa/show-564405.html">利用php-fpm的慢日志定位故障</a> </p> </div> <!--begin::Block--> <div class="d-flex flex-stack mb-2 mt-10"> <!--begin::Title--> <h3 class="text-dark fs-5 fw-bold text-gray-800">相关内容</h3> <!--end::Title--> </div> <div class="separator separator-dashed mb-9"></div> <!--end::Block--> <div class="row g-10"> </div> </div> <!--end::Table widget 14--> </div> <!--end::Col--> <!--begin::Col--> <div class="col-xl-4 mt-0"> <!--begin::Chart Widget 35--> <div class="card card-flush h-md-100"> <!--begin::Header--> <div class="card-header pt-5 "> <!--begin::Title--> <h3 class="card-title align-items-start flex-column"> <!--begin::Statistics--> <div class="d-flex align-items-center mb-2"> <!--begin::Currency--> <span class="fs-5 fw-bold text-gray-800 ">热门资讯</span> <!--end::Currency--> </div> <!--end::Statistics--> </h3> <!--end::Title--> </div> <!--end::Header--> <!--begin::Body--> <div class="card-body pt-3"> <!--begin::Item--> <div class="d-flex flex-stack mb-7"> <!--begin::Symbol--> <div class="symbol symbol-60px symbol-2by3 me-4"> <div class="symbol-label" style="background-image: url('/uploadfile/202605/ce50a31cf8f712b.jpg')"></div> </div> <!--end::Symbol--> <!--begin::Title--> <div class="m-0"> <a href="/news/show-14525097.html" class="text-dark fw-bold text-hover-primary fs-6">初一男生校门口遭群殴或失聪,教...</a> <span class="text-gray-600 fw-semibold d-block pt-1 fs-7">哥哥同学辱骂母亲,辽宁鞍山13岁少年在学校门口维护哥哥与人发生争执,随后数名同龄少年一拥而上对其实施...</span> </div> <!--end::Title--> </div> <!--begin::Item--> <div class="d-flex flex-stack mb-7"> <!--begin::Symbol--> <div class="symbol symbol-60px symbol-2by3 me-4"> <div class="symbol-label" style="background-image: url('/uploadfile/data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABAQMAAAAl21bKAAAAA1BMVEXy8vJkA4prAAAACklEQVQI12NgAAAAAgAB4iG8MwAAAABJRU5ErkJggg==')"></div> </div> <!--end::Symbol--> <!--begin::Title--> <div class="m-0"> <a href="/news/show-14525096.html" class="text-dark fw-bold text-hover-primary fs-6">演唱会大量邀请票被当众焚毁,警...</a> <span class="text-gray-600 fw-semibold d-block pt-1 fs-7">极目新闻记者 杜光然5月29日,网友发帖称,有人当众焚烧大量演唱会邀请票,视频定位于温岭市体育中心。...</span> </div> <!--end::Title--> </div> <!--begin::Item--> <div class="d-flex flex-stack mb-7"> <!--begin::Symbol--> <div class="symbol symbol-60px symbol-2by3 me-4"> <div class="symbol-label" style="background-image: url('/static/assets/images/nopic.gif')"></div> </div> <!--end::Symbol--> <!--begin::Title--> <div class="m-0"> <a href="/news/show-14525095.html" class="text-dark fw-bold text-hover-primary fs-6">多所高校撤销外语学院</a> <span class="text-gray-600 fw-semibold d-block pt-1 fs-7">人工智能的发展及其他因素的影响下,外语类专业面临挑战,近年来一些高校对外语学院进行调整、重组。近日,...</span> </div> <!--end::Title--> </div> <!--begin::Item--> <div class="d-flex flex-stack mb-7"> <!--begin::Symbol--> <div class="symbol symbol-60px symbol-2by3 me-4"> <div class="symbol-label" style="background-image: url('/static/assets/images/nopic.gif')"></div> </div> <!--end::Symbol--> <!--begin::Title--> <div class="m-0"> <a href="/news/show-14525094.html" class="text-dark fw-bold text-hover-primary fs-6">解放军少将质问日方:有没有资格...</a> <span class="text-gray-600 fw-semibold d-block pt-1 fs-7">【环球时报-环球网报道 记者 郭媛丹 苏雅瑄】 “殷鉴不远,今天的世界又处在新的十字路口,我们必须警...</span> </div> <!--end::Title--> </div> <!--begin::Item--> <div class="d-flex flex-stack mb-7"> <!--begin::Symbol--> <div class="symbol symbol-60px symbol-2by3 me-4"> <div class="symbol-label" style="background-image: url('/static/assets/images/nopic.gif')"></div> </div> <!--end::Symbol--> <!--begin::Title--> <div class="m-0"> <a href="/news/show-14525093.html" class="text-dark fw-bold text-hover-primary fs-6">《给阿嬷的情书》导演被网友二创...</a> <span class="text-gray-600 fw-semibold d-block pt-1 fs-7">《给阿嬷的情书》这段解读直接破防!网友二创还原木生守桥的画面,把邮差落水,弄丢写有真相的讣告信件,解...</span> </div> <!--end::Title--> </div> <!--begin::Item--> <div class="d-flex flex-stack mb-7"> <!--begin::Symbol--> <div class="symbol symbol-60px symbol-2by3 me-4"> <div class="symbol-label" style="background-image: url('/static/assets/images/nopic.gif')"></div> </div> <!--end::Symbol--> <!--begin::Title--> <div class="m-0"> <a href="/news/show-14525092.html" class="text-dark fw-bold text-hover-primary fs-6">伊朗最高领袖顾问批特朗普背弃外...</a> <span class="text-gray-600 fw-semibold d-block pt-1 fs-7">据凤凰卫视报道,美伊谈判前景不明,伊朗最高领袖顾问雷扎伊5月30日指责美国总统特朗普第三次背弃外交原...</span> </div> <!--end::Title--> </div> <!--begin::Item--> <div class="d-flex flex-stack mb-7"> <!--begin::Symbol--> <div class="symbol symbol-60px symbol-2by3 me-4"> <div class="symbol-label" style="background-image: url('/uploadfile/202605/2eb813ac32fcf3b.png')"></div> </div> <!--end::Symbol--> <!--begin::Title--> <div class="m-0"> <a href="/news/show-14525091.html" class="text-dark fw-bold text-hover-primary fs-6">免去南开大学陈某院长、中山大学...</a> <span class="text-gray-600 fw-semibold d-block pt-1 fs-7">刚刚,南开大学和中山大学发布情况通报。南开大学:免去陈某院长职务南开大学通报中指出,论文第一作者郑某...</span> </div> <!--end::Title--> </div> <!--begin::Item--> <div class="d-flex flex-stack mb-7"> <!--begin::Symbol--> <div class="symbol symbol-60px symbol-2by3 me-4"> <div class="symbol-label" style="background-image: url('/static/assets/images/nopic.gif')"></div> </div> <!--end::Symbol--> <!--begin::Title--> <div class="m-0"> <a href="/news/show-14525090.html" class="text-dark fw-bold text-hover-primary fs-6">绿色算力全栈AI平台在呼和浩特...</a> <span class="text-gray-600 fw-semibold d-block pt-1 fs-7">  新华社呼和浩特5月30日电(记者侯维轶)30日,绿色算力全栈AI平台——内蒙古词元交易平台在内蒙...</span> </div> <!--end::Title--> </div> <!--begin::Item--> <div class="d-flex flex-stack mb-7"> <!--begin::Symbol--> <div class="symbol symbol-60px symbol-2by3 me-4"> <div class="symbol-label" style="background-image: url('/static/assets/images/nopic.gif')"></div> </div> <!--end::Symbol--> <!--begin::Title--> <div class="m-0"> <a href="/news/show-14525089.html" class="text-dark fw-bold text-hover-primary fs-6">北京太空智算研究院在北京亦庄成...</a> <span class="text-gray-600 fw-semibold d-block pt-1 fs-7">红星资本局5月30日消息,近日,北京太空智算研究院在北京经济技术开发区(简称“北京经开区”,又称“北...</span> </div> <!--end::Title--> </div> <!--begin::Item--> <div class="d-flex flex-stack mb-7"> <!--begin::Symbol--> <div class="symbol symbol-60px symbol-2by3 me-4"> <div class="symbol-label" style="background-image: url('/static/assets/images/nopic.gif')"></div> </div> <!--end::Symbol--> <!--begin::Title--> <div class="m-0"> <a href="/news/show-14525088.html" class="text-dark fw-bold text-hover-primary fs-6">第二届西部医学科技创新学术大会...</a> <span class="text-gray-600 fw-semibold d-block pt-1 fs-7">封面新闻记者 邱添 2026年5月30日,在第十个全国科技工作者日来临之际,一场汇聚医学前沿智慧与创...</span> </div> <!--end::Title--> </div> </div> <!--end::Body--> </div> <!--end::Chart Widget 35--> </div> <!--end::Col--> </div> </div> <!--end::Content container--> </div> <!--end::Content--> </div> <!--end::Content wrapper--> <!--begin::Footer--> <div id="kt_app_footer" class="app-footer"> <!--begin::Footer container--> <div class="app-container container-xxl d-flex flex-column flex-md-row flex-center flex-md-stack py-3"> <!--begin::Copyright--> <div class="text-dark order-2 order-md-1"> <span class="text-muted fw-semibold me-1">2026 ©</span> 太卓网<a href="http://www.yexian114.com/">叶县生活网</a> <a href="https://www.wxx86.cn/">编程知识网</a> <a href="http://www.kcwzh.com/">开创游戏网</a> <a href="https://www.caiding5.net">菜丁网</a><a href="http://cn.office369.com">奥飞商务网</a> <a href="http://www.zlnznjj.com/">众乐知识网</a><a href="http://www.lhjia.com/">乐活家</a><a href="http://www.nengyuan100.com/">能源100网</a> <a href="http://it.tdroid.net/">泰达科技</a><a href="http://cn.tdroid.net/">太卓开发网</a><a href="http://cn.yuansudz.com/">元素科技网</a><a href="http://www.80hlw.com/">八零商务网</a><a href="http://game.tdroid.net/">土豆游戏网</a> </div> <!--end::Copyright--> <!--begin::Menu--> <ul class="menu menu-gray-600 menu-hover-primary fw-semibold order-1"> <li class="menu-item"> <a href="/news" target="_blank" class="menu-link px-2">科技资讯</a> </li> <li class="menu-item"> <a href="/jishu" target="_blank" class="menu-link px-2">技术分享</a> </li> <li class="menu-item"> <a href="/kaifa" target="_blank" class="menu-link px-2">程序开发</a> </li> <li class="menu-item"> <a href="/weixiu" target="_blank" class="menu-link px-2">设备维修</a> </li> </ul> <!--end::Menu--> </div> <!--end::Footer container--> </div> <!--end::Footer--> </div> <!--end:::Main--> </div> <!--end::Wrapper--> </div> <!--end::Page--> </div> <!--end::App--> <div id="kt_scrolltop" class="scrolltop" data-kt-scrolltop="true"> <!--begin::Svg Icon | path: icons/duotune/arrows/arr066.svg--> <span class="svg-icon"> <svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"> <rect opacity="0.5" x="13" y="6" width="13" height="2" rx="1" transform="rotate(90 13 6)" fill="currentColor"></rect> <path d="M12.5657 8.56569L16.75 12.75C17.1642 13.1642 17.8358 13.1642 18.25 12.75C18.6642 12.3358 18.6642 11.6642 18.25 11.25L12.7071 5.70711C12.3166 5.31658 11.6834 5.31658 11.2929 5.70711L5.75 11.25C5.33579 11.6642 5.33579 12.3358 5.75 12.75C6.16421 13.1642 6.83579 13.1642 7.25 12.75L11.4343 8.56569C11.7467 8.25327 12.2533 8.25327 12.5657 8.56569Z" fill="currentColor"></path> </svg> </span> <!--end::Svg Icon--> </div> <!--begin::Javascript--> <script>var hostUrl = "/static/default/pc/";</script> <!--begin::Global Javascript Bundle(mandatory for all pages)--> <script src="/static/default/pc/plugins/global/plugins.bundle.js"></script> <script src="/static/default/pc/js/scripts.bundle.js"></script> <!--end::Global Javascript Bundle--> <!--end::Javascript--> </body> <!--end::Body--> </html>