PHP的cURL模块批量下载脚本

<?php
/*
    Title:      download resource with regular URLs
    Author:     tunpishuang(http://tunps.com)
    Desc:       这个脚本可以下载url有规律的资源,比如数字递增,字母递增的。
                并且可以定义“循环数”。
    Usage:      php.exe this.php fromValue1  toValue1 fromValue2 toValue2 fromValue3 toValue3 ...
                this.php是这个脚本的名称
                fromValue1是第一个循环的初始值
                toValue1是第二个循环的初始值
                fromValue2以此类推
                toValue2以此类推 一直到fromValueN->toValueN
    Example:    php this.php http://tunps.com/uploads/*//*.jpg 001 206 001 500
    Thx:        jondro哥(http://stackoverflow.com/questions/2627000/how-to-write-a-function-to-output-unconstant-loop)对关键函数loop()的编写。
*/
    function plus($num){
        $len=strlen($num);
        $num++;
        while(strlen($num)<$len)
        {
            $num="0".$num;
        }
        return $num;
    }
    //generate loop
    function loop($a) {
    $from1 = array_shift($a);
    $to1 = array_shift($a);
    $result = array();
    while ($from1 <= $to1) {
    if (sizeof($a) > 0) {
      $rest = loop($a);
      foreach ($rest as $b) {
        $result[] = $from1.'|'.$b;
      }
    } else {
      $result[] = $from1;
    }
    $from1=plus($from1);
    }
    return $result;
    }
    //download resource
    function download($u,$f,$e){
        $ch = curl_init();
        $options=array(
            CURLOPT_URL=>$u,
            CURLOPT_RETURNTRANSFER=>true
        );
        curl_setopt_array($ch,$options);
        $data = curl_exec($ch);
        if(curl_getinfo($ch,CURLINFO_HTTP_CODE)==200){
            $fp=fopen($f.$e,"w");
            echo "downloading:".$u."->".$f.$e."\n";
            fwrite($fp,$data);
            fclose($fp);
        }
        curl_close($ch);
    }
    //main
    if($argc==1){
        print_r("用法:php ".basename(__FILE__)." url from to  from to ...\n");
        print_r("\turl      \t包含通配符*的网址\n");
        print_r("\tfrom \t\turl中通配符的起始数字or字母,按顺序匹配*\n");
        print_r("\tto       \turl中通配符的终止数字or字母,按顺序匹配*\n");
        print_r("\n");
        print_r("例子:\n");
        print_r("\tphp http://tunps.com/*/*.jpg 0 100  5 200 \n");
        print_r("\t\t\t\t         -----  -----\n");
        print_r("\t\t\t\t           ↓     ↓   \n");
        print_r("\t\t\t\t          *    *   \n");
    }else{
        if(($argc-2)%2 != 0){
            echo "argument error";
            exit();
        }else{
            $loopCount=($argc-2)/2;
            while($loopCount > 0){
                if($argv[($loopCount-1)*2+2] >= $argv[($loopCount-1)*2+3]){
                    echo "from cannot larger than to";
                    exit();
                }
                $loopCount--;
            }
        }
        if(!preg_match("/^http:\/\/[A-Za-z0-9]+\.[A-Za-z0-9]+[\/=\?%\-&_~`@[\]\':+!]*([^\"\"])*$/",$argv[1]) || strpos($argv[1],"*") === false){
            echo "url malformed or lack of char of \"*\" ";
            exit();
        }else{
            $url=$argv[1];
            $ext=substr($url,strrpos($url,'.'),strlen($url)-strrpos($url,'.'));
            $fileName=1;
            $originUrl=$url;
            array_shift($argv);
            array_shift($argv);
            $rangeArr=$argv;
            $numArr=loop($rangeArr);
            foreach($numArr as $n){
                $num=array_reverse(explode('|',$n));
                $i=0;
                while(strpos($url,'*') && $i<count($num)){
                    $lastOccur=strrpos($url,"*");
                    $url=substr_replace($url,$num[$i],$lastOccur,1);
                    $i++;
                }
                download($url,$fileName,$ext);
                $fileName++;
                $url=$originUrl;
            }
        }
    }
    exit();
?>

发表评论

电子邮件地址不会被公开。 必填项已用*标注