[ PHPXref.com ] [ Generated: Sun Jul 20 21:11:51 2008 ] [ XOOPS 2.2.3a ]
[ Index ]     [ Variables ]     [ Functions ]     [ Classes ]     [ Constants ]     [ Statistics ]

title

Body

[close]

/html/class/snoopy/ -> Snoopy.class.php (source)

   1  <?php
   2  
   3  /*************************************************

   4  

   5  Snoopy - the PHP net client

   6  Author: Monte Ohrt <monte@ispi.net>

   7  Copyright (c): 1999-2000 ispi, all rights reserved

   8  Version: 1.2

   9  

  10   * This library is free software; you can redistribute it and/or

  11   * modify it under the terms of the GNU Lesser General Public

  12   * License as published by the Free Software Foundation; either

  13   * version 2.1 of the License, or (at your option) any later version.

  14   *

  15   * This library is distributed in the hope that it will be useful,

  16   * but WITHOUT ANY WARRANTY; without even the implied warranty of

  17   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

  18   * Lesser General Public License for more details.

  19   *

  20   * You should have received a copy of the GNU Lesser General Public

  21   * License along with this library; if not, write to the Free Software

  22   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

  23  

  24  You may contact the author of Snoopy by e-mail at:

  25  monte@ispi.net

  26  

  27  Or, write to:

  28  Monte Ohrt

  29  CTO, ispi

  30  237 S. 70th suite 220

  31  Lincoln, NE 68510

  32  

  33  The latest version of Snoopy can be obtained from:

  34  http://snoopy.sourceforge.net/

  35  

  36  *************************************************/
  37  
  38  class Snoopy
  39  {
  40      /**** Public variables ****/

  41      
  42      /* user definable vars */

  43  
  44      var $host            =    "www.php.net";        // host name we are connecting to

  45      var $port            =    80;                    // port we are connecting to

  46      var $proxy_host        =    "";                    // proxy host to use

  47      var $proxy_port        =    "";                    // proxy port to use

  48      var $proxy_user        =    "";                    // proxy user to use

  49      var $proxy_pass        =    "";                    // proxy password to use

  50      
  51      var $agent            =    "Snoopy v1.2";        // agent we masquerade as

  52      var    $referer        =    "";                    // referer info to pass

  53      var $cookies        =    array();            // array of cookies to pass

  54                                                  // $cookies["username"]="joe";

  55      var    $rawheaders        =    array();            // array of raw headers to send

  56                                                  // $rawheaders["Content-type"]="text/html";

  57  
  58      var $maxredirs        =    5;                    // http redirection depth maximum. 0 = disallow

  59      var $lastredirectaddr    =    "";                // contains address of last redirected address

  60      var    $offsiteok        =    true;                // allows redirection off-site

  61      var $maxframes        =    0;                    // frame content depth maximum. 0 = disallow

  62      var $expandlinks    =    true;                // expand links to fully qualified URLs.

  63                                                  // this only applies to fetchlinks()

  64                                                  // or submitlinks()

  65      var $passcookies    =    true;                // pass set cookies back through redirects

  66                                                  // NOTE: this currently does not respect

  67                                                  // dates, domains or paths.

  68      
  69      var    $user            =    "";                    // user for http authentication

  70      var    $pass            =    "";                    // password for http authentication

  71      
  72      // http accept types

  73      var $accept            =    "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";

  74      

  75      var $results        =    "";                    // where the content is put

  76          

  77      var $error            =    "";                    // error messages sent here

  78      var    $response_code    =    "";                    // response code returned from server

  79      var    $headers        =    array();            // headers returned from server sent here

  80      var    $maxlength        =    500000;                // max return data length (body)

  81      var $read_timeout    =    0;                    // timeout on read operations, in seconds

  82                                                  // supported only since PHP 4 Beta 4

  83                                                  // set to 0 to disallow timeouts

  84      var $timed_out        =    false;                // if a read operation timed out

  85      var    $status            =    0;                    // http request status

  86  

  87      var $temp_dir        =    "/tmp";                // temporary directory that the webserver

  88                                                  // has permission to write to.

  89                                                  // under Windows, this should be C:\temp

  90  

  91      var    $curl_path        =    "/usr/local/bin/curl";

  92                                                  // Snoopy will use cURL for fetching

  93                                                  // SSL content if a full system path to

  94                                                  // the cURL binary is supplied here.

  95                                                  // set to false if you do not have

  96                                                  // cURL installed. See http://curl.haxx.se

  97                                                  // for details on installing cURL.

  98                                                  // Snoopy does *not* use the cURL

  99                                                  // library functions built into php,

 100                                                  // as these functions are not stable

 101                                                  // as of this Snoopy release.

 102      

 103      /**** Private variables ****/    

 104      

 105      var    $_maxlinelen    =    4096;                // max line length (headers)

 106      

 107      var $_httpmethod    =    "GET";                // default http request method

 108      var $_httpversion    =    "HTTP/1.0";            // default http request version

 109      var $_submit_method    =    "POST";                // default submit method

 110      var $_submit_type    =    "application/x-www-form-urlencoded";    // default submit type

 111      var $_mime_boundary    =   "";                    // MIME boundary for multipart/form-data submit type

 112      var $_redirectaddr    =    false;                // will be set if page fetched is a redirect

 113      var $_redirectdepth    =    0;                    // increments on an http redirect

 114      var $_frameurls        =     array();            // frame src urls

 115      var $_framedepth    =    0;                    // increments on frame depth

 116      

 117      var $_isproxy        =    false;                // set if using a proxy server

 118      var $_fp_timeout    =    30;                    // timeout for socket connection

 119  

 120  /*======================================================================*\

 121      Function:    fetch

 122      Purpose:    fetch the contents of a web page

 123                  (and possibly other protocols in the

 124                  future like ftp, nntp, gopher, etc.)

 125      Input:        $URI    the location of the page to fetch

 126      Output:        $this->results    the output text from the fetch

 127  \*======================================================================*/
 128  
 129  	function fetch($URI)
 130      {
 131      
 132          //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);

 133          $URI_PARTS = parse_url($URI);
 134          if (!empty($URI_PARTS["user"]))
 135              $this->user = $URI_PARTS["user"];
 136          if (!empty($URI_PARTS["pass"]))
 137              $this->pass = $URI_PARTS["pass"];
 138          if (empty($URI_PARTS["query"]))
 139              $URI_PARTS["query"] = '';
 140                  
 141          switch($URI_PARTS["scheme"])
 142          {
 143              case "http":
 144                  $this->host = $URI_PARTS["host"];
 145                  if(!empty($URI_PARTS["port"]))
 146                      $this->port = $URI_PARTS["port"];
 147                  if($this->_connect($fp))
 148                  {
 149                      if($this->_isproxy)
 150                      {
 151                          // using proxy, send entire URI

 152                          $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
 153                      }
 154                      else
 155                      {
 156                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 157                          // no proxy, send only the path

 158                          $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
 159                      }
 160                      
 161                      $this->_disconnect($fp);
 162  
 163                      if($this->_redirectaddr)
 164                      {
 165                          /* url was redirected, check if we've hit the max depth */

 166                          if($this->maxredirs > $this->_redirectdepth)
 167                          {
 168                              // only follow redirect if it's on this site, or offsiteok is true

 169                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 170                              {
 171                                  /* follow the redirect */

 172                                  $this->_redirectdepth++;
 173                                  $this->lastredirectaddr=$this->_redirectaddr;
 174                                  $this->fetch($this->_redirectaddr);
 175                              }
 176                          }
 177                      }
 178  
 179                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 180                      {
 181                          $frameurls = $this->_frameurls;
 182                          $this->_frameurls = array();
 183                          
 184                          while(list(,$frameurl) = each($frameurls))
 185                          {
 186                              if($this->_framedepth < $this->maxframes)
 187                              {
 188                                  $this->fetch($frameurl);
 189                                  $this->_framedepth++;
 190                              }
 191                              else
 192                                  break;
 193                          }
 194                      }                    
 195                  }
 196                  else
 197                  {
 198                      return false;
 199                  }
 200                  return true;                    
 201                  break;
 202              case "https":
 203                  if(!$this->curl_path)
 204                      return false;
 205                  if(function_exists("is_executable"))
 206                      if (!is_executable($this->curl_path))
 207                          return false;
 208                  $this->host = $URI_PARTS["host"];
 209                  if(!empty($URI_PARTS["port"]))
 210                      $this->port = $URI_PARTS["port"];
 211                  if($this->_isproxy)
 212                  {
 213                      // using proxy, send entire URI

 214                      $this->_httpsrequest($URI,$URI,$this->_httpmethod);
 215                  }
 216                  else
 217                  {
 218                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 219                      // no proxy, send only the path

 220                      $this->_httpsrequest($path, $URI, $this->_httpmethod);
 221                  }
 222  
 223                  if($this->_redirectaddr)
 224                  {
 225                      /* url was redirected, check if we've hit the max depth */

 226                      if($this->maxredirs > $this->_redirectdepth)
 227                      {
 228                          // only follow redirect if it's on this site, or offsiteok is true

 229                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 230                          {
 231                              /* follow the redirect */

 232                              $this->_redirectdepth++;
 233                              $this->lastredirectaddr=$this->_redirectaddr;
 234                              $this->fetch($this->_redirectaddr);
 235                          }
 236                      }
 237                  }
 238  
 239                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 240                  {
 241                      $frameurls = $this->_frameurls;
 242                      $this->_frameurls = array();
 243  
 244                      while(list(,$frameurl) = each($frameurls))
 245                      {
 246                          if($this->_framedepth < $this->maxframes)
 247                          {
 248                              $this->fetch($frameurl);
 249                              $this->_framedepth++;
 250                          }
 251                          else
 252                              break;
 253                      }
 254                  }                    
 255                  return true;                    
 256                  break;
 257              default:
 258                  // not a valid protocol

 259                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 260                  return false;
 261                  break;
 262          }        
 263          return true;
 264      }
 265  
 266  /*======================================================================*\

 267      Function:    submit

 268      Purpose:    submit an http form

 269      Input:        $URI    the location to post the data

 270                  $formvars    the formvars to use.

 271                      format: $formvars["var"] = "val";

 272                  $formfiles  an array of files to submit

 273                      format: $formfiles["var"] = "/dir/filename.ext";

 274      Output:        $this->results    the text output from the post

 275  \*======================================================================*/
 276  
 277  	function submit($URI, $formvars="", $formfiles="")
 278      {
 279          unset($postdata);
 280          
 281          $postdata = $this->_prepare_post_body($formvars, $formfiles);
 282              
 283          $URI_PARTS = parse_url($URI);
 284          if (!empty($URI_PARTS["user"]))
 285              $this->user = $URI_PARTS["user"];
 286          if (!empty($URI_PARTS["pass"]))
 287              $this->pass = $URI_PARTS["pass"];
 288          if (empty($URI_PARTS["query"]))
 289              $URI_PARTS["query"] = '';
 290  
 291          switch($URI_PARTS["scheme"])
 292          {
 293              case "http":
 294                  $this->host = $URI_PARTS["host"];
 295                  if(!empty($URI_PARTS["port"]))
 296                      $this->port = $URI_PARTS["port"];
 297                  if($this->_connect($fp))
 298                  {
 299                      if($this->_isproxy)
 300                      {
 301                          // using proxy, send entire URI

 302                          $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
 303                      }
 304                      else
 305                      {
 306                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 307                          // no proxy, send only the path

 308                          $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 309                      }
 310                      
 311                      $this->_disconnect($fp);
 312  
 313                      if($this->_redirectaddr)
 314                      {
 315                          /* url was redirected, check if we've hit the max depth */

 316                          if($this->maxredirs > $this->_redirectdepth)
 317                          {                        
 318                              if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 319                                  $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                        
 320                              
 321                              // only follow redirect if it's on this site, or offsiteok is true

 322                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 323                              {
 324                                  /* follow the redirect */

 325                                  $this->_redirectdepth++;
 326                                  $this->lastredirectaddr=$this->_redirectaddr;
 327                                  if( strpos( $this->_redirectaddr, "?" ) > 0 )
 328                                      $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get

 329                                  else
 330                                      $this->submit($this->_redirectaddr,$formvars, $formfiles);
 331                              }
 332                          }
 333                      }
 334  
 335                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 336                      {
 337                          $frameurls = $this->_frameurls;
 338                          $this->_frameurls = array();
 339                          
 340                          while(list(,$frameurl) = each($frameurls))
 341                          {                                                        
 342                              if($this->_framedepth < $this->maxframes)
 343                              {
 344                                  $this->fetch($frameurl);
 345                                  $this->_framedepth++;
 346                              }
 347                              else
 348                                  break;
 349                          }
 350                      }                    
 351                      
 352                  }
 353                  else
 354                  {
 355                      return false;
 356                  }
 357                  return true;                    
 358                  break;
 359              case "https":
 360                  if(!$this->curl_path)
 361                      return false;
 362                  if(function_exists("is_executable"))
 363                      if (!is_executable($this->curl_path))
 364                          return false;
 365                  $this->host = $URI_PARTS["host"];
 366                  if(!empty($URI_PARTS["port"]))
 367                      $this->port = $URI_PARTS["port"];
 368                  if($this->_isproxy)
 369                  {
 370                      // using proxy, send entire URI

 371                      $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 372                  }
 373                  else
 374                  {
 375                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 376                      // no proxy, send only the path

 377                      $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 378                  }
 379  
 380                  if($this->_redirectaddr)
 381                  {
 382                      /* url was redirected, check if we've hit the max depth */

 383                      if($this->maxredirs > $this->_redirectdepth)
 384                      {                        
 385                          if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 386                              $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                        
 387  
 388                          // only follow redirect if it's on this site, or offsiteok is true

 389                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 390                          {
 391                              /* follow the redirect */

 392                              $this->_redirectdepth++;
 393                              $this->lastredirectaddr=$this->_redirectaddr;
 394                              if( strpos( $this->_redirectaddr, "?" ) > 0 )
 395                                  $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get

 396                              else
 397                                  $this->submit($this->_redirectaddr,$formvars, $formfiles);
 398                          }
 399                      }
 400                  }
 401  
 402                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 403                  {
 404                      $frameurls = $this->_frameurls;
 405                      $this->_frameurls = array();
 406  
 407                      while(list(,$frameurl) = each($frameurls))
 408                      {                                                        
 409                          if($this->_framedepth < $this->maxframes)
 410                          {
 411                              $this->fetch($frameurl);
 412                              $this->_framedepth++;
 413                          }
 414                          else
 415                              break;
 416                      }
 417                  }                    
 418                  return true;                    
 419                  break;
 420                  
 421              default:
 422                  // not a valid protocol

 423                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 424                  return false;
 425                  break;
 426          }        
 427          return true;
 428      }
 429  
 430  /*======================================================================*\

 431      Function:    fetchlinks

 432      Purpose:    fetch the links from a web page

 433      Input:        $URI    where you are fetching from

 434      Output:        $this->results    an array of the URLs

 435  \*======================================================================*/
 436  
 437  	function fetchlinks($URI)
 438      {
 439          if ($this->fetch($URI))
 440          {            
 441  
 442              if(is_array($this->results))
 443              {
 444                  for($x=0;$x<count($this->results);$x++)
 445                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 446              }
 447              else
 448                  $this->results = $this->_striplinks($this->results);
 449  
 450              if($this->expandlinks)
 451                  $this->results = $this->_expandlinks($this->results, $URI);
 452              return true;
 453          }
 454          else
 455              return false;
 456      }
 457  
 458  /*======================================================================*\

 459      Function:    fetchform

 460      Purpose:    fetch the form elements from a web page

 461      Input:        $URI    where you are fetching from

 462      Output:        $this->results    the resulting html form

 463  \*======================================================================*/
 464  
 465  	function fetchform($URI)
 466      {
 467          
 468          if ($this->fetch($URI))
 469          {            
 470  
 471              if(is_array($this->results))
 472              {
 473                  for($x=0;$x<count($this->results);$x++)
 474                      $this->results[$x] = $this->_stripform($this->results[$x]);
 475              }
 476              else
 477                  $this->results = $this->_stripform($this->results);
 478              
 479              return true;
 480          }
 481          else
 482              return false;
 483      }
 484      
 485      
 486  /*======================================================================*\

 487      Function:    fetchtext

 488      Purpose:    fetch the text from a web page, stripping the links

 489      Input:        $URI    where you are fetching from

 490      Output:        $this->results    the text from the web page

 491  \*======================================================================*/
 492  
 493  	function fetchtext($URI)
 494      {
 495          if($this->fetch($URI))
 496          {            
 497              if(is_array($this->results))
 498              {
 499                  for($x=0;$x<count($this->results);$x++)
 500                      $this->results[$x] = $this->_striptext($this->results[$x]);
 501              }
 502              else
 503                  $this->results = $this->_striptext($this->results);
 504              return true;
 505          }
 506          else
 507              return false;
 508      }
 509  
 510  /*======================================================================*\

 511      Function:    submitlinks

 512      Purpose:    grab links from a form submission

 513      Input:        $URI    where you are submitting from

 514      Output:        $this->results    an array of the links from the post

 515  \*======================================================================*/
 516  
 517  	function submitlinks($URI, $formvars="", $formfiles="")
 518      {
 519          if($this->submit($URI,$formvars, $formfiles))
 520          {            
 521              if(is_array($this->results))
 522              {
 523                  for($x=0;$x<count($this->results);$x++)
 524                  {
 525                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 526                      if($this->expandlinks)
 527                          $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 528                  }
 529              }
 530              else
 531              {
 532                  $this->results = $this->_striplinks($this->results);
 533                  if($this->expandlinks)
 534                      $this->results = $this->_expandlinks($this->results,$URI);
 535              }
 536              return true;
 537          }
 538          else
 539              return false;
 540      }
 541  
 542  /*======================================================================*\

 543      Function:    submittext

 544      Purpose:    grab text from a form submission

 545      Input:        $URI    where you are submitting from

 546      Output:        $this->results    the text from the web page

 547  \*======================================================================*/
 548  
 549  	function submittext($URI, $formvars = "", $formfiles = "")
 550      {
 551          if($this->submit($URI,$formvars, $formfiles))
 552          {            
 553              if(is_array($this->results))
 554              {
 555                  for($x=0;$x<count($this->results);$x++)
 556                  {
 557                      $this->results[$x] = $this->_striptext($this->results[$x]);
 558                      if($this->expandlinks)
 559                          $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 560                  }
 561              }
 562              else
 563              {
 564                  $this->results = $this->_striptext($this->results);
 565                  if($this->expandlinks)
 566                      $this->results = $this->_expandlinks($this->results,$URI);
 567              }
 568              return true;
 569          }
 570          else
 571              return false;
 572      }
 573  
 574      
 575  
 576  /*======================================================================*\

 577      Function:    set_submit_multipart

 578      Purpose:    Set the form submission content type to

 579                  multipart/form-data

 580  \*======================================================================*/
 581  	function set_submit_multipart()
 582      {
 583          $this->_submit_type = "multipart/form-data";
 584      }
 585  
 586      
 587  /*======================================================================*\

 588      Function:    set_submit_normal

 589      Purpose:    Set the form submission content type to

 590                  application/x-www-form-urlencoded

 591  \*======================================================================*/
 592  	function set_submit_normal()
 593      {
 594          $this->_submit_type = "application/x-www-form-urlencoded";
 595      }
 596  
 597      
 598      
 599  
 600  /*======================================================================*\

 601      Private functions

 602  \*======================================================================*/
 603      
 604      
 605  /*======================================================================*\

 606      Function:    _striplinks

 607      Purpose:    strip the hyperlinks from an html document

 608      Input:        $document    document to strip.

 609      Output:        $match        an array of the links

 610  \*======================================================================*/
 611  
 612  	function _striplinks($document)
 613      {    
 614          preg_match_all("'<\s*a\s.*?href\s*=\s*            # find <a href=
 615                          ([\"\'])?                    # find single or double quote
 616                          (?(1) (.*?)\\1 | ([^\s\>]+))        # if quote found, match up to next matching
 617                                                      # quote, otherwise match up to next space

 618                          'isx",$document,$links);
 619                          
 620  
 621          // catenate the non-empty matches from the conditional subpattern

 622  
 623          while(list($key,$val) = each($links[2]))
 624          {
 625              if(!empty($val))
 626                  $match[] = $val;
 627          }                
 628          
 629          while(list($key,$val) = each($links[3]))
 630          {
 631              if(!empty($val))
 632                  $match[] = $val;
 633          }        
 634          
 635          // return the links

 636          return $match;
 637      }
 638  
 639  /*======================================================================*\

 640      Function:    _stripform

 641      Purpose:    strip the form elements from an html document

 642      Input:        $document    document to strip.

 643      Output:        $match        an array of the links

 644  \*======================================================================*/
 645  
 646  	function _stripform($document)
 647      {    
 648          preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
 649          
 650          // catenate the matches

 651          $match = implode("\r\n",$elements[0]);
 652                  
 653          // return the links

 654          return $match;
 655      }
 656  
 657      
 658      
 659  /*======================================================================*\

 660      Function:    _striptext

 661      Purpose:    strip the text from an html document

 662      Input:        $document    document to strip.

 663      Output:        $text        the resulting text

 664  \*======================================================================*/
 665  
 666  	function _striptext($document)
 667      {
 668          
 669          // I didn't use preg eval (//e) since that is only available in PHP 4.0.

 670          // so, list your entities one by one here. I included some of the

 671          // more common ones.

 672                                  
 673          $search = array("'<script[^>]*?>.*?</script>'si",    // strip out javascript
 674                          "'<[\/\!]*?[^<>]*?>'si",            // strip out html tags
 675                          "'([\r\n])[\s]+'",                    // strip out white space
 676                          "'&(quot|#34|#034|#x22);'i",        // replace html entities
 677                          "'&(amp|#38|#038|#x26);'i",            // added hexadecimal values
 678                          "'&(lt|#60|#060|#x3c);'i",
 679                          "'&(gt|#62|#062|#x3e);'i",
 680                          "'&(nbsp|#160|#xa0);'i",
 681                          "'&(iexcl|#161);'i",
 682                          "'&(cent|#162);'i",
 683                          "'&(pound|#163);'i",
 684                          "'&(copy|#169);'i",
 685                          "'&(reg|#174);'i",
 686                          "'&(deg|#176);'i",
 687                          "'&(#39|#039|#x27);'",
 688                          "'&(euro|#8364);'i",                // europe
 689                          "'&a(uml|UML);'",                    // german
 690                          "'&o(uml|UML);'",
 691                          "'&u(uml|UML);'",
 692                          "'&A(uml|UML);'",
 693                          "'&O(uml|UML);'",
 694                          "'&U(uml|UML);'",
 695                          "'&szlig;'i",
 696                          );
 697          $replace = array(    "",
 698                              "",
 699                              "\\1",
 700                              "\"",
 701                              "&",
 702                              "<",
 703                              ">",
 704                              " ",
 705                              chr(161),
 706                              chr(162),
 707                              chr(163),
 708                              chr(169),
 709                              chr(174),
 710                              chr(176),
 711                              chr(39),
 712                              chr(128),
 713                              "",
 714                              "",
 715                              "",
 716                              "",
 717                              "",
 718                              "",
 719                              "",
 720                          );
 721                      
 722          $text = preg_replace($search,$replace,$document);
 723                                  
 724          return $text;
 725      }
 726  
 727  /*======================================================================*\

 728      Function:    _expandlinks

 729      Purpose:    expand each link into a fully qualified URL

 730      Input:        $links            the links to qualify

 731                  $URI            the full URI to get the base from

 732      Output:        $expandedLinks    the expanded links

 733  \*======================================================================*/
 734  
 735  	function _expandlinks($links,$URI)
 736      {
 737          
 738          preg_match("/^[^\?]+/",$URI,$match);
 739  
 740          $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
 741          $match = preg_replace("|/$|","",$match);
 742                  
 743          $search = array(     "|^http://".preg_quote($this->host)."|i",
 744                              "|^(?!http://)(\/)?(?!mailto:)|i",
 745                              "|/\./|",
 746                              "|/[^\/]+/\.\./|"
 747                          );
 748                          
 749          $replace = array(    "",
 750                              $match."/",
 751                              "/",
 752                              "/"
 753                          );            
 754                  
 755          $expandedLinks = preg_replace($search,$replace,$links);
 756  
 757          return $expandedLinks;
 758      }
 759  
 760  /*======================================================================*\

 761      Function:    _httprequest

 762      Purpose:    go get the http data from the server

 763      Input:        $url        the url to fetch

 764                  $fp            the current open file pointer

 765                  $URI        the full URI

 766                  $body        body contents to send if any (POST)

 767      Output:        

 768  \*======================================================================*/
 769      
 770  	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
 771      {
 772          $cookie_headers = '';
 773          if($this->passcookies && $this->_redirectaddr)
 774              $this->setcookies();
 775              
 776          $URI_PARTS = parse_url($URI);
 777          if(empty($url))
 778              $url = "/";
 779          $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";        
 780          if(!empty($this->agent))
 781              $headers .= "User-Agent: ".$this->agent."\r\n";
 782          if(!empty($this->host) && !isset($this->rawheaders['Host']))
 783              $headers .= "Host: ".$this->host."\r\n";
 784          if(!empty($this->accept))
 785              $headers .= "Accept: ".$this->accept."\r\n";
 786          if(!empty($this->referer))
 787              $headers .= "Referer: ".$this->referer."\r\n";
 788          if(!empty($this->cookies))
 789          {            
 790              if(!is_array($this->cookies))
 791                  $this->cookies = (array)$this->cookies;
 792      
 793              reset($this->cookies);
 794              if ( count($this->cookies) > 0 ) {
 795                  $cookie_headers .= 'Cookie: ';
 796                  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 797                  $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
 798                  }
 799                  $headers .= substr($cookie_headers,0,-2) . "\r\n";
 800              } 
 801          }
 802          if(!empty($this->rawheaders))
 803          {
 804              if(!is_array($this->rawheaders))
 805                  $this->rawheaders = (array)$this->rawheaders;
 806              while(list($headerKey,$headerVal) = each($this->rawheaders))
 807                  $headers .= $headerKey.": ".$headerVal."\r\n";
 808          }
 809          if(!empty($content_type)) {
 810              $headers .= "Content-type: $content_type";
 811              if ($content_type == "multipart/form-data")
 812                  $headers .= "; boundary=".$this->_mime_boundary;
 813              $headers .= "\r\n";
 814          }
 815          if(!empty($body))    
 816              $headers .= "Content-length: ".strlen($body)."\r\n";
 817          if(!empty($this->user) || !empty($this->pass))    
 818              $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
 819          
 820          //add proxy auth headers

 821          if(!empty($this->proxy_user))    
 822              $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
 823  
 824  
 825          $headers .= "\r\n";
 826          
 827          // set the read timeout if needed

 828          if ($this->read_timeout > 0)
 829              socket_set_timeout($fp, $this->read_timeout);
 830          $this->timed_out = false;
 831          
 832          fwrite($fp,$headers.$body,strlen($headers.$body));
 833          
 834          $this->_redirectaddr = false;
 835          unset($this->headers);
 836                          
 837          while($currentHeader = fgets($fp,$this->_maxlinelen))
 838          {
 839              if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 840              {
 841                  $this->status=-100;
 842                  return false;
 843              }
 844                  
 845              if($currentHeader == "\r\n")
 846                  break;
 847                          
 848              // if a header begins with Location: or URI:, set the redirect

 849              if(preg_match("/^(Location:|URI:)/i",$currentHeader))
 850              {
 851                  // get URL portion of the redirect

 852                  preg_match("/^(Location:|URI:)[ ]+(.*)/",chop($currentHeader),$matches);
 853                  // look for :// in the Location header to see if hostname is included

 854                  if(!preg_match("|\:\/\/|",$matches[2]))
 855                  {
 856                      // no host in the path, so prepend

 857                      $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
 858                      // eliminate double slash

 859                      if(!preg_match("|^/|",$matches[2]))
 860                              $this->_redirectaddr .= "/".$matches[2];
 861                      else
 862                              $this->_redirectaddr .= $matches[2];
 863                  }
 864                  else
 865                      $this->_redirectaddr = $matches[2];
 866              }
 867          
 868              if(preg_match("|^HTTP/|",$currentHeader))
 869              {
 870                  if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
 871                  {
 872                      $this->status= $status[1];
 873                  }                
 874                  $this->response_code = $currentHeader;
 875              }
 876                  
 877              $this->headers[] = $currentHeader;
 878          }
 879  
 880          $results = '';
 881          do {
 882              $_data = fread($fp, $this->maxlength);
 883              if (strlen($_data) == 0) {
 884                  break;
 885              }
 886              $results .= $_data;
 887          } while(true);
 888  
 889          if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 890          {
 891              $this->status=-100;
 892              return false;
 893          }
 894          
 895          // check if there is a a redirect meta tag

 896          
 897          if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
 898  
 899          {
 900              $this->_redirectaddr = $this->_expandlinks($match[1],$URI);    
 901          }
 902  
 903          // have we hit our frame depth and is there frame src to fetch?

 904          if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
 905          {
 906              $this->results[] = $results;
 907              for($x=0; $x<count($match[1]); $x++)
 908                  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
 909          }
 910          // have we already fetched framed content?

 911          elseif(is_array($this->results))
 912              $this->results[] = $results;
 913          // no framed content

 914          else
 915              $this->results = $results;
 916          
 917          return true;
 918      }
 919  
 920  /*======================================================================*\

 921      Function:    _httpsrequest

 922      Purpose:    go get the https data from the server using curl

 923      Input:        $url        the url to fetch

 924                  $URI        the full URI

 925                  $body        body contents to send if any (POST)

 926      Output:        

 927  \*======================================================================*/
 928      
 929  	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
 930      {
 931          if($this->passcookies && $this->_redirectaddr)
 932              $this->setcookies();
 933  
 934          $headers = array();        
 935                      
 936          $URI_PARTS = parse_url($URI);
 937          if(empty($url))
 938              $url = "/";
 939          // GET ... header not needed for curl

 940          //$headers[] = $http_method." ".$url." ".$this->_httpversion;        

 941          if(!empty($this->agent))
 942              $headers[] = "User-Agent: ".$this->agent;
 943          if(!empty($this->host))
 944              $headers[] = "Host: ".$this->host;
 945          if(!empty($this->accept))
 946              $headers[] = "Accept: ".$this->accept;
 947          if(!empty($this->referer))
 948              $headers[] = "Referer: ".$this->referer;
 949          if(!empty($this->cookies))
 950          {            
 951              if(!is_array($this->cookies))
 952                  $this->cookies = (array)$this->cookies;
 953      
 954              reset($this->cookies);
 955              if ( count($this->cookies) > 0 ) {
 956                  $cookie_str = 'Cookie: ';
 957                  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 958                  $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
 959                  }
 960                  $headers[] = substr($cookie_str,0,-2);
 961              }
 962          }
 963          if(!empty($this->rawheaders))
 964          {
 965              if(!is_array($this->rawheaders))
 966                  $this->rawheaders = (array)$this->rawheaders;
 967              while(list($headerKey,$headerVal) = each($this->rawheaders))
 968                  $headers[] = $headerKey.": ".$headerVal;
 969          }
 970          if(!empty($content_type)) {
 971              if ($content_type == "multipart/form-data")
 972                  $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
 973              else
 974                  $headers[] = "Content-type: $content_type";
 975          }
 976          if(!empty($body))    
 977              $headers[] = "Content-length: ".strlen($body);
 978          if(!empty($this->user) || !empty($this->pass))    
 979              $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
 980              
 981          for($curr_header = 0; $curr_header < count($headers); $curr_header++)
 982              $cmdline_params .= " -H \"".$headers[$curr_header]."\"";
 983          
 984          if(!empty($body))
 985              $cmdline_params .= " -d \"$body\"";
 986          
 987          if($this->read_timeout > 0)
 988              $cmdline_params .= " -m ".$this->read_timeout;
 989          
 990          $headerfile = tempnam($temp_dir, "sno");
 991  
 992          $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access

 993          exec($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\"",$results,$return);
 994          
 995          if($return)
 996          {
 997              $this->error = "Error: cURL could not retrieve the document, error $return.";
 998              return false;
 999          }
1000              
1001              
1002          $results = implode("\r\n",$results);
1003          
1004          $result_headers = file("$headerfile");
1005                          
1006          $this->_redirectaddr = false;
1007          unset($this->headers);
1008                          
1009          for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1010          {
1011              
1012              // if a header begins with Location: or URI:, set the redirect

1013              if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1014              {
1015                  // get URL portion of the redirect

1016                  preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1017                  // look for :// in the Location header to see if hostname is included

1018                  if(!preg_match("|\:\/\/|",$matches[2]))
1019                  {
1020                      // no host in the path, so prepend

1021                      $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1022                      // eliminate double slash

1023                      if(!preg_match("|^/|",$matches[2]))
1024                              $this->_redirectaddr .= "/".$matches[2];
1025                      else
1026                              $this->_redirectaddr .= $matches[2];
1027                  }
1028                  else
1029                      $this->_redirectaddr = $matches[2];
1030              }
1031          
1032              if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1033                  $this->response_code = $result_headers[$currentHeader];
1034  
1035              $this->headers[] = $result_headers[$currentHeader];
1036          }
1037  
1038          // check if there is a a redirect meta tag

1039          
1040          if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1041          {
1042              $this->_redirectaddr = $this->_expandlinks($match[1],$URI);    
1043          }
1044  
1045          // have we hit our frame depth and is there frame src to fetch?

1046          if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1047          {
1048              $this->results[] = $results;
1049              for($x=0; $x<count($match[1]); $x++)
1050                  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1051          }
1052          // have we already fetched framed content?

1053          elseif(is_array($this->results))
1054              $this->results[] = $results;
1055          // no framed content

1056          else
1057              $this->results = $results;
1058  
1059          unlink("$headerfile");
1060          
1061          return true;
1062      }
1063  
1064  /*======================================================================*\

1065      Function:    setcookies()

1066      Purpose:    set cookies for a redirection

1067  \*======================================================================*/
1068      
1069  	function setcookies()
1070      {
1071          for($x=0; $x<count($this->headers); $x++)
1072          {
1073          if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1074              $this->cookies[$match[1]] = urldecode($match[2]);
1075          }
1076      }
1077  
1078      
1079  /*======================================================================*\

1080      Function:    _check_timeout

1081      Purpose:    checks whether timeout has occurred

1082      Input:        $fp    file pointer

1083  \*======================================================================*/
1084  
1085  	function _check_timeout($fp)
1086      {
1087          if ($this->read_timeout > 0) {
1088              $fp_status = socket_get_status($fp);
1089              if ($fp_status["timed_out"]) {
1090                  $this->timed_out = true;
1091                  return true;
1092              }
1093          }
1094          return false;
1095      }
1096  
1097  /*======================================================================*\

1098      Function:    _connect

1099      Purpose:    make a socket connection

1100      Input:        $fp    file pointer

1101  \*======================================================================*/
1102      
1103  	function _connect(&$fp)
1104      {
1105          if(!empty($this->proxy_host) && !empty($this->proxy_port))
1106              {
1107                  $this->_isproxy = true;
1108                  
1109                  $host = $this->proxy_host;
1110                  $port = $this->proxy_port;
1111              }
1112          else
1113          {
1114              $host = $this->host;
1115              $port = $this->port;
1116          }
1117      
1118          $this->status = 0;
1119          
1120          if($fp = fsockopen(
1121                      $host,
1122                      $port,
1123                      $errno,
1124                      $errstr,
1125                      $this->_fp_timeout
1126                      ))
1127          {
1128              // socket connection succeeded

1129  
1130              return true;
1131          }
1132          else
1133          {
1134              // socket connection failed

1135              $this->status = $errno;
1136              switch($errno)
1137              {
1138                  case -3:
1139                      $this->error="socket creation failed (-3)";
1140                  case -4:
1141                      $this->error="dns lookup failure (-4)";
1142                  case -5:
1143                      $this->error="connection refused or timed out (-5)";
1144                  default:
1145                      $this->error="connection failed (".$errno.")";
1146              }
1147              return false;
1148          }
1149      }
1150  /*======================================================================*\

1151      Function:    _disconnect

1152      Purpose:    disconnect a socket connection

1153      Input:        $fp    file pointer

1154  \*======================================================================*/
1155      
1156  	function _disconnect($fp)
1157      {
1158          return(fclose($fp));
1159      }
1160  
1161      
1162  /*======================================================================*\

1163      Function:    _prepare_post_body

1164      Purpose:    Prepare post body according to encoding type

1165      Input:        $formvars  - form variables

1166                  $formfiles - form upload files

1167      Output:        post body

1168  \*======================================================================*/
1169      
1170  	function _prepare_post_body($formvars, $formfiles)
1171      {
1172          settype($formvars, "array");
1173          settype($formfiles, "array");
1174          $postdata = '';
1175  
1176          if (count($formvars) == 0 && count($formfiles) == 0)
1177              return;
1178          
1179          switch ($this->_submit_type) {
1180              case "application/x-www-form-urlencoded":
1181                  reset($formvars);
1182                  while(list($key,$val) = each($formvars)) {
1183                      if (is_array($val) || is_object($val)) {
1184                          while (list($cur_key, $cur_val) = each($val)) {
1185                              $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1186                          }
1187                      } else
1188                          $postdata .= urlencode($key)."=".urlencode($val)."&";
1189                  }
1190                  break;
1191  
1192              case "multipart/form-data":
1193                  $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1194                  
1195                  reset($formvars);
1196                  while(list($key,$val) = each($formvars)) {
1197                      if (is_array($val) || is_object($val)) {
1198                          while (list($cur_key, $cur_val) = each($val)) {
1199                              $postdata .= "--".$this->_mime_boundary."\r\n";
1200                              $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1201                              $postdata .= "$cur_val\r\n";
1202                          }
1203                      } else {
1204                          $postdata .= "--".$this->_mime_boundary."\r\n";
1205                          $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1206                          $postdata .= "$val\r\n";
1207                      }
1208                  }
1209                  
1210                  reset($formfiles);
1211                  while (list($field_name, $file_names) = each($formfiles)) {
1212                      settype($file_names, "array");
1213                      while (list(, $file_name) = each($file_names)) {
1214                          if (!is_readable($file_name)) continue;
1215  
1216                          $fp = fopen($file_name, "r");
1217                          $file_content = fread($fp, filesize($file_name));
1218                          fclose($fp);
1219                          $base_name = basename($file_name);
1220  
1221                          $postdata .= "--".$this->_mime_boundary."\r\n";
1222                          $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1223                          $postdata .= "$file_content\r\n";
1224                      }
1225                  }
1226                  $postdata .= "--".$this->_mime_boundary."--\r\n";
1227                  break;
1228          }
1229  
1230          return $postdata;
1231      }
1232  }
1233  
1234  ?>


[ Powered by PHPXref - Served by Debian GNU/Linux ]