[ PHPXref.com ] [ Generated: Sun Jul 20 21:12:30 2008 ] [ YACS 6.3.1 ]
[ Index ]     [ Variables ]     [ Functions ]     [ Classes ]     [ Constants ]     [ Statistics ]

title

Body

[close]

/feeds/ -> feeds.php (source)

   1  <?
   2  /**
   3   * get news
   4   *
   5   * This data abstraction for feeds provides two main functions, plus several utility functions:
   6   * - [code]get_local_news()[/code] - retrieve local news
   7   * - [code]get_remote_news()[/code] - retrieve news collected from remote sites
   8   * - [code]get_remote_news_from()[/code] - actual news fetching from one feeding site
   9   * - [code]tick_hook()[/code] - trigger feeding in the background
  10   *
  11   * @author Bernard Paques [email]bernard.paques@bigfoot.com[/email]
  12   * @reference
  13   * @license http://www.gnu.org/copyleft/lesser.txt GNU Lesser General Public License
  14   */
  15  class Feeds {
  16  
  17      /**
  18       * decode a date
  19       *
  20       * @link http://www.w3.org/TR/NOTE-datetime Date and Time Formats, a profile of ISO 8601
  21       *
  22       * @param string some date
  23       * @return int a valid time stamp, or -1
  24       */
  25  	function decode_date($date) {
  26          global $context;
  27  
  28           // match wc3dtf
  29           if(preg_match("/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/", $date, $matches)) {
  30  
  31               // split date components
  32               list($year, $month, $day, $hours, $minutes, $seconds) = array($matches[1], $matches[2], $matches[3], $matches[4], $matches[5], $matches[6]);
  33  
  34               // calc epoch for current date assuming GMT
  35               $stamp = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
  36  
  37               // zulu time, aka GMT
  38               if($matches[10] == 'Z')
  39                   $offset = 0;
  40  
  41               else {
  42                   list($tz_mod, $tz_hour, $tz_min) = array($matches[8], $matches[9], $matches[10]);
  43  
  44                   // zero out the variables
  45                   if(!$tz_hour)
  46                       $tz_hour = 0;
  47                   if(!$tz_min)
  48                       $tz_min = 0;
  49  
  50                   $offset = (($tz_hour*60)+$tz_min)*60;
  51  
  52                   // is timezone ahead of GMT?  then subtract offset
  53                   if($tz_mod == '+')
  54                       $offset = $offset * -1;
  55  
  56               }
  57               return ($stamp + $offset);
  58  
  59           // everything else
  60          } else
  61              return strtotime($date);
  62  
  63      }
  64  
  65      /**
  66       * get current news from this server
  67       *
  68       * Actually, this function lists most recent published articles.
  69       *
  70       * @param int the number of items to list
  71       * @param 'feeds' to get a regular feed, or 'contents' to get everything
  72       * @return an array of array($time, $title, $author, $section, $image, $description)
  73       */
  74  	function get_local_news($count=20, $variant='feeds') {
  75          global $context;
  76  
  77          // list the newest published articles
  78          include_once $context['path_to_root'].'articles/articles.php';
  79          return Articles::list_by_date(0, $count, $variant);
  80  
  81      }
  82  
  83      /**
  84       * get news from remote servers
  85       *
  86       * This function extracts from the database most recent links fetched from feeders.
  87       *
  88       * By default, up to 20 items are displayed.
  89       *
  90       * @param the maximum number of news to fetch
  91       * @param the expected variant to use
  92       * @return an array to use with [code]Skin::build_list()[/code], or NULL
  93       *
  94       * @see feeds/index.php
  95       */
  96  	function get_remote_news($count=20, $variant='compact') {
  97          global $context, $local;
  98  
  99          // number of items to display
 100          if($count < 3)
 101              $count = 10;
 102          if($count > 50)
 103              $count = 50;
 104  
 105          // get them from the database
 106          include_once $context['path_to_root'].'links/links.php';
 107          return Links::list_news(0, $count, $variant);
 108      }
 109  
 110      /**
 111       * get news from a remote server
 112       *
 113       * This function is aiming to run silently, therefore errors are logged in a file.
 114       * To troubleshoot feeders you can configure the debugging facility in the
 115       * configuration panel for feeds (parameter [code]debug_feeds[/code], at [script]feeds/configure.php[/script]).
 116       *
 117       * @see links/link.php
 118       *
 119       * @param string the URL to use to fetch news
 120       * @return either an array of items, or NULL on error
 121       *
 122       * @see feeds/feeds.php
 123       * @see servers/test.php
 124       */
 125  	function get_remote_news_from($feed_url) {
 126          global $context, $local;
 127  
 128          // ensure we are using adequate feeding parameters
 129          @include_once $context['path_to_root'].'feeds/parameters.include.php';
 130  
 131          // parse the target URL
 132          $items = @parse_url($feed_url);
 133  
 134          // sometime parse_url() adds a '_'
 135          $items['host'] = rtrim($items['host'], '_');
 136  
 137          // stop here if no host
 138          if(!$items['host']) {
 139              Logger::remember('feeds/feeds.php', 'No valid host at '.$feed_url);
 140              return NULL;
 141          }
 142  
 143          // avoid local loops
 144          if(($items['host'] == $context['host_name']) || ($items['host'] == 'localhost') || ($items['host'] == '127.0.0.1')) {
 145              Logger::remember('feeds/feeds.php', 'Local feed is skipped at '.$feed_url);
 146              return NULL;
 147          }
 148  
 149          // we only want XML
 150          $headers = "Accept: text/xml\015\012";
 151  
 152          // are we in debug mode
 153          $debug = '';
 154          if(isset($context['debug_feeds']) && ($context['debug_feeds'] == 'Y'))
 155              $debug = 'feeds/feeds.php';
 156  
 157          // actual fetch
 158          include_once $context['path_to_root'].'links/link.php';
 159          if(($content = Link::fetch($feed_url, $headers, '', $debug)) == FALSE)
 160              return NULL;
 161  
 162          // select a codec
 163          @include_once $context['path_to_root'].'services/codec.php';
 164  
 165          // decode slashdot
 166          if(preg_match('/<backslash/i', $content)) {
 167              @include_once $context['path_to_root'].'services/slashdot_codec.php';
 168              $codec = new slashdot_Codec();
 169  
 170          // the default is to decode as RSS
 171          } else {
 172              @include_once $context['path_to_root'].'services/rss_codec.php';
 173              $codec = new RSS_Codec();
 174          }
 175  
 176          // decode the result
 177          $result = $codec->import_response($content, $headers, NULL);
 178          if(!$result[0]) {
 179              Logger::remember('feeds/feeds.php', 'Unable to decode XML response from '.$feed_url);
 180              return NULL;
 181          }
 182  
 183          // streamline date processing
 184          $items = array();
 185          foreach($result[1] as $item) {
 186  
 187              // preserved attributes
 188              $transcoded = array();
 189              $transcoded['title'] = $item['title'];
 190              $transcoded['description'] = $item['description'];
 191              $transcoded['link'] = $item['link'];
 192              $transcoded['category'] = $item['category'];
 193              $transcoded['author'] = $item['author'];
 194  
 195              // transcode pubDate
 196              if($item['pubDate'])
 197                  $transcoded['pubDate'] = strftime('%Y-%m-%d %H:%M:%S', Feeds::decode_date($item['pubDate']));
 198  
 199              // use Atom
 200              elseif($item['issued'])
 201                  $transcoded['pubDate'] = strftime('%Y-%m-%d %H:%M:%S', Feeds::decode_date($item['issued']));
 202  
 203              // use DC
 204              elseif($item['dc']['date'])
 205                  $transcoded['pubDate'] = strftime('%Y-%m-%d %H:%M:%S', Feeds::decode_date($item['dc']['date']));
 206  
 207              // default stamp
 208              else
 209                  $transcoded['pubDate'] = strftime('%Y-%m-%d %H:%M:%S', time());
 210  
 211              $items[] = $transcoded;
 212          }
 213  
 214          // and returns it
 215          return $items;
 216      }
 217  
 218      /**
 219       * get news from remote servers
 220       *
 221       * This function queries remote sources and populate the table of links based on fetched news.
 222       *
 223       * On tick, the including hook calls [code]Feeds::tick_hook()[/code].
 224       * See [script]control/scan.php[/script] for a more complete description of hooks.
 225       *
 226       * The function browses the database to locate servers acting as feeders, and read the URLs to use.
 227       *
 228       * Because of the "poor-man" cron mechanism you should limit the list of servers queried to about 7 servers.
 229       * Of course this limit does not apply for servers acting as news aggregators, with an actual cron facility, and the
 230       * "poor-man" feature disabled (from the main configuration panel [script]control/configure.php[/script]).
 231       *
 232       * XML feeds are fetched and parsed according to their type.
 233       * At the moment YACS is able to process RSS and slashdot feeds.
 234       * Link records are created or updated in the database saving as much of possible of provided data.
 235       * Item data is reflected in Link, Title, and Description fields.
 236       * Channel  data is used to populate the Source field.
 237       * Stamping information is based on feeding date, and channel title.
 238       * Also, the edit action 'link:feed' marks links that are collected from feeders.
 239       * The anchor field is set to the category assigned in the server profile.
 240       *
 241       * At the end of the feeding process, the database is purged from oldest links according to the limit
 242       * defined in feeds/parameters.include.php, set through feeds/configure.php.
 243       * See Links::purge_old_news().
 244       *
 245       * @param boolean if set to true, fetch news on each call; else use normal period of time
 246       * @return a string to be displayed in resulting page, if any
 247       *
 248       * @see control/scan.php
 249       * @see feeds/configure.php
 250       */
 251  	function tick_hook($forced=FALSE) {
 252          global $context, $local;
 253  
 254          // stop here if feeds have not been configured properly
 255          if(($stat = @stat($context['path_to_root'].'feeds/parameters.include.php')) === FALSE)
 256              return;
 257  
 258          // get feeding parameters
 259          @include_once $context['path_to_root'].'feeds/parameters.include.php';
 260  
 261          // control feeding interval on regular tick
 262          if(!$forced) {
 263  
 264              // delay between feeds - minimum is 5 minutes
 265              $interval = max((int)$context['minutes_between_feeds'], 5)*60;
 266  
 267              // wait at least for the end of the delay
 268              $target = $stat[9] + $interval;
 269  
 270              // request to be delayed
 271              if($target > time())
 272                  return 'feeds/feeds.php: wait until '.gmdate('r', $target).' GMT'.BR;
 273  
 274              // remember feeding date
 275              touch($context['path_to_root'].'feeds/parameters.include.php');
 276  
 277          }
 278  
 279          // get the list of feeders
 280          include_once $context['path_to_root'].'servers/servers.php';
 281          if(!$feeders = Servers::list_for_feed(0, 7, 'feed'))
 282              return 'feeds/feeds.php: no feed has been defined'.BR;
 283  
 284          // remember start time
 285          $stamp = get_micro_time();
 286  
 287          // list banned tokens
 288          include_once $context['path_to_root'].'servers/servers.php';
 289          $banned_pattern = Servers::get_banned_pattern();
 290  
 291          // browse each feed
 292          $count = 0;
 293          foreach($feeders as $browse_url => $attributes) {
 294  
 295              // get specific feed parameters
 296              list($feed_url, $feed_title, $anchor) = $attributes;
 297  
 298              // fetch news from the provided link
 299              if((!$news = Feeds::get_remote_news_from($feed_url)) || !is_array($news))
 300                  continue;
 301  
 302               // because of the time required to process the request, it is likely we have to reopen a connection
 303               if(!@mysql_ping($context['connection'])) {
 304                  $context['connection'] = @mysql_connect($context['database_server'], $context['database_user'], $context['database_password']);
 305                  if(!@mysql_select_db($context['database'], $context['connection'])) {
 306                      Logger::remember('feeds/feeds.php', 'Unable to connect to the database');
 307                      return;
 308                  }
 309              }
 310  
 311              // no anchor has been defined for this feed
 312              if(!$anchor) {
 313  
 314                  // create a default section if necessary
 315                  @include_once $context['path_to_root'].'sections/sections.php';
 316                  if(!($anchor = Sections::lookup('external_news'))) {
 317                      $fields = array();
 318                      $fields['nick_name'] = 'external_news';
 319                      $fields['create_date'] = strftime('%Y-%m-%d %H:%M:%S', time());
 320                      $fields['edit_date'] = strftime('%Y-%m-%d %H:%M:%S', time());
 321                      $fields['active'] = 'Y'; // public material
 322                      $fields['locked'] = 'Y'; // no direct contributions
 323                      $fields['home_panel'] = 'extra'; // in a side box at the front page
 324                      $fields['index_map'] = 'Y'; // this is a regular section
 325                      $fields['rank'] = 40000; // at the end of the list
 326                      if($context['preferred_language'] == 'fr') {
 327                          $fields['title'] = 'Nouvelles du monde';
 328                          $fields['description'] = 'Les informations transmises par les autres serveurs';
 329                      } else {
 330                          $fields['title'] = 'External News';
 331                          $fields['description'] = 'Received from feeding servers';
 332                      }
 333                      if(!$new_id = Sections::post($fields)) {
 334                          Logger::remember('feeds/feeds.php', 'Unable to create a section for news');
 335                          return;
 336                      }
 337                      $anchor = 'section:'.$new_id;
 338                  }
 339              }
 340  
 341              // process retrieved links
 342              @include_once $context['path_to_root'].'links/links.php';
 343              foreach($news as $item) {
 344  
 345  
 346                  // link has to be valid
 347                  if(!isset($item['link']) || !($item['title'].$item['description'])) {
 348                      if(isset($context['debug_feeds']) && ($context['debug_feeds'] == 'Y'))
 349                          Logger::debug($item, 'feed item is invalid');
 350                      continue;
 351                  }
 352  
 353                  // skip banned servers
 354                  if($banned_pattern && preg_match($banned_pattern, $item['link'])) {
 355                      if(isset($context['debug_feeds']) && ($context['debug_feeds'] == 'Y'))
 356                          Logger::debug($item['link'], 'feed host has been banned');
 357                      continue;
 358                  }
 359  
 360                  // skip links that already exist in the database
 361                  if(Links::have($item['link'], $anchor)) {
 362                      if(isset($context['debug_feeds']) && ($context['debug_feeds'] == 'Y'))
 363                          Logger::debug($item['link'], 'feed link already exists');
 364                      continue;
 365                  }
 366  
 367                  // this link does not exist yet
 368                  $fields = array();
 369                  $fields['anchor'] = $anchor;
 370                  $fields['link_url'] = $item['link'];
 371                  $fields['title'] = $item['title'];
 372                  $fields['description'] = $item['description'];
 373                  if($item['category'])
 374                      $fields['description'] .= ' ('.$item['category'].')';
 375                  $fields['edit_name'] = $feed_title;
 376                  $fields['edit_id'] = $browse_url;
 377                  $fields['edit_address'] = $context['url_to_home'].$context['url_to_root'].$browse_url;
 378                  $fields['edit_action'] = 'link:feed';
 379                  if($item['pubDate'])
 380                      $fields['edit_date'] = strftime('%Y-%m-%d %H:%M:%S', strtotime($item['pubDate']));
 381  
 382                  // save link in the database
 383                  if($error = Links::post($fields))
 384                      Logger::remember('feeds/feeds.php', 'unable to save feed link: '.$error);
 385              }
 386  
 387              // one feed has been processed
 388              $count += 1;
 389  
 390          }
 391  
 392          // cap the number of links used for news
 393          if($context['maximum_news'] > 10) {
 394              @include_once $context['path_to_root'].'links/links.php';
 395              Links::purge_old_news($context['maximum_news']);
 396          }
 397  
 398          // compute execution time
 399          $time = round(get_micro_time() - $stamp, 2);
 400  
 401          // report on work achieved
 402          if($count > 1)
 403              return 'feeds/feeds.php: '.$count.' feeds have been processed ('.$time.' seconds)'.BR;
 404          elseif($count == 1)
 405              return 'feeds/feeds.php: 1 feed has been processed ('.$time.' seconds)'.BR;
 406          else
 407              return 'feeds/feeds.php: nothing to do ('.$time.' seconds)'.BR;
 408      }
 409  
 410  }
 411  ?>


[ Powered by PHPXref - Served by Debian GNU/Linux ]