| [ PHPXref.com ] | [ Generated: Sun Jul 20 21:12:30 2008 ] | [ YACS 6.3.1 ] |
| [ Index ] [ Variables ] [ Functions ] [ Classes ] [ Constants ] [ Statistics ] | ||
[Summary view] [Print] [Text view]
1 <? 2 /** 3 * get news 4 * 5 * This data abstraction for feeds provides two main functions, plus several utility functions: 6 * - [code]get_local_news()[/code] - retrieve local news 7 * - [code]get_remote_news()[/code] - retrieve news collected from remote sites 8 * - [code]get_remote_news_from()[/code] - actual news fetching from one feeding site 9 * - [code]tick_hook()[/code] - trigger feeding in the background 10 * 11 * @author Bernard Paques [email]bernard.paques@bigfoot.com[/email] 12 * @reference 13 * @license http://www.gnu.org/copyleft/lesser.txt GNU Lesser General Public License 14 */ 15 class Feeds { 16 17 /** 18 * decode a date 19 * 20 * @link http://www.w3.org/TR/NOTE-datetime Date and Time Formats, a profile of ISO 8601 21 * 22 * @param string some date 23 * @return int a valid time stamp, or -1 24 */ 25 function decode_date($date) { 26 global $context; 27 28 // match wc3dtf 29 if(preg_match("/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/", $date, $matches)) { 30 31 // split date components 32 list($year, $month, $day, $hours, $minutes, $seconds) = array($matches[1], $matches[2], $matches[3], $matches[4], $matches[5], $matches[6]); 33 34 // calc epoch for current date assuming GMT 35 $stamp = gmmktime($hours, $minutes, $seconds, $month, $day, $year); 36 37 // zulu time, aka GMT 38 if($matches[10] == 'Z') 39 $offset = 0; 40 41 else { 42 list($tz_mod, $tz_hour, $tz_min) = array($matches[8], $matches[9], $matches[10]); 43 44 // zero out the variables 45 if(!$tz_hour) 46 $tz_hour = 0; 47 if(!$tz_min) 48 $tz_min = 0; 49 50 $offset = (($tz_hour*60)+$tz_min)*60; 51 52 // is timezone ahead of GMT? then subtract offset 53 if($tz_mod == '+') 54 $offset = $offset * -1; 55 56 } 57 return ($stamp + $offset); 58 59 // everything else 60 } else 61 return strtotime($date); 62 63 } 64 65 /** 66 * get current news from this server 67 * 68 * Actually, this function lists most recent published articles. 69 * 70 * @param int the number of items to list 71 * @param 'feeds' to get a regular feed, or 'contents' to get everything 72 * @return an array of array($time, $title, $author, $section, $image, $description) 73 */ 74 function get_local_news($count=20, $variant='feeds') { 75 global $context; 76 77 // list the newest published articles 78 include_once $context['path_to_root'].'articles/articles.php'; 79 return Articles::list_by_date(0, $count, $variant); 80 81 } 82 83 /** 84 * get news from remote servers 85 * 86 * This function extracts from the database most recent links fetched from feeders. 87 * 88 * By default, up to 20 items are displayed. 89 * 90 * @param the maximum number of news to fetch 91 * @param the expected variant to use 92 * @return an array to use with [code]Skin::build_list()[/code], or NULL 93 * 94 * @see feeds/index.php 95 */ 96 function get_remote_news($count=20, $variant='compact') { 97 global $context, $local; 98 99 // number of items to display 100 if($count < 3) 101 $count = 10; 102 if($count > 50) 103 $count = 50; 104 105 // get them from the database 106 include_once $context['path_to_root'].'links/links.php'; 107 return Links::list_news(0, $count, $variant); 108 } 109 110 /** 111 * get news from a remote server 112 * 113 * This function is aiming to run silently, therefore errors are logged in a file. 114 * To troubleshoot feeders you can configure the debugging facility in the 115 * configuration panel for feeds (parameter [code]debug_feeds[/code], at [script]feeds/configure.php[/script]). 116 * 117 * @see links/link.php 118 * 119 * @param string the URL to use to fetch news 120 * @return either an array of items, or NULL on error 121 * 122 * @see feeds/feeds.php 123 * @see servers/test.php 124 */ 125 function get_remote_news_from($feed_url) { 126 global $context, $local; 127 128 // ensure we are using adequate feeding parameters 129 @include_once $context['path_to_root'].'feeds/parameters.include.php'; 130 131 // parse the target URL 132 $items = @parse_url($feed_url); 133 134 // sometime parse_url() adds a '_' 135 $items['host'] = rtrim($items['host'], '_'); 136 137 // stop here if no host 138 if(!$items['host']) { 139 Logger::remember('feeds/feeds.php', 'No valid host at '.$feed_url); 140 return NULL; 141 } 142 143 // avoid local loops 144 if(($items['host'] == $context['host_name']) || ($items['host'] == 'localhost') || ($items['host'] == '127.0.0.1')) { 145 Logger::remember('feeds/feeds.php', 'Local feed is skipped at '.$feed_url); 146 return NULL; 147 } 148 149 // we only want XML 150 $headers = "Accept: text/xml\015\012"; 151 152 // are we in debug mode 153 $debug = ''; 154 if(isset($context['debug_feeds']) && ($context['debug_feeds'] == 'Y')) 155 $debug = 'feeds/feeds.php'; 156 157 // actual fetch 158 include_once $context['path_to_root'].'links/link.php'; 159 if(($content = Link::fetch($feed_url, $headers, '', $debug)) == FALSE) 160 return NULL; 161 162 // select a codec 163 @include_once $context['path_to_root'].'services/codec.php'; 164 165 // decode slashdot 166 if(preg_match('/<backslash/i', $content)) { 167 @include_once $context['path_to_root'].'services/slashdot_codec.php'; 168 $codec = new slashdot_Codec(); 169 170 // the default is to decode as RSS 171 } else { 172 @include_once $context['path_to_root'].'services/rss_codec.php'; 173 $codec = new RSS_Codec(); 174 } 175 176 // decode the result 177 $result = $codec->import_response($content, $headers, NULL); 178 if(!$result[0]) { 179 Logger::remember('feeds/feeds.php', 'Unable to decode XML response from '.$feed_url); 180 return NULL; 181 } 182 183 // streamline date processing 184 $items = array(); 185 foreach($result[1] as $item) { 186 187 // preserved attributes 188 $transcoded = array(); 189 $transcoded['title'] = $item['title']; 190 $transcoded['description'] = $item['description']; 191 $transcoded['link'] = $item['link']; 192 $transcoded['category'] = $item['category']; 193 $transcoded['author'] = $item['author']; 194 195 // transcode pubDate 196 if($item['pubDate']) 197 $transcoded['pubDate'] = strftime('%Y-%m-%d %H:%M:%S', Feeds::decode_date($item['pubDate'])); 198 199 // use Atom 200 elseif($item['issued']) 201 $transcoded['pubDate'] = strftime('%Y-%m-%d %H:%M:%S', Feeds::decode_date($item['issued'])); 202 203 // use DC 204 elseif($item['dc']['date']) 205 $transcoded['pubDate'] = strftime('%Y-%m-%d %H:%M:%S', Feeds::decode_date($item['dc']['date'])); 206 207 // default stamp 208 else 209 $transcoded['pubDate'] = strftime('%Y-%m-%d %H:%M:%S', time()); 210 211 $items[] = $transcoded; 212 } 213 214 // and returns it 215 return $items; 216 } 217 218 /** 219 * get news from remote servers 220 * 221 * This function queries remote sources and populate the table of links based on fetched news. 222 * 223 * On tick, the including hook calls [code]Feeds::tick_hook()[/code]. 224 * See [script]control/scan.php[/script] for a more complete description of hooks. 225 * 226 * The function browses the database to locate servers acting as feeders, and read the URLs to use. 227 * 228 * Because of the "poor-man" cron mechanism you should limit the list of servers queried to about 7 servers. 229 * Of course this limit does not apply for servers acting as news aggregators, with an actual cron facility, and the 230 * "poor-man" feature disabled (from the main configuration panel [script]control/configure.php[/script]). 231 * 232 * XML feeds are fetched and parsed according to their type. 233 * At the moment YACS is able to process RSS and slashdot feeds. 234 * Link records are created or updated in the database saving as much of possible of provided data. 235 * Item data is reflected in Link, Title, and Description fields. 236 * Channel data is used to populate the Source field. 237 * Stamping information is based on feeding date, and channel title. 238 * Also, the edit action 'link:feed' marks links that are collected from feeders. 239 * The anchor field is set to the category assigned in the server profile. 240 * 241 * At the end of the feeding process, the database is purged from oldest links according to the limit 242 * defined in feeds/parameters.include.php, set through feeds/configure.php. 243 * See Links::purge_old_news(). 244 * 245 * @param boolean if set to true, fetch news on each call; else use normal period of time 246 * @return a string to be displayed in resulting page, if any 247 * 248 * @see control/scan.php 249 * @see feeds/configure.php 250 */ 251 function tick_hook($forced=FALSE) { 252 global $context, $local; 253 254 // stop here if feeds have not been configured properly 255 if(($stat = @stat($context['path_to_root'].'feeds/parameters.include.php')) === FALSE) 256 return; 257 258 // get feeding parameters 259 @include_once $context['path_to_root'].'feeds/parameters.include.php'; 260 261 // control feeding interval on regular tick 262 if(!$forced) { 263 264 // delay between feeds - minimum is 5 minutes 265 $interval = max((int)$context['minutes_between_feeds'], 5)*60; 266 267 // wait at least for the end of the delay 268 $target = $stat[9] + $interval; 269 270 // request to be delayed 271 if($target > time()) 272 return 'feeds/feeds.php: wait until '.gmdate('r', $target).' GMT'.BR; 273 274 // remember feeding date 275 touch($context['path_to_root'].'feeds/parameters.include.php'); 276 277 } 278 279 // get the list of feeders 280 include_once $context['path_to_root'].'servers/servers.php'; 281 if(!$feeders = Servers::list_for_feed(0, 7, 'feed')) 282 return 'feeds/feeds.php: no feed has been defined'.BR; 283 284 // remember start time 285 $stamp = get_micro_time(); 286 287 // list banned tokens 288 include_once $context['path_to_root'].'servers/servers.php'; 289 $banned_pattern = Servers::get_banned_pattern(); 290 291 // browse each feed 292 $count = 0; 293 foreach($feeders as $browse_url => $attributes) { 294 295 // get specific feed parameters 296 list($feed_url, $feed_title, $anchor) = $attributes; 297 298 // fetch news from the provided link 299 if((!$news = Feeds::get_remote_news_from($feed_url)) || !is_array($news)) 300 continue; 301 302 // because of the time required to process the request, it is likely we have to reopen a connection 303 if(!@mysql_ping($context['connection'])) { 304 $context['connection'] = @mysql_connect($context['database_server'], $context['database_user'], $context['database_password']); 305 if(!@mysql_select_db($context['database'], $context['connection'])) { 306 Logger::remember('feeds/feeds.php', 'Unable to connect to the database'); 307 return; 308 } 309 } 310 311 // no anchor has been defined for this feed 312 if(!$anchor) { 313 314 // create a default section if necessary 315 @include_once $context['path_to_root'].'sections/sections.php'; 316 if(!($anchor = Sections::lookup('external_news'))) { 317 $fields = array(); 318 $fields['nick_name'] = 'external_news'; 319 $fields['create_date'] = strftime('%Y-%m-%d %H:%M:%S', time()); 320 $fields['edit_date'] = strftime('%Y-%m-%d %H:%M:%S', time()); 321 $fields['active'] = 'Y'; // public material 322 $fields['locked'] = 'Y'; // no direct contributions 323 $fields['home_panel'] = 'extra'; // in a side box at the front page 324 $fields['index_map'] = 'Y'; // this is a regular section 325 $fields['rank'] = 40000; // at the end of the list 326 if($context['preferred_language'] == 'fr') { 327 $fields['title'] = 'Nouvelles du monde'; 328 $fields['description'] = 'Les informations transmises par les autres serveurs'; 329 } else { 330 $fields['title'] = 'External News'; 331 $fields['description'] = 'Received from feeding servers'; 332 } 333 if(!$new_id = Sections::post($fields)) { 334 Logger::remember('feeds/feeds.php', 'Unable to create a section for news'); 335 return; 336 } 337 $anchor = 'section:'.$new_id; 338 } 339 } 340 341 // process retrieved links 342 @include_once $context['path_to_root'].'links/links.php'; 343 foreach($news as $item) { 344 345 346 // link has to be valid 347 if(!isset($item['link']) || !($item['title'].$item['description'])) { 348 if(isset($context['debug_feeds']) && ($context['debug_feeds'] == 'Y')) 349 Logger::debug($item, 'feed item is invalid'); 350 continue; 351 } 352 353 // skip banned servers 354 if($banned_pattern && preg_match($banned_pattern, $item['link'])) { 355 if(isset($context['debug_feeds']) && ($context['debug_feeds'] == 'Y')) 356 Logger::debug($item['link'], 'feed host has been banned'); 357 continue; 358 } 359 360 // skip links that already exist in the database 361 if(Links::have($item['link'], $anchor)) { 362 if(isset($context['debug_feeds']) && ($context['debug_feeds'] == 'Y')) 363 Logger::debug($item['link'], 'feed link already exists'); 364 continue; 365 } 366 367 // this link does not exist yet 368 $fields = array(); 369 $fields['anchor'] = $anchor; 370 $fields['link_url'] = $item['link']; 371 $fields['title'] = $item['title']; 372 $fields['description'] = $item['description']; 373 if($item['category']) 374 $fields['description'] .= ' ('.$item['category'].')'; 375 $fields['edit_name'] = $feed_title; 376 $fields['edit_id'] = $browse_url; 377 $fields['edit_address'] = $context['url_to_home'].$context['url_to_root'].$browse_url; 378 $fields['edit_action'] = 'link:feed'; 379 if($item['pubDate']) 380 $fields['edit_date'] = strftime('%Y-%m-%d %H:%M:%S', strtotime($item['pubDate'])); 381 382 // save link in the database 383 if($error = Links::post($fields)) 384 Logger::remember('feeds/feeds.php', 'unable to save feed link: '.$error); 385 } 386 387 // one feed has been processed 388 $count += 1; 389 390 } 391 392 // cap the number of links used for news 393 if($context['maximum_news'] > 10) { 394 @include_once $context['path_to_root'].'links/links.php'; 395 Links::purge_old_news($context['maximum_news']); 396 } 397 398 // compute execution time 399 $time = round(get_micro_time() - $stamp, 2); 400 401 // report on work achieved 402 if($count > 1) 403 return 'feeds/feeds.php: '.$count.' feeds have been processed ('.$time.' seconds)'.BR; 404 elseif($count == 1) 405 return 'feeds/feeds.php: 1 feed has been processed ('.$time.' seconds)'.BR; 406 else 407 return 'feeds/feeds.php: nothing to do ('.$time.' seconds)'.BR; 408 } 409 410 } 411 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| [ Powered by PHPXref - Served by Debian GNU/Linux ] |