36 |
kaklik |
1 |
<?php
|
|
|
2 |
/*
|
|
|
3 |
======================================================================
|
|
|
4 |
lastRSS 0.6
|
|
|
5 |
|
|
|
6 |
Simple yet powerful PHP class to parse RSS files.
|
|
|
7 |
|
|
|
8 |
by Vojtech Semecky, webmaster@webdot.cz
|
|
|
9 |
|
|
|
10 |
Latest version, features, manual and examples:
|
|
|
11 |
http://lastrss.webdot.cz/
|
|
|
12 |
|
|
|
13 |
----------------------------------------------------------------------
|
|
|
14 |
TODO
|
|
|
15 |
- Iconv nedavat na cely, ale jen na TITLE a DESCRIPTION (u item i celkove)
|
|
|
16 |
----------------------------------------------------------------------
|
|
|
17 |
LICENSE
|
|
|
18 |
|
|
|
19 |
This program is free software; you can redistribute it and/or
|
|
|
20 |
modify it under the terms of the GNU General Public License (GPL)
|
|
|
21 |
as published by the Free Software Foundation; either version 2
|
|
|
22 |
of the License, or (at your option) any later version.
|
|
|
23 |
|
|
|
24 |
This program is distributed in the hope that it will be useful,
|
|
|
25 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
26 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
27 |
GNU General Public License for more details.
|
|
|
28 |
|
|
|
29 |
To read the license please visit http://www.gnu.org/copyleft/gpl.html
|
|
|
30 |
======================================================================
|
|
|
31 |
*/
|
|
|
32 |
|
|
|
33 |
class lastRSS {
|
|
|
34 |
// -------------------------------------------------------------------
|
|
|
35 |
// Settings
|
|
|
36 |
// -------------------------------------------------------------------
|
|
|
37 |
var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'pubDate', 'lastBuildDate', 'rating', 'docs');
|
|
|
38 |
var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source');
|
|
|
39 |
var $imagetags = array('title', 'url', 'link', 'width', 'height');
|
|
|
40 |
var $textinputtags = array('title', 'description', 'name', 'link');
|
|
|
41 |
|
|
|
42 |
var $strip_html = true;
|
|
|
43 |
|
|
|
44 |
var $time_out = 5;
|
|
|
45 |
var $user_agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20040913 Firefox/0.10";
|
|
|
46 |
|
|
|
47 |
// -------------------------------------------------------------------
|
|
|
48 |
// Parse RSS file and returns associative array.
|
|
|
49 |
// -------------------------------------------------------------------
|
|
|
50 |
function Get ($rss_url) {
|
|
|
51 |
// If CACHE ENABLED
|
|
|
52 |
if ($this->cache_dir != '') {
|
|
|
53 |
$cache_file = $this->cache_dir . '/rsscache_' . md5($rss_url);
|
|
|
54 |
$timedif = @(time() - filemtime($cache_file));
|
|
|
55 |
if ($timedif < $this->cache_time) {
|
|
|
56 |
// cached file is fresh enough, return cached array
|
|
|
57 |
$result = unserialize(join('', file($cache_file)));
|
|
|
58 |
// set 'cached' to 1 only if cached file is correct
|
|
|
59 |
if ($result) $result['cached'] = 1;
|
|
|
60 |
} else {
|
|
|
61 |
// cached file is too old, create new
|
|
|
62 |
$result = $this->Parse($rss_url);
|
|
|
63 |
$serialized = serialize($result);
|
|
|
64 |
if ($f = @fopen($cache_file, 'w')) {
|
|
|
65 |
fwrite ($f, $serialized, strlen($serialized));
|
|
|
66 |
fclose($f);
|
|
|
67 |
}
|
|
|
68 |
if ($result) $result['cached'] = 0;
|
|
|
69 |
}
|
|
|
70 |
}
|
|
|
71 |
// If CACHE DISABLED >> load and parse the file directly
|
|
|
72 |
else {
|
|
|
73 |
$result = $this->Parse($rss_url);
|
|
|
74 |
if ($result) $result['cached'] = 0;
|
|
|
75 |
}
|
|
|
76 |
// return result
|
|
|
77 |
return $result;
|
|
|
78 |
}
|
|
|
79 |
|
|
|
80 |
// -------------------------------------------------------------------
|
|
|
81 |
// Modification of preg_match(); return trimmed field with index 1
|
|
|
82 |
// from 'classic' preg_match() array output
|
|
|
83 |
// -------------------------------------------------------------------
|
|
|
84 |
function my_preg_match ($pattern, $subject) {
|
|
|
85 |
preg_match($pattern, $subject, $out);
|
|
|
86 |
return trim($out[1]);
|
|
|
87 |
}
|
|
|
88 |
|
|
|
89 |
// -------------------------------------------------------------------
|
|
|
90 |
// Replace HTML entities &something; by real characters
|
|
|
91 |
// -------------------------------------------------------------------
|
|
|
92 |
function unhtmlentities ($string) {
|
|
|
93 |
$trans_tbl = get_html_translation_table (HTML_ENTITIES);
|
|
|
94 |
$trans_tbl = array_flip ($trans_tbl);
|
|
|
95 |
return strtr ($string, $trans_tbl);
|
|
|
96 |
}
|
|
|
97 |
|
|
|
98 |
// -------------------------------------------------------------------
|
|
|
99 |
// Encoding conversion function
|
|
|
100 |
// -------------------------------------------------------------------
|
|
|
101 |
function MyConvertEncoding($in_charset, $out_charset, $string) {
|
|
|
102 |
// if substitute_character
|
|
|
103 |
if ($this->subs_char) {
|
|
|
104 |
// Iconv() to UTF-8. mb_convert_encoding() to $out_charset
|
|
|
105 |
$utf = iconv($in_charset, 'UTF-8', $string);
|
|
|
106 |
mb_substitute_character($this->subs_char);
|
|
|
107 |
return mb_convert_encoding ($utf, $out_charset, 'UTF-8');
|
|
|
108 |
} else {
|
|
|
109 |
// Iconv() to $out_charset
|
|
|
110 |
return iconv($in_charset, $out_charset, $string);
|
|
|
111 |
}
|
|
|
112 |
}
|
|
|
113 |
|
|
|
114 |
// -------------------------------------------------------------------
|
|
|
115 |
// Parse() is private method used by Get() to load and parse RSS file.
|
|
|
116 |
// Don't use Parse() in your scripts - use Get($rss_file) instead.
|
|
|
117 |
// -------------------------------------------------------------------
|
|
|
118 |
function Parse ($rss_url) {
|
|
|
119 |
include_once( "db.php" );
|
|
|
120 |
include_once( "functions.php" );
|
|
|
121 |
|
|
|
122 |
// Open and load RSS file
|
|
|
123 |
$rss_content = fetchHTML( $rss_url );
|
|
|
124 |
|
|
|
125 |
if( empty( $rss_content ) )
|
|
|
126 |
{
|
|
|
127 |
return false;
|
|
|
128 |
}
|
|
|
129 |
|
|
|
130 |
// Parse document encoding
|
|
|
131 |
$result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content);
|
|
|
132 |
|
|
|
133 |
// If code page is set convert character encoding to required
|
|
|
134 |
if ($this->cp != '')
|
|
|
135 |
$rss_content = $this->MyConvertEncoding($result['encoding'], $this->cp, $rss_content);
|
|
|
136 |
|
|
|
137 |
// Parse CHANNEL info
|
|
|
138 |
preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel);
|
|
|
139 |
foreach($this->channeltags as $channeltag)
|
|
|
140 |
{
|
|
|
141 |
$temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]);
|
|
|
142 |
if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty
|
|
|
143 |
|
|
|
144 |
}
|
|
|
145 |
|
|
|
146 |
// Parse TEXTINPUT info
|
|
|
147 |
preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo);
|
|
|
148 |
// This a little strange regexp means:
|
|
|
149 |
// Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beginning tag)
|
|
|
150 |
if ($out_textinfo[2]) {
|
|
|
151 |
foreach($this->textinputtags as $textinputtag) {
|
|
|
152 |
$temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]);
|
|
|
153 |
if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty
|
|
|
154 |
}
|
|
|
155 |
}
|
|
|
156 |
// Parse IMAGE info
|
|
|
157 |
preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo);
|
|
|
158 |
if ($out_imageinfo[1]) {
|
|
|
159 |
foreach($this->imagetags as $imagetag) {
|
|
|
160 |
$temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]);
|
|
|
161 |
if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty
|
|
|
162 |
}
|
|
|
163 |
}
|
|
|
164 |
// Parse ITEMS
|
|
|
165 |
preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items);
|
|
|
166 |
$rss_items = $items[2];
|
|
|
167 |
$result['items_count'] = count($items[1]);
|
|
|
168 |
$i = 0;
|
|
|
169 |
$result['items'] = array(); // create array even if there are no items
|
|
|
170 |
foreach($rss_items as $rss_item) {
|
|
|
171 |
// Parse one item
|
|
|
172 |
foreach($this->itemtags as $itemtag)
|
|
|
173 |
{
|
|
|
174 |
$temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item);
|
|
|
175 |
if ($temp != '') $result[items][$i][$itemtag] = $temp; // Set only if not empty
|
|
|
176 |
}
|
|
|
177 |
// Strip HTML tags and other bullshit from DESCRIPTION (if description is presented)
|
|
|
178 |
if ($result['items'][$i]['description'] && $this->strip_html == true)
|
|
|
179 |
{
|
|
|
180 |
$result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description'])));
|
|
|
181 |
}
|
|
|
182 |
// Item counter
|
|
|
183 |
$i++;
|
|
|
184 |
}
|
|
|
185 |
return $result;
|
|
|
186 |
}
|
|
|
187 |
}
|
|
|
188 |
|
|
|
189 |
?>
|