Révision | 7a110e39bec67ad24434c31e88c3f48844b02f9a (tree) |
---|---|
l'heure | 2017-10-21 05:42:22 |
Auteur | umorigu <umorigu@gmai...> |
Commiter | umorigu |
BugTrack/2176 showrss: Fix character encoding issues etc.
@@ -58,17 +58,24 @@ function plugin_showrss_convert() | ||
58 | 58 | if (! is_url($uri)) |
59 | 59 | return '#showrss: Seems not URI: ' . htmlsc($uri) . '<br />' . "\n"; |
60 | 60 | |
61 | + // Remove old caches in 5% rate | |
62 | + if (mt_rand(1, 20) === 1) { | |
63 | + plugin_showrss_cache_expire(24); | |
64 | + } | |
61 | 65 | list($rss, $time) = plugin_showrss_get_rss($uri, $cachehour); |
62 | 66 | if ($rss === FALSE) return '#showrss: Failed fetching RSS from the server<br />' . "\n"; |
63 | - | |
64 | - $time = ''; | |
67 | + if (! is_array($rss)) { | |
68 | + // Show XML error message | |
69 | + return '#showrss: Error - ' . htmlsc($rss) . '<br />' . "\n"; | |
70 | + } | |
71 | + $time_display = ''; | |
65 | 72 | if ($timestamp > 0) { |
66 | - $time = '<p style="font-size:10px; font-weight:bold">Last-Modified:' . | |
73 | + $time_display = '<p style="font-size:10px; font-weight:bold">Last-Modified:' . | |
67 | 74 | get_date('Y/m/d H:i:s', $time) . '</p>'; |
68 | 75 | } |
69 | 76 | |
70 | 77 | $obj = new $class($rss); |
71 | - return $obj->toString($time); | |
78 | + return $obj->toString($time_display); | |
72 | 79 | } |
73 | 80 | |
74 | 81 | // Create HTML from RSS array() |
@@ -157,23 +164,22 @@ function plugin_showrss_get_rss($target, $cachehour) | ||
157 | 164 | $buf = ''; |
158 | 165 | $time = NULL; |
159 | 166 | if ($cachehour) { |
167 | + $filename = CACHE_DIR . encode($target) . '.tmp'; | |
160 | 168 | // Remove expired cache |
161 | - plugin_showrss_cache_expire($cachehour); | |
162 | - | |
169 | + plugin_showrss_cache_expire_file($filename, $cachehour); | |
163 | 170 | // Get the cache not expired |
164 | - $filename = CACHE_DIR . encode($target) . '.tmp'; | |
165 | 171 | if (is_readable($filename)) { |
166 | 172 | $buf = join('', file($filename)); |
167 | 173 | $time = filemtime($filename) - LOCALZONE; |
168 | 174 | } |
169 | 175 | } |
170 | 176 | |
171 | - if ($time === NULL) { | |
177 | + if (is_null($time)) { | |
172 | 178 | // Newly get RSS |
173 | 179 | $data = pkwk_http_request($target); |
174 | - if ($data['rc'] !== 200) | |
180 | + if ($data['rc'] !== 200) { | |
175 | 181 | return array(FALSE, 0); |
176 | - | |
182 | + } | |
177 | 183 | $buf = $data['data']; |
178 | 184 | $time = UTIME; |
179 | 185 |
@@ -184,9 +190,9 @@ function plugin_showrss_get_rss($target, $cachehour) | ||
184 | 190 | fclose($fp); |
185 | 191 | } |
186 | 192 | } |
187 | - | |
188 | 193 | // Parse |
189 | 194 | $obj = new ShowRSS_XML(); |
195 | + $obj->modified_date = (is_null($time) ? UTIME : $time); | |
190 | 196 | return array($obj->parse($buf),$time); |
191 | 197 | } |
192 | 198 |
@@ -204,6 +210,20 @@ function plugin_showrss_cache_expire($cachehour) | ||
204 | 210 | $dh->close(); |
205 | 211 | } |
206 | 212 | |
213 | +/** | |
214 | + * Remove single file cache if expired limit exeed | |
215 | + * @param $filename | |
216 | + * @param $cachehour | |
217 | + */ | |
218 | +function plugin_showrss_cache_expire_file($filename, $cachehour) | |
219 | +{ | |
220 | + $expire = $cachehour * 60 * 60; // Hour | |
221 | + $last = time() - filemtime($filename); | |
222 | + if ($last > $expire) { | |
223 | + unlink($filename); | |
224 | + } | |
225 | +} | |
226 | + | |
207 | 227 | // Get RSS and array() them |
208 | 228 | class ShowRSS_XML |
209 | 229 | { |
@@ -212,6 +232,7 @@ class ShowRSS_XML | ||
212 | 232 | var $is_item; |
213 | 233 | var $tag; |
214 | 234 | var $encoding; |
235 | + var $modified_date; | |
215 | 236 | |
216 | 237 | function parse($buf) |
217 | 238 | { |
@@ -219,32 +240,30 @@ class ShowRSS_XML | ||
219 | 240 | $this->item = array(); |
220 | 241 | $this->is_item = FALSE; |
221 | 242 | $this->tag = ''; |
222 | - | |
223 | 243 | // Detect encoding |
224 | 244 | $matches = array(); |
225 | 245 | if(preg_match('/<\?xml [^>]*\bencoding="([a-z0-9-_]+)"/i', $buf, $matches)) { |
226 | 246 | $this->encoding = $matches[1]; |
247 | + $buf = preg_replace('/<\?xml ([^>]*)\bencoding="[a-z0-9-_]+"/i', '<?xml $1', $buf); | |
227 | 248 | } else { |
228 | - $this->encoding = mb_detect_encoding($buf); | |
249 | + $this->encoding = 'UTF-8'; | |
229 | 250 | } |
230 | - | |
231 | 251 | // Normalize to UTF-8 / ASCII |
232 | 252 | if (! in_array(strtolower($this->encoding), array('us-ascii', 'iso-8859-1', 'utf-8'))) { |
233 | 253 | $buf = mb_convert_encoding($buf, 'utf-8', $this->encoding); |
234 | 254 | $this->encoding = 'utf-8'; |
235 | 255 | } |
236 | - | |
237 | 256 | // Parsing |
238 | 257 | $xml_parser = xml_parser_create($this->encoding); |
239 | 258 | xml_set_element_handler($xml_parser, array(& $this, 'start_element'), array(& $this, 'end_element')); |
240 | 259 | xml_set_character_data_handler($xml_parser, array(& $this, 'character_data')); |
241 | 260 | if (! xml_parse($xml_parser, $buf, 1)) { |
242 | - return(sprintf('XML error: %s at line %d in %s', | |
261 | + return sprintf('XML error: %s at line %d in %s', | |
243 | 262 | xml_error_string(xml_get_error_code($xml_parser)), |
244 | - xml_get_current_line_number($xml_parser), $buf)); | |
263 | + xml_get_current_line_number($xml_parser), | |
264 | + (strlen($buf) < 500 ? $buf : substr($buf, 0, 500) . '...')); | |
245 | 265 | } |
246 | 266 | xml_parser_free($xml_parser); |
247 | - | |
248 | 267 | return $this->items; |
249 | 268 | } |
250 | 269 |
@@ -278,10 +297,10 @@ class ShowRSS_XML | ||
278 | 297 | $this->item = array(); |
279 | 298 | |
280 | 299 | if (isset($item['DC:DATE'])) { |
281 | - $time = plugin_showrss_get_timestamp($item['DC:DATE']); | |
300 | + $time = plugin_showrss_get_timestamp($item['DC:DATE'], $this->modified_date); | |
282 | 301 | |
283 | 302 | } else if (isset($item['PUBDATE'])) { |
284 | - $time = plugin_showrss_get_timestamp($item['PUBDATE']); | |
303 | + $time = plugin_showrss_get_timestamp($item['PUBDATE'], $this->modified_date); | |
285 | 304 | } else { |
286 | 305 | $time_from_desc = FALSE; |
287 | 306 | if (isset($item['DESCRIPTION']) && |
@@ -309,7 +328,7 @@ class ShowRSS_XML | ||
309 | 328 | } |
310 | 329 | } |
311 | 330 | |
312 | -function plugin_showrss_get_timestamp($str) | |
331 | +function plugin_showrss_get_timestamp($str, $default_date) | |
313 | 332 | { |
314 | 333 | $str = trim($str); |
315 | 334 | if ($str == '') return UTIME; |
@@ -318,14 +337,14 @@ function plugin_showrss_get_timestamp($str) | ||
318 | 337 | if (preg_match('/(\d{4}-\d{2}-\d{2})T(\d{2}:\d{2}:\d{2})(([+-])(\d{2}):(\d{2}))?/', $str, $matches)) { |
319 | 338 | $time = strtotime($matches[1] . ' ' . $matches[2]); |
320 | 339 | if ($time === FALSE || $time === -1) { |
321 | - $time = UTIME; | |
322 | - } else if ($matches[3]) { | |
340 | + $time = $default_date; | |
341 | + } else if (isset($matches[3])) { | |
323 | 342 | $diff = ($matches[5] * 60 + $matches[6]) * 60; |
324 | 343 | $time += ($matches[4] == '-' ? $diff : -$diff); |
325 | 344 | } |
326 | 345 | return $time; |
327 | 346 | } else { |
328 | 347 | $time = strtotime($str); |
329 | - return ($time === FALSE || $time === -1) ? UTIME : $time - LOCALZONE; | |
348 | + return ($time === FALSE || $time === -1) ? $default_date : $time - LOCALZONE; | |
330 | 349 | } |
331 | 350 | } |