news4 - RSS aggrigation system
Révision | ed712d441a3abeb61f516c2d82775e85a118b734 (tree) |
---|---|
l'heure | 2012-10-04 04:21:36 |
Auteur | hylom <hylom@hylo...> |
Commiter | hylom |
add auto breaking with p tag in slashdotjp filter
@@ -7,6 +7,8 @@ re_read_all = re.compile(ur'''<p>\s*<a href=['"][^'"]+['"]>\s*すべて読む\s* | ||
7 | 7 | re_related = re.compile(ur'''<p>\s*関連ストーリー:.*?</p>''') |
8 | 8 | re_topics = re.compile(ur'''<a href="http://slashdot.jp/stories/\w+">(.*?)</a>''') |
9 | 9 | |
10 | +re_break = re.compile(r'''\n\n(.*?)\n''') | |
11 | + | |
10 | 12 | def entry_filter(entry): |
11 | 13 | # すべて読む、関連ストーリーを削除 |
12 | 14 | body = entry['body'] |
@@ -17,6 +19,9 @@ def entry_filter(entry): | ||
17 | 19 | itr = re_topics.findall(s) |
18 | 20 | for items in itr: |
19 | 21 | topics.append(items) |
22 | + | |
23 | + while re_break.search(body): | |
24 | + body = re_break.sub(r'</p><p>\1</p><p>', body) | |
20 | 25 | |
21 | 26 | body = re_read_all.sub('', body) |
22 | 27 | body = re_related.sub('', body) |