Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# coding:utf-8
2"""
3@file
4@brief About RSS
5"""
6import datetime
7import os
8import re
9from pyquickhelper.loghelper import fLOG
10from .filefunction import find_all_blogs_function
13modelForARSSFeed = """<rss version="2.0">
14 <channel>
15 <title>XD blog</title>
16 <link>http://www.xavierdupre.fr/blog/xd_blog_nojs.html</link>
17 <description>new posts from XD blog</description>
18 """.replace(" ", "")
20modelForARSSRow = """
21 <item>
22 <title>%s</title>
23 <link>http://www.xavierdupre.fr/blog/%s_nojs.html</link>
24 <guid isPermaLink="true">http://www.xavierdupre.fr/blog/%s_nojs.html</guid>
25 <description>%s</description>
26 <pubDate>%s</pubDate>
27 </item>"""
29modelForARSSChannel = """\n</channel>\n</rss>\n"""
32def file_build_rss(folder=".", outfile="blog/xdbrss.xml", now=datetime.datetime.now(),
33 model_feed=modelForARSSFeed, model_row=modelForARSSRow,
34 model_channel=modelForARSSChannel, months_delay=6):
35 """
36 Build a RSS file, the function keeps the blog post (HTML format) from the last month.
37 If a post contains one the two following string:
39 ::
41 <!-- SUMMARY BEGINS -->
42 <!-- SUMMARY ENDS -->
44 The summary will only contains the part included in those two comments.
47 @param folder folder where the blog post can be found
48 @param outfile final file to produce
49 @param now date to use as a final date, only blog post between one month now and now will be kept
50 @param model_feed see model_channel
51 @param model_row see model_row
52 @param model_channel the part related to a post in the rss stream is composed
53 by the concatenation of the three stream:
55 ::
57 model_feed
58 model_row
59 model_channel
61 You should see the default value to see how you can replace them.
62 @param months_delay keep mails written a couple of months ago: *month_delay* months
63 @return 2-uple: outfile and the list of kept blog post (the last month)
64 """
66 now -= datetime.timedelta(days=months_delay * 30)
67 fLOG("now - month ", now)
68 file = find_all_blogs_function(folder)
69 nbfile = len(file)
70 exp = re.compile('<meta +name=\\"description\\" +content=\\"(.*?)\\" */>')
71 expt = re.compile('<title>(.*?)</title>')
73 keepfiles = []
74 rss = []
75 for f in file:
76 temp = os.path.split(f)[-1].lower().replace(".html", "")
77 day = datetime.datetime(int(temp[:4]), int(temp[5:7]), int(temp[8:10]))
78 if day > now:
79 keepfiles.append(f)
81 ff = open(f, "r", encoding="utf8")
82 t = ff.read().replace("\n", " ").replace("\r", " ")
83 ff.close()
84 check_encoding(f)
86 summary = exp.search(t)
87 title = expt.search(t)
89 if not title:
90 raise ValueError("unable to find title in " + f)
91 fLOG("getting summary for ", f)
93 title = title.groups()[0]
94 summary = None if summary is None else summary.groups()[0]
95 adddots = False
97 if summary is None or len(summary) == 0:
98 if "<!-- SUMMARY BEGINS -->" in t and "<!-- SUMMARY ENDS -->" in t:
99 p0 = t.find("<!-- SUMMARY BEGINS -->")
100 p1 = t.find("<!-- SUMMARY ENDS -->")
101 summary = t[
102 p0 + len("<!-- SUMMARY BEGINS -->"):p1].strip(" \n\r\t")
103 summary = summary.replace("<", "<")
104 summary = summary.replace(">", ">")
105 adddots = True
107 if summary is None or len(summary) == 0:
108 p0 = t.find("<body>")
109 p1 = t.find("</body>")
110 summary = t[p0 + len("<body>"):p1].strip(" \n\r\t")
111 summary = summary.replace("<", "<")
112 summary = summary.replace(">", ">")
114 if summary is None or len(summary) == 0:
115 raise ValueError("summary is empty for blog " + f)
117 summary = re.sub(r"\s+", " ", summary)
118 rss.append((day, f, summary, temp, title))
120 rows = ["<?xml version=\"1.0\" encoding=\"utf-8\"?>"]
121 rows.append(modelForARSSFeed)
122 if len(rss) == 0:
123 raise Exception(
124 "No found file in '{0}' (raw count {1}).".format(folder, nbfile))
126 rss.sort(reverse=True)
127 for day, f, summary, short, title in rss:
128 if adddots and not summary.endswith("..."):
129 summary += " suite..." if not summary.endswith(
130 ".") else " suite..."
132 row = modelForARSSRow % (title, short, short, summary, str(day))
133 rows.append(row)
135 rows.append(modelForARSSChannel)
136 content = "\n".join(rows)
137 rssf = open(outfile, "w", encoding='utf-8')
138 rssf.write(content)
139 rssf.close()
141 return outfile, keepfiles
144def check_encoding(file):
145 """
146 check the encoding of a file (ASCII here),
147 read the file, it does not return anything
148 @param file file to check
149 """
150 f = open(file, "r")
151 try:
152 f.read()
153 except Exception as e:
154 size = os.stat(file).st_size
155 raise Exception(
156 "issue with file (size {1})\n File \"{0}\", line 1".format(file, size)) from e
157 f.close()