Coverage for src/ensae_teaching_cs/homeblog/buildrss.py: 92%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1# coding:utf-8

2"""

3@file

4@brief About RSS

5"""

6import datetime

7import os

8import re

9from pyquickhelper.loghelper import fLOG

10from .filefunction import find_all_blogs_function

13modelForARSSFeed = """<rss version="2.0">

14 <channel>

15 <title>XD blog</title>

16 <link>http://www.xavierdupre.fr/blog/xd_blog_nojs.html</link>

17 <description>new posts from XD blog</description>

18 """.replace(" ", "")

20modelForARSSRow = """

21 <item>

22 <title>%s</title>

23 <link>http://www.xavierdupre.fr/blog/%s_nojs.html</link>

24 <guid isPermaLink="true">http://www.xavierdupre.fr/blog/%s_nojs.html</guid>

25 <description>%s</description>

26 <pubDate>%s</pubDate>

27 </item>"""

29modelForARSSChannel = """\n</channel>\n</rss>\n"""

32def file_build_rss(folder=".", outfile="blog/xdbrss.xml", now=datetime.datetime.now(),

33 model_feed=modelForARSSFeed, model_row=modelForARSSRow,

34 model_channel=modelForARSSChannel, months_delay=6):

35 """

36 Build a RSS file, the function keeps the blog post (HTML format) from the last month.

37 If a post contains one the two following string:

39 ::

41

42

44 The summary will only contains the part included in those two comments.

47 @param folder folder where the blog post can be found

48 @param outfile final file to produce

49 @param now date to use as a final date, only blog post between one month now and now will be kept

50 @param model_feed see model_channel

51 @param model_row see model_row

52 @param model_channel the part related to a post in the rss stream is composed

53 by the concatenation of the three stream:

55 ::

57 model_feed

58 model_row

59 model_channel

61 You should see the default value to see how you can replace them.

62 @param months_delay keep mails written a couple of months ago: *month_delay* months

63 @return 2-uple: outfile and the list of kept blog post (the last month)

64 """

66 now -= datetime.timedelta(days=months_delay * 30)

67 fLOG("now - month ", now)

68 file = find_all_blogs_function(folder)

69 nbfile = len(file)

70 exp = re.compile('<meta +name=\\"description\\" +content=\\"(.*?)\\" */>')

71 expt = re.compile('<title>(.*?)</title>')

73 keepfiles = []

74 rss = []

75 for f in file:

76 temp = os.path.split(f)[-1].lower().replace(".html", "")

77 day = datetime.datetime(int(temp[:4]), int(temp[5:7]), int(temp[8:10]))

78 if day > now:

79 keepfiles.append(f)

81 ff = open(f, "r", encoding="utf8")

82 t = ff.read().replace("\n", " ").replace("\r", " ")

83 ff.close()

84 check_encoding(f)

86 summary = exp.search(t)

87 title = expt.search(t)

89 if not title:

90 raise ValueError("unable to find title in " + f)

91 fLOG("getting summary for ", f)

93 title = title.groups()[0]

94 summary = None if summary is None else summary.groups()[0]

95 adddots = False

97 if summary is None or len(summary) == 0:

98 if "" in t and "" in t:

99 p0 = t.find("")

100 p1 = t.find("")

101 summary = t[

102 p0 + len(""):p1].strip(" \n\r\t")

103 summary = summary.replace("<", "<")

104 summary = summary.replace(">", ">")

105 adddots = True

106

107 if summary is None or len(summary) == 0:

108 p0 = t.find("<body>")

109 p1 = t.find("</body>")

110 summary = t[p0 + len("<body>"):p1].strip(" \n\r\t")

111 summary = summary.replace("<", "<")

112 summary = summary.replace(">", ">")

113

114 if summary is None or len(summary) == 0:

115 raise ValueError("summary is empty for blog " + f)

116

117 summary = re.sub(r"\s+", " ", summary)

118 rss.append((day, f, summary, temp, title))

119

120 rows = ["<?xml version=\"1.0\" encoding=\"utf-8\"?>"]

121 rows.append(modelForARSSFeed)

122 if len(rss) == 0:

123 raise Exception(

124 "No found file in '{0}' (raw count {1}).".format(folder, nbfile))

125

126 rss.sort(reverse=True)

127 for day, f, summary, short, title in rss:

128 if adddots and not summary.endswith("..."):

129 summary += " suite..." if not summary.endswith(

130 ".") else " suite..."

131

132 row = modelForARSSRow % (title, short, short, summary, str(day))

133 rows.append(row)

134

135 rows.append(modelForARSSChannel)

136 content = "\n".join(rows)

137 rssf = open(outfile, "w", encoding='utf-8')

138 rssf.write(content)

139 rssf.close()

140

141 return outfile, keepfiles

142

143

144def check_encoding(file):

145 """

146 check the encoding of a file (ASCII here),

147 read the file, it does not return anything

148 @param file file to check

149 """

150 f = open(file, "r")

151 try:

152 f.read()

153 except Exception as e:

154 size = os.stat(file).st_size

155 raise Exception(

156 "issue with file (size {1})\n File \"{0}\", line 1".format(file, size)) from e

157 f.close()

Coverage for src/ensae_teaching_cs/homeblog/buildrss.py : 92%

77 statements

Coverage for src/ensae_teaching_cs/homeblog/buildrss.py : 92%

77 statements 71 run 6 missing 0 excluded

77 statements