Code source de ensae_teaching_cs.homeblog.buildrss
# coding:utf-8
"""
About RSS
:githublink:`%|py|6`
"""
import datetime
import os
import re
from pyquickhelper.loghelper import fLOG
from .filefunction import find_all_blogs_function
modelForARSSFeed = """<rss version="2.0">
<channel>
<title>XD blog</title>
<link>http://www.xavierdupre.fr/blog/xd_blog_nojs.html</link>
<description>new posts from XD blog</description>
""".replace(" ", "")
modelForARSSRow = """
<item>
<title>%s</title>
<link>http://www.xavierdupre.fr/blog/%s_nojs.html</link>
<guid isPermaLink="true">http://www.xavierdupre.fr/blog/%s_nojs.html</guid>
<description>%s</description>
<pubDate>%s</pubDate>
</item>"""
modelForARSSChannel = """\n</channel>\n</rss>\n"""
[docs]def file_build_rss(folder=".", outfile="blog/xdbrss.xml", now=datetime.datetime.now(),
model_feed=modelForARSSFeed, model_row=modelForARSSRow,
model_channel=modelForARSSChannel, months_delay=6):
"""
Build a RSS file, the function keeps the blog post (HTML format) from the last month.
If a post contains one the two following string:
::
<!-- SUMMARY BEGINS -->
<!-- SUMMARY ENDS -->
The summary will only contains the part included in those two comments.
:param folder: folder where the blog post can be found
:param outfile: final file to produce
:param now: date to use as a final date, only blog post between one month now and now will be kept
:param model_feed: see model_channel
:param model_row: see model_row
:param model_channel: the part related to a post in the rss stream is composed
by the concatenation of the three stream:
::
model_feed
model_row
model_channel
You should see the default value to see how you can replace them.
:param months_delay: keep mails written a couple of months ago: *month_delay* months
:return: 2-uple: outfile and the list of kept blog post (the last month)
:githublink:`%|py|64`
"""
now -= datetime.timedelta(days=months_delay * 30)
fLOG("now - month ", now)
file = find_all_blogs_function(folder)
nbfile = len(file)
exp = re.compile('<meta +name=\\"description\\" +content=\\"(.*?)\\" */>')
expt = re.compile('<title>(.*?)</title>')
keepfiles = []
rss = []
for f in file:
temp = os.path.split(f)[-1].lower().replace(".html", "")
day = datetime.datetime(int(temp[:4]), int(temp[5:7]), int(temp[8:10]))
if day > now:
keepfiles.append(f)
ff = open(f, "r", encoding="utf8")
t = ff.read().replace("\n", " ").replace("\r", " ")
ff.close()
check_encoding(f)
summary = exp.search(t)
title = expt.search(t)
if not title:
raise ValueError("unable to find title in " + f)
fLOG("getting summary for ", f)
title = title.groups()[0]
summary = None if summary is None else summary.groups()[0]
adddots = False
if summary is None or len(summary) == 0:
if "<!-- SUMMARY BEGINS -->" in t and "<!-- SUMMARY ENDS -->" in t:
p0 = t.find("<!-- SUMMARY BEGINS -->")
p1 = t.find("<!-- SUMMARY ENDS -->")
summary = t[
p0 + len("<!-- SUMMARY BEGINS -->"):p1].strip(" \n\r\t")
summary = summary.replace("<", "<")
summary = summary.replace(">", ">")
adddots = True
if summary is None or len(summary) == 0:
p0 = t.find("<body>")
p1 = t.find("</body>")
summary = t[p0 + len("<body>"):p1].strip(" \n\r\t")
summary = summary.replace("<", "<")
summary = summary.replace(">", ">")
if summary is None or len(summary) == 0:
raise ValueError("summary is empty for blog " + f)
summary = re.sub(r"\s+", " ", summary)
rss.append((day, f, summary, temp, title))
rows = ["<?xml version=\"1.0\" encoding=\"utf-8\"?>"]
rows.append(modelForARSSFeed)
if len(rss) == 0:
raise Exception(
"No found file in '{0}' (raw count {1}).".format(folder, nbfile))
rss.sort(reverse=True)
for day, f, summary, short, title in rss:
if adddots and not summary.endswith("..."):
summary += " suite..." if not summary.endswith(
".") else " suite..."
row = modelForARSSRow % (title, short, short, summary, str(day))
rows.append(row)
rows.append(modelForARSSChannel)
content = "\n".join(rows)
rssf = open(outfile, "w", encoding='utf-8')
rssf.write(content)
rssf.close()
return outfile, keepfiles
[docs]def check_encoding(file):
"""
check the encoding of a file (ASCII here),
read the file, it does not return anything
:param file: file to check
:githublink:`%|py|149`
"""
f = open(file, "r")
try:
f.read()
except Exception as e:
size = os.stat(file).st_size
raise Exception(
"issue with file (size {1})\n File \"{0}\", line 1".format(file, size)) from e
f.close()