Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Various function to automate the collection of blog posts.
4"""
5import os
6import webbrowser
7import sys
8import threading
9import datetime
10from textwrap import dedent
11from jinja2 import Template
12from pyquickhelper.filehelper import read_content_ufs
13from pyensae.sql.database_main import Database
14from .rss_stream import StreamRSS
15from .rss_blogpost import BlogPost
16from .rss_simple_server import RSSServer
19def rss_from_xml_to_database(file, database="database_rss.db3",
20 table="blogs", fLOG=None):
21 """
22 Parses a list of blogs stored in a :epkg:`XML`
23 file using Google Reader format,
24 stores the results in a :epkg:`SQLite` database.
26 @param file (str) xml file containing the list of blogs, example:
27 @param database database file (sqlite)
28 @param table table name
29 @param fLOG logging function
30 @return number of stored blogs
32 The XML file should contain the following:
34 ::
36 <outline text="XD blog"
37 title="XD blog" type="rss"
38 xmlUrl="http://www.xavierdupre.fr/blog/xdbrss.xml"
39 htmlUrl="http://www.xavierdupre.fr/blog/xd_blog.html" />
41 The function does not check whether or not the blogs were
42 already added to the database,
43 they will be added a second time. If the table
44 does not exist, it will be created.
45 """
46 res = list(StreamRSS.enumerate_stream_from_google_list(file))
47 db = Database(database, LOG=fLOG)
48 db.connect()
49 StreamRSS.fill_table(db, table, res)
50 db.close()
51 return len(res)
54def rss_download_post_to_database(database="database_rss.db3",
55 table_blog="blogs",
56 table_post="posts",
57 fLOG=None):
58 """
59 Downloads all posts from a list of blogs stored
60 in a database by function @see fn rss_from_xml_to_database.
62 @param database database file name (SQLite format)
63 @param table_blog table name of the blogs
64 @param table_post table name of the post
65 @param fLOG logging function
66 @return number of posts downloaded
67 """
68 db = Database(database, LOG=fLOG)
69 db.connect()
70 rss_list = list(db.enumerate_objects(table_blog, StreamRSS))
71 list_post = list(
72 StreamRSS.enumerate_post_from_rsslist(rss_list, fLOG=fLOG))
73 BlogPost.fill_table(db, table_post, list_post, skip_exception=True)
74 db.close()
76 return len(list_post)
79def rss_update_run_server(dbfile, xml_blogs, port=8093, browser=None, period="today",
80 server=None, thread=False, fLOG=None):
81 """
82 Creates a database if it does not exists,
83 add a table for blogs and posts,
84 update the database, starts a server and
85 open a browser.
87 @param dbfile (str) sqllite database to create
88 @param xml_blogs (str) xml description of blogs (google format) (file or string)
89 @param port the main page will be ``http://localhost:port/``
90 @param browser (str) to choose a different browser than the default one
91 @param period (str) when opening the browser, it can show the results for last day or last week
92 @param server to set up your own server
93 @param thread to start the server in a separate thread
94 @param fLOG logging function
95 @return see @see fn rss_run_server
97 You can read the blog post `pyhome3 RSS Reader
98 <http://www.xavierdupre.fr/blog/2013-07-28_nojs.html>`_.
99 """
100 rss_from_xml_to_database(xml_blogs, database=dbfile, fLOG=fLOG)
101 rss_download_post_to_database(database=dbfile, fLOG=fLOG)
102 return rss_run_server(dbfile, port, browser=browser, period=period, server=server, thread=thread, fLOG=fLOG)
105def rss_run_server(dbfile, port=8093, browser=None, period="today",
106 server=None, thread=False, fLOG=None):
107 """
108 Starts a server and open a browser on a page reading blog posts.
110 @param dbfile (str) sqllite database to create
111 @param port the main page will be ``http://localhost:port/``
112 @param browser (str) to choose a different browser than the default one
113 @param period (str) when opening the browser, it can show the results for last day or last week
114 @param server to set up your own server
115 @param thread to start the server in a separate thread
116 @param fLOG logging function
118 You can read the blog post `RSS Reader
119 <http://www.xavierdupre.fr/blog/2013-07-28_nojs.html>`_.
121 If *browser* is "none", the browser is not started.
122 """
123 if not os.path.exists(dbfile):
124 raise FileNotFoundError(dbfile)
126 def open_browser():
127 url = "http://localhost:%d/rss_reader.html?search=%s" % (port, period)
128 if fLOG:
129 fLOG("opening ", url)
130 if browser is not None:
131 if browser in ["none", "None"]:
132 pass
133 else:
134 try:
135 b = webbrowser.get(browser)
136 except webbrowser.Error as e:
137 if browser == "firefox" and sys.platform.startswith("win"):
138 webbrowser.register(
139 'firefox',
140 None,
141 webbrowser.GenericBrowser(r"C:\Program Files (x86)\Mozilla Firefox\firefox.exe"))
142 b = webbrowser.get(browser)
143 else:
144 raise e
145 b.open(url)
146 else:
147 webbrowser.open(url)
149 # webbrowser.open does get back until the browser is closed if the browser was launched
150 # with this only tab. If a new tab was create this function quickly endss
151 th = threading.Thread(target=open_browser)
152 th.start()
153 ret = RSSServer.run_server(
154 server, dbfile, port=port, thread=thread, fLOG=fLOG)
155 # we should close the thread here if it is still alive
156 return ret
159def enumerate_post_from_rss(content, rss_stream=None):
160 """
161 Parses a :epkg:`RSS` stream.
163 @param content :epkg:`RSS` content
164 @return list of @see cl BlogPost
165 """
166 import feedparser # pylint: disable=C0415
167 d = feedparser.parse(content)
169 if d is not None:
170 for post in d["entries"]:
171 titleb = post.get("title", "-")
172 url = post.get("link", "")
174 try:
175 id_ = post["id"]
176 guid = url if post["guidislink"] else id_
177 except KeyError:
178 id_ = url
179 guid = url
181 try:
182 desc = post["summary_detail"]["value"]
183 except KeyError:
184 try:
185 desc = post["summary"]
186 except KeyError:
187 desc = ""
189 isPermaLink = True
191 try:
192 structTime = post["published_parsed"]
193 date = datetime.datetime(*structTime[:6])
194 except KeyError:
195 try:
196 structTime = post["updated_parsed"]
197 date = datetime.datetime(*structTime[:6])
198 except KeyError:
199 date = datetime.datetime.now()
200 except TypeError as e:
201 structTime = post["published_parsed"]
202 if structTime is None:
203 date = datetime.datetime.now()
204 else:
205 raise e
207 if date > datetime.datetime.now():
208 date = datetime.datetime.now()
210 bl = BlogPost(rss_stream, titleb, guid,
211 isPermaLink, url, desc, date)
212 yield bl
215def enumerate_rss_merge(rss_urls, title="compilation", min_size=None):
216 """
217 Merges many :epkg:`rss` file or url.
219 @param rss_urls :epkg:`rss` files or urls
220 @param title title
221 @param min_size fails if the downloaded file
222 is below this size
223 @return new RSS
224 """
225 sts = StreamRSS(title, None, None, None, None, id=0)
226 for name in rss_urls:
227 content = read_content_ufs(name, min_size=min_size)
228 for blog in enumerate_post_from_rss(content, rss_stream=sts):
229 yield blog
232def to_rss(obj, link, description):
233 """
234 Converts something into :epkg:`RSS`.
236 @param obj object
237 @param link link
238 @param description description
239 @return content
240 """
241 if isinstance(obj, list):
242 if len(obj) == 0:
243 raise ValueError("obj cannot be empty.")
244 else:
245 raise TypeError("Unexpected type {}.".format(type(obj)))
247 if isinstance(obj[0], StreamRSS):
248 st = obj[0]
249 title = st.title
250 else:
251 title = ""
253 items = []
254 for blog in obj:
255 items.append(blog.to_rss_item())
257 template = dedent("""
258 <?xml version="1.0" encoding="utf-8"?>
259 <rss version="2.0">
260 <channel>
261 <title>{{title}}</title>
262 <link>{{link}}</link>
263 <description>{{description}}</description>
264 {{items}}
265 </channel>
266 </rss>
267 """)
268 tpl = Template(template)
269 return tpl.render(link=link, description=description,
270 items='\n'.join(items),
271 title=title)
274template_html = """
275<?xml version="1.0" encoding="utf-8"?>
276<html>
277<head>
278<link href="http://www.xavierdupre.fr/pyhome3.ico" rel="shortcut icon"/>
279<link href="http://www.xavierdupre.fr/blog/pMenu.css" rel="stylesheet" type="text/css"/>
280<link REL="stylesheet" TYPE="text/css" href="http://www.xavierdupre.fr/blog/javascript/run_prettify.css"/>
281<title>{{title}}</title>
282<meta content="{{author}}" name="author"/>
283<meta content="{{keywords}}" name="keywords"/>
284<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
285<script src="http://www.xavierdupre.fr/blog/javascript/pMenu.js" type="text/javascript"></script>
286<script src="http://www.xavierdupre.fr/blog/javascript/latexit.js" type="text/javascript"></script>
287<script src="http://www.xavierdupre.fr/blog/javascript/run_prettify.js" type="text/javascript"></script>
288<link href="http://www.xavierdupre.fr/blog/javascript/shCore.css" rel="stylesheet" type="text/css"/>
289<link href="http://www.xavierdupre.fr/blog/javascript/shThemeDefault.css" rel="stylesheet" type="text/css"/>
290<script src="http://www.xavierdupre.fr/blog/javascript/shCore.js" type="text/javascript"></script>
291<script src="http://www.xavierdupre.fr/blog/javascript/shAutoloader.js" type="text/javascript"></script>
292</head>
294<body>
296<div class="otherlayer">
297<!-- other layer -->
298</div>
300<div class="sidebar">
301</div>
303<div class="maintitle">
304<h1>{{title}}</h1>
305<p><a href="{{rssfile.xml}}"><img src="http://www.xavierdupre.fr/blog/documents/feed-icon-16x16.png"/></a>
306<i>{{header}}</i></p>
308</div>
310<div class="mainbody">
312<hr />
314{{items}}
316<hr />
318</div>
319<script type="text/javascript">
320SyntaxHighlighter.autoloader(
321 'js jscript javascript http://www.xavierdupre.fr/blog/javascript/shBrushJScript.js',
322 'py python http://www.xavierdupre.fr/blog/javascript/shBrushPython.js',
323 'cpp http://www.xavierdupre.fr/blog/javascript/shBrushCpp.js',
324 'sql http://www.xavierdupre.fr/blog/javascript/shBrushSql.js',
325 'flat plain http://www.xavierdupre.fr/blog/javascript/shBrushPlain.js',
326 'vba vb http://www.xavierdupre.fr/blog/javascript/shBrushVb.js',
327 'bash http://www.xavierdupre.fr/blog/javascript/shBrushBash.js',
328 'cs http://www.xavierdupre.fr/blog/javascript/shBrushCSharp.js',
329 'php http://www.xavierdupre.fr/blog/javascript/shBrushPhp.js',
330 'css http://www.xavierdupre.fr/blog/javascript/shBrushCss.js',
331 'xml html http://www.xavierdupre.fr/blog/javascript/shBrushXml.js'
332);
333SyntaxHighlighter.all();
334</script>
335<div id="playscript"/>
337</body>
338</html>
339"""
342def to_html(items, template=None, title="BLOG",
343 author="AUTHOR", keywords="blog,python",
344 header="", rssfile="rssfile.xml",
345 **context):
346 """
347 Produces a :epkg:`HTML`.
349 @param items list of blog post
350 @param template template or None to get the default one
351 @param title blog title
352 @param author author
353 @param keywords keywords
354 @param header blog description
355 @param rssfile file RSS
356 @param context other information
357 @return pages
358 """
359 if template is None:
360 template_ = Template(template_html)
362 hitems = "\n".join(map(lambda b: b.to_html_item(),
363 sorted(items, reverse=True,
364 key=lambda i: i.pubDate)))
365 return template_.render(title=title, author=author, keywords=keywords,
366 items=hitems, header=header, rssfile=rssfile,
367 **context)