"""
Mark-up Python code file using HTML for syntax highlighting.
Syntax highlighting rules are in the spirit of IDLE.
Unless the -r 0 option is used it will also format the code by
applying some of the PEP8 spacing guidelines to expressions and
assignments.
For those that want a GUI you can try py2htmTk.pyw -
(it's minimal but functional).
::
USAGE in command line mode:
py2html [options] [-i filename]|-I]
OPTIONS:
-h Print this command line summary
--help Print more detailed help on styles and
revision info.
-o filename Output file (default is "py2html.html")
-i filename Source file. See -I.
-p filename HTML page template (must include a %s for inserting
the code). If not specified then a default is used.
-s filename Use a style-file otherwise use built in styles
(see --help for details)
-r 0|1|2 Reformat expressions and definitions.
-r 0 No formatting
-r 1 Format as a = 3+4; b = [1, 2, 3] (default)
-r 2 Format as a = 3 + 4; b = [1 , 2 , 3]
-R Replace newlines with <br>, tabs and multi-spaces
with
-B Just make a block (ignores -p)
-O Print to sys.stdout (ignores -o, no file created)
-I Use stdin as source file (ignore -i option)
-E 0|1|2|3|4 0 - Don't do entity substitution.
1 - Substitute < > and & (default)
2 - Substitute < > & and "
3 - Substitute <> & " and '
4 - Substitute all non-ASCIIalphanumeric
:githublink:`%|py|42`
"""
import tokenize
import os
appliedstyle = None
help_styles = """
The default styles applied are as follows:
<style type="text/css">
h1 { color: green;
position: center;
}
.python_code { font-family: monospace;
font-size: 10pt;
}
.py_key {}
.py_num {}
.py_str { color: #00AA00;}
.py_op {}
.py_com { color: red;}
.py_res { color: #FF7700;}
.py_def { color: blue;}
</style>
Where:
.python_code is the style applied to the whole block
.py_key is used for words that are not reserved words
.py_num for numeric values
.py_str for strings
.py_op for operators
.py_res for reserved words
.py_com for comments
.py_def used for words that are names in function and class definitions.
Optionally you can define a style-file, a text file with the following format:
#User editable style substitutions
# This style-file assumes a css is used.
# Format:
# styleName | start | end
# examples:
# block | <pre> | </pre>
# key | <span class="key">'| </span>
# str | <span style = "color: #00AA00"> | </span>
block | <pre class="python_code" id="pycode"> | </pre>
key | <span class="py_key"> | </span>
num | <span class="py_num"> | </span>
str | <span class="py_str"> | </span>
op | <span class="py_op"> | </span>
com | <span class="py_com"> | </span>
res | <span class="py_res"> | </span>
def | <span class="py_def"> | </span>
brk | <span class="py_brk"> | </span>
#End
This example does not use css:
# User editable HTML style substitutions
# This file uses font tags
# Undefined tags will use the default character colour
block | <pre> | </pre>
key | |
num | |
str | <font color="green"> | </font>
op | |
com | <font color="red"> | </font>
res | <font color="orange"> | </font>
def | <font color="blue"> | </font>
brk | |
Revisions:
0.51 First release.
0.6 7 Mar '04
Fixed - now supports \\ character properly
Changes - The styles now use a py_ prefix
Added - Extended and added to formatting options
- External style-file support
0.61 Added - support for disabling entity translator
0.62 Added - additional entity replacement options
"""
__author__ = "Paul Hardwick <paul@peck.org.uk>"
__date__ = "07 March 2004"
__version__ = "0.62"
entities = { # 34: '"', 38: '&', 60: '<', 62: '>',
# 160: ' ,
161: '¡', 162: '¢', 163: '£',
164: '¤', 165: '¥', 166: '¦',
167: '§', 168: '¨', 169: '©',
170: 'ª', 171: '«', 172: '¬',
173: '­', 174: '®', 175: '¯',
176: '°', 177: '±', 178: '²',
179: '³', 180: '´', 181: 'µ',
182: '¶', 183: '·', 184: '¸',
185: '¹', 186: 'º', 187: '»',
188: '¼', 189: '½', 190: '¾',
191: '¿', 192: 'À', 193: 'Á',
194: 'Â', 195: 'Ã', 196: 'Ä',
197: 'Å', 198: 'Æ', 199: 'Ç',
200: 'È', 201: 'É', 202: 'Ê',
203: 'Ë', 204: 'Ì', 205: 'Í',
206: 'Î', 207: 'Ï', 208: 'Ð',
209: 'Ñ', 210: 'Ò', 211: 'Ó',
212: 'Ô', 213: 'Õ', 214: 'Ö',
215: '×', 216: 'Ø', 217: 'Ù',
218: 'Ú', 219: 'Û', 220: 'Ü',
221: 'Ý', 222: 'Þ', 223: 'ß',
224: 'à', 225: 'á', 226: 'â',
227: 'ã', 228: 'ä', 229: 'å',
230: 'æ', 231: 'ç', 232: 'è',
233: 'é', 234: 'ê', 235: 'ë',
236: 'ì', 237: 'í', 238: 'î',
239: 'ï', 240: 'ð', 241: 'ñ',
242: 'ò', 243: 'ó', 244: 'ô',
245: 'õ', 246: 'ö', 247: '÷',
248: 'ø', 249: 'ù', 250: 'ú',
251: 'û', 252: 'ü', 253: 'ý',
254: 'þ', 255: 'ÿ'}
BSLASH = chr(92)
WORD = 1
NUMBER = 2
STRING = 3
OPERATOR = 50
COMMENT = 52
RESERVED = "reserved"
DEFINING = "defining"
BRACKETS = "brackets"
BLOCK = "block"
skeys = {WORD: "key", NUMBER: "num", STRING: "str", COMMENT: "com",
OPERATOR: "op", RESERVED: "res", DEFINING: "def", BRACKETS: "brk", BLOCK: "block"}
#######################################
# The styles wrapping each token using css
#######################################
py_style = {skeys[WORD]: ['<span class="py_key">', "</span>"], # name
skeys[NUMBER]: ['<span class="py_num">', "</span>"], # number
skeys[STRING]: ['<span class="py_str">', "</span>"], # string
skeys[OPERATOR]: ['<span class="py_op">', "</span>"], # operator
skeys[COMMENT]: ['<span class="py_com">', "</span>"], # comment
# reserved word
skeys[RESERVED]: ['<span class="py_res">', "</span>"],
# class and def names
skeys[DEFINING]: ['<span class="py_def">', "</span>"],
# all enclosing ops
skeys[BRACKETS]: ['<span class="py_brk">', "</span>"],
# wraps the block
skeys[BLOCK]: ['<pre class="python_code" id="pycode">', "</pre>"]
}
######################################
# The default web page
######################################
py_page = """
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" >
<title>%s</title>
<style type="text/css">
h1 { color: green;
position: center;
}
.python_code { font-family: monospace;
font-size: 10pt;
}
.py_key {color: black;}
.py_num color: black;{}
.py_str { color: #00AA00;}
.py_op {color: black; }
.py_com { color: red;}
.py_res { color: #FF7700;}
.py_def { color: blue;}
.py_brk { color: black;}
</style>
</head>
<body>
<h1>#### Source code for %s ####</h1>
<hr>
%s
<hr>
[Created with py2html Ver:%s]
<p>
<a href="http://validator.w3.org/check/referer"><img border="0"
src="http://www.w3.org/Icons/valid-html401"
alt="Valid HTML 4.01!" height="31" width="88"></a>
</p></body>
</html>"""
######################################
# Some variables used for tracking states
command = False
prev = 0
prevres = False
prevtok = "#" # previous token
defining = False # check if previous toke was def or class
# Are we somewhere between def/class and : (for default assignment formating)
definingmode = False
# Operator formating
spaced_tokens = ["=", "==", ">", "<", ">=",
"<=", "+=", "-=", "<>", "!=", "&&", "||"]
monospaced_tokens = [":", ";", ","]
binops = ["+", "-", "*", "/", "//", "**", "%", "<<", ">>", "&", "^", "|"]
brackets = ["[", "]", "{", "}", "(", ")"]
spaced_fmt2 = spaced_tokens + binops
# reserved words
reserved = kwlist = [
# --start keywords--
'and', 'assert', 'break',
'class', 'continue', 'def',
'del', 'elif', 'else',
'except', 'exec', 'finally',
'for', 'from', 'global',
'if', 'import', 'in',
'is', 'lambda', 'not',
'or', 'pass', 'print',
'raise', 'return', 'try',
'while', 'yield',
# others languages
"void", "double", "int", "throw", "template", "catch",
"public", "protected", "float", "unsigned", "__int32", "short",
# --end keywords--
]
[docs]def substituteEntities(token, level="1", char_set=None):
"""
based on level setting do entity substitution
and return revised. iso8859-1 ??:
- "0" : Don't do entity substitution.
- "1" : Substitute ``< > and & (default)``
- "2" : Substitute ``< > & and "``
- "3" : Substitute ``<> & " and '``
- "4" : Substitute all non-ASCIIalphanumeric char_set not implemented yet
:githublink:`%|py|288`
"""
if level == "0":
return token
elif level in "1234":
token = token.replace('&', "&")
token = token.replace('<', "<")
token = token.replace('>', ">")
if level in "234":
token = token.replace('"', """)
if level in "34":
token = token.replace("'", "'")
if level == "4":
dl = list(token)
for index, char in enumerate(dl):
value = ord(char)
if value > 127:
dl[index] = entities.get(value, "&#%3d;" % (value,))
token = ''.join(dl)
return token
[docs]def apply_style(index, token, start, src, format, style, entity="1"):
"""
Supplied with an index this function applies
the style using the format rules and returns a formatted
verison of the token.
'start' and 'src' are (not used yet) for appliance of
intelligent line breaks for long lines
entity is the replace entities flag
:githublink:`%|py|316`
"""
global prev, prevres, defining, definingmode, prevtok
# keyword handling
isres = False
if index == WORD:
if token not in reserved:
if defining:
fmt = style[skeys[DEFINING]]
else:
fmt = style.get(skeys.get(index, 99), ['', ''])
defining = False
else:
fmt = style[skeys[RESERVED]]
isres = True
if token in ['def', 'class']:
defining = True
definingmode = True
elif index == OPERATOR and token in brackets:
fmt = style[skeys[BRACKETS]]
else:
fmt = style.get(skeys.get(index, 99), ['', ''])
if (token == ":") and definingmode:
definingmode = False
token = substituteEntities(token, entity, char_set=None)
if format != "0":
if format == "1":
if token in spaced_tokens and not definingmode:
token = " %s " % (token,)
elif token in monospaced_tokens:
token = "%s " % (token,)
elif (isres or index == COMMENT) and prev in [WORD, NUMBER, STRING, OPERATOR]:
token = " " + token
elif index in [WORD, NUMBER, STRING] and prev == WORD:
token = " " + token
elif token in ["[", "("] and prevres:
token = " " + token
elif format == "2":
if token in spaced_fmt2:
if (prev == OPERATOR and token in "+-~"):
token = " %s" % (token,)
else:
token = " %s " % (token,)
elif token in monospaced_tokens:
token = "%s " % (token,)
elif isres and prev in [WORD, NUMBER, STRING, OPERATOR]:
token = " " + token
elif index in [WORD, NUMBER, STRING] and prev == WORD:
token = " " + token
elif token in ["[", "("] and prevres:
token = " " + token
text = fmt[0] + token + fmt[1]
prev = index
prevres = isres
prevtok = token
return text
[docs]def readStyleFile(filename):
"""
Read a style file and return a style dictionary.
The file format is::
#User editable style substitutions
# This style-file assumes a css is used.
# Format:
# styleName | start | end
# examples:
# block | <pre> | </pre>
# key | <span class="key">'| </span>
# str | <span style = "color: #00AA00"> | </span>
block | <pre class="python_code" id="pycode"> | </pre>
key | <span class="py_key"> | </span>
num | <span class="py_num"> | </span>
str | <span class="py_str"> | </span>
op | <span class="py_op"> | </span>
com | <span class="py_com"> | </span>
res | <span class="py_res"> | </span>
def | <span class="py_def"> | </span>
brk | <span class="py_brk"> | </span>
:githublink:`%|py|397`
"""
global appliedstyle
if not filename:
appliedstyle = py_style
return py_style
else:
try:
with open(filename, 'r') as ff:
lines = ff.readlines()
appliedstyle = {}
for line in lines:
line = line.strip()
if line and line[0] != '#':
parts = line.split('|')
if len(parts) == 3 and len(parts[0].strip()):
appliedstyle[parts[0].strip()] = [
parts[1].strip(), parts[2].strip()]
else:
print("Error in style file:\n", line)
sys.exit(1)
return appliedstyle
except Exception:
appliedstyle = py_style
return py_style
[docs]def replaceCodes(text=""):
"""
Helper function that does the ``\\n`` and space substition
returning the changed text.
:githublink:`%|py|427`
"""
text = text.replace('\n', '<br>')
text = text.replace('\t', ' ' * 4)
text = text.replace(" " * 4, " ")
text = text.replace(" " * 3, " ")
text = text.replace(" " * 2, " ")
text = text.replace("<br><br>", "<br> <br>")
return text
[docs]def file2HTML(file_name, format, style, Replace, entity="1", encoding="utf-8"):
"""
Reads a file and returns the contents as a string,
highlighted with :epkg:`HTML` styles. This function uses the
output of the tokenize module to decide what to colour.
It calls :func:`apply_style <ensae_teaching_cs.homeblog.py2html.apply_style>` with the token index.
- If format == '0' then the code will display as the author expected it to.
- If format == '1' then spaces are added and removed around expressions to standardise the format.
- If format == '2' as '1' but different rules
- If style == style dictionary. Replace (boolean)
- If True then replace ``\\n`` with ``<br>``, multiple spaces with `` <space>combinations;``
:githublink:`%|py|449`
"""
removeFile = None
if file_name == "<stdin>":
file_name = "temp_stdin.py2html.tmp"
lines = sys.stdin.readlines()
with open(file_name, "w", encoding=encoding) as f:
f.writelines(lines)
removeFile = file_name
elif len(file_name) < 1000 and os.path.exists(file_name):
try:
# , encoding="utf8").readlines() #copy all lines into lines list
with open(file_name, 'r', encoding=encoding) as f:
lines = f.readlines()
except UnicodeDecodeError as e:
print("issue with file ", file_name)
raise e
else:
lines = file_name.split("\n")
file_name = "temp_py2html.tmp"
with open(file_name, "w", encoding=encoding) as f:
f.writelines("\n".join(lines))
removeFile = file_name
lines = ['', ] + lines
tempPointer = open(file_name, 'r', encoding=encoding)
read_line = tempPointer.readline # , encoding="utf8").readline
# use tokenize to interate through tokens
tok = tokenize.generate_tokens(read_line)
page = []
old_line = 1
old_column = 0
try:
for tupe in tok:
# first collect packing beween previous token and this one
if old_line == tupe[2][0]: # Another token on same line as last token
if tupe[2][1] and (old_column < tupe[2][1]):
# handle when no reformat and when it is the first line
# processed
if (format == "0") or (prev == 0):
txt = lines[old_line][old_column:tupe[2][1]]
txt = txt.replace(BSLASH, BSLASH + '\n')
page.append(txt)
else:
# collect remains of old line
subpage = lines[old_line][old_column:-1]
subpage = subpage.replace(BSLASH, BSLASH + '\n')
page.append(subpage)
old_line += 1
# now collect all the lines between last and current
while old_line != tupe[2][0]:
txt = lines[old_line].replace(BSLASH, BSLASH + '\n')
page.append(txt)
old_line += 1
# now get begining of line upto current column
txt = lines[old_line][0:tupe[2][1]].replace(
BSLASH, BSLASH + '\n')
page.append(txt)
# now add formatted token
page.append(apply_style(tupe[0], tupe[1], tupe[
2], tupe[4], format, style, entity))
# now update pointers
old_line = tupe[3][0]
old_column = tupe[3][1]
except tokenize.TokenError:
return "File cannot be tokenized by tokenize"
except IndexError:
pass
text = ''.join(page)
if Replace:
text = replaceCodes(text)
tempPointer.close()
if removeFile is not None and os.path.exists(removeFile):
os.remove(removeFile)
return text
[docs]def makeBlock(data):
"""
Applies the block tags to text
:githublink:`%|py|531`
"""
global appliedstyle
return "%s%s%s" % (appliedstyle['block'][0], data, appliedstyle['block'][1])
[docs]def cmdLine(): # pragma: no cover
"""
This is the function that handles
command line mode
:githublink:`%|py|538`
"""
global appliedstyle
help_ = ' PY2HTML - convert python code to HTML (version %s)\n\n%s' % (
__version__, __doc__)
try:
opts, _ = getopt.getopt(sys.argv[1:], "ho:i:p:s:r:RBOIE:",
["help", "output", "page", "input", "style", "reformat",
"Replace", "Block", "STDOUT", "STDIN", "entities"])
except getopt.GetoptError:
# print help information and exit:
print(help_)
sys.exit(2)
outfile = "py2html.html"
page = None
infile = ""
reformat = "1"
justdiv = False
stylefile = None
emit = False
Replace = False
stdin = False
entity = "1"
for o, a in opts:
if o == "-h":
print(help_)
sys.exit()
elif o == "--help":
print(help_styles)
sys.exit()
elif o in ("-o", "--output"):
outfile = a
elif o in ("-i", "--input"):
infile = a
elif o in ("-p", "--page"):
page = a
elif o in ("-s", "--style"):
stylefile = a
elif o in ("-r", "--reformat") and a in "012":
reformat = str(a)
elif o in ("-B", "--Block"):
justdiv = True
elif o in ("-O", "--STDOUT"):
emit = True
elif o in ("-R", "--Replace"):
Replace = True
elif o in ("-I", "--STDIN"):
stdin = True
elif o in ("-E", "--entities") and a in "01234":
entity = str(a)
else:
print(help_)
sys.exit(1)
if infile == "" and not stdin:
print(help_)
sys.exit()
elif stdin:
infile = "<stdin>"
appliedstyle = readStyleFile(stylefile)
data = file2HTML(infile, reformat, appliedstyle, Replace, entity)
block = makeBlock(data)
if infile == "<stdin>":
infile = "stdin"
if justdiv:
html = block
elif page:
html = open(page, "r").read() # , encoding="utf8").read()
html = html % (block,)
else:
html = py_page % (infile, infile, block, __version__)
if not emit:
f = open(outfile, "w") # , encoding="utf8")
f.write(html)
f.close()
else:
print(html)
if __name__ == "__main__": # pragma: no cover
import getopt
import sys
cmdLine()