import httpclient, nre, options, json,
strutils, strformat, strtabs,
xmlparser, xmltree, times, os
var indentWidth = 3
proc generateHtmlHeading(element: XmlNode, indent: int, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
var number = element.attr("number")
htmlToc &= r"<a style=""margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"" href=""#_" & number & "\"" & r">" & number & " " & innerText(child(element, "catchline")) & r"</a><br/>" & "\n"
faLinks[number] = number
htmlBody &= r"<h" & intToStr(indent) & r" id=""_" & number & "\"" & r">" & number & " " & innerText(child(element, "catchline")) & r"</h" & intToStr(indent) & r">" & "\n"
var effdate = child(element, "effdate")
if not effdate.isNil():
htmlBody &= r"<p style=""display: block; margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"">(Effective " & innerText(effdate) & r")</p>" & "\n"
proc convertSubsectionToHtml(element: XmlNode, parentNumber: string, htmlToc: var string, htmlBody: var string, indent: int, faLinks: var StringTableRef) =
var number = element.attr("number")
var dispNum = number[parentNumber.len..^1]
htmlBody &= r"<p id=" & "\"_" & number & "\" " & r"style=""display: block; margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"">" & dispNum & " "
faLinks[number] = number
for child in element:
if child.kind() == xnElement:
case child.tag():
of "xref":
var childNum = child.attr("refnumber")
var href = r"#_" & childNum
htmlBody &= r"<a href=" & "\"" & href & "\"" & r">" & innerText(child) & r"</a>"
if not faLinks.hasKey(childNum):
faLinks[childNum] = ""
of "subsection":
convertSubsectionToHtml(child, number, htmlToc, htmlBody, indent+1, faLinks)
of "histories":
echo "skipping histories"
of "catchline":
echo "skipping catchline"
of "tab":
echo "skipping tab"
else:
echo "Unrecognized node type: " & child.tag()
else:
if child.kind() == xnText:
htmlBody &= innerText(child) & " "
htmlBody &= r"</p>" & "\n"
proc convertSectionToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
generateHtmlHeading(element, 3, htmlToc, htmlBody, faLinks)
var count = 0
var number = element.attr("number")
for subsection in items(element):
if subsection.kind == xnElement and subsection.tag == "subsection":
convertSubsectionToHtml(subsection, number, htmlToc, htmlBody, 3, faLinks)
count += 1
if count < 1:
# treat as a subsection if we didn't find any subsections
convertSubsectionToHtml(element, "", htmlToc, htmlBody, 3, faLinks)
proc convertChapterToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
generateHtmlHeading(element, 1, htmlToc, htmlBody, faLinks)
for section in findAll(element, "section"):
convertSectionToHtml(section, htmlToc, htmlBody, faLinks)
htmlBody &= r"<hr/>" & "\n"
proc convertPartToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
generateHtmlHeading(element, 2, htmlToc, htmlBody, faLinks)
for section in findAll(element, "section"):
convertSectionToHtml(section, htmlToc, htmlBody, faLinks)
htmlBody &= r"<br/>" & "\n"
proc convertXmlToHtml(statute: string, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
var tree = parseXml(statute)
if tree.kind() == xnELement:
case tree.tag():
of "chapter":
convertChapterToHtml(tree, htmlToc, htmlBody, faLinks)
of "part":
convertPartToHtml(tree, htmlToc, htmlBody, faLinks)
of "section":
convertSectionToHtml(tree, htmlToc, htmlBody, faLinks)
else:
echo "Unknown root element tag: " & tree.tag()
else:
echo "Unknown root element kind: "
echo tree.kind()
proc writeHtml(fileName: string, htmlToc: string, htmlBody: string, page_title: string) =
var preamble = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
"""
preamble &= fmt"<title>{page_title} {getDateStr()}</title>"
preamble &= """
<style>
html,body{font-size:100%}
h1{font-size:2.125em; margin-left: 0%}
h2{font-size:1.6875em; margin-left: 3%}
h3{font-size:1.375em; margin-left: 6%}
h4{font-size:1.125em; margin-left: 9%}
h5{font-size:1.125em; margin-left: 12%}
h6{font-size:1em; margin-left: 15%}
hr{border:solid #ddddd8;border-width:1px 0 0;clear:both;margin:1.25em 0 1.1875em;height:0}
</style>
</head>
<body>
"""
preamble &= fmt"<h1>{page_title}</h1>"
preamble &= fmt"<p>Generated {getDateStr()}</p>"
preamble &= """
<p> See <a href="https://manithree.gitlab.io/utfastatutes/">https://manithree.gitlab.io/utfastatutes/</a> for the latest version, or
<a href="https://gitlab.com/manithree/utfastatutes">https://gitlab.com/manithree/utfastatutes</a> to report defects or make suggestions.</p>
<hr/>
"""
var post = """
</body>
</html>
"""
writeFile(fileName, preamble & htmlToc & htmlBody & post )
proc fixLinks(htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
# this is where the external links are fixed up if I can figure out how
# to reliably link to le.utah.gov
echo "Fixing links"
for line in splitlines(htmlBody):
# TODO this only finds the first occurence
var m = line.find(re(r"href=""([^""]+)" & "\"" & r".?"))
if isSome(m):
#echo m.get.captures[0][2..^1]
let statute = m.get.captures[0][2..^1]
if faLinks[statute] != statute:
echo "Needs fixing: " & statute
when isMainModule:
var htmlToc = ""
var htmlBody = ""
var faLinks = newStringTable()
# Read the json config file
let settings = parseJson(readFile(paramStr(1)))
let codes = settings["code"]
let title = settings["title"].getStr()
let fileName = settings["filename"].getStr()
var client = newHttpClient()
for code in codes:
var pg = client.getContent(code.getStr())
# couldn't get multi-line regex to work, but this is probably more
# efficient, anyway:
var version = ""
for line in splitlines(pg):
var m = line.match(re"var versionDefault=""(.*)"";")
if isSome(m):
version = m.get.captures[0]
break
# download the base versioned file
var urlBase =code.getStr()[0..rfind(code.getStr(), '/')]
var statute = client.getContent(urlBase & "/" & version & "." & settings["download_format"].getStr())
# save the file (as downloaded)
writeFile(version & "." & settings["download_format"].getStr(), statute)
if settings["download_format"].getStr() == "xml" and
settings["output_format"].getStr() == "html":
convertXmlToHtml(statute, htmlToc, htmlBody, faLinks)
if settings["download_format"].getStr() == "xml" and
settings["output_format"].getStr() == "html":
fixLinks(htmlToc, htmlBody, faLinks)
writeHtml(fileName, htmlToc, htmlBody, title)