import httpclient, nre, options, json, strutils, strformat, strtabs, xmlparser, xmltree, times, os var indentWidth = 3 proc generateHtmlHeading(element: XmlNode, indent: int, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) = var number = element.attr("number") htmlToc &= r"<a style=""margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"" href=""#_" & number & "\"" & r">" & number & " " & innerText(child(element, "catchline")) & r"</a><br/>" & "\n" faLinks[number] = number htmlBody &= r"<h" & intToStr(indent) & r" id=""_" & number & "\"" & r">" & number & " " & innerText(child(element, "catchline")) & r"</h" & intToStr(indent) & r">" & "\n" var effdate = child(element, "effdate") if not effdate.isNil(): htmlBody &= r"<p style=""display: block; margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"">(Effective " & innerText(effdate) & r")</p>" & "\n" proc convertSubsectionToHtml(element: XmlNode, parentNumber: string, htmlToc: var string, htmlBody: var string, indent: int, faLinks: var StringTableRef) = var number = element.attr("number") var dispNum = number[parentNumber.len..^1] htmlBody &= r"<p id=" & "\"_" & number & "\" " & r"style=""display: block; margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"">" & dispNum & " " faLinks[number] = number for child in element: if child.kind() == xnElement: case child.tag(): of "xref": var childNum = child.attr("refnumber") var href = r"#_" & childNum htmlBody &= r"<a href=" & "\"" & href & "\"" & r">" & innerText(child) & r"</a>" if not faLinks.hasKey(childNum): faLinks[childNum] = "" of "subsection": convertSubsectionToHtml(child, number, htmlToc, htmlBody, indent+1, faLinks) of "histories": echo "skipping histories" of "catchline": echo "skipping catchline" of "tab": echo "skipping tab" else: echo "Unrecognized node type: " & child.tag() else: if child.kind() == xnText: htmlBody &= innerText(child) & " " htmlBody &= r"</p>" & "\n" proc convertSectionToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) = generateHtmlHeading(element, 3, htmlToc, htmlBody, faLinks) var count = 0 var number = element.attr("number") for subsection in items(element): if subsection.kind == xnElement and subsection.tag == "subsection": convertSubsectionToHtml(subsection, number, htmlToc, htmlBody, 3, faLinks) count += 1 if count < 1: # treat as a subsection if we didn't find any subsections convertSubsectionToHtml(element, "", htmlToc, htmlBody, 3, faLinks) proc convertChapterToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) = generateHtmlHeading(element, 1, htmlToc, htmlBody, faLinks) for section in findAll(element, "section"): convertSectionToHtml(section, htmlToc, htmlBody, faLinks) htmlBody &= r"<hr/>" & "\n" proc convertPartToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) = generateHtmlHeading(element, 2, htmlToc, htmlBody, faLinks) for section in findAll(element, "section"): convertSectionToHtml(section, htmlToc, htmlBody, faLinks) htmlBody &= r"<br/>" & "\n" proc convertXmlToHtml(statute: string, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) = var tree = parseXml(statute) if tree.kind() == xnELement: case tree.tag(): of "chapter": convertChapterToHtml(tree, htmlToc, htmlBody, faLinks) of "part": convertPartToHtml(tree, htmlToc, htmlBody, faLinks) of "section": convertSectionToHtml(tree, htmlToc, htmlBody, faLinks) else: echo "Unknown root element tag: " & tree.tag() else: echo "Unknown root element kind: " echo tree.kind() proc writeHtml(fileName: string, htmlToc: string, htmlBody: string, page_title: string) = var preamble = """ <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> """ preamble &= fmt"<title>{page_title} {getDateStr()}</title>" preamble &= """ <style> html,body{font-size:100%} h1{font-size:2.125em; margin-left: 0%} h2{font-size:1.6875em; margin-left: 3%} h3{font-size:1.375em; margin-left: 6%} h4{font-size:1.125em; margin-left: 9%} h5{font-size:1.125em; margin-left: 12%} h6{font-size:1em; margin-left: 15%} hr{border:solid #ddddd8;border-width:1px 0 0;clear:both;margin:1.25em 0 1.1875em;height:0} </style> </head> <body> """ preamble &= fmt"<h1>{page_title}</h1>" preamble &= fmt"<p>Generated {getDateStr()}</p>" preamble &= """ <p> See <a href="https://manithree.gitlab.io/utfastatutes/">https://manithree.gitlab.io/utfastatutes/</a> for the latest version, or <a href="https://gitlab.com/manithree/utfastatutes">https://gitlab.com/manithree/utfastatutes</a> to report defects or make suggestions.</p> <hr/> """ var post = """ </body> </html> """ writeFile(fileName, preamble & htmlToc & htmlBody & post ) proc fixLinks(htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) = # this is where the external links are fixed up if I can figure out how # to reliably link to le.utah.gov echo "Fixing links" for line in splitlines(htmlBody): # TODO this only finds the first occurence var m = line.find(re(r"href=""([^""]+)" & "\"" & r".?")) if isSome(m): #echo m.get.captures[0][2..^1] let statute = m.get.captures[0][2..^1] if faLinks[statute] != statute: echo "Needs fixing: " & statute when isMainModule: var htmlToc = "" var htmlBody = "" var faLinks = newStringTable() # Read the json config file let settings = parseJson(readFile(paramStr(1))) let codes = settings["code"] let title = settings["title"].getStr() let fileName = settings["filename"].getStr() var client = newHttpClient() for code in codes: var pg = client.getContent(code.getStr()) # couldn't get multi-line regex to work, but this is probably more # efficient, anyway: var version = "" for line in splitlines(pg): var m = line.match(re"var versionDefault=""(.*)"";") if isSome(m): version = m.get.captures[0] break # download the base versioned file var urlBase =code.getStr()[0..rfind(code.getStr(), '/')] var statute = client.getContent(urlBase & "/" & version & "." & settings["download_format"].getStr()) # save the file (as downloaded) writeFile(version & "." & settings["download_format"].getStr(), statute) if settings["download_format"].getStr() == "xml" and settings["output_format"].getStr() == "html": convertXmlToHtml(statute, htmlToc, htmlBody, faLinks) if settings["download_format"].getStr() == "xml" and settings["output_format"].getStr() == "html": fixLinks(htmlToc, htmlBody, faLinks) writeHtml(fileName, htmlToc, htmlBody, title)