Commit 2ec36a36 authored by John Austin's avatar John Austin
Browse files

Merge branch 'ja_devel' containing u2o.py

parents 9e80c60b 8001f18b
Pipeline #10 canceled with stages
# Set newline behavior, overriding core.autocrlf if set.
# "auto" means Git decides if the file is text and normalizes text
# files' newlines to LF upon checkin.
* text=auto
# Declare files that will always have LF line endings on checkout.
* text eol=lf
# Denote all files that are truly binary and should not be modified.
*.idx binary
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:osis="http://www.bibletechnologies.net/2003/OSIS/namespace"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsl:output standalone="yes" indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- Transforms OSIS files created by usfm2osis.py and paratext2osis.pl for use with GoBible Creator -->
<!-- Make two passes over entire node set -->
<xsl:template match="/">
<xsl:variable name="pass1">
<xsl:apply-templates/>
</xsl:variable>
<xsl:apply-templates select="$pass1" mode="pass2"/>
</xsl:template>
<!-- PASS 1: FILTER AND SIMPLIFY ELEMENT HIERARCHY -->
<xsl:template match="node()|@*" name="identity">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<!-- remove comments !-->
<xsl:template match="comment()" priority="1"/>
<!-- remove all tags by default -->
<xsl:template match="*" priority="1">
<xsl:apply-templates/>
</xsl:template>
<!-- remove these elements entirely -->
<xsl:template match="osis:w|osis:note|osis:title|osis:header" priority="2"/>
<!-- but keep only these elements in their entirety -->
<xsl:template match="osis:osis|osis:osisText|osis:div[@type='book']|*[@type='canonical']|osis:chapter|osis:verse" priority="3">
<xsl:call-template name="identity"/>
</xsl:template>
<!-- PASS 2: MAKE CONTAINERS FROM MILESTONE CHAPTER (IF NEEDED) AND VERSE TAGS -->
<xsl:template match="node()|@*" mode="pass2">
<xsl:copy>
<xsl:apply-templates select="node()|@*" mode="pass2"/>
</xsl:copy>
</xsl:template>
<!-- GoBible Creator requires this bookGroup div -->
<xsl:template match="osis:osisText" mode="pass2">
<xsl:copy>
<xsl:element name="div" xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace">
<xsl:attribute name="type">bookGroup</xsl:attribute>
<xsl:apply-templates select="node()|@*" mode="pass2"/>
</xsl:element>
</xsl:copy>
</xsl:template>
<!-- Insure chapters are container elements -->
<xsl:template match="osis:div[@type='book']" mode="pass2">
<xsl:copy>
<xsl:apply-templates select="@*" mode="pass2"/>
<xsl:choose>
<!-- this handles element chapters -->
<xsl:when test="./osis:chapter/osis:verse">
<xsl:for-each select="./osis:chapter">
<xsl:copy>
<xsl:apply-templates select="@*" mode="pass2"/>
<xsl:for-each-group select="node()" group-starting-with="osis:verse[@sID]">
<xsl:call-template name="verses"/>
</xsl:for-each-group>
</xsl:copy>
</xsl:for-each>
</xsl:when>
<!-- this handles milestone chapters -->
<xsl:otherwise>
<xsl:for-each-group select="node()" group-starting-with="osis:chapter[@sID]">
<xsl:choose>
<xsl:when test="position()=1 and name(current())!='chapter'"/><!-- remove introductions -->
<xsl:otherwise>
<xsl:element name="chapter" xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace">
<xsl:attribute name="osisID" select="current()/@sID"/>
<xsl:for-each-group select="current-group()[not(self::osis:chapter)]" group-starting-with="osis:verse[@sID]">
<xsl:call-template name="verses"/>
</xsl:for-each-group>
</xsl:element>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</xsl:otherwise>
</xsl:choose>
</xsl:copy>
</xsl:template>
<!-- Convert milestone verses into containers, and add dummy verses after multi-verse elements, as required by GoBible Creator -->
<xsl:template name="verses">
<xsl:choose>
<xsl:when test="position()=1 and name(current())!='verse'"/>
<xsl:otherwise>
<xsl:element name="verse" xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace">
<xsl:apply-templates select="current-group()[not(self::osis:verse)]" mode="pass2"/>
</xsl:element>
<xsl:for-each select="remove(tokenize(current()/@osisID,'\s+'),1)">
<xsl:element name="verse" xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace">.</xsl:element>
</xsl:for-each>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
......@@ -18,15 +18,17 @@
<xsl:template match="osis:title[@type='x-chapterLabel']"/>
<!-- remove annotateRefs, which are auto-generated by SWORD front-ends, to prevent duplication !-->
<xsl:template match="osis:reference[@type='annotateRef']"/>
<xsl:template match="osis:reference[@type='annotateRef']" priority="2"/>
<!-- remove introduction elements that shouldn't appear in SWORD introductions !-->
<xsl:template match="osis:milestone[@type='x-usfm-toc1']"/>
<xsl:template match="osis:milestone[@type='x-usfm-toc2']"/>
<xsl:template match="osis:milestone[@type='x-usfm-toc3']"/>
<xsl:template match="osis:lb[@type='x-optional']"/>
<!-- remove comments !-->
<xsl:template match="comment()"/>
<xsl:template match="comment()" priority="1"/>
<!-- show introduction <head> tags as secondary titles !-->
<xsl:template match="osis:head">
......@@ -44,7 +46,7 @@
</xsl:template>
<!-- remove <reference> tags that lack osisRef attributes !-->
<xsl:template match="osis:reference[not(@osisRef)]">
<xsl:template match="osis:reference[not(@osisRef)]" priority="1">
<xsl:apply-templates/>
</xsl:template>
......@@ -58,7 +60,7 @@
<xsl:apply-templates select="node()|@*"/>
<xsl:element name="hi" namespace="http://www.bibletechnologies.net/2003/OSIS/namespace">
<xsl:attribute name="type">italic</xsl:attribute>
<xsl:attribute name="subType">selah</xsl:attribute>
<xsl:attribute name="subType">x-selah</xsl:attribute>
<xsl:text> </xsl:text>
<xsl:value-of select="following-sibling::osis:l[1]"/>
<xsl:text> </xsl:text>
......
......@@ -14,14 +14,14 @@
<xsl:template match="/">
<xsl:element name="TEI" namespace="http://www.crosswire.org/2013/TEIOSIS/namespace">
<xsl:copy-of select="document('')/*/@xsi:schemaLocation"/>
<!-- The more complex grouping here allows keywords to be either siblings of one another, or else individually embedded in another element-->
<xsl:for-each-group select="//*" group-starting-with="*[count(descendant::osis:seg[@type='keyword'])=1]|osis:seg[@type='keyword'][count(../child::osis:seg[@type='keyword'])&gt;1]">
<xsl:for-each-group select="//node()" group-starting-with="osis:seg[@type='keyword']">
<xsl:choose>
<xsl:when test="position()=1"/><!-- drop first entry which is always junk -->
<xsl:when test="position()=1"/><!-- drop stuff before first keyword -->
<xsl:otherwise>
<xsl:element name="entryFree" namespace="http://www.crosswire.org/2013/TEIOSIS/namespace">
<xsl:attribute name="n"><xsl:value-of select="descendant-or-self::osis:seg[@type='keyword'][1]"/></xsl:attribute>
<xsl:for-each select="current-group()[count(index-of(current-group(),./..))=0]"><xsl:call-template name="teiosis"/></xsl:for-each>
<xsl:attribute name="n"><xsl:value-of select="."/></xsl:attribute>
<!-- Select only those nodes with no parent element in the current-group, because teiosis template does a recursive copy -->
<xsl:for-each select="current-group()[count(index-of(current-group(), ./..))=0]"><xsl:call-template name="teiosis"/></xsl:for-each>
</xsl:element>
</xsl:otherwise>
</xsl:choose>
......@@ -32,12 +32,15 @@
<!-- Filter and change all element namespaces to teiosis -->
<xsl:template name="teiosis">
<xsl:choose>
<xsl:when test="self::osis:chapter|self::osis:title[@type='x-chapterLabel']|self::osis:seg[@type='keyword']"/><!-- Filter any unwanted elements here -->
<xsl:when test="comment()|self::osis:chapter|self::osis:title[@type='x-chapterLabel']|self::osis:seg[@type='keyword']"/><!-- Filter any unwanted elements here -->
<xsl:when test="self::text()"><xsl:value-of select="."/></xsl:when>
<xsl:otherwise>
<xsl:element name="{local-name()}" namespace="http://www.crosswire.org/2013/TEIOSIS/namespace">
<xsl:copy-of select="@*"/>
<xsl:for-each select="*|text()"><xsl:call-template name="teiosis"/></xsl:for-each>
<!-- Do not recurse on child nodes following a keyword element, since these nodes are part of the next group -->
<xsl:for-each select="node()[count(preceding-sibling::*/descendant-or-self::osis:seg[@type='keyword']) = 0]">
<xsl:call-template name="teiosis"/>
</xsl:for-each>
</xsl:element>
</xsl:otherwise>
</xsl:choose>
......
This diff is collapsed.
u2o.py vs. usfm2osis.py notes:
\qa (acrostic) has major bug which moves punctation!
Mishandles \q1 (blank <line>) before new verse
Needs to move pre-verse titles, references, etc into verse
Canonical titles are not handled correctly (see above about pre-verse titles)
Some verses are not well formed in ISA, PRO, and PSA (found by osis2mod, but validates)
\cl doesn't work right (TKL)
Uses u"" strings which are not allowed in Python3
Bug on line 494: r"\ie\n"
\w glossary\w* handled as index (incorrect and does not validate either)
Attribute type="psalm" should be reserved only for Psalms
Speed:
TKL with usfm2osis.py: 83s
TKL with u2o.py: < 4s
TKL with u2o.py (orig): < 2s
......@@ -137,7 +137,7 @@ bookDict = {
# Peripheral books
'FRT':'FRONT', 'INT':'INTRODUCTION', 'BAK':'BACK', 'CNC':'CONCORDANCE', 'GLO':'GLOSSARY',
'TDX':'INDEX', 'NDX':'GAZETTEER', 'OTH':'X-OTHER'
'TDX':'INDEX', 'NDX':'GAZETTEER', 'OTH':'X-OTHER', 'DIC':'DICTIONARY'
}
addBookDict = {
......@@ -652,7 +652,7 @@ def convertToOsis(sFile):
osis = re.sub(r'\\cd\b\s+(.+)', '\uFDD4<title type="x-description">'+r'\1</title>', osis)
# \v_#
osis = re.sub(r'\\v\s+([^\s]+)\b\s*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: '\uFDD2<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + '" sID="$BOOK$.$CHAP$.' + m.group(1) + '"/>' + m.group(2) + '<verse eID="$BOOK$.$CHAP$.' + m.group(1) + '"/>\uFDD2\n', osis, flags=re.DOTALL)
osis = re.sub(r'\\v\s+([\d\-]+)[\s\u00A0]*(.+?)(?=(\\v\s+|</div type="book"|<chapter eID))', lambda m: '\uFDD2<verse osisID="$BOOK$.$CHAP$.' + m.group(1) + '" sID="$BOOK$.$CHAP$.' + m.group(1) + '"/>' + m.group(2) + '<verse eID="$BOOK$.$CHAP$.' + m.group(1) + '"/>\uFDD2\n', osis, flags=re.DOTALL)
# \vp_#\vp*
# \va_#\va*
......@@ -1281,27 +1281,32 @@ def convertToOsis(sFile):
# assorted re-orderings
osis = re.sub('(\uFDD3<chapter eID=.+?\n)(<verse eID=.+?>\uFDD2)\n?', r'\2'+'\n'+r'\1', osis) # can this ever occur?
# </div-last>...</chapter> --> ...</chapter></div-last>
osis = re.sub('([\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9]</div>)([^\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9]*<chapter eID.+?>)', r'\2\1', osis)
# delete Unicode non-characters
for c in '\uFDD0\uFDD1\uFDD2\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE\uFDDF\uFDE0\uFDE1\uFDE2\uFDE3\uFDE4\uFDE5\uFDE6\uFDE7\uFDE8\uFDE9\uFDEA\uFDEB\uFDEC\uFDED\uFDEE\uFDEF':
# delete Unicode non-characters (except section divs for now)
for c in '\uFDD1\uFDD2\uFDD3\uFDD4\uFDDF\uFDE0\uFDE1\uFDE2\uFDE3\uFDE4\uFDE5\uFDE6\uFDE7\uFDE8\uFDE9\uFDEA\uFDEB\uFDEC\uFDED\uFDEE\uFDEF':
osis = osis.replace(c, '')
# <start-tags-belonging-to-next-verse></verse> --> </verse><start-tags-belonging-to-next-verse>
osis = re.sub('(((<div type="[^"]*[Ss]ection">\s*)?<title(?!\scanonical="true")(\s[^>]*)?>.*?</title>|<([pl]|lg)(\s[^>]*)?>|\s)+)(<verse eID=[^>]*>)', r'\7\1', osis)
# <start-tags-belonging-to-next-verse><verse> --> <verse><start-tags-belonging-to-next-verse>
osis = re.sub('(((<div type="[^"]*[Ss]ection">\s*)?<title(\s[^>]*)?>.*?</title>|<([pl]|lg)(\s[^>]*)?>|\s)+)(<verse osisID=[^>]*>)', r'\7\1', osis)
# </div-book></div-section> --> </div-section></div-book>
sectionDivChar = '[\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE]'
osis = re.sub('(</div type="book">)(</div>'+sectionDivChar+')', r'\2\1', osis)
# <verse></end-tags-belonging-to-previous-verse> --> </end-tags-belonging-to-previous-verse><verse>
osis = re.sub('(<verse osisID=[^>]*>)((</([pl]|lg)(\s[^>]*)?>|\s)+)', r'\2\1', osis)
# <start-tags-belonging-to-next-verse></verse><verse> --> </verse><verse><start-tags-belonging-to-next-verse>
startTagsVerse = '((('+sectionDivChar+'<div\s[^>]*>\s*)?<title(?!\scanonical="true")(\s[^>]*)?>.*?</title>|<([pl]|lg)(\s[^>]*)?>|\s)+)';
osis = re.sub(startTagsVerse+'(<verse eID=[^>]*>)', r'\7\1', osis)
osis = re.sub(startTagsVerse+'(<chapter eID=[^>]*/>)', r'\7\1', osis)
osis = re.sub(startTagsVerse+'(<chapter [^>]*sID=[^>]*/>)', r'\7\1', osis)
osis = re.sub(startTagsVerse+'(<verse osisID=[^>]*>)', r'\7\1', osis)
# </verse></end-tags-belonging-to-previous-verse> --> </end-tags-belonging-to-previous-verse></verse>
osis = re.sub('(<verse eID=[^>]*>)((</([pl]|lg)(\s[^>]*)?>|\s)+)', r'\2\1', osis)
# </verse><verse></end-tags-belonging-to-previous-verse> --> </end-tags-belonging-to-previous-verse></verse><verse>
endTagsVerse = '((</div>'+sectionDivChar+'|</([pl]|lg)(\s[^>]*)?>|\s)+)';
osis = re.sub('(<verse osisID=[^>]*>)'+endTagsVerse, r'\2\1', osis)
osis = re.sub('(<chapter [^>]*sID=[^>]*/>)'+endTagsVerse, r'\2\1', osis)
osis = re.sub('(<chapter eID=[^>]*/>)'+endTagsVerse, r'\2\1', osis)
osis = re.sub('(<verse eID=[^>]*>)'+endTagsVerse, r'\2\1', osis)
# delete rest of Unicode non-characters
for c in '\uFDD0\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE':
osis = osis.replace(c, '')
# </l>NOTE --> NOTE</l>
osis = re.sub('(</l>)(<note .+?</note>)', r'\2\1', osis)
......
......@@ -5,20 +5,23 @@ DONE: Stop removing <lb/> between line groups.
DONE: Add subType="x-to-next-level" when the subsequent <l> will be indented 1 more than the current <l>
DONE: Fix markup of secondary sections
Fix conversion of all container tags to <index/> milestones which is lossy and renders these tags useless.
Correctly support other types of USFM documents, right now only: osisRefWork="Bible" docs are handled correctly.
TODO for osis2sword.xlt:
DONE: Remove certain elements (Intro title, chapter name, <p type="x-noindent">, footnote references)
DONE: Map certain elements to simplify display classes
Attribute values should be copied exactly (no entity escaping)
DONE: Attribute values should be copied exactly (no entity escaping)
TODO for xulsword:
DONE: <lg> doesn't correspond to USFM tags and should not appear as a blank line.
DONE: With <l level="x">, if x==1 this should indent 1 rather than 0.
DONE: <lb/> between line groups should be displayed.
Add "parallel" to style sheet in addition to x-parallel-passage
Fix note mouseover hover placement of popup with new HTML.
check support for osisRef="TVN:Gen.1.1"
Copy OSIS filters as TEI filters.
DONE: check support for osisRef="TVN:Gen.1.1"
WONT-DO: Add "parallel" to style sheet in addition to x-parallel-passage
WONT-DO: Fix note mouseover hover placement of popup with new HTML.
DONE: Implement Companion and deprecate DictionaryModule and ReferenceBible .conf entries
DONE: Popup passages containing verse spans repeat entire spans
DONE: Add OSIS filter tag handlers to TEI filter.
TODO for osis2mod.cpp:
Pass <p> type as HTML class
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment