RELAX NG Schema for OTMI
From OpenTextMining
Return to OTMI Specification
This annex defines a RELAX NG schema specification for OTMI.
<?xml version="1.0" encoding="UTF-8"?>
<grammar xmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0" xmlns:otmi="http://www.nature.com/schema/2006/03/otmi" xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
<start>
<ref name="data"/>
</start>
<define name="data">
<a:documentation>The "data" element added to an Atom Entry document</a:documentation>
<element name="otmi:data">
<attribute name="version">
<data type="NMTOKEN"/>
</attribute>
<optional>
<ref name="stoplist"/>
</optional>
<ref name="sections"/>
<optional>
<ref name="floats"/>
</optional>
<optional>
<ref name="references"/>
</optional>
</element>
</define>
<define name="stoplist">
<a:documentation>Stoplist is an optional element and references by URI
the stoplist document used </a:documentation>
<element name="otmi:stoplist">
<attribute name="href">
<data type="anyURI"/>
</attribute>
<empty/>
</element>
</define>
<define name="sections">
<a:documentation>Sections </a:documentation>
<oneOrMore>
<ref name="section"/>
</oneOrMore>
</define>
<define name="section">
<a:documentation>Section is either a front section ("abstract" | "standfirst")
and otmi-text content, or else a body section ("body") and child
sections ("firstpara" | "methods" | "conclusions" | "others")
and otmi-text content</a:documentation>
<element name="otmi:section">
<attribute name="name">
<data type="NCName"/>
</attribute>
<choice>
<ref name="otmi-text"/>
<oneOrMore>
<ref name="section"/>
</oneOrMore>
</choice>
</element>
</define>
<define name="floats">
<a:documentation>Floats - for now just figures and tables are included
(other floating objects could be added)</a:documentation>
<choice>
<ref name="figures"/>
<ref name="tables"/>
</choice>
</define>
<define name="figures">
<a:documentation>Figures element includes figure titles and captions</a:documentation>
<oneOrMore>
<element name="otmi:figure">
<ref name="title"/>
<ref name="caption"/>
</element>
</oneOrMore>
</define>
<define name="tables">
<a:documentation>Tables element includes table titles</a:documentation>
<oneOrMore>
<element name="otmi:table">
<ref name="title"/>
</element>
</oneOrMore>
</define>
<define name="title">
<element name="otmi:title">
<ref name="otmi-text"/>
</element>
</define>
<define name="caption">
<element name="otmi:caption">
<ref name="otmi-text"/>
</element>
</define>
<define name="references">
<a:documentation>References - no text is provided but URI references</a:documentation>
<element name="otmi:references">
<oneOrMore>
<element name="otmi:ref-id">
<a:documentation>The "ref-id" element references documents by URI</a:documentation>
<data type="anyURI"/>
</element>
</oneOrMore>
<element name="otmi:refs-noid">
<a:documentation>The "refs-noid" element provides count of references
with no URI</a:documentation>
<data type="integer"/>
</element>
</element>
</define>
<define name="otmi-text">
<a:documentation>OTMI Text - This is the actual payload for an OTMI file</a:documentation>
<optional>
<ref name="vectors"/>
</optional>
<optional>
<ref name="snippets"/>
</optional>
<optional>
<ref name="full-text"/>
</optional>
</define>
<define name="vectors">
<a:documentation>Vectors is a table listing word vectors</a:documentation>
<element name="otmi:vectors">
<attribute name="number">
<data type="integer"/>
</attribute>
<ref name="split-regex"/>
<oneOrMore>
<element name="otmi:vector">
<attribute name="count">
<data type="integer"/>
</attribute>
<text/>
</element>
</oneOrMore>
</element>
</define>
<define name="snippets">
<a:documentation>Snippets is a table listing text snippets</a:documentation>
<element name="otmi:snippets">
<attribute name="number">
<data type="integer"/>
</attribute>
<ref name="split-regex"/>
<oneOrMore>
<element name="otmi:snippet">
<text/>
</element>
</oneOrMore>
</element>
</define>
<define name="split-regex">
<a:documentation>The "split-regex" expression used to split text </a:documentation>
<element name="otmi:split-regex">
<text/>
</element>
</define>
<define name="full-text">
<a:documentation>Full text is either with stopwords removed or without </a:documentation>
<choice>
<ref name="reduced-text"/>
<ref name="raw-text"/>
</choice>
</define>
<define name="reduced-text">
<a:documentation>The "reduced-text" element provides arbitrary text cleaned of markup
and with stopwords removed</a:documentation>
<element name="otmi:reduced-text">
<text/>
</element>
</define>
<define name="raw-text">
<a:documentation>The "raw-text" element provides arbitrary text cleaned of markup
�but without stopwords removed</a:documentation>
<element name="otmi:raw-text">
<text/>
</element>
</define>
</grammar>
