W3C XML Schema for OTMI

From OpenTextMining

Jump to: navigation, search

Return to OTMI Specification

This annex defines a W3C XML schema specification for OTMI.

<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" targetNamespace="http://www.nature.com/schema/2006/03/otmi" xmlns:otmi="http://www.nature.com/schema/2006/03/otmi">
  <xs:element name="data">
    <xs:complexType>
      <xs:sequence>
        <xs:element minOccurs="0" ref="otmi:stoplist"/>
        <xs:group ref="otmi:sections"/>
        <xs:group minOccurs="0" ref="otmi:floats"/>
        <xs:element minOccurs="0" ref="otmi:references"/>
      </xs:sequence>
      <xs:attribute name="version" use="required" type="xs:NMTOKEN"/>
    </xs:complexType>
  </xs:element>
  <xs:element name="stoplist">
    <xs:complexType>
      <xs:attribute name="href" use="required" type="xs:anyURI"/>
    </xs:complexType>
  </xs:element>
  <xs:group name="sections">
    <xs:annotation>
      <xs:documentation>Sections   </xs:documentation>
    </xs:annotation>
    <xs:sequence>
      <xs:element maxOccurs="unbounded" ref="otmi:section"/>
    </xs:sequence>
  </xs:group>
  <xs:element name="section">
    <xs:complexType>
      <xs:choice>
        <xs:group ref="otmi:otmi-text"/>
        <xs:element maxOccurs="unbounded" ref="otmi:section"/>
      </xs:choice>
      <xs:attribute name="name" use="required" type="xs:NCName"/>
    </xs:complexType>
  </xs:element>
  <xs:group name="floats">
    <xs:choice>
      <xs:group ref="otmi:figures"/>
      <xs:group ref="otmi:tables"/>
    </xs:choice>
  </xs:group>
  <xs:group name="figures">
    <xs:annotation>
      <xs:documentation>Figures element includes figure titles and captions</xs:documentation>
    </xs:annotation>
    <xs:sequence>
      <xs:element maxOccurs="unbounded" ref="otmi:figure"/>
    </xs:sequence>
  </xs:group>
  <xs:element name="figure">
    <xs:complexType>
      <xs:complexContent>
        <xs:extension base="otmi:title">
          <xs:sequence>
            <xs:element ref="otmi:caption"/>
          </xs:sequence>
        </xs:extension>
      </xs:complexContent>
    </xs:complexType>
  </xs:element>
  <xs:group name="tables">
    <xs:annotation>
      <xs:documentation>Tables element includes table titles</xs:documentation>
    </xs:annotation>
    <xs:sequence>
      <xs:element maxOccurs="unbounded" ref="otmi:table"/>
    </xs:sequence>
  </xs:group>
  <xs:element name="table" type="otmi:title"/>
  <xs:complexType name="title">
    <xs:sequence>
      <xs:element ref="otmi:title"/>
    </xs:sequence>
  </xs:complexType>
  <xs:element name="title">
    <xs:complexType>
      <xs:group ref="otmi:otmi-text"/>
    </xs:complexType>
  </xs:element>
  <xs:element name="caption">
    <xs:complexType>
      <xs:group ref="otmi:otmi-text"/>
    </xs:complexType>
  </xs:element>
  <xs:element name="references">
    <xs:complexType>
      <xs:sequence>
        <xs:element maxOccurs="unbounded" ref="otmi:ref-id"/>
        <xs:element ref="otmi:refs-noid"/>
      </xs:sequence>
    </xs:complexType>
  </xs:element>
  <xs:element name="ref-id" type="xs:anyURI">
    <xs:annotation>
      <xs:documentation>The "ref-id" element references documents by URI</xs:documentation>
    </xs:annotation>
  </xs:element>
  <xs:element name="refs-noid" type="xs:integer"/>
  <xs:group name="otmi-text">
    <xs:annotation>
      <xs:documentation>OTMI Text - This is the actual payload for an OTMI file</xs:documentation>
    </xs:annotation>
    <xs:sequence>
      <xs:element minOccurs="0" ref="otmi:vectors"/>
      <xs:element minOccurs="0" ref="otmi:snippets"/>
      <xs:element minOccurs="0" ref="otmi:full-text"/>
    </xs:sequence>
  </xs:group>
  <xs:element name="vectors">
    <xs:complexType>
      <xs:sequence>
        <xs:element ref="otmi:split-regex"/>
        <xs:element maxOccurs="unbounded" ref="otmi:vector"/>
      </xs:sequence>
      <xs:attribute name="number" use="required" type="xs:integer"/>
    </xs:complexType>
  </xs:element>
  <xs:element name="vector">
    <xs:complexType mixed="true">
      <xs:attribute name="count" use="required" type="xs:integer"/>
    </xs:complexType>
  </xs:element>
  <xs:element name="snippets">
    <xs:complexType>
      <xs:sequence>
        <xs:element ref="otmi:split-regex"/>
        <xs:element maxOccurs="unbounded" ref="otmi:snippet"/>
      </xs:sequence>
      <xs:attribute name="number" use="required" type="xs:integer"/>
    </xs:complexType>
  </xs:element>
  <xs:element name="snippet" type="xs:string"/>
  <xs:element name="split-regex" type="xs:string"/>
  <xs:element name="full-text" abstract="true" type="xs:string">
    <xs:annotation>
      <xs:documentation>Full text is either with stopwords removed or without </xs:documentation>
    </xs:annotation>
  </xs:element>
  <xs:element name="reduced-text" substitutionGroup="otmi:full-text"/>
  <xs:element name="raw-text" substitutionGroup="otmi:full-text"/>
</xs:schema>
Personal tools