RELAX NG (Compact) Schema for OTMI

From OpenTextMining

Jump to: navigation, search

Return to OTMI Specification

This annex defines a RELAX NG (Conpact) schema specification for OTMI.

namespace otmi = "http://www.nature.com/schema/2006/03/otmi"

start = data

## The "data" element added to an Atom Entry document
data =
  element otmi:data {
    attribute version { xsd:NMTOKEN },
    stoplist?,
    sections,
    floats?,
    references?
  }

## Stoplist is an optional element and references by URI 
## the stoplist document used 
stoplist =
  element otmi:stoplist {
    attribute href { xsd:anyURI },
    empty
  }
  
## Sections   
sections =
  section+

## Section is either a front section ("abstract" | "standfirst")
## and otmi-text content, or else a body section ("body") and child
## sections ("firstpara" | "methods" | "conclusions" | "others")
## and otmi-text content
section =
  element otmi:section {
    attribute name { xsd:NCName },
    (otmi-text | section+)
  }

## Floats - for now just figures and tables are included
## (other floating objects could be added)
floats =
  figures | tables

## Figures element includes figure titles and captions
figures =  
  element otmi:figure {
    title, caption
  }+

## Tables element includes table titles
tables =  
  element otmi:table {
    title
  }+

title =
  element otmi:title { otmi-text }
    
caption =
  element otmi:caption { otmi-text }
 
## References - no text is provided but URI references
references =
  element otmi:references {
    ## The "ref-id" element references documents by URI
    element otmi:ref-id { xsd:anyURI }+,
    ## The "refs-noid" element provides count of references
    ## with no URI
    element otmi:refs-noid { xsd:integer }
  }

## OTMI Text - This is the actual payload for an OTMI file
otmi-text = 
  vectors?, snippets?, full-text?
  
## Vectors is a table listing word vectors
vectors =
  element otmi:vectors {
    attribute number { xsd:integer },
    split-regex,
    element otmi:vector {
      attribute count { xsd:integer },
      text
    }+
  }
     
## Snippets is a table listing text snippets
snippets =     
  element otmi:snippets {
    attribute number { xsd:integer },
    split-regex,
      element otmi:snippet {
        text
      }+
    }

## The "split-regex" expression used to split text 
split-regex = element otmi:split-regex { text }

## Full text is either with stopwords removed or without 
full-text =
  reduced-text | raw-text
  
## The "reduced-text" element provides arbitrary text cleaned of markup
## and with stopwords removed
reduced-text = element otmi:reduced-text { text }

## The "raw-text" element provides arbitrary text cleaned of markup
## but without stopwords removed
raw-text = element otmi:raw-text { text }
Personal tools