-- access.asn
--$Revision: 6.0 $
--*********************************************************************
--
--  access.asn
--
--     messages for data access
--
--*********************************************************************

NCBI-Access DEFINITIONS ::=
BEGIN

EXPORTS Link-set;

    -- links between same class = neighbors
    -- links between other classes = links

Link-set ::= SEQUENCE {
    num INTEGER ,                         -- number of links to this doc type
    uids SEQUENCE OF INTEGER OPTIONAL ,     -- the links
    weights SEQUENCE OF INTEGER OPTIONAL }  -- the weights


END

-- biblio.asn
--$Revision: 217677 $
--****************************************************************
--
--  NCBI Bibliographic data elements
--  by James Ostell, 1990
--
--  Taken from the American National Standard for
--      Bibliographic References
--      ANSI Z39.29-1977
--  Version 3.0 - June 1994
--  PubMedId added in 1996
--  ArticleIds and eprint elements added in 1999
--
--****************************************************************

NCBI-Biblio DEFINITIONS ::=
BEGIN

EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen,
        Cit-proc, Cit-sub, Title, Author, PubMedId, DOI;

IMPORTS Person-id, Date, Dbtag FROM NCBI-General;

    -- Article Ids

ArticleId ::= CHOICE {         -- can be many ids for an article
	pubmed PubMedId ,      -- see types below
	medline MedlineUID ,
	doi DOI ,
	pii PII ,
	pmcid PmcID ,
	pmcpid PmcPid ,
        pmpid PmPid ,
        other Dbtag  }    -- generic catch all
    
PubMedId ::= INTEGER           -- Id from the PubMed database at NCBI
MedlineUID ::= INTEGER         -- Id from MEDLINE
DOI ::= VisibleString          -- Document Object Identifier
PII ::= VisibleString          -- Controlled Publisher Identifier
PmcID ::= INTEGER              -- PubMed Central Id
PmcPid ::= VisibleString       -- Publisher Id supplied to PubMed Central
PmPid ::= VisibleString        -- Publisher Id supplied to PubMed

ArticleIdSet ::= SET OF ArticleId

    -- Status Dates

PubStatus ::= INTEGER {            -- points of publication
    received  (1) ,            -- date manuscript received for review
    accepted  (2) ,            -- accepted for publication
    epublish  (3) ,            -- published electronically by publisher
    ppublish  (4) ,            -- published in print by publisher
    revised   (5) ,            -- article revised by publisher/author
    pmc       (6) ,            -- article first appeared in PubMed Central
    pmcr      (7) ,            -- article revision in PubMed Central
    pubmed    (8) ,            -- article citation first appeared in PubMed
    pubmedr   (9) ,            -- article citation revision in PubMed
    aheadofprint (10),         -- epublish, but will be followed by print
    premedline (11),           -- date into PreMedline status
    medline    (12),           -- date made a MEDLINE record
    other    (255) }

PubStatusDate ::= SEQUENCE {   -- done as a structure so fields can be added
    pubstatus PubStatus ,
    date Date }                -- time may be added later

PubStatusDateSet ::= SET OF PubStatusDate
    
    -- Citation Types

Cit-art ::= SEQUENCE {                  -- article in journal or book
    title Title OPTIONAL ,              -- title of paper (ANSI requires)
    authors Auth-list OPTIONAL ,        -- authors (ANSI requires)
    from CHOICE {                       -- journal or book
        journal Cit-jour ,
        book Cit-book ,
        proc Cit-proc } ,
    ids ArticleIdSet OPTIONAL }         -- lots of ids

Cit-jour ::= SEQUENCE {             -- Journal citation
    title Title ,                   -- title of journal
    imp Imprint }

Cit-book ::= SEQUENCE {              -- Book citation
    title Title ,                    -- Title of book
    coll Title OPTIONAL ,            -- part of a collection
    authors Auth-list,               -- authors
    imp Imprint }

Cit-proc ::= SEQUENCE {             -- Meeting proceedings
    book Cit-book ,                 -- citation to meeting
    meet Meeting }                  -- time and location of meeting

    -- Patent number and date-issue were made optional in 1997 to
    --   support patent applications being issued from the USPTO
    --   Semantically a Cit-pat must have either a patent number or
    --   an application number (or both) to be valid

Cit-pat ::= SEQUENCE {                  -- patent citation
    title VisibleString ,
    authors Auth-list,                  -- author/inventor
    country VisibleString ,             -- Patent Document Country
    doc-type VisibleString ,            -- Patent Document Type
    number VisibleString OPTIONAL,      -- Patent Document Number
    date-issue Date OPTIONAL,           -- Patent Issue/Pub Date
    class SEQUENCE OF VisibleString OPTIONAL ,      -- Patent Doc Class Code 
    app-number VisibleString OPTIONAL , -- Patent Doc Appl Number
    app-date Date OPTIONAL ,            -- Patent Appl File Date
    applicants Auth-list OPTIONAL ,     -- Applicants
    assignees Auth-list OPTIONAL ,      -- Assignees
    priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities
    abstract VisibleString OPTIONAL }   -- abstract of patent

Patent-priority ::= SEQUENCE {
    country VisibleString ,             -- Patent country code
    number VisibleString ,              -- number assigned in that country
    date Date }                         -- date of application

Id-pat ::= SEQUENCE {                   -- just to identify a patent
    country VisibleString ,             -- Patent Document Country
    id CHOICE {
        number VisibleString ,          -- Patent Document Number
        app-number VisibleString } ,    -- Patent Doc Appl Number
    doc-type VisibleString OPTIONAL }   -- Patent Doc Type

Cit-let ::= SEQUENCE {                  -- letter, thesis, or manuscript
    cit Cit-book ,                      -- same fields as a book
    man-id VisibleString OPTIONAL ,     -- Manuscript identifier
    type ENUMERATED {
        manuscript (1) ,
        letter (2) ,
        thesis (3) } OPTIONAL }
                                -- NOTE: this is just to cite a
                                -- direct data submission, see NCBI-Submit
                                -- for the form of a sequence submission
Cit-sub ::= SEQUENCE {               -- citation for a direct submission
    authors Auth-list ,              -- not necessarily authors of the paper
    imp Imprint OPTIONAL ,			 -- this only used to get date.. will go
    medium ENUMERATED {              -- medium of submission
        paper   (1) ,
        tape    (2) ,
        floppy  (3) ,
        email   (4) ,
        other   (255) } OPTIONAL ,
    date Date OPTIONAL ,              -- replaces imp, will become required
    descr VisibleString OPTIONAL }    -- description of changes for public view
    
Cit-gen ::= SEQUENCE {      -- NOT from ANSI, this is a catchall
    cit VisibleString OPTIONAL ,     -- anything, not parsable
    authors Auth-list OPTIONAL ,
    muid INTEGER OPTIONAL ,      -- medline uid
    journal Title OPTIONAL ,
    volume VisibleString OPTIONAL ,
    issue VisibleString OPTIONAL ,
    pages VisibleString OPTIONAL ,
    date Date OPTIONAL ,
    serial-number INTEGER OPTIONAL ,   -- for GenBank style references
    title VisibleString OPTIONAL ,     -- eg. cit="unpublished",title="title"
	pmid PubMedId OPTIONAL }           -- PubMed Id
    
    
    -- Authorship Group
Auth-list ::= SEQUENCE {
        names CHOICE {
            std SEQUENCE OF Author ,        -- full citations
            ml SEQUENCE OF VisibleString ,  -- MEDLINE, semi-structured
            str SEQUENCE OF VisibleString } , -- free for all
        affil Affil OPTIONAL }        -- author affiliation

Author ::= SEQUENCE {
    name Person-id ,                        -- Author, Primary or Secondary
    level ENUMERATED {
        primary (1),
        secondary (2) } OPTIONAL ,
    role ENUMERATED {                   -- Author Role Indicator
        compiler (1),
        editor (2),
        patent-assignee (3),
        translator (4) } OPTIONAL ,
    affil Affil OPTIONAL ,
    is-corr BOOLEAN OPTIONAL }          -- TRUE if corresponding author

Affil ::= CHOICE {
    str VisibleString ,                 -- unparsed string
    std SEQUENCE {                      -- std representation
    affil VisibleString OPTIONAL ,      -- Author Affiliation, Name
    div VisibleString OPTIONAL ,        -- Author Affiliation, Division
    city VisibleString OPTIONAL ,       -- Author Affiliation, City
    sub VisibleString OPTIONAL ,        -- Author Affiliation, County Sub
    country VisibleString OPTIONAL ,    -- Author Affiliation, Country
    street VisibleString OPTIONAL ,    -- street address, not ANSI
    email VisibleString OPTIONAL ,
    fax VisibleString OPTIONAL ,
    phone VisibleString OPTIONAL ,
    postal-code VisibleString OPTIONAL }}

    -- Title Group
    -- Valid for = A = Analytic (Cit-art)
    --             J = Journals (Cit-jour)
    --             B = Book (Cit-book)
                                                 -- Valid for:
Title ::= SET OF CHOICE {
    name VisibleString ,    -- Title, Anal,Coll,Mono    AJB
    tsub VisibleString ,    -- Title, Subordinate       A B
    trans VisibleString ,   -- Title, Translated        AJB
    jta VisibleString ,     -- Title, Abbreviated        J
    iso-jta VisibleString , -- specifically ISO jta      J
    ml-jta VisibleString ,  -- specifically MEDLINE jta  J
    coden VisibleString ,   -- a coden                   J
    issn VisibleString ,    -- ISSN                      J
    abr VisibleString ,     -- Title, Abbreviated         B
    isbn VisibleString }    -- ISBN                       B

Imprint ::= SEQUENCE {                  -- Imprint group
    date Date ,                         -- date of publication
    volume VisibleString OPTIONAL ,
    issue VisibleString OPTIONAL ,
    pages VisibleString OPTIONAL ,
    section VisibleString OPTIONAL ,
    pub Affil OPTIONAL,                     -- publisher, required for book
    cprt Date OPTIONAL,                     -- copyright date, "    "   "
    part-sup VisibleString OPTIONAL ,       -- part/sup of volume
    language VisibleString DEFAULT "ENG" ,  -- put here for simplicity
    prepub ENUMERATED {                     -- for prepublication citations
        submitted (1) ,                     -- submitted, not accepted
        in-press (2) ,                      -- accepted, not published
        other (255)  } OPTIONAL ,
    part-supi VisibleString OPTIONAL ,      -- part/sup on issue
    retract CitRetract OPTIONAL ,           -- retraction info
    pubstatus PubStatus OPTIONAL ,          -- current status of this publication
    history PubStatusDateSet OPTIONAL }     -- dates for this record

CitRetract ::= SEQUENCE {
    type ENUMERATED {                    -- retraction of an entry
        retracted (1) ,               -- this citation retracted
        notice (2) ,                  -- this citation is a retraction notice
        in-error (3) ,                -- an erratum was published about this
        erratum (4) } ,               -- this is a published erratum
    exp VisibleString OPTIONAL }      -- citation and/or explanation

Meeting ::= SEQUENCE {
    number VisibleString ,
    date Date ,
    place Affil OPTIONAL }

            
END


-- biotree.asn
--$Revision: 525462 $
--*********************************************************************
--
--  biotree.asn
--
--     BioTree ASN
--     Anatoliy Kuznetsov
--
--*********************************************************************

NCBI-BioTree DEFINITIONS ::=
BEGIN

EXPORTS BioTreeContainer, FeatureDictSet, DistanceMatrix;

IMPORTS User-object, User-field FROM NCBI-General;

BioTreeContainer ::= SEQUENCE {
   treetype  VisibleString OPTIONAL,  -- hint on what kind of tree this is
   fdict     FeatureDictSet,          -- features dictionary 
   nodes     NodeSet,                 -- set of nodes with encoded topology   
   label     VisibleString OPTIONAL,  -- bio-tree label (short name)
   user      User-object OPTIONAL     -- user defined object
}

NodeSet ::= SET OF Node


Node ::= SEQUENCE {
   id         INTEGER,             -- node uid
   parent     INTEGER OPTIONAL,    -- parent node id
   features   NodeFeatureSet OPTIONAL
}


NodeFeatureSet ::= SET OF NodeFeature


NodeFeature ::= SEQUENCE {
   featureid   INTEGER,
   value       UTF8String
}


FeatureDictSet ::= SET OF FeatureDescr


FeatureDescr ::= SEQUENCE {
   id    INTEGER,              -- feature id
   name  VisibleString         -- feature name
}


DistanceMatrix ::= SEQUENCE {
   labels     SEQUENCE OF VisibleString,  -- n labels
   distances  SEQUENCE OF REAL            -- n(n-1)/2 pairwise distances
                                          -- (0, 1)...(0, n), (1, 2)...(1, n)...
}


END

-- blast.asn
--  ----------------------------------------------------------------------------
--
--                            PUBLIC DOMAIN NOTICE
--                National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the terms
--  of the United States Copyright Act.  It was written as part of the author's
--  official duties as a United States Government employee and thus cannot be
--  copyrighted.  This software/database is freely available to the public for
--  use.  The National Library of Medicine and the U.S. Government have not
--  placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy and
--  reliability of the software and data, the NLM and the U.S. Government do not
--  and cannot warrant the performance or results that may be obtained by using
--  this software or data.  The NLM and the U.S. Government disclaim all
--  warranties, express or implied, including warranties of performance,
--  merchantability or fitness for any particular purpose.
--
--  Please cite the authors in any work or product based on this material.
--
--  ----------------------------------------------------------------------------
--
--  Authors: Tom Madden, Tim Boemker
--
--  ASN.1 interface to BLAST.
--
--  ----------------------------------------------------------------------------

NCBI-Blast4 DEFINITIONS ::=
BEGIN

EXPORTS
    Blast4-ka-block,
    Blast4-value,
    Blast4-parameter,
    Blast4-parameters;

IMPORTS
    Bioseq                  FROM NCBI-Sequence
    Seq-data                FROM NCBI-Sequence
    Bioseq-set              FROM NCBI-Seqset
    PssmWithParameters      FROM NCBI-ScoreMat
    Seq-id,
    Seq-interval,
    Seq-loc                 FROM NCBI-Seqloc
    Seq-align,
    Seq-align-set           FROM NCBI-Seqalign;

--  --------------------------------------------------------------------
--
--  Requests
--
--  --------------------------------------------------------------------

Blast4-request ::= SEQUENCE {
    -- Client identifier (email address, organization name, program/script
    -- name, or any other form of contacting the owner of this request)
    ident                   VisibleString OPTIONAL,

    -- Payload of the request
    body                    Blast4-request-body
}

-- An archive format for results.  the results can be reformatted from
-- this format also.
Blast4-archive ::= SEQUENCE {
    -- Query and options
    request                 Blast4-request,

    -- Results of search
    results                 Blast4-get-search-results-reply,
    messages                SEQUENCE OF Blast4-error OPTIONAL 
}

Blast4-request-body ::= CHOICE {
    finish-params           Blast4-finish-params-request,
    -- Get all available BLAST databases
    get-databases           NULL,
    -- Get supported scoring matrices
    get-matrices            NULL,
    get-parameters          NULL,
    get-paramsets           NULL,
    get-programs            NULL,
    get-search-results      Blast4-get-search-results-request,
    get-sequences           Blast4-get-sequences-request,
    queue-search            Blast4-queue-search-request,
    get-request-info        Blast4-get-request-info-request,
    get-sequence-parts      Blast4-get-seq-parts-request,
    get-windowmasked-taxids NULL,
    -- Get version and info about some extended methods
    get-protocol-info	    Blast4-get-protocol-info-request,
    -- Get search info: status, title and etc.
    get-search-info	    Blast4-get-search-info-request,
    -- OLD OK get-databases-ex           NULL
    get-databases-ex           Blast4-get-databases-ex-request

}
Blast4-get-databases-ex-request ::= SEQUENCE {
    params                  Blast4-parameters OPTIONAL
}	
-- END ADD EXT INFO
Blast4-finish-params-request ::= SEQUENCE {
    program                 VisibleString,
    service                 VisibleString,
    paramset                VisibleString OPTIONAL,
    params                  Blast4-parameters OPTIONAL
}

-- This type allows the specification of what result types are desired
Blast4-result-types ::= ENUMERATED {
    -- Default retrieves the following result types (if available): alignments,
    -- phi-alignments masks, ka-blocks, search-stats and pssm
    default                 (63),
    alignments              (1),
    phi-alignments          (2),
    masks                   (4),
    ka-blocks               (8),
    search-stats            (16),
    pssm                    (32),
    simple-results          (64)
}

Blast4-get-search-results-request ::= SEQUENCE {
    -- The request ID of the BLAST search
    request-id              VisibleString,
    -- Logical OR of Blast4-result-types, assumes default if absent
    result-types            INTEGER OPTIONAL
}

-- If a PSSM is used (ie. for PSI-Blast), it must contain a "query"
-- for formatting purposes.  Bioseq-set may contain any number of
-- queries, specified as data.  Seq-loc-list may contain only the
-- "whole" or "interval" types.  In the case of "whole", any number of
-- queries may be used; in the case of "interval", there should be
-- exactly one query.  (This is limited by the BlastObject.)

Blast4-queries ::= CHOICE {
    pssm                    PssmWithParameters,
    seq-loc-list            SEQUENCE OF Seq-loc,
    bioseq-set              Bioseq-set
}

-- Options have been broken down into three groups as part of the BLAST
-- API work.  The algorithm options essentially correspond to those
-- options available via the CBlastOptions class.
-- For definitions of the names used in the Blast4-parameter structures, see
-- c++/{include,src}/objects/blast/names.[hc]pp in the NCBI C++ toolkit.
--   algorithm-options: Options for BLAST (ie. seq comparison) algorithm.
--   program-options:   Options for controlling program execution and/or 
--                      database filtering
--   format-options:    Options for formatting BLAST results, clients should
--                      use this only if applicable, otherwise they should be
--                      ignored

Blast4-queue-search-request ::= SEQUENCE {
    program                 VisibleString,
    service                 VisibleString,
    queries                 Blast4-queries,
    subject                 Blast4-subject,
    -- This field contains a task description
    paramset                VisibleString OPTIONAL,
    algorithm-options       Blast4-parameters OPTIONAL,
    program-options         Blast4-parameters OPTIONAL,
    format-options          Blast4-parameters OPTIONAL
}

-- Request to retrieve the status of a given search
Blast4-get-search-status-request ::= SEQUENCE {
    request-id              VisibleString
}

-- Reply to retrieve the status of a given search
Blast4-get-search-status-reply ::= SEQUENCE {
    status                  VisibleString
}

-- Fetch information about the search request.

Blast4-get-request-info-request ::= SEQUENCE {
    request-id              VisibleString
}

Blast4-get-request-info-reply ::= SEQUENCE {
    database                Blast4-database,
    program                 VisibleString,
    service                 VisibleString,
    created-by              VisibleString,
    queries                 Blast4-queries,
    algorithm-options       Blast4-parameters,
    program-options         Blast4-parameters,
    format-options          Blast4-parameters OPTIONAL,
    subjects                Blast4-subject OPTIONAL
}

-- Fetch the search strategy (i.e.: object used to submit the search)
Blast4-get-search-strategy-request ::= SEQUENCE {
    request-id              VisibleString
}

-- Return the search strategy (i.e.: Blast4-request containing a
-- Blast4-queue-search-request, an object used to submit the search)
Blast4-get-search-strategy-reply ::= Blast4-request

-- Fetch sequence data from a BLAST database
Blast4-get-sequences-request ::= SEQUENCE {
    -- Name of the BLAST database from which to retrieve the sequence data
    database                Blast4-database,
    -- Sequence identifiers for the sequence to get
    seq-ids                 SEQUENCE OF Seq-id,
    -- Determines whether the returned Bioseqs should contain the sequence data
    -- or not
    skip-seq-data           BOOLEAN DEFAULT FALSE,
    -- Determines whether or not the defline of the returned Bioseqs should 
    -- contain only the requested seq id.  This optional field only applies
    -- to non-redundant BLAST database
    target-only             BOOLEAN OPTIONAL
}

-- Fetch parts of sequences from a BLAST database
Blast4-get-seq-parts-request ::= SEQUENCE {
    -- Name of the BLAST database from which to retrieve the sequence data
    database                Blast4-database,
    -- Allows the specification of ranges of sequence data needed.
    -- If the sequence(s) interval's end is 0, no data will be fetched. 
    -- If end is past the length of the sequence, it will be adjusted to the
    -- end of the sequence (this allows fetching of the first chunk in
    -- cases where the length is not yet known).
    seq-locations           SEQUENCE OF Seq-interval
}
-- support for version and checking availability of methods
Blast4-get-protocol-info-request ::= Blast4-parameters

-- variose search information
Blast4-get-search-info-request   ::= SEQUENCE {
    request-id              VisibleString,
    info		    Blast4-parameters OPTIONAL
}
--  --------------------------------------------------------------------
--
--  Replies
--
--  --------------------------------------------------------------------

Blast4-reply ::= SEQUENCE {
    errors                  SEQUENCE OF Blast4-error OPTIONAL,
    body                    Blast4-reply-body
}

Blast4-reply-body ::= CHOICE {
    finish-params           Blast4-finish-params-reply,
    get-databases           Blast4-get-databases-reply,
    get-matrices            Blast4-get-matrices-reply,
    get-parameters          Blast4-get-parameters-reply,
    get-paramsets           Blast4-get-paramsets-reply,
    get-programs            Blast4-get-programs-reply,
    get-search-results      Blast4-get-search-results-reply,
    get-sequences           Blast4-get-sequences-reply,
    queue-search            Blast4-queue-search-reply,
    get-queries             Blast4-get-queries-reply,
    get-request-info        Blast4-get-request-info-reply,
    get-sequence-parts      Blast4-get-seq-parts-reply,
    get-windowmasked-taxids Blast4-get-windowmasked-taxids-reply,
    get-protocol-info	    Blast4-get-protocol-info-reply,
    get-search-info	    Blast4-get-search-info-reply,
    get-databases-ex        Blast4-get-databases-ex-reply
}

Blast4-finish-params-reply ::= Blast4-parameters

Blast4-get-windowmasked-taxids-reply ::= SEQUENCE OF INTEGER

Blast4-get-databases-reply ::= SEQUENCE OF Blast4-database-info
Blast4-get-databases-ex-reply ::= SEQUENCE OF Blast4-database-info

Blast4-get-matrices-reply ::= SEQUENCE OF Blast4-matrix-id

Blast4-get-parameters-reply ::= SEQUENCE OF Blast4-parameter-info

-- Note: Paramsets and tasks represent the same concept: a human readable
-- description that represents a set of parameters with specific values 
-- to accomplish a given task
Blast4-get-paramsets-reply ::= SEQUENCE OF Blast4-task-info

Blast4-get-programs-reply ::= SEQUENCE OF Blast4-program-info

Blast4-get-search-results-reply ::= SEQUENCE {
    alignments              Seq-align-set OPTIONAL,
    phi-alignments          Blast4-phi-alignments OPTIONAL,

    -- Masking locations for the query sequence(s). Each element of this set
    -- corresponds to a single query's translation frame as appropriate.
    masks                   SEQUENCE OF Blast4-mask OPTIONAL,

    ka-blocks               SEQUENCE OF Blast4-ka-block OPTIONAL,
    search-stats            SEQUENCE OF VisibleString OPTIONAL,
    pssm                    PssmWithParameters OPTIONAL,
    simple-results          Blast4-simple-results OPTIONAL
}

Blast4-get-sequences-reply ::= SEQUENCE OF Bioseq
-- Bundles Seq-ids and sequence data to fulfill requests of type
-- Blast4-get-seq-parts-request
Blast4-seq-part-data ::= SEQUENCE {
    -- Sequence identifier
    id      Seq-id,
    -- Its sequence data (may be partial)
    data    Seq-data
}
Blast4-get-seq-parts-reply ::= SEQUENCE OF Blast4-seq-part-data

Blast4-queue-search-reply ::= SEQUENCE {
    request-id              VisibleString OPTIONAL
}

Blast4-get-queries-reply ::= SEQUENCE {
    queries                 Blast4-queries
}

Blast4-get-protocol-info-reply ::= Blast4-parameters

Blast4-get-search-info-reply   ::= SEQUENCE {
    request-id              VisibleString,
    info		    Blast4-parameters OPTIONAL
}
--  --------------------------------------------------------------------
--
--  Errors
--
--  --------------------------------------------------------------------

Blast4-error ::= SEQUENCE {
    -- This is an integer to allow for flexibility, but the values assigned
    -- should be of type Blast4-error-code
    code                    INTEGER,
    message                 VisibleString OPTIONAL
}

-- This enumeration defines values that are intended to be used with the
-- Blast4-error::code in a logical AND operation to easily determine whether a
-- given Blast4-error object contains either a warning or an error
Blast4-error-flags ::= ENUMERATED {
    warning                 (1024),
    error                   (2048)
}

-- Defines values for use in Blast4-error::code.
-- Note: warnings should have values greater than 1024 and less than 2048,
-- errors should have values greater than 2048.
Blast4-error-code ::= INTEGER {
    -- A conversion issue was found when converting to/from blast3 from/to 
    -- blast4 protocol in the blast4 server
    conversion-warning		(1024),

    -- Indicates internal errors in the blast4 server
    internal-error          (2048),
    -- Request type is not implemented in the blast4 server
    not-implemented         (2049),
    -- Request type is not allowed in the blast4 server
    not-allowed             (2050),
    -- Malformed/invalid requests (e.g.: parsing errors or invalid data in request)
    bad-request             (2051),
    -- The RID requested is unknown or it has expired
    bad-request-id          (2052),
    -- The search is pending
    search-pending          (2053)
}

--  --------------------------------------------------------------------
--
--  Other types in alphabetical order
--
--  --------------------------------------------------------------------

Blast4-cutoff ::= CHOICE {
    e-value                 REAL,
    raw-score               INTEGER
}

Blast4-database ::= SEQUENCE {
    name                    VisibleString,
    type                    Blast4-residue-type
}

-- Borrowed from seq.asn

Blast4-seqtech ::= INTEGER {
        unknown (0) ,
        standard (1) ,          -- standard sequencing
        est (2) ,               -- Expressed Sequence Tag
        sts (3) ,               -- Sequence Tagged Site
        survey (4) ,            -- one-pass genomic sequence
        genemap (5) ,           -- from genetic mapping techniques
        physmap (6) ,           -- from physical mapping techniques
        derived (7) ,           -- derived from other data, not a primary entity
        concept-trans (8) ,     -- conceptual translation
        seq-pept (9) ,          -- peptide was sequenced
        both (10) ,             -- concept transl. w/ partial pept. seq.
        seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
        seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
        concept-trans-a (13) ,  -- conceptual transl. supplied by author
        htgs-1 (14) ,           -- unordered High Throughput sequence contig
        htgs-2 (15) ,           -- ordered High Throughput sequence contig
        htgs-3 (16) ,           -- finished High Throughput sequence
        fli-cdna (17) ,         -- full length insert cDNA
        htgs-0 (18) ,           -- single genomic reads for coordination
        htc (19) ,              -- high throughput cDNA
        wgs (20) ,              -- whole genome shotgun sequencing
        other (255)             -- use Source.techexp
}

Blast4-database-info ::= SEQUENCE {
    database                Blast4-database,
    description             VisibleString,
    last-updated            VisibleString,
    total-length            BigInt,
    num-sequences           BigInt,
    seqtech                 Blast4-seqtech,
    taxid                   INTEGER,
    extended                Blast4-parameters OPTIONAL
}

Blast4-frame-type ::= ENUMERATED {
    notset                  (0),
    plus1                   (1),
    plus2                   (2),
    plus3                   (3),
    minus1                  (4),
    minus2                  (5),
    minus3                  (6)
}

Blast4-ka-block ::= SEQUENCE {
    lambda                  REAL,
    k                       REAL,
    h                       REAL,
    gapped                  BOOLEAN
}

-- Masking locations for a query's frame. The locations field is a single
-- Seq-loc of type Packed-int, which contains all the masking locations for the
-- translation frame specified by the frame field.
-- Notes:
-- On input (i.e.: when the client specifies masking locations as a
-- Blast4-parameter), in the case of protein queries, the frame field must 
-- always be notset, in the case of nucleotide queries (regardless of whether 
-- the search will translate these or not), the frame must be plus1. Masking 
-- locations in the translated encoding are not permitted.
-- On output (i.e.: when blast 4 server encodes these as part of the 
-- Blast4-get-search-results-reply), the same conventions as above apply for
-- non-translated protein and nucleotide queries, but in the case of translated
-- nucleotide queries, the frame field can be specified in any of the
-- translation frames as appropriate.
Blast4-mask ::= SEQUENCE {
    locations               SEQUENCE OF Seq-loc,
    frame                   Blast4-frame-type
}

Blast4-matrix-id ::= SEQUENCE {
    residue-type            Blast4-residue-type,
    name                    VisibleString
}

Blast4-parameter ::= SEQUENCE {
    name                    VisibleString,
    value                   Blast4-value
}

Blast4-parameter-info ::= SEQUENCE {
    name                    VisibleString,
    type                    VisibleString
}

-- Self documenting task structure
Blast4-task-info ::= SEQUENCE {
    -- Name of this task
    name                    VisibleString,
    -- Description of the task
    documentation           VisibleString
}

Blast4-program-info ::= SEQUENCE {
    program                 VisibleString,
    services                SEQUENCE OF VisibleString
}

Blast4-residue-type ::= ENUMERATED {
    unknown                 (0),
    protein                 (1),
    nucleotide              (2)
}

Blast4-strand-type ::= ENUMERATED {
    forward-strand          (1),
    reverse-strand          (2),
    both-strands            (3)
}

Blast4-subject ::= CHOICE {
    database                VisibleString,
    sequences               SEQUENCE OF Bioseq,
    seq-loc-list            SEQUENCE OF Seq-loc
}

Blast4-parameters ::= SEQUENCE OF Blast4-parameter

Blast4-phi-alignments ::= SEQUENCE {
    num-alignments          INTEGER,
    seq-locs                SEQUENCE OF Seq-loc
}

Blast4-value ::= CHOICE {

    -- scalar types
    big-integer             BigInt,
    bioseq                  Bioseq,
    boolean                 BOOLEAN,
    cutoff                  Blast4-cutoff,
    integer                 INTEGER,
    matrix                  PssmWithParameters,
    real                    REAL,
    seq-align               Seq-align,
    seq-id                  Seq-id,
    seq-loc                 Seq-loc,
    strand-type             Blast4-strand-type,
    string                  VisibleString,

    -- lists of scalar types
    big-integer-list        SEQUENCE OF BigInt,
    bioseq-list             SEQUENCE OF Bioseq,
    boolean-list            SEQUENCE OF BOOLEAN,
    cutoff-list             SEQUENCE OF Blast4-cutoff,
    integer-list            SEQUENCE OF INTEGER,
    matrix-list             SEQUENCE OF PssmWithParameters,
    real-list               SEQUENCE OF REAL,
    seq-align-list          SEQUENCE OF Seq-align,
    seq-id-list             SEQUENCE OF Seq-id,
    seq-loc-list            SEQUENCE OF Seq-loc,
    strand-type-list        SEQUENCE OF Blast4-strand-type,
    string-list             SEQUENCE OF VisibleString,

    -- imported collection types
    bioseq-set              Bioseq-set,
    seq-align-set           Seq-align-set,

    -- Intended to represent user-provided masking locations for a single query
    -- sequence (name field in Blast4-parameter should be "LCaseMask").
    -- Multiple Blast4-parameters of this type are needed to specify masking
    -- locations for multiple queries.
    query-mask              Blast4-mask
}

-- Complete set of simple Blast results
Blast4-simple-results ::= SEQUENCE {
    all-alignments      SEQUENCE OF Blast4-alignments-for-query
}

-- Alignments for one query, compiled from the raw SeqAlign results
Blast4-alignments-for-query ::= SEQUENCE {

    -- Query sequence identifier
    -- (present if query is not a local id in the SeqAlign)
    query-id            VisibleString,

    -- All the alignments for this query
    alignments          SEQUENCE OF Blast4-simple-alignment
}

-- A single alignment
Blast4-simple-alignment ::= SEQUENCE {

    -- Subject sequence identifier
    -- (normally a GI from the SeqAlign)
    subject-id          VisibleString,

    -- E-Value
    e-value             REAL,

    -- Bit score
    bit-score           REAL,
    
    -- Number of identities
    num-identities      INTEGER OPTIONAL,

    -- Number of insertions/deletions
    num-indels          INTEGER OPTIONAL,
    
    -- Full query range covered by this HSP
    full-query-range    Blast4-range,
    
    -- Full subject range covered by this HSP
    full-subject-range  Blast4-range 
}

-- Range on a sequence - zero offset
Blast4-range ::= SEQUENCE {
    start               INTEGER OPTIONAL,
    end                 INTEGER OPTIONAL,
    -- The frame of the range (absent for proteins; -1/1 for nucleotides;
    -- -1,-2,-3,1,2,3 for translated sequences)
    strand              INTEGER OPTIONAL
}

END

-- blastdb.asn
--$Id: blastdb.asn 640754 2021-11-17 18:49:10Z camacho $
--
-- Notes:
--
-- taxonomy: an integer is proposed, which would require some sort of 
-- table (or network connection) to do the conversions from integer 
-- to various names.  This could save quite a bit of space for databases 
-- that are predominantly of one organism (e.g., human in htgs).
-- I've proposed here that table contain scientific-, common-, and 
-- blast-names at the advice of Scott Federhen.  Scott also was in 
-- favor of having the complete lineage in the file, but it seems like 
-- this would be seldom used and we could have a view with a link back 
-- to the taxonomy page for anyone needing it. Since one file would 
-- suffice for all blast databases, it seems like this should be a new file.
--
-- memberships: a sequence of integers is proposed.  Each bit of an integer 
-- would indicate membership in some (virtual) blast database (e.g., pdb, 
-- swissprot) or some classification (e.g., mRNA, genomic).
--
-- links: a sequence of integers is proposed.  Each bit of an integer would 
-- indicate a link that could be established based upon the gi of the 
-- database sequence.
--

NCBI-BlastDL DEFINITIONS ::=
BEGIN

EXPORTS Blast-def-line-set, Blast-def-line;
        
IMPORTS Seq-id, Seq-loc FROM NCBI-Seqloc;

Blast-def-line-set ::= SEQUENCE OF Blast-def-line  -- all deflines for an entry

Blast-def-line ::= SEQUENCE {
	title VisibleString OPTIONAL,             -- simple title
	seqid SEQUENCE OF Seq-id,                 -- Regular NCBI Seq-Id
    -- taxonomy id, should not be set if multiple taxids are available 
    -- (see links below)
	taxid  INTEGER OPTIONAL,                  
	memberships SEQUENCE OF INTEGER OPTIONAL, -- bit arrays
    -- Repurposed to store the (multiple) taxIDs associated with WP proteins. 
	links SEQUENCE OF INTEGER OPTIONAL,       
    -- In proteins this stores the PIG, in nucleotides this stores the "origin
    -- GIs" (if one sequence is described in the ASN.1 as an identical copy of
    -- another)
	other-info SEQUENCE OF INTEGER OPTIONAL   
}

-- This defines the possible sequence filtering algorithms to be used in a
-- BLAST database
Blast-filter-program ::= INTEGER {
    not-set             (0),
    dust                (10),
    seg                 (20),
    windowmasker        (30),
    repeat              (40),
    other               (100),
    max                 (255)
}

Blast-mask-list ::= SEQUENCE {
    -- masks for a single sequence should be grouped in a Packed-seqint
    masks SEQUENCE OF Seq-loc,  
    -- as of 01/21/2010, this field is set to false always, indicating that the
    -- entire object (set of Seq-loc) is contained in this object
    more BOOLEAN
}

-- Defines the masking information for a set of sequences
Blast-db-mask-info ::= SEQUENCE {
    algo-id      INTEGER,
    algo-program Blast-filter-program,
    algo-options VisibleString,
    -- This object was originally created to allow pagination of the sequence
    -- masks, but this feature was discontinued in 01/21/2010
    masks        Blast-mask-list 
}

Blast-db-metadata ::= SEQUENCE {
    -- Default indicates the starting version
    version                         VisibleString DEFAULT "1.2",
    dbname                          VisibleString,
    dbtype                          VisibleString,
    -- Default indicates the starting version
    db-version                      INTEGER DEFAULT 5,
    description                     VisibleString OPTIONAL,
    number-of-letters               INTEGER,
    number-of-sequences             INTEGER,
    last-updated                    VisibleString,
    number-of-volumes               INTEGER,
    number-of-taxids                INTEGER OPTIONAL,
    bytes-total                     INTEGER,
    bytes-to-cache                  INTEGER,
    bytes-total-compressed          INTEGER OPTIONAL,
    files SEQUENCE OF               VisibleString OPTIONAL,
    compressed-files SEQUENCE OF    VisibleString OPTIONAL
}
END


-- blastxml.asn
--$Id: blastxml.asn 100080 2007-03-12 16:05:35Z kazimird $

NCBI-BlastOutput DEFINITIONS ::=
BEGIN

BlastOutput ::= SEQUENCE {
	program VisibleString ,		-- BLAST program: blastp, tblastx etc.
	version VisibleString ,		-- Program version 
	reference VisibleString ,	-- Steven, David, Tom and others
	db VisibleString ,		-- BLAST Database name
	query-ID VisibleString ,	-- SeqId of query
	query-def VisibleString ,	-- Definition line of query
	query-len INTEGER ,		-- length of query sequence
	query-seq VisibleString OPTIONAL ,	-- query sequence itself
	param Parameters,		-- search parameters
        iterations SEQUENCE OF Iteration,
        mbstat Statistics OPTIONAL        -- Mega BLAST search statistics
}
Iteration ::= SEQUENCE {
	iter-num INTEGER ,	         -- iteration number
	query-ID VisibleString OPTIONAL, -- SeqId of query
	query-def VisibleString OPTIONAL,-- Definition line of query
	query-len INTEGER OPTIONAL ,	 -- length of query sequence
	hits SEQUENCE OF Hit OPTIONAL,	 -- Hits one for every db sequence
	stat Statistics OPTIONAL,	 -- search statistics            
        message VisibleString OPTIONAL   -- Some (error?) information
}
Parameters ::= SEQUENCE {
	matrix VisibleString OPTIONAL ,	-- Matrix used (-M)
	expect REAL ,			-- Expectation threshold (-e)
	include REAL OPTIONAL ,		-- Inclusion threshold (-h)
	sc-match INTEGER OPTIONAL ,	-- match score for NT (-r)
	sc-mismatch INTEGER OPTIONAL ,	-- mismatch score for NT (-q)
	gap-open INTEGER ,		-- Gap opening cost (-G)
	gap-extend INTEGER ,		-- Gap extension cost (-E)
	filter VisibleString OPTIONAL,  -- Filtering options (-F)
	pattern VisibleString OPTIONAL,	-- PHI-BLAST pattern
        entrez-query VisibleString OPTIONAL -- Limit of request to Entrez query
}

Statistics ::= SEQUENCE {
	db-num INTEGER ,		-- Number of sequences in BLAST db
	db-len BigInt ,	                -- Length of BLAST db
	hsp-len INTEGER ,		-- Effective HSP length
	eff-space REAL,			-- Effective search space
        kappa REAL,                     -- Karlin-Altschul parameter K
        lambda REAL,                    -- Karlin-Altschul parameter Lambda
        entropy REAL                    -- Karlin-Altschul parameter H
}       

Hit ::= SEQUENCE {
	num INTEGER ,			-- hit number
	id VisibleString ,		-- SeqId of subject
	def VisibleString ,		-- definition line of subject
	accession VisibleString ,       -- accession
	len INTEGER ,			-- length of subject
	hsps SEQUENCE OF Hsp OPTIONAL	-- all HSP regions for the given subject
}

Hsp ::= SEQUENCE {
	num INTEGER ,			-- HSP number
	bit-score REAL ,		-- score (in bits) of HSP
	score REAL ,			-- score of HSP
	evalue REAL ,			-- e-value of HSP
	query-from INTEGER ,		-- start of HSP in query
	query-to INTEGER ,		-- end of HSP
	hit-from INTEGER,		-- start of HSP in subject
	hit-to INTEGER ,		-- end of HSP in subject
	pattern-from INTEGER OPTIONAL ,	-- start of PHI-BLAST pattern
	pattern-to INTEGER OPTIONAL ,	-- end of PHI-BLAST pattern
	query-frame INTEGER OPTIONAL ,	-- translation frame of query
	hit-frame INTEGER OPTIONAL ,	-- translation frame of subject
	identity INTEGER OPTIONAL ,	-- number of identities in HSP
	positive INTEGER OPTIONAL ,	-- number of positives in HSP
	gaps INTEGER OPTIONAL ,		-- number of gaps in HSP
	align-len INTEGER OPTIONAL ,	-- length of the alignment used
	density INTEGER OPTIONAL ,	-- score density
	qseq VisibleString ,		-- alignment string for the query (with gaps)
	hseq VisibleString,		-- alignment string for subject (with gaps)
	midline VisibleString OPTIONAL	-- formating middle line
}

END

-- blastxml2.asn
NCBI-BlastOutput2 DEFINITIONS ::=
BEGIN

BlastOutput2 ::= SEQUENCE {
	report Report OPTIONAL,
	error  Err OPTIONAL
}

BlastXML2 ::= SET OF BlastOutput2

Report ::= SEQUENCE {
	program VisibleString ,		-- BLAST program: blastp, tblastx etc.
	version VisibleString ,		-- Program version 
	reference VisibleString ,	-- Steven, David, Tom and others
	search-target Target,
	params Parameters,		-- search parameters
	results Results
}

Err ::= SEQUENCE {
	code                    INTEGER,
	message                 VisibleString OPTIONAL
}

Target ::= CHOICE {
	db VisibleString ,			-- BLAST Database name
	subjects SEQUENCE OF VisibleString 	-- Subject IDs
}

Results ::=CHOICE {
    iterations SEQUENCE OF Iteration , 
                                --  iterative search (psi and delta blast)
    search Search, 	        --  db search   
    bl2seq SEQUENCE OF Search   --  bl2seq
}

Iteration ::= SEQUENCE {
	iter-num INTEGER,	 -- iteration number (use with psiblast)
	search Search
}

Search ::= SEQUENCE {
	query-id VisibleString OPTIONAL,           -- SeqId of query
	query-title VisibleString OPTIONAL,        -- Definition line of query
	query-len INTEGER OPTIONAL ,	           -- length of query sequence
	query-masking SEQUENCE OF Range OPTIONAL,  -- Masked offsets.
	hits SEQUENCE OF Hit OPTIONAL,	           -- Hits one for every db sequence
	stat Statistics OPTIONAL,	           -- search statistics            
    	message VisibleString OPTIONAL             -- Some (error?) information
}

Parameters ::= SEQUENCE {
	matrix VisibleString OPTIONAL ,	    -- Matrix used (-M)
	expect REAL ,			    -- Expectation threshold (-e)
	include REAL OPTIONAL ,		    -- Inclusion threshold (-h)
	sc-match INTEGER OPTIONAL ,	    -- match score for NT (-r)
	sc-mismatch INTEGER OPTIONAL ,	    -- mismatch score for NT (-q)
	gap-open INTEGER OPTIONAL,          -- Gap opening cost (-G)
	gap-extend INTEGER OPTIONAL,	    -- Gap extension cost (-E)
	filter VisibleString OPTIONAL,      -- Filtering options (-F)
	pattern VisibleString OPTIONAL,	    -- PHI-BLAST pattern
    	entrez-query VisibleString OPTIONAL, -- Limit of request to Entrez query
	cbs INTEGER OPTIONAL,	 	    -- composition-based stats (numbers correspond to
					    -- numbering in stand-alone application parameter -comp_based_stats).
	query-gencode INTEGER OPTIONAL,     -- genetic code for query (blastx or tblastx)
	db-gencode INTEGER OPTIONAL,	    -- genetic code for db or subjects (tblastn or tblastx)
	bl2seq-mode VisibleString OPTIONAL  -- bl2seq mode 
}

-- Used to specify start/stop of masking on query.
Range ::= SEQUENCE {
	from INTEGER,			-- Beginning of masked range (one-offset)
	to INTEGER			-- End of masked range (one-offset)
}

Statistics ::= SEQUENCE {
	db-num BigInt OPTIONAL,	-- Number of sequences in BLAST db
	db-len BigInt OPTIONAL, -- Length of BLAST db
	hsp-len INTEGER ,	-- Effective HSP length
	eff-space BigInt,	-- Effective search space
    	kappa REAL,             -- Karlin-Altschul parameter K
    	lambda REAL,            -- Karlin-Altschul parameter Lambda
    	entropy REAL            -- Karlin-Altschul parameter H
}       

-- Description of entries for this (possibly non-redundant) sequence.
HitDescr ::= SEQUENCE {
	id VisibleString ,	        -- SeqId of subject
	accession VisibleString OPTIONAL,     -- accession
	title VisibleString OPTIONAL,   -- title (definition line) of subject
	taxid INTEGER OPTIONAL,		-- NCBI taxid (9606 for human)
	sciname VisibleString OPTIONAL	-- binomial scientific name ("Homo sapiens" for human).  
}

Hit ::= SEQUENCE {
	num INTEGER , 			    -- hit number
	description SEQUENCE OF HitDescr,   -- ID, title, and taxonomy for each entry in the PIG
	len INTEGER ,			    -- length of subject
	hsps SEQUENCE OF Hsp OPTIONAL	    -- all HSP regions for the given subject
}

Hsp ::= SEQUENCE {
	num INTEGER  ,			-- HSP number
	bit-score REAL ,		-- score (in bits) of HSP
	score REAL ,			-- score of HSP
	evalue REAL ,			-- e-value of HSP
	identity INTEGER OPTIONAL ,	-- number of identities in HSP
	positive INTEGER OPTIONAL ,	-- number of positives in HSP
	density INTEGER OPTIONAL ,	-- score density
	pattern-from INTEGER OPTIONAL ,	-- start of PHI-BLAST pattern
	pattern-to INTEGER OPTIONAL ,	-- end of PHI-BLAST pattern
	query-from INTEGER ,	        -- start of HSP in query
	query-to INTEGER ,		-- end of HSP
	query-strand VisibleString OPTIONAL, -- Strand of query (blastn)
	query-frame INTEGER OPTIONAL ,	-- translation frame of query (blastx, tblastx)
	hit-from INTEGER,		-- start of HSP in subject
	hit-to INTEGER ,		-- end of HSP in subject
	hit-strand VisibleString OPTIONAL, -- Strand of subject (blastn)
	hit-frame INTEGER OPTIONAL ,	-- translation frame of subject (tblastn, tblastx)
	align-len INTEGER OPTIONAL ,	-- length of the alignment used
	gaps INTEGER OPTIONAL ,		-- number of gaps in HSP
	qseq VisibleString ,		-- alignment string for the query (with gaps)
	hseq VisibleString,		-- alignment string for subject (with gaps)
	midline VisibleString OPTIONAL	-- formating middle line
}

END

-- cdd.asn
--$Revision: 430149 $
--**********************************************************************
--
--  Definitions for CDD's 
--
--  NCBI Structure Group
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
--  October 1999
--
--  asntool -m cdd.asn -w 100 -o cdd.h
--  asntool -B objcdd -m cdd.asn -G -w 100 -I objseq.h objsset.h -K cdd.h \
--          -M asn.all
--**********************************************************************

NCBI-Cdd DEFINITIONS ::=
-- NCBI Conserved Domain Definition


BEGIN

EXPORTS  Cdd-id, Cdd-id-set, Cdd, Cdd-set, Cdd-tree, Cdd-tree-set, Cdd-pref-nodes, Cdd-Project;

IMPORTS  Date                    FROM NCBI-General
         Pub                     FROM NCBI-Pub
         Biostruc-annot-set      FROM MMDB
         Bioseq                  FROM NCBI-Sequence
         Seq-annot               FROM NCBI-Sequence
         Seq-entry               FROM NCBI-Seqset
         Org-ref                 FROM NCBI-Organism
         Seq-id                  FROM NCBI-Seqloc
         Seq-interval            FROM NCBI-Seqloc
         Seq-loc                 FROM NCBI-Seqloc
         Seq-feat                FROM NCBI-Seqfeat
         Score-set               FROM NCBI-Seqalign
         Cn3d-style-dictionary,
         Cn3d-user-annotations   FROM NCBI-Cn3d
         PssmWithParameters      FROM NCBI-ScoreMat;
         
-- dealing with lists of preferred tax-nodes 

Cdd-org-ref ::= SEQUENCE {
  reference     Org-ref,
  active        BOOLEAN DEFAULT TRUE,
  parent-tax-id INTEGER OPTIONAL,
  rank          VisibleString OPTIONAL
}
Cdd-org-ref-set ::= SET OF Cdd-org-ref

Cdd-pref-node-descr ::= CHOICE {
  create-date      Date,
  description      VisibleString
}

Cdd-pref-node-descr-set ::= SET OF Cdd-pref-node-descr

Cdd-pref-nodes ::= SEQUENCE {
   preferred-nodes Cdd-org-ref-set,
   model-organisms Cdd-org-ref-set OPTIONAL,
   optional-nodes  Cdd-org-ref-set OPTIONAL,
   description     Cdd-pref-node-descr-set OPTIONAL
}        

-- Cdd's should not exist without a unique accession, but alternative id's may
-- be present as well. It is conceivable that a CD which is created as a merged
-- product of two highly redundant CDs will retain the source ids in addition 
-- to its new unique id

Global-id ::= SEQUENCE {
  accession     VisibleString,          -- SMART, Pfam, LOAD or CD accession
  release       VisibleString OPTIONAL, -- to hold CD-Database release number
                                        -- if desired, currently not used
  version       INTEGER       OPTIONAL, -- version 0 is the seed, version
                                        -- numbers increase with update/curate
                                        -- cycles
  database      VisibleString OPTIONAL  -- this is NOT the source!, rather the
}                                       -- database the object resides in
                                        -- currently not in use

Cdd-id ::= CHOICE {
  uid           INTEGER,                -- for synchronization with Entrez
                                        -- holds PSSM-Ids
  gid           Global-id               -- holds accession/version pairs
}

Cdd-id-set ::= SEQUENCE OF Cdd-id

Cdd-repeat ::= SEQUENCE {               -- record whether the CD contains
                                        -- repeated sequence/structure motifs
  count         INTEGER,                -- number of tandem repeats in the CD
  location      Seq-loc OPTIONAL,       -- location on the representative
  avglen        INTEGER OPTIONAL        -- average repeat length
}


Cdd-book-ref ::= SEQUENCE {             -- record a link to Entrez Books
  bookname      VisibleString,          -- abbreviated book title
  textelement   ENUMERATED { unassigned(0),   -- type of element 
                             section(1),      -- a section or paragraph
                             figgrp(2),       -- a figure or set of figures
                             table(3),        -- a table
                             chapter(4),      -- a whole chapter
                             biblist(5),      -- a lisf of references
                             box(6),          -- an inserted box
                             glossary(7),     -- glossary
                             appendix(8),     -- appendix
                             other(255) },
  elementid     INTEGER OPTIONAL,       -- numerical address of the text-element
  subelementid  INTEGER OPTIONAL,       -- exact address, used with section
  celementid    VisibleString OPTIONAL, -- address of the text element, if character string
  csubelementid VisibleString OPTIONAL  -- exact address, if character string

}

-- The description of CDD's refers to the specific set of aligned sequences,
-- the region that is being aligned and the information contained in the
-- alignment. It may contain a lengthy comment
-- describing the function of the domain as well as its origin and all
-- other anecdotal information that can't be pressed into a rigid scheme.
-- Crosslinks to reference papers available in PubMed are possible as well.
-- There can be as many of these as you want in the CDD.

Cdd-descr ::= CHOICE {
  othername     VisibleString,          -- alternative names for the CDD
                                        -- if domain has several common names
  category      VisibleString,          -- intracellular, extracellular, etc.
                                        -- to record spatial and/or temporal
                                        -- expression in free-text format
  comment       VisibleString,          -- this is where descriptions go
  reference     Pub,                    -- a citation describing the domain
  create-date   Date,                   -- Date of first creation/dump
  tax-source    Org-ref,                -- holds the highest common tax node
  source        VisibleString,          -- the database the seeds were created
                                        -- from, e.g. SMART, PFAM, etc..
  status        INTEGER { unassigned(0),
                          finished-ok(1),     -- a public curated CD
                          pending-release(2), -- needs work done, not yet released
                          other-asis(3),      -- imported as-is, immediate release
                          matrix-only(4),     -- CD holds a Psi-Blast PSSM only,
                                              -- does not contain alignment data
                          update-running(5),  -- has been flagged for
                                              -- update (in queue)
                          auto-updated(6),    -- update finished, no
                                              -- work necessary
                          claimed(7),         -- is earmarked for curation
                          curated-complete(8),-- public curated member of a
                                              -- completed family
                          other(255) },       -- for CD production?
  update-date   Date,                         -- Date of last version change
  scrapbook     SEQUENCE OF VisibleString,    -- for storing curation notes
                                              -- those won't make it into public
                                              -- distributions
  source-id     Cdd-id-set,                   -- for linking back to source db
  repeats       Cdd-repeat,                   -- to record repeat counts
  old-root      Cdd-id-set,                   -- to record short-term history
  curation-status INTEGER { unassigned(0),    -- to record curation status
                            prein (1),        -- when CD is checked out from
                            ofc (2),          -- the tracking database, for 
                            iac (3),          -- use within curation software
                            ofv1 (4),
                            iav1 (5),
                            ofv2 (6),
                            iav2 (7),
                            postin (8),
                            other (255) },
  readonly-status INTEGER { unassigned(0),    -- to record read-only status
                            readonly (1),     -- when CD is checked out from
                            readwrite (2),    -- the tracking database, for
                            other (255) },    -- use within curation software
  book-ref      Cdd-book-ref,                 -- links to Entrez/books
  attribution   Pub,                          -- add citations and/or author names
  title         VisibleString                 -- hold short descriptive text
}

Cdd-descr-set ::= SET OF Cdd-descr

-- the Cdd-tree stores the hierarchy of CDDs. These objects are stored separate
-- from the CDs to allow for fast retrieval and use as an 'index' into CDs
-- all the components in a CD-tree match components in the full-sized CD
-- and should be synchronized

Cdd-tree ::= SEQUENCE {
  name          VisibleString,          -- short name  copied from CD
  id            Cdd-id-set,             -- IDs         copied from CD
  description   Cdd-descr-set OPTIONAL, -- description copied from CD
  parent        Cdd-id     OPTIONAL,    -- CD is the result of a split/merge
  children      Cdd-id-set OPTIONAL,    -- this CD has been split
  siblings      Cdd-id-set OPTIONAL,    -- related CDs (have common hits)
  neighbors     Cdd-id-set OPTIONAL     -- co-occurring CDs (non-overlapping 
                                        -- hits to same sequences)
}

Cdd-tree-set ::= SEQUENCE OF Cdd-tree

-- Matrix definitions, these are supposed to store PSSMs and corresponding 
-- matrices of relative residue frequencies.
-- the number of columns and rows is listed explicitly, values in columns
-- are stored column by column, i.e. in groups of nrows values for each column

Matrix ::= SEQUENCE {
  ncolumns      INTEGER,
  nrows         INTEGER,
  row-labels    SEQUENCE OF VisibleString OPTIONAL,
  scale-factor  INTEGER,
  columns       SEQUENCE OF INTEGER
}

-- definition for matrix of pairwise "distances", stored as the upper 
-- triangle of a squared n x n matrix (excluding the diagonal), this is
-- supposed to store pairwise percentages of identical residues, pairwise
-- alignment scores or E-values from pairwise BLAST sequence comparisons

Triangle ::= SEQUENCE {
  nelements     INTEGER,
  scores        Score-set OPTIONAL,
  div-ranks     SEQUENCE OF INTEGER OPTIONAL
}

-- Update-align is supposed to contain alignments that still need some work
-- done to fit into the CD-proper alignment. These originate from the
-- CD update process (generated by Blast, for example) or may be created in
-- an editing session to save its state

Update-comment ::= CHOICE {
  comment       VisibleString,          -- free text to describe nature of
                                        -- Update-align
  addthis       Seq-loc,                -- suggestion for inclusion in the CD
                                        -- without corresponding alignment
  replaces      Seq-loc,                -- if one or several alignment rows are
                                        -- to be replaced by the Update-align
  reject-loc    Seq-loc,                -- if used with Reject-id, specify a
                                        -- location on a sequence which should
                                        -- not be used
  reference     Pub                     -- if update alignment imported from
                                        -- citation and for whenever it seems
                                        -- necessary to cite
}

-- Both fields are optional, as the Update-align may be a Seq-annot without
-- description, or a suggestion to add a sequence without the corresponding
-- alignment

Update-align ::= SEQUENCE {
  description   SEQUENCE OF Update-comment OPTIONAL,  
  seqannot      Seq-annot OPTIONAL,     -- contains the SeqAlign
  type          INTEGER { unassigned(0),
                          update(1),
                          update-3d(2),
                          demoted(51),
                          demoted-3d(52),
                          other(255)}
}

Reject-id ::= SEQUENCE {
  description   SEQUENCE OF Update-comment OPTIONAL,
  ids           SET OF Seq-id
}

Feature-evidence ::= CHOICE {
  comment       VisibleString,          -- so we can spell out what doesn't
                                        -- fit in any other category
  reference     Pub,                    -- evidence via a literature reference
  bsannot       Biostruc-annot-set,     -- evidence via Biostruc-features, such
                                        -- as structure superpositions 
  seqfeat       Seq-feat,               -- evidence is a Sequence feature found
                                        -- elsewhere
  book-ref      Cdd-book-ref            -- evidence is a book chapter or figure
}

Align-annot ::= SEQUENCE {
  location      Seq-loc,                -- points to a location in one of the
                                        -- aligned sequences, usually the
                                        -- master/representative
  description   VisibleString OPTIONAL, -- to hold descriptions/names like
                                        -- "Heme binding site" or "catalytic
                                        -- triad" etc., something that should
                                        -- be used for labels in visualization
  evidence      SEQUENCE OF Feature-evidence OPTIONAL,  -- evidence we can
                                                        -- compute with
  type          INTEGER OPTIONAL,       -- for typing annotated features
                                        -- 0 .. no type assigned
                                        -- 1 .. active site
                                        -- 2 .. polypeptide binding site
                                        -- 3 .. nucleic acid binding site
                                        -- 4 .. ion binding site
                                        -- 5 .. chemical binding site
                                        -- 6 .. posttranslational modification site
                                        -- 7 .. structural motif
  aliases       SEQUENCE OF VisibleString OPTIONAL, -- adding more names for indexing
  motif         VisibleString OPTIONAL, -- to validate mapping of sites
  motifuse      INTEGER OPTIONAL        -- 0 for validation,
                                        -- 1 for motif somewhere in seqloc
                                        -- 2 for multiple motifs in seqloc
}

Align-annot-set ::= SEQUENCE OF Align-annot

-- the Domain-parent records an evolutionary relationship which may not be
-- as simple as a classical parent-child relationship in a typical hierarchy,
-- i.e. where a CD is merely a specific subgroup ("child") of a more general
-- diverse alignment model ("parent"). A CD alignment model may be the result
-- of an ancient fusion event, combining two or more domains into a bigger unit
-- which has subsequently undergone a divergent evolutionary process similar to
-- what may have happened to a single "domain". A CD alignment model may 
-- also reflect the result of a deletion event, where a specific subgroup
-- lacks part of a (set of) domain(s), but where the part present is found to
-- be highly similar to a putative "parent", with some added evidence for
-- an actual deletion, like from the distribution of truncated copies in phylogenetic
-- lineages. Deletion events which affect different parts of a set of
-- duplicated domain architectures may be indistinguishable from actual
-- fission events, which means that we may want to represent the latter as
-- deletions after duplication and do not need a special case for fissions.

Domain-parent ::= SEQUENCE {

  parent-type    INTEGER { classical           (0), -- the classification of parent child relations
                           fusion              (1),
                           deletion            (2),
                           permutation         (3),
                           other               (255) },
  parentid       Cdd-id,                -- identify the section parent by accession
  seqannot       Seq-annot OPTIONAL     -- contains the sequence alignment linking
                                        -- CD alignment models, should align the 
                                        -- masters/representatives of each CD
}


-- record sequence trees generated by a suitable algorithm.

Sequence-tree ::= SEQUENCE {
  cdAccession    VisibleString OPTIONAL,
  algorithm      Algorithm-type,
  isAnnotated    BOOLEAN DEFAULT FALSE,
  root           SeqTree-node
}

SeqTree-node ::= SEQUENCE {
  isAnnotated    BOOLEAN DEFAULT FALSE,
  name           VisibleString           OPTIONAL,
  distance       REAL                    OPTIONAL,
  children       CHOICE {
    children SEQUENCE OF SeqTree-node,
    footprint SEQUENCE {
      seqRange   Seq-interval,
      rowId      INTEGER OPTIONAL
    }
  },
  annotation     Node-annotation         OPTIONAL
}

Algorithm-type ::= SEQUENCE {
  scoring-Scheme    INTEGER { unassigned           (0),
                              percent-id           (1),
                              kimura-corrected     (2),
                              aligned-score        (3),
                              aligned-score-ext    (4),
                              aligned-score-filled (5),
                              blast-footprint      (6),
                              blast-full           (7),
			      hybrid-aligned-score (8),
                              other           (255) },
  clustering-Method INTEGER { unassigned             (0),
                              single-linkage         (1),
                              neighbor-joining       (2),
                              fast-minimum-evolution (3),
                              other                (255) },
  score-Matrix      INTEGER { unassigned (0),
                              blosum45   (1),
                              blosum62   (2),
                              blosum80   (3),
                              pam30      (4),
                              pam70      (5),
                              pam250     (6),
                              other    (255) } OPTIONAL,
  gapOpen           INTEGER OPTIONAL,
  gapExtend         INTEGER OPTIONAL,
  gapScaleFactor    INTEGER OPTIONAL,
  nTerminalExt      INTEGER OPTIONAL,
  cTerminalExt      INTEGER OPTIONAL,
  tree-scope        INTEGER { allDescendants       (0),
		                  immediateChildrenOnly(1),
		                  selfOnly             (2),
		                  other              (255) } OPTIONAL,
  coloring-scope    INTEGER { allDescendants        (0),
		                  immediateChildrenOnly (1),
		                  other               (255) } OPTIONAL
}

Node-annotation ::= SEQUENCE {
  presentInChildCD VisibleString OPTIONAL,
  note             VisibleString OPTIONAL
}

-- the Cdd is the basic ASN.1 object storing an annotated and curated set of
-- alignments (formulated as a set of pairwise master-slave alignments). 
-- The alignment data are contained in Seq-annots, and a special type of
-- object, the Update-align, contains additional alignment data from unfinished
-- editing sessions and update processes. The Biostruc-annot-set holds 
-- structure superposition information for multiple structure-derived rows in
-- the alignment.
-- Version numbers in Global-ids are meant to be updated every time the Cdd is
-- changed in a way that does not require Global-ids to be changed (sequences
-- added in update cycle, annotation changed, alignment errors fixed)

Cdd ::= SEQUENCE {
  name          VisibleString,          -- a short name (can be the accession..)
  id            Cdd-id-set,             -- this CD's Ids
  description   Cdd-descr-set OPTIONAL, -- status, references, etc.
  seqannot      SEQUENCE OF Seq-annot    OPTIONAL,  -- contains the CD alignment
  features      Biostruc-annot-set       OPTIONAL,  -- contains structure
                                                    -- alignment data
                                                    -- or "core" definitions
  sequences     Seq-entry     OPTIONAL, -- store as bioseq-set inside seq-entry
  profile-range Seq-interval  OPTIONAL, -- profile for this region only
                                        -- also stores the Seq-id of the master
  trunc-master  Bioseq        OPTIONAL, -- holds the truncated master, which
                                        -- may be something like a consensus,
                                        -- uses the same sequence coordinate
                                        -- frame as the profile-range
  posfreq       Matrix        OPTIONAL, -- relative residue frequencies
  scoremat      Matrix        OPTIONAL, -- Position dependent score matrix
  distance      Triangle      OPTIONAL, -- pairwise distances for all seqs.
  parent        Cdd-id        OPTIONAL, -- this CD is the result of a split
  children      Cdd-id-set    OPTIONAL, -- this CD has been split, not used
  siblings      Cdd-id-set    OPTIONAL, -- related CDs (common hits), clusters
  neighbors     Cdd-id-set    OPTIONAL, -- co-occurring CDs, not used
  pending       SEQUENCE OF Update-align OPTIONAL,  -- contains alignments from
                                                    -- update or "lower panel"
  rejects       SEQUENCE OF Reject-id    OPTIONAL,  -- SeqIds of rejected CD-
                                                    -- members, ignore in update
  master3d      SET OF Seq-id OPTIONAL, -- record if CD has a 3D representative
  alignannot    Align-annot-set OPTIONAL,           -- alignment annotation
  style-dictionary Cn3d-style-dictionary OPTIONAL,  -- record rendering styles
  user-annotations Cn3d-user-annotations OPTIONAL,  -- user annotations in Cn3D
  ancestors     SEQUENCE OF Domain-parent OPTIONAL, -- list of parents
  scoreparams   PssmWithParameters       OPTIONAL,
  seqtree       Sequence-tree            OPTIONAL
}

Cdd-set ::= SET OF Cdd


-- Cdd projects store a set of CDs, typically related to each other
-- relationships would be specified using the ancestors fields in the
-- individual CD objects. For use with CD-Tree, a program to visualize
-- curated CD hierarchies and evidence for hierarchical family structures.

Cdd-Viewer-Rect ::= SEQUENCE {
  top           INTEGER,           -- top coordinate
  left          INTEGER,           -- left  coordinate
  width         INTEGER,           -- width 
  height        INTEGER            -- height
}

Cdd-Viewer ::= SEQUENCE {
  ctrl          INTEGER {                   -- viewer type
                  unassigned          (0),
		  cd-info             (1),
		  align-annot         (2),
		  seq-list            (3),
		  seq-tree            (4),
		  merge-preview       (5),
		  cross-hits          (6),
		  notes               (7),
		  tax-tree            (8),
		  dart                (9),
		  dart-selected-rows (10),
		  other (255)
                },
  rect          Cdd-Viewer-Rect OPTIONAL,  -- viewer rectangle
  accessions    SEQUENCE OF VisibleString  -- list of accessions associated with a viewer
}

Cdd-Script ::= SEQUENCE {
  type          INTEGER {
                  unassigned (0),
                  user-recorded (1),
		  server-generated (2),
                  other (255)
                } OPTIONAL,
  name          VisibleString OPTIONAL,   -- user assigned name/description
  commands      VisibleString             -- actual script commands
}


-- cd colors are as:  0000FF for red, 00FF00 for green, FF0000 for blue

Cdd-Project ::= SEQUENCE {
  cds           SEQUENCE OF Cdd ,         -- cds
  cdcolor       SEQUENCE OF INTEGER,      -- colors  
  viewers       SEQUENCE OF Cdd-Viewer,   -- Sequence viewers
  log           VisibleString,            -- log
  scripts       SEQUENCE OF Cdd-Script OPTIONAL,    -- command scripts
  id            Cdd-id-set  OPTIONAL,               -- to assign unique project id
  rids          SEQUENCE OF VisibleString OPTIONAL, -- to store request IDs for batch CD-Searches
  create-date   Date OPTIONAL,
  update-date   Date OPTIONAL,
  project-id    INTEGER OPTIONAL          -- for temporary tracking in the database
}

END

-- cn3d.asn
--$Revision: 1.15 $
--**********************************************************************
--
--  Definitions for Cn3D-specific data (rendering settings,
--    user annotations, etc.)
--
--  by Paul Thiessen
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
-- asntool -m cn3d.asn -w 100 -o cn3d.h
-- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \
--   -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn
--**********************************************************************

NCBI-Cn3d DEFINITIONS ::=
-- Cn3D-specific information

BEGIN

EXPORTS  Cn3d-style-dictionary, Cn3d-user-annotations;

IMPORTS  Biostruc-id FROM MMDB
         Molecule-id, Residue-id FROM MMDB-Chemical-graph;


-- values of enumerations must match those in cn3d/style_manager.hpp!

Cn3d-backbone-type ::= ENUMERATED {     -- for different types of backbones
    off (1),
    trace (2),
    partial (3),
    complete (4)
}

Cn3d-drawing-style ::= ENUMERATED {     -- atom/bond/object rendering styles
    -- for atoms and bonds
    wire (1),
    tubes (2),
    ball-and-stick (3),
    space-fill (4),
    wire-worm (5),
    tube-worm (6),
    -- for 3d-objects
    with-arrows (7),
    without-arrows (8)
}

Cn3d-color-scheme ::= ENUMERATED {  -- available color schemes (not all
                                    -- necessarily applicable to all objects)
    element (1),
    object (2),
    molecule (3),
    domain (4),
    residue (20),
    secondary-structure (5),
    user-select (6),
    -- different alignment conservation coloring (currently only for proteins)
    aligned (7),
    identity (8),
    variety (9),
    weighted-variety (10),
    information-content (11),
    fit (12),
    block-fit (17),
    block-z-fit (18),
    block-row-fit (19),
    -- other schemes
    temperature (13),
    hydrophobicity (14),
    charge (15),
    rainbow (16)
}

-- RGB triplet, interpreted (after division by the scale-factor) as floating
-- point values which should range from [0..1]. The default scale-factor is
-- 255, so that one can conveniently set integer byte values [0..255] for
-- colors with the scale-factor already set appropriately to map to [0..1].
--    An alpha value is allowed, but is currently ignored by Cn3D.
Cn3d-color ::= SEQUENCE {
    scale-factor INTEGER DEFAULT 255,
    red INTEGER,
    green INTEGER,
    blue INTEGER,
    alpha INTEGER DEFAULT 255
}

Cn3d-backbone-style ::= SEQUENCE {  -- style blob for backbones only
    type Cn3d-backbone-type,
    style Cn3d-drawing-style,
    color-scheme Cn3d-color-scheme,
    user-color Cn3d-color
}

Cn3d-general-style ::= SEQUENCE {   -- style blob for other objects
    is-on BOOLEAN,
    style Cn3d-drawing-style,
    color-scheme Cn3d-color-scheme,
    user-color Cn3d-color
}

Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels
    spacing INTEGER,        -- zero means none
    type ENUMERATED {
        one-letter (1),
        three-letter (2)
    },
    number ENUMERATED {
        none (0),
        sequential (1),     -- from 1, by residues present, to match sequence
        pdb (2)             -- use number assigned by PDB
    },
    termini BOOLEAN,
    white BOOLEAN           -- all white, or (if false) color of alpha carbon
}

-- rendering settings for Cn3D (mirrors StyleSettings class)
Cn3d-style-settings ::= SEQUENCE {
    name VisibleString OPTIONAL,                -- a name (for favorites)
    protein-backbone Cn3d-backbone-style,       -- backbone styles
    nucleotide-backbone Cn3d-backbone-style,
    protein-sidechains Cn3d-general-style,      -- styles for other stuff
    nucleotide-sidechains Cn3d-general-style,
    heterogens Cn3d-general-style,
    solvents Cn3d-general-style,
    connections Cn3d-general-style,
    helix-objects Cn3d-general-style,
    strand-objects Cn3d-general-style,
    virtual-disulfides-on BOOLEAN,              -- virtual disulfides
    virtual-disulfide-color Cn3d-color,
    hydrogens-on BOOLEAN,                       -- hydrogens
    background-color Cn3d-color,                -- background
    -- floating point parameters - scale-factor applies to all the following:
    scale-factor INTEGER,
    space-fill-proportion INTEGER,
    ball-radius INTEGER,
    stick-radius INTEGER,
    tube-radius INTEGER,
    tube-worm-radius INTEGER,
    helix-radius INTEGER,
    strand-width INTEGER,
    strand-thickness INTEGER,
    -- backbone labels (no labels if not present)
    protein-labels Cn3d-backbone-label-style OPTIONAL,
    nucleotide-labels Cn3d-backbone-label-style OPTIONAL,
    -- ion labels
    ion-labels BOOLEAN OPTIONAL
}

Cn3d-style-settings-set ::= SET OF Cn3d-style-settings

Cn3d-style-table-id ::= INTEGER

Cn3d-style-table-item ::= SEQUENCE {
    id Cn3d-style-table-id,
    style Cn3d-style-settings
}

-- the global settings, and a lookup table of styles for user annotations.
Cn3d-style-dictionary ::= SEQUENCE {
    global-style Cn3d-style-settings,
    style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL
}

-- a range of residues in a chain, identified by MMDB residue-id
-- (e.g., numbered from 1)
Cn3d-residue-range ::= SEQUENCE {
    from Residue-id,
    to Residue-id
}

-- set of locations on a particular chain
Cn3d-molecule-location ::= SEQUENCE {
    molecule-id Molecule-id,    -- MMDB molecule id
    -- which residues; whole molecule implied if absent
    residues SEQUENCE OF Cn3d-residue-range OPTIONAL
}

-- set of locations on a particular structure object (e.g., a PDB/MMDB
-- structure), which may include multiple ranges of residues each on
-- multiple chains.
Cn3d-object-location ::= SEQUENCE {
    structure-id Biostruc-id,
    residues SEQUENCE OF Cn3d-molecule-location
}

-- information for an individual user annotation
Cn3d-user-annotation ::= SEQUENCE {
    name VisibleString,                 -- a (short) name for this annotation
    description VisibleString OPTIONAL, -- an optional longer description
    style-id Cn3d-style-table-id,       -- how to draw this annotation
    residues SEQUENCE OF Cn3d-object-location,  -- which residues to cover
    is-on BOOLEAN   -- whether this annotation is to be turned on in Cn3D
}

-- a GL-ordered transformation matrix
Cn3d-GL-matrix ::= SEQUENCE {
    m0  REAL, m1  REAL, m2  REAL, m3  REAL,
    m4  REAL, m5  REAL, m6  REAL, m7  REAL,
    m8  REAL, m9  REAL, m10 REAL, m11 REAL,
    m12 REAL, m13 REAL, m14 REAL, m15 REAL
}

-- a floating point 3d vector
Cn3d-vector ::= SEQUENCE {
    x REAL,
    y REAL,
    z REAL
}

-- parameters used to set up the camera in Cn3D
Cn3d-view-settings ::= SEQUENCE {
    camera-distance REAL,       -- camera on +Z axis this distance from origin
    camera-angle-rad REAL,      -- camera angle
    camera-look-at-X REAL,      -- X,Y of point in Z=0 plane camera points at
    camera-look-at-Y REAL,
    camera-clip-near REAL,      -- distance of clipping planes from camera
    camera-clip-far REAL,
    matrix Cn3d-GL-matrix,      -- transformation of objects in the scene
    rotation-center Cn3d-vector -- center of rotation of whole scene
}

-- The list of annotations for a given CDD/mime. If residue regions overlap
-- between annotations that are turned on, the last annotation in this list
-- that contains these residues will be used as the display style for these
-- residues.
--   Also contains the current viewpoint, so that user's camera angle
-- can be stored and reproduced, for illustrations, on-line figures, etc.
Cn3d-user-annotations ::= SEQUENCE {
    annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL,
    view Cn3d-view-settings OPTIONAL
}

END


-- docsum.asn
-- ============================================
-- ::DATATOOL:: Generated from "docsum_3.4.xsd"
-- ::DATATOOL:: by application DATATOOL version 2.1.0
-- ::DATATOOL:: on 08/14/2012 12:01:24
-- ============================================

-- edited with XMLSPY v5 rel. 4 U (http://www.xmlspy.com) by Michael Kholodov (National Library of Medicine) 
-- edited with XMLSpy v2005 rel. 3 U (http://www.altova.com) by Michael Feolo (NCBI/NLM/NIH) 
Docsum-3-4 DEFINITIONS AUTOMATIC TAGS ::=
BEGIN

Assay ::= SEQUENCE {
  attlist SET {
    handle VisibleString OPTIONAL,
    batch VisibleString OPTIONAL,
    batchId INTEGER OPTIONAL,
    batchType ENUMERATED {
      snpassay (1),
      validation (2),
      doublehit (3)
    } OPTIONAL,
    molType ENUMERATED {
      genomic (1),
      cDNA (2),
      mito (3),
      chloro (4)
    } OPTIONAL,
    sampleSize INTEGER OPTIONAL,
    population VisibleString OPTIONAL,
    linkoutUrl VisibleString OPTIONAL
  },
  method SEQUENCE {
    eMethod SEQUENCE {
      attlist SET {
        name VisibleString OPTIONAL, --Submitters method identifier
        id VisibleString OPTIONAL --dbSNP method identifier
      },
      
      --description of deviation from/addition to
      --										given method 
      exception VisibleString
    } OPTIONAL
  },
  taxonomy SEQUENCE {
    attlist SET {
      
      --NCBI taxonomy ID for
      --									variation
      id INTEGER,
      organism VisibleString OPTIONAL
    },
    taxonomy NULL
  },
  strains SEQUENCE OF VisibleString OPTIONAL,
  comment VisibleString OPTIONAL,
  citation SEQUENCE OF VisibleString OPTIONAL
}

--A collection of genome sequence records (curated gene regions (NG's),
--				contigs (NWNT's) and chromosomes (NC/AC's) produced by a genome sequence project.
--				Structure is populated from ContigInfo tables.
Assembly ::= SEQUENCE {
  attlist SET {
    
    --dbSNP build number defining the rsid set aligned to this
    --						assembly
    dbSnpBuild INTEGER,
    
    --assembly build number with possible 'subbuild' version
    --						numbers to reflect updates in gene annotation (human e.g. 34_3, 35_1,
    --						36_1)
    genomeBuild VisibleString,
    
    --High-level classification of the assembly to distinguish
    --						reference projects from alternate solutions. GroupLabel field from
    --						organism/build-specific ContigInfo tables. "reference" is occasionally used
    --						as the preferred assembly; standards will converge as additional organism
    --						genome projects are finished. Note that some organism assembly names include
    --						extended characters like '~' and '/' that may be incompatible with OS
    --						filename conventions.
    groupLabel VisibleString OPTIONAL,
    
    --Name of the group(s) or organization(s) that generated the
    --						assembly
    assemblySource VisibleString OPTIONAL,
    current BOOLEAN OPTIONAL, --Marks the current genomic assembly
    reference BOOLEAN OPTIONAL
  },
  component SEQUENCE OF Component OPTIONAL,
  snpStat SEQUENCE {
    attlist SET {
      
      --summary measure of placement precision in the
      --									assembly
      mapWeight ENUMERATED {
        unmapped (1),
        unique-in-contig (2),
        two-hits-in-contig (3),
        less-10-hits (4),
        multiple-hits (5)
      },
      
      --number of distinct chromosomes in the
      --									mapset
      chromCount INTEGER OPTIONAL,
      
      --number of distinct contigs [ gi |
      --									accession[.version] ] in the mapset
      placedContigCount INTEGER OPTIONAL,
      
      --number of sequence postions to a contig with
      --									unknown chromosomal assignment
      unplacedContigCount INTEGER OPTIONAL,
      
      --total number of sequence positions in the
      --									mapset
      seqlocCount INTEGER OPTIONAL,
      
      --Number of hits to alternative genomic haplotypes
      --									(e.g. HLA DR region, KIR, or pseudo-autosomal regions like PAR)
      --									within the assembly mapset. Note that positions on haplotypes
      --									defined in other assemblies (a different assembly_group_label
      --									value) will not be counted in this value.
      hapCount INTEGER OPTIONAL
    },
    snpStat NULL
  }
}

--URL value from dbSNP_main.BaseURL links table. attributes provide
--				context information and URL id that is referenced within individual refSNP
--				objects.
BaseURL ::= SEQUENCE {
  attlist SET {
    
    --Resource identifier from
    --								dbSNP_main.baseURL.
    urlId INTEGER OPTIONAL,
    resourceName VisibleString OPTIONAL, --Name of linked resource
    
    --identifier expected by resource for
    --								URL
    resourceId VisibleString OPTIONAL
  },
  
  --URL value from dbSNP_main.BaseURL links table. attributes provide
  --				context information and URL id that is referenced within individual refSNP
  --				objects.
  baseURL VisibleString
}

Component ::= SEQUENCE {
  attlist SET {
    
    --type of component: chromosome, contig, gene_region,
    --						etc.
    componentType ENUMERATED {
      contig (1),
      mrna (2)
    } OPTIONAL,
    
    --dbSNP contig_id used to join on contig hit / mapset data to
    --						these assembly properties
    ctgId INTEGER OPTIONAL,
    
    --Accession[.version] for the sequence
    --						component
    accession VisibleString OPTIONAL,
    
    --contig name defined as either a submitter local id, element
    --						of a whole genome assembly set, or internal NCBI local
    --						id
    name VisibleString OPTIONAL,
    
    --Organism appropriate chromosome tag, 'Un' reserved for
    --						default case of unplaced components
    chromosome VisibleString OPTIONAL,
    
    --component starting position on the chromosome (base 0
    --						inclusive)
    start INTEGER OPTIONAL,
    
    --component ending position on the chromosome (base 0
    --						inclusive)
    end INTEGER OPTIONAL,
    
    --orientation of this component to chromosome, forward (fwd) =
    --						0, reverse (rev) = 1, unknown = NULL in
    --						ContigInfo.orient.
    orientation ENUMERATED {
      fwd (1),
      rev (2),
      unknown (3)
    } OPTIONAL,
    
    --NCBI gi for component sequence (equivalent to
    --						accession.version) for nucleotide sequence.
    gi VisibleString OPTIONAL,
    
    --Identifier label for the genome assembly that defines the
    --						contigs in this mapset and their placement within the organism genome.
    groupTerm VisibleString OPTIONAL,
    contigLabel VisibleString OPTIONAL --Display label for component
  },
  mapLoc SEQUENCE OF MapLoc
}

--Set of dbSNP refSNP docsums, version 3.4
ExchangeSet ::= SEQUENCE {
  attlist SET {
    
    --set-type: full dump; from query; single
    --						refSNP
    setType VisibleString OPTIONAL,
    
    --content depth: brief XML (only refSNP properties and summary
    --						subSNP element content); full XML (full refSNP, full subSNP content; all
    --						flanking sequences) 
    setDepth VisibleString OPTIONAL,
    
    --version number of docsum.asn/docsum.dtd
    --						specification
    specVersion VisibleString OPTIONAL,
    dbSnpBuild INTEGER OPTIONAL, --build number of database for this export
    generated VisibleString OPTIONAL --Generated date
  },
  sourceDatabase SEQUENCE {
    attlist SET {
      
      --NCBI taxonomy ID for
      --									variation
      taxId INTEGER,
      
      --common name for species used as part of database
      --									name.
      organism VisibleString,
      dbSnpOrgAbbr VisibleString OPTIONAL, --organism abbreviation used in dbSNP.
      
      --organism abbreviation used within NCBI genome
      --									pipeline data dumps.
      gpipeOrgAbbr VisibleString OPTIONAL
    },
    sourceDatabase NULL
  } OPTIONAL,
  rs SEQUENCE OF Rs OPTIONAL,
  assay Assay OPTIONAL,
  query SEQUENCE {
    attlist SET {
      date VisibleString OPTIONAL, --yyyy-mm-dd
      
      --Query terms or search
      --									constraints
      string VisibleString OPTIONAL
    },
    query NULL
  } OPTIONAL,
  summary SEQUENCE {
    attlist SET {
      numRsIds INTEGER OPTIONAL, --Total number of refsnp-ids in this exchange set
      
      --Total length of exemplar flanking
      --									sequences
      totalSeqLength INTEGER OPTIONAL,
      
      --Total number of contig locations from
      --									SNPContigLoc
      numContigHits INTEGER OPTIONAL,
      
      --Total number of locus ids from
      --									SNPContigLocusId
      numGeneHits INTEGER OPTIONAL,
      
      --Total number of gi hits from
      --									MapLink
      numGiHits INTEGER OPTIONAL,
      
      --Total number of 3D structures from
      --									SNP3D
      num3dStructs INTEGER OPTIONAL,
      
      --Total number of allele frequences from
      --									SubPopAllele
      numAlleleFreqs INTEGER OPTIONAL,
      
      --Total number of STS hits from
      --									SnpInSts
      numStsHits INTEGER OPTIONAL,
      
      --Total number of unigene cluster ids from
      --									UnigeneSnp
      numUnigeneCids INTEGER OPTIONAL
    },
    summary NULL
  } OPTIONAL,
  baseURL SEQUENCE OF BaseURL OPTIONAL
}

--functional relationship of SNP (and possibly alleles) to genes at
--				contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx
--				tables.
FxnSet ::= SEQUENCE {
  attlist SET {
    geneId INTEGER OPTIONAL, --gene-id of gene as aligned to contig
    
    --symbol (official if present in Entrez Gene) of
    --						gene
    symbol VisibleString OPTIONAL,
    mrnaAcc VisibleString OPTIONAL, --mRNA accession if variation in transcript
    
    --mRNA sequence version if variation is in
    --						transcripot
    mrnaVer INTEGER OPTIONAL,
    protAcc VisibleString OPTIONAL, --protein accession if variation in protein
    
    --protein version if variation is in
    --						protein
    protVer INTEGER OPTIONAL,
    
    --variation in region of gene, but not in
    --										transcript - deprecated
    fxnClass ENUMERATED {
      locus-region (1),
      coding-unknown (2),
      synonymous-codon (3),
      non-synonymous-codon (4),
      mrna-utr (5),
      intron-variant (6),
      splice-region-variant (7),
      reference (8),
      coding-exception (9),
      coding-sequence-variant (10),
      nc-transcript-variant (11),
      downstream-variant-500B (12),
      upstream-variant-2KB (13),
      nonsense (14),
      missense (15),
      frameshift-variant (16),
      utr-variant-3-prime (17),
      utr-variant-5-prime (18),
      splice-acceptor-variant (19),
      splice-donor-variant (20),
      cds-indel (21),
      stop-gained (22),
      stop-lost (23),
      complex-change-in-transcript (24),
      incomplete-terminal-codon-variant (25),
      nmd-transcript-variant (26),
      mature-miRNA-variant (27),
      upstream-variant-5KB (28),
      downstream-variant-5KB (29),
      intergenic (30)
    } OPTIONAL,
    readingFrame INTEGER OPTIONAL,
    
    --variation allele: * suffix indicates allele of contig at this
    --						location
    allele VisibleString OPTIONAL,
    residue VisibleString OPTIONAL, --translated amino acid residue for allele
    
    --position of the variant residue in peptide
    --						sequence
    aaPosition INTEGER OPTIONAL,
    mrnaPosition INTEGER OPTIONAL,
    soTerm VisibleString OPTIONAL
  },
  
  --functional relationship of SNP (and possibly alleles) to genes at
  --				contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx
  --				tables.
  fxnSet NULL
}

--Position of a single hit of a variation on a
--				contig
MapLoc ::= SEQUENCE {
  attlist SET {
    
    --beginning of variation as feature on
    --						contig
    asnFrom INTEGER,
    
    --end position of variation as feature on
    --						contig
    asnTo INTEGER,
    
    --defines the seq-loc symbol if asn_from !=
    --						asn_to
    locType ENUMERATED {
      insertion (1),
      exact (2),
      deletion (3),
      range-ins (4),
      range-exact (5),
      range-del (6)
    },
    alnQuality REAL OPTIONAL, --alignment qualiity
    
    --orientation of refSNP sequence to contig
    --						sequence
    orient ENUMERATED {
      forward (1),
      reverse (2)
    } OPTIONAL,
    
    --chromosome position as integer for
    --						sorting
    physMapInt INTEGER OPTIONAL,
    
    --nearest aligned position in 5' flanking sequence of
    --						snp
    leftFlankNeighborPos INTEGER OPTIONAL,
    rightFlankNeighborPos INTEGER OPTIONAL, --nearest aligned position in 3' flanking sequence of snp
    
    --nearest aligned position in 5' contig alignment of
    --						snp
    leftContigNeighborPos INTEGER OPTIONAL,
    
    --nearest aligned position in 3' contig alignment of
    --						snp
    rightContigNeighborPos INTEGER OPTIONAL,
    
    --number of Mismatched positions in this
    --						alignment
    numberOfMismatches INTEGER OPTIONAL,
    numberOfDeletions INTEGER OPTIONAL, --number of deletions in this alignment
    numberOfInsertions INTEGER OPTIONAL, --number of insetions in this alignment
    refAllele VisibleString OPTIONAL
  },
  fxnSet SEQUENCE OF FxnSet OPTIONAL
}

PrimarySequence ::= SEQUENCE {
  attlist SET {
    dbSnpBuild INTEGER,
    gi INTEGER,
    source ENUMERATED {
      submitter (1),
      blastmb (2),
      xm (3),
      remap (4),
      hgvs (5)
    } OPTIONAL,
    accession VisibleString OPTIONAL
  },
  mapLoc SEQUENCE OF MapLoc
}

--defines the docsum structure for refSNP clusters, where a refSNP
--				cluster (rs) is a grouping of individual dbSNP submissions that all refer to the
--				same variation. The refsnp provides a single unified record for annotation of NCBI
--				resources such as reference genome sequence.
Rs ::= SEQUENCE {
  attlist SET {
    rsId INTEGER, --refSNP (rs) number
    snpClass ENUMERATED {
      snp (1),
      in-del (2),
      heterozygous (3),
      microsatellite (4),
      named-locus (5),
      no-variation (6),
      mixed (7),
      multinucleotide-polymorphism (8)
    },
    snpType ENUMERATED {
      notwithdrawn (1),
      artifact (2),
      gene-duplication (3),
      duplicate-submission (4),
      notspecified (5),
      ambiguous-location (6),
      low-map-quality (7)
    },
    molType ENUMERATED {
      genomic (1),
      cDNA (2),
      mito (3),
      chloro (4),
      unknown (5)
    },
    
    --minimum reported success rate of all submissions in
    --						cluster
    validProbMin INTEGER OPTIONAL,
    
    --maximum reported success rate of all submissions in
    --						cluster
    validProbMax INTEGER OPTIONAL,
    
    --at least one genotype reported for this
    --						refSNP
    genotype BOOLEAN OPTIONAL,
    bitField VisibleString OPTIONAL,
    taxId INTEGER OPTIONAL
  },
  het SEQUENCE {
    attlist SET {
      
      --Est=Estimated average het from allele
      --									frequencies, Obs=Observed from genotype data
      type ENUMERATED {
        est (1),
        obs (2)
      },
      value REAL, --Heterozygosity
      
      --Standard error of Het
      --									estimate
      stdError REAL OPTIONAL
    },
    het NULL
  } OPTIONAL,
  validation SEQUENCE {
    attlist SET {
      
      --at least one subsnp in cluster has frequency data
      --									submitted
      byCluster BOOLEAN OPTIONAL,
      byFrequency BOOLEAN OPTIONAL, --Validated by allele frequency
      byOtherPop BOOLEAN OPTIONAL,
      
      --cluster has 2+ submissions, with 1+ submissions
      --									assayed with a non-computational method
      by2Hit2Allele BOOLEAN OPTIONAL,
      byHapMap BOOLEAN OPTIONAL, --Validated by HapMap Project 
      by1000G BOOLEAN OPTIONAL, --Validated by 1000 Genomes Project
      suspect BOOLEAN OPTIONAL --Suspected to be false SNP
    },
    
    --dbSNP batch-id's for other pop snp validation
    --										data.
    otherPopBatchId SEQUENCE OF INTEGER OPTIONAL,
    
    --dbSNP batch-id's for double-hit snp
    --										validation data. Use batch-id to get methods, etc.
    twoHit2AlleleBatchId SEQUENCE OF INTEGER OPTIONAL,
    
    --Frequency validation class (1) low frequency
    --										variation that is cited in journal and other reputable
    --										sources (2) greater than 5 percent minor allele freq in each
    --										and all populations (4) greater than 5 percent minor allele
    --										freq in 1+ populations (8) if the variant has 2+ minor
    --										allele count based on freq or genotype data (16) less than 1
    --										percent minor allele freq in each and all populations (32)
    --										less than 1 percent minor freq in 1+ populations
    frequencyClass SEQUENCE OF INTEGER OPTIONAL,
    
    --alidated by HapMap Project phase1-genotyped
    --										(1), Phase 1 genotyped; filtered, non-redundant
    --										phase2-genotyped (2), Phase 2 genotyped; filtered,
    --										non-redundant phase3-genotyped (4) Phase 3 genotyped;
    --										filtered, non-redundant 
    hapMapPhase SEQUENCE OF INTEGER OPTIONAL,
    
    --Validated by 1000 Genomes Project (TGP) pilot
    --										1 (1), pilot 2 (2), pilot 3 (4) 
    tGPPhase SEQUENCE OF INTEGER OPTIONAL,
    
    --Suspected to be false SNP evidence Single
    --										Nucleotide Difference - paralogous genes (1), Genotype or
    --										base calling errors (2), Submission evidence or errors (4),
    --										Others (8) 
    suspectEvidence SEQUENCE OF VisibleString OPTIONAL
  },
  
  --date the refsnp cluster was
  --							instantiated
  
  --date the refsnp cluster was
  --							instantiated
  create SEQUENCE {
    attlist SET {
      
      --build number when the cluster was
      --									created
      build INTEGER OPTIONAL,
      date VisibleString OPTIONAL --yyyy-mm-dd
    },
    
    --date the refsnp cluster was
    --							instantiated
    create NULL
  },
  
  --most recent date the cluster was updated (member added or
  --							deleted)
  
  --most recent date the cluster was updated (member added or
  --							deleted)
  update SEQUENCE {
    attlist SET {
      
      --build number when the cluster was
      --									updated
      build INTEGER OPTIONAL,
      date VisibleString OPTIONAL --yyyy-mm-dd
    },
    
    --most recent date the cluster was updated (member added or
    --							deleted)
    update NULL
  } OPTIONAL,
  sequence SEQUENCE {
    attlist SET {
      
      --dbSNP ss# selected as source of refSNP flanking
      --									sequence, ss# part of ss-list below 
      exemplarSs INTEGER,
      ancestralAllele VisibleString OPTIONAL
    },
    
    --5' sequence that flanks the
    --										variation
    seq5 VisibleString OPTIONAL,
    
    --list of all nucleotide alleles observed in
    --										ss-list members, correcting for reverse complementation of
    --										members reported in reverse orientation
    observed VisibleString,
    
    --3' sequence that flanks the
    --										variation
    seq3 VisibleString OPTIONAL
  },
  ss SEQUENCE OF Ss,
  assembly SEQUENCE OF Assembly OPTIONAL,
  primarySequence SEQUENCE OF PrimarySequence OPTIONAL,
  rsStruct SEQUENCE OF RsStruct OPTIONAL,
  rsLinkout SEQUENCE OF RsLinkout OPTIONAL,
  mergeHistory SEQUENCE OF SEQUENCE {
    attlist SET {
      
      --previously issued rs id whose member assays have
      --									now been merged
      rsId INTEGER,
      
      --build id when rs id was merged into parent
      --									rs
      buildId INTEGER OPTIONAL,
      
      --TRUE if strand of rs id is reverse to parent
      --									object's current strand
      orientFlip BOOLEAN OPTIONAL
    },
    mergeHistory NULL
  } OPTIONAL,
  hgvs SEQUENCE OF VisibleString OPTIONAL, -- HGVS name list 
  
  --							origin of this allele, if known
  --							note that these are powers-of-two, and represent bits; thus, we can
  --							represent more than one state simultaneously through a bitwise OR
  --							unknown         (0),
  --							germline        (1),
  --							somatic         (2),
  --							inherited       (4),
  --							paternal        (8),
  --							maternal        (16),
  --							de-novo         (32),
  --							biparental      (64),
  --							uniparental     (128),
  --							not-tested      (256),
  --							tested-inconclusive (512),
  alleleOrigin SEQUENCE OF 
    --							origin of this allele, if known
    --							note that these are powers-of-two, and represent bits; thus, we can
    --							represent more than one state simultaneously through a bitwise OR
    --							unknown         (0),
    --							germline        (1),
    --							somatic         (2),
    --							inherited       (4),
    --							paternal        (8),
    --							maternal        (16),
    --							de-novo         (32),
    --							biparental      (64),
    --							uniparental     (128),
    --							not-tested      (256),
    --							tested-inconclusive (512),
    SEQUENCE {
    attlist SET {
      allele VisibleString OPTIONAL
    },
    
    --							origin of this allele, if known
    --							note that these are powers-of-two, and represent bits; thus, we can
    --							represent more than one state simultaneously through a bitwise OR
    --							unknown         (0),
    --							germline        (1),
    --							somatic         (2),
    --							inherited       (4),
    --							paternal        (8),
    --							maternal        (16),
    --							de-novo         (32),
    --							biparental      (64),
    --							uniparental     (128),
    --							not-tested      (256),
    --							tested-inconclusive (512),
    alleleOrigin INTEGER
  } OPTIONAL,
  phenotype SEQUENCE OF SEQUENCE {
    
    --										unknown                 (0),
    --										untested                (1),
    --										non-pathogenic          (2),
    --										probable-non-pathogenic (3),
    --										probable-pathogenic     (4),
    --										pathogenic              (5),
    --										drug response           (6),
    --										other                   (255)
    clinicalSignificance SEQUENCE OF VisibleString OPTIONAL
  } OPTIONAL,
  bioSource SEQUENCE OF SEQUENCE {
    
    --										unknown (0) ,
    --										genomic (1) ,
    --										chloroplast (2) ,
    --										chromoplast (3) ,
    --										kinetoplast (4) ,
    --										mitochondrion (5) ,
    --										plastid (6) ,
    --										macronuclear (7) ,
    --										extrachrom (8) ,
    --										plasmid (9) ,
    --										transposon (10) ,
    --										insertion-seq (11) ,
    --										cyanelle (12) ,
    --										proviral (13) ,
    --										virion (14) ,
    --										nucleomorph (15) ,
    --										apicoplast (16) ,
    --										leucoplast (17) ,
    --										proplastid (18) ,
    --										endogenous-virus (19) ,
    --										hydrogenosome (20) ,
    --										chromosome (21) ,
    --										chromatophore (22)
    genome SEQUENCE OF VisibleString OPTIONAL,
    
    --										unknown (0) ,
    --										natural (1) ,                    normal biological entity
    --										natmut (2) ,                    naturally occurring mutant
    --										mut (3) ,                        artificially mutagenized
    --										artificial (4) ,                 artificially engineered
    --										synthetic (5) ,                 purely synthetic
    --										other (255)
    origin SEQUENCE OF VisibleString OPTIONAL
  } OPTIONAL,
  frequency SEQUENCE OF SEQUENCE {
    attlist SET {
      freq REAL OPTIONAL,
      allele VisibleString OPTIONAL,
      popId INTEGER OPTIONAL, --dbSNP Populaton ID
      sampleSize INTEGER OPTIONAL
    },
    frequency NULL
  } OPTIONAL
}

--link data for another resource
RsLinkout ::= SEQUENCE {
  attlist SET {
    resourceId VisibleString, --BaseURLList.url_id
    
    --value to append to ResourceURL.base-url for complete
    --						link
    linkValue VisibleString
  },
  
  --link data for another resource
  rsLinkout NULL
}

--structure information for SNP
RsStruct ::= SEQUENCE {
  attlist SET {
    protAcc VisibleString OPTIONAL, --accession of the protein with variation
    protGi INTEGER OPTIONAL, --GI of the protein with variation
    
    --position of the residue for the protein
    --						GI
    protLoc INTEGER OPTIONAL,
    
    --residue specified for protein at prot-loc
    --						location
    protResidue VisibleString OPTIONAL,
    
    --alternative residue specified by variation
    --						sequence
    rsResidue VisibleString OPTIONAL,
    structGi INTEGER OPTIONAL, --GI of the structure neighbor
    
    --position of the residue for the structure
    --						GI
    structLoc INTEGER OPTIONAL,
    
    --residue specified for protein at struct-loc
    --						location
    structResidue VisibleString OPTIONAL
  },
  
  --structure information for SNP
  rsStruct NULL
}

--data for an individual submission to dbSNP
Ss ::= SEQUENCE {
  attlist SET {
    ssId INTEGER, --dbSNP accession number for submission
    handle VisibleString, --Tag for the submitting laboratory
    batchId INTEGER, --dbSNP number for batch submission
    locSnpId VisibleString OPTIONAL, --submission (ss#) submitter ID
    
    --SubSNP classification by type of
    --						variation
    subSnpClass ENUMERATED {
      snp (1),
      in-del (2),
      heterozygous (3),
      microsatellite (4),
      named-locus (5),
      no-variation (6),
      mixed (7),
      multinucleotide-polymorphism (8)
    } OPTIONAL,
    
    --orientation of refsnp cluster members to refsnp cluster
    --						sequence
    orient ENUMERATED {
      forward (1),
      reverse (2)
    } OPTIONAL,
    
    --strand is defined as TOP/BOTTOM by nature of flanking
    --						nucleotide sequence
    strand ENUMERATED {
      top (1),
      bottom (2)
    } OPTIONAL,
    molType ENUMERATED {
      genomic (1),
      cDNA (2),
      mito (3),
      chloro (4),
      unknown (5)
    } OPTIONAL, --moltype from Batch table
    
    --dbSNP build number when ss# was added to a refSNP (rs#)
    --						cluster
    buildId INTEGER OPTIONAL,
    
    --class of method used to assay for the
    --						variation
    methodClass ENUMERATED {
      dHPLC (1),
      hybridize (2),
      computed (3),
      sSCP (4),
      other (5),
      unknown (6),
      rFLP (7),
      sequence (8)
    } OPTIONAL,
    
    --subsnp has been experimentally validated by
    --									submitter
    validated ENUMERATED {
      by-submitter (1),
      by-frequency (2),
      by-cluster (3)
    } OPTIONAL,
    
    --append loc-snp-id to this base URL to construct a pointer to
    --						submitter data.
    linkoutUrl VisibleString OPTIONAL,
    ssAlias VisibleString OPTIONAL,
    
    --				<xsd:simpleType>
    --					<xsd:restriction base="xsd:string">
    --						<xsd:enumeration value="unknown"/>
    --						<xsd:enumeration value="germline"/>
    --						<xsd:enumeration value="somatic"/>
    --						<xsd:enumeration value="inherited"/>
    --						<xsd:enumeration value="paternal"/>
    --						<xsd:enumeration value="maternal"/>
    --						<xsd:enumeration value="de-novo"/>
    --						<xsd:enumeration value="other"/>
    --					</xsd:restriction>
    --				</xsd:simpleType>
    alleleOrigin INTEGER OPTIONAL,
    
    --				<xsd:simpleType>
    --					<xsd:restriction base="xsd:string">
    --						<xsd:enumeration value="unknown"/>
    --						<xsd:enumeration value="untested"/>
    --						<xsd:enumeration value="non-pathogenic"/>
    --						<xsd:enumeration value="probable-non-pathogenic"/>
    --						<xsd:enumeration value="probable-pathogenic"/>
    --						<xsd:enumeration value="pathogenic"/>
    --						<xsd:enumeration value="other"/>
    --					</xsd:restriction>
    --				</xsd:simpleType>
    clinicalSignificance VisibleString OPTIONAL
  },
  sequence SEQUENCE {
    
    --5' sequence that flanks the
    --										variation
    seq5 VisibleString OPTIONAL,
    
    --list of all nucleotide alleles observed in
    --										ss-list members, correcting for reverse complementation of
    --										memebers reported in reverse orientation
    observed VisibleString,
    
    --3' sequence that flanks the
    --										variation
    seq3 VisibleString OPTIONAL
  }
}

END


-- entrez2.asn
--$Revision: 1.12 $********************************************
--
--  entrez2.asn
--   Version 1
--
--   API to Entrez Engine (1999)
--   Retrieval of sequence done through ID1 module
--     Also, SeqId queries
--   Retrieval of PubMed records through PubMed module
--   Retrieval of Structures through PubStruct module
--   Retrieval of Genomes through Genomes module
--
--***************************************************************

NCBI-Entrez2 DEFINITIONS ::=
BEGIN

   --**************************************
   --  Entrez2 common elements
   --**************************************

Entrez2-dt ::= INTEGER                   -- a date/time stamp
Entrez2-db-id ::= VisibleString          -- database name
Entrez2-field-id ::= VisibleString       -- field name
Entrez2-link-id ::= VisibleString        -- link name

Entrez2-id-list ::= SEQUENCE  {          -- list of record UIDs
  db Entrez2-db-id ,                     -- the database
  num INTEGER ,                          -- number of uids
  uids OCTET STRING OPTIONAL  }          -- coded uids

       --****************************************
       -- The structured form of the boolean is the same in a request or
       --    return so that it easy to modify a query. This means some
       --    fields are only considered in a return value, like counts
       --    by term. They are ignored in a request.
       -- The structured boolean supports specific boolean components,
       --    an unparsed string in query syntax, and UID lists as
       --    elements of a boolean. This makes it possible to submit
       --    a single string, a fully structured query, or a mixture.
       --    The UID list feature means one can also perform refinements
       --    on UID lists from links, neighbors, or other operations.
       --    UID list query now returns a history key for subsequent use.
       --*****************************************

Entrez2-boolean-exp ::= SEQUENCE {
  db Entrez2-db-id ,                         -- database for this query
  exp SEQUENCE OF Entrez2-boolean-element ,  -- the Boolean
  limits Entrez2-limits OPTIONAL }           -- date bounds

Entrez2-boolean-element ::= CHOICE {
  str VisibleString ,                       -- unparsed query string
  op Entrez2-operator ,                     -- logical operator
  term Entrez2-boolean-term ,               -- fielded term
  ids Entrez2-id-list ,                     -- list of UIDs - returns history key in reply
  key VisibleString }                       -- history key for uploaded UID list or other query

       --*****************************************
       -- the term is both sent and received as parts of
       --   queries and replies. The attributes can be filled in
       --   by either, but may be ignored by one or the other. Flags are
       --   shown if a real value is only of use in the query (Q), only
       --   in the reply (R), or used in both (B)
       -- do-not-explode and do-not-translate are only active set by
       --   by the query. However, they retain those settings in the
       --   return value so they can be resent with a new query
       --******************************************

Entrez2-boolean-term ::= SEQUENCE {
  field Entrez2-field-id ,                  -- B
  term VisibleString ,                      -- B
  term-count INTEGER OPTIONAL,              -- R count of records with term
  do-not-explode BOOLEAN DEFAULT FALSE,     -- Q do not explode term
  do-not-translate BOOLEAN DEFAULT FALSE}   -- Q do not use synonyms
  
Entrez2-operator ::= INTEGER {
  and (1) ,
  or (2) ,
  butnot (3) ,
  range (4) ,
  left-paren (5) ,
  right-paren (6) }

  --***************************************
  --  Entrez2 Request types
  --***************************************
       --****************************************
       -- The basic request wrapper leaves space for a version which
       --   allow the server to support older clients
       -- The tool parameter allows us to log the client types for
       --   debugging and tuning
       -- The cookie is a session ID returned by the first Entrez2-reply
       --****************************************
       
Entrez2-request ::= SEQUENCE {           -- a standard request
  request E2Request ,                    -- the actual request
  version INTEGER ,                      -- ASN1 spec version
  tool VisibleString OPTIONAL ,          -- tool making request
  cookie VisibleString OPTIONAL ,        -- history session cookie
  use-history BOOLEAN DEFAULT FALSE }    -- request should use history

E2Request ::= CHOICE {                   -- request types
  get-info NULL ,                        -- ask for info block
  eval-boolean Entrez2-eval-boolean ,    -- Boolean lookup
  get-docsum Entrez2-id-list ,           -- get the DocSums
  get-term-pos Entrez2-term-query,       -- get position in term list
  get-term-list Entrez2-term-pos ,       -- get Term list by position
  get-term-hierarchy Entrez2-hier-query, -- get a hierarchy from a term
  get-links Entrez2-get-links ,          -- get specific links from a UID list
  get-linked Entrez2-get-links ,         -- get subset of UID list which has links
  get-link-counts Entrez2-id }           -- get all links from one UID

       --****************************************
       -- When evaluating a boolean query the counts of hits is always
       --    returned.
       -- In addition, you can request the UIDs of the hits or the
       --    the parsed query in structured form (with counts by term),
       --    or both.
       --****************************************
  
Entrez2-eval-boolean ::= SEQUENCE {       -- evaluate Boolean query
  return-UIDs BOOLEAN DEFAULT FALSE,      -- return UID list?
  return-parse BOOLEAN DEFAULT FALSE,     -- return parsed query?
  query Entrez2-boolean-exp }             -- the actual query

Entrez2-dt-filter ::= SEQUENCE { 
  begin-date Entrez2-dt,
  end-date Entrez2-dt,
  type-date Entrez2-field-id }

Entrez2-limits ::= SEQUENCE {            -- date limits
  filter-date Entrez2-dt-filter OPTIONAL,
  max-UIDs INTEGER OPTIONAL,             -- max UIDs to return in list
  offset-UIDs INTEGER OPTIONAL}          -- start partway into UID list
 
  
Entrez2-id ::= SEQUENCE {                -- a single UID
  db Entrez2-db-id ,
  uid INTEGER }

Entrez2-term-query ::= SEQUENCE {
  db Entrez2-db-id ,
  field Entrez2-field-id ,
  term VisibleString }

Entrez2-hier-query ::= SEQUENCE {
  db Entrez2-db-id ,
  field Entrez2-field-id ,
  term VisibleString OPTIONAL ,          -- query with either term
  txid INTEGER OPTIONAL }                -- or Taxonomy ID

Entrez2-term-pos ::= SEQUENCE {          -- request portions of term list
  db Entrez2-db-id ,
  field Entrez2-field-id ,
  first-term-pos INTEGER ,
  number-of-terms INTEGER OPTIONAL }     -- optional for hierarchy only

Entrez2-get-links ::= SEQUENCE {         -- request links of one type
  uids Entrez2-id-list ,                 -- docs to link from
  linktype Entrez2-link-id ,             -- type of link
  max-UIDS INTEGER OPTIONAL ,            -- maximum number of links to return
  count-only BOOLEAN OPTIONAL ,          -- return only the counts
  parents-persist BOOLEAN OPTIONAL }     -- allow original uids in list

  --**********************************************************
  -- Replies from the Entrez server
  --  all replies contain the date/time stamp when they were executed
  --  to do reqular date bounded searches use this value+1 to search
  --  again later instead of recording the date/time on the client machine
  --  the cookie allows a simple key string to represent UID lists in the history
  --**********************************************************
  
Entrez2-reply ::= SEQUENCE {
  reply E2Reply ,                       -- the actual reply
  dt Entrez2-dt ,                       -- date/time stamp from server
  server VisibleString ,                -- server version info
  msg VisibleString OPTIONAL ,          -- possibly a message to the user
  key VisibleString OPTIONAL ,          -- history key for query
  cookie VisibleString OPTIONAL }       -- history session cookie

E2Reply ::= CHOICE {
  error VisibleString ,                 -- if nothing can be returned
  get-info Entrez2-info ,               -- the database info
  eval-boolean Entrez2-boolean-reply,   -- result of boolean query
  get-docsum Entrez2-docsum-list,
  get-term-pos INTEGER,                 -- position of the term
  get-term-list Entrez2-term-list,
  get-term-hierarchy Entrez2-hier-node,
  get-links Entrez2-link-set,
  get-linked Entrez2-id-list,
  get-link-counts Entrez2-link-count-list }


Entrez2-info ::= SEQUENCE {             -- describes all the databases
  db-count INTEGER ,                    -- number of databases
  build-date Entrez2-dt ,               -- build date of databases
  db-info SEQUENCE OF Entrez2-db-info } -- info by database

Entrez2-db-info ::= SEQUENCE {          -- info for one database
  db-name Entrez2-db-id ,               -- internal name
  db-menu VisibleString ,               -- short name for menu
  db-descr VisibleString ,              -- longer explanatory name
  doc-count INTEGER ,                   -- total number of records
  field-count INTEGER ,                 -- number of field types
  fields SEQUENCE OF Entrez2-field-info,
  link-count INTEGER ,                  -- number of link types
  links SEQUENCE OF Entrez2-link-info,
  docsum-field-count INTEGER,
  docsum-fields SEQUENCE OF Entrez2-docsum-field-info }

Entrez2-field-info ::= SEQUENCE {       -- info about one field
  field-name Entrez2-field-id ,         -- the internal name
  field-menu VisibleString ,            -- short string suitable for menu
  field-descr VisibleString ,           -- longer, explanatory name
  term-count INTEGER ,                  -- number of terms in field
  is-date BOOLEAN OPTIONAL ,
  is-numerical BOOLEAN OPTIONAL ,
  single-token BOOLEAN OPTIONAL ,
  hierarchy-avail BOOLEAN OPTIONAL ,
  is-rangable BOOLEAN OPTIONAL ,
  is-truncatable BOOLEAN OPTIONAL }

Entrez2-link-info ::= SEQUENCE {        -- info about one link
  link-name Entrez2-link-id ,
  link-menu VisibleString ,
  link-descr VisibleString ,
  db-to Entrez2-db-id ,                 -- database it links to
  data-size INTEGER OPTIONAL }          -- size of link data element    

Entrez2-docsum-field-type ::= INTEGER {
  string (1) ,
  int    (2) ,
  float  (3) ,
  date-pubmed (4) }

Entrez2-docsum-field-info ::= SEQUENCE {
  field-name VisibleString,
  field-description VisibleString,
  field-type Entrez2-docsum-field-type }

Entrez2-boolean-reply ::= SEQUENCE {
  count INTEGER ,                       -- records hit
  uids Entrez2-id-list OPTIONAL,        -- if uids requested
  query Entrez2-boolean-exp OPTIONAL }  -- if parsed query requested

Entrez2-docsum-list ::= SEQUENCE {
  count INTEGER ,                       -- number of docsums
  list SEQUENCE OF Entrez2-docsum }

Entrez2-docsum ::= SEQUENCE {
  uid INTEGER ,                         -- primary uid (gi, pubmedid)
  docsum-data SEQUENCE OF Entrez2-docsum-data }

Entrez2-docsum-data::= SEQUENCE {
  field-name VisibleString,
  field-value VisibleString }

Entrez2-term-list ::= SEQUENCE {
  pos INTEGER,                          -- position of first term in list
  num INTEGER,                          -- number of terms in list
  list SEQUENCE OF Entrez2-term }

Entrez2-term ::= SEQUENCE {
  term VisibleString ,
  txid  INTEGER OPTIONAL,
  count INTEGER ,                       -- count of records with this term
  is-leaf-node BOOLEAN OPTIONAL }       -- used for hierarchy only

Entrez2-hier-node ::= SEQUENCE {        -- for hierarchical index
  cannonical-form VisibleString ,       -- the official name
  lineage-count INTEGER ,               -- number of strings in lineage
  lineage SEQUENCE OF Entrez2-term OPTIONAL , -- strings up the lineage
  child-count INTEGER ,                 -- number of children of this node
  children SEQUENCE OF Entrez2-term ,   -- the children
  is-ambiguous BOOLEAN OPTIONAL }       -- used for hierarchy only


      --*******************************************
      -- Links are returned in sets also using OCTET STRINGS
      --*******************************************

Entrez2-link-set ::= SEQUENCE {          -- set of links
  ids Entrez2-id-list ,
  data-size INTEGER OPTIONAL ,           -- size of data elements
  data OCTET STRING OPTIONAL }           -- coded scores

Entrez2-link-count-list ::= SEQUENCE {   -- all links from 1 uid
  link-type-count INTEGER ,              -- number of types of links
  links SEQUENCE OF Entrez2-link-count }

Entrez2-link-count ::= SEQUENCE {        -- link count of one type
  link-type Entrez2-link-id ,
  link-count INTEGER }

END


-- entrezgene.asn
--$Revision: 529103 $ 
--********************************************************************** 
-- 
--  NCBI Entrezgene 
--  by James Ostell, 2001 
--   
--  Generic "Gene" object for Entrez Genes 
--    This object is designed to incorporate a subset of information from 
--    LocusLink and from records in Entrez Genomes to provide indexing, 
--    linkage, and a useful summary report in Entrez for "Genes" 
-- 
--********************************************************************** 
 
NCBI-Entrezgene DEFINITIONS ::= 
BEGIN 
 
EXPORTS Entrezgene, Entrezgene-Set, Gene-track, Gene-commentary;
 
IMPORTS Gene-ref FROM NCBI-Gene 
    Prot-ref FROM NCBI-Protein 
    BioSource FROM NCBI-BioSource 
    RNA-ref FROM NCBI-RNA 
    Dbtag, Date FROM NCBI-General 
    Seq-loc FROM NCBI-Seqloc 
    Pub FROM NCBI-Pub; 
 
--******************************************** 
-- Entrezgene is the "document" indexed in Entrez 
--  and presented in the full display 
-- It also contains the Entrez ID and date information 
--******************************************* 
Entrezgene ::= SEQUENCE { 
    track-info Gene-track OPTIONAL , -- not in submission, but in retrieval 
    type INTEGER {                   -- type of Gene
        unknown (0) ,
        tRNA    (1) ,
        rRNA    (2) ,
        snRNA   (3) ,
        scRNA   (4) ,
        snoRNA  (5) ,
        protein-coding (6) ,
        pseudo  (7) ,
        transposon  (8) ,
        miscRNA  (9) ,
        ncRNA (10) ,
        biological-region (11),
        other (255) } ,
    source BioSource , 
    gene Gene-ref ,                     -- for locus-tag see note 3
    prot Prot-ref OPTIONAL , 
    rna RNA-ref OPTIONAL , 
    summary VisibleString OPTIONAL ,    -- short summary 
    location SEQUENCE OF Maps OPTIONAL,
    gene-source Gene-source OPTIONAL ,             -- NCBI source to Entrez 
    locus SEQUENCE OF Gene-commentary OPTIONAL ,   -- location of gene on chromosome (if known)
                                                   -- and all information about products
						   -- (mRNA, proteins and so on)
    properties SEQUENCE OF Gene-commentary OPTIONAL , 
    refgene SEQUENCE OF Gene-commentary OPTIONAL , -- NG for this? 
    homology SEQUENCE OF Gene-commentary OPTIONAL , 
    comments SEQUENCE OF Gene-commentary OPTIONAL ,
    unique-keys SEQUENCE OF Dbtag OPTIONAL ,              -- see note 3
    xtra-index-terms SEQUENCE OF VisibleString OPTIONAL , -- see note 2
    xtra-properties SEQUENCE OF Xtra-Terms OPTIONAL ,     -- see note 2
    xtra-iq SEQUENCE OF Xtra-Terms OPTIONAL,              -- see note 2
    non-unique-keys SEQUENCE OF Dbtag OPTIONAL }

Entrezgene-Set ::= SET OF Entrezgene

Gene-track ::= SEQUENCE { 
    geneid INTEGER ,     -- required unique document id 
    status INTEGER {
        live (0) ,
        secondary (1) ,   -- synonym with merged
        discontinued (2)  -- 'deleted', still index and display to public
    } DEFAULT live ,
    current-id SEQUENCE OF Dbtag OPTIONAL , -- see note 1 below
    create-date Date ,   -- date created in Entrez 
    update-date Date ,   -- last date updated in Entrez 
    discontinue-date Date OPTIONAL } --
 
Gene-source ::= SEQUENCE { 
    src VisibleString ,                -- key to the source within NCBI locuslink, Ecoli, etc 
    src-int INTEGER OPTIONAL ,         -- eg. locuslink id 
    src-str1 VisibleString OPTIONAL ,  -- eg. chromosome1 
    src-str2 VisibleString OPTIONAL ,  -- see note 3
    gene-display BOOLEAN DEFAULT FALSE ,  -- do we have a URL for gene display? 
    locus-display BOOLEAN DEFAULT FALSE , -- do we have a URL for map/locus display? 
    extra-terms BOOLEAN DEFAULT FALSE }   -- do we have a URL for extra indexing terms? 
 
Gene-commentary ::= SEQUENCE { 
    type INTEGER {            -- type of Gene Commentary
        genomic (1) ,
        pre-RNA (2) ,
        mRNA (3) ,
        rRNA (4) ,
        tRNA (5) ,
        snRNA (6) ,
        scRNA (7) ,
        peptide (8) ,
        other-genetic (9) ,
        genomic-mRNA (10) ,
        cRNA (11) ,
        mature-peptide (12) ,
        pre-protein (13) ,
        miscRNA  (14) ,
        snoRNA  (15) ,
        property  (16) , -- used to display tag/value pair
	                 -- for this type label is used as property tag, text is used as property value, 
	                 -- other fields are not used.
        reference (17), -- currently not used             
        generif (18), -- to include generif in the main blob             
        phenotype(19), -- to display phenotype information
        complex (20), -- used (but not limited) to identify resulting 
                      -- interaction complexes
        compound (21), -- pubchem entities

        ncRNA (22), 
        gene-group (23), -- for relationship sets (such as pseudogene / parent gene)
        assembly (24),  -- for full assembly accession
        assembly-unit (25), -- for the assembly unit corresponding to the refseq
        c-region (26),
        d-segment (27),
        j-segment (28),
        v-segment (29),

        comment (254) ,
        other (255) } ,
    heading VisibleString OPTIONAL ,      -- appears above text 
    label VisibleString OPTIONAL ,        -- occurs to left of text
                                          -- for protein and RNA types it is a name
					  -- for property type it is a property tag  
    text VisibleString OPTIONAL ,         -- block of text 
					  -- for property type it is a property value  
    accession VisibleString OPTIONAL ,    -- accession for the gi in the seqloc, see note 3
    version INTEGER OPTIONAL ,    -- version for the accession above
    xtra-properties SEQUENCE OF Xtra-Terms OPTIONAL , -- see note 2
    refs SEQUENCE OF Pub OPTIONAL ,       -- refs for this 
    source SEQUENCE OF Other-source OPTIONAL ,    -- links and refs 
    genomic-coords SEQUENCE OF Seq-loc OPTIONAL , -- referenced sequences in genomic coords
    seqs SEQUENCE OF Seq-loc OPTIONAL ,           -- referenced sequences in non-genomic coords
    products SEQUENCE OF Gene-commentary OPTIONAL ,
    properties SEQUENCE OF Gene-commentary OPTIONAL ,
    comment SEQUENCE OF Gene-commentary OPTIONAL ,
    create-date Date OPTIONAL ,   
    update-date Date OPTIONAL ,   
    rna RNA-ref OPTIONAL } 
 
Other-source ::= SEQUENCE { 
    src Dbtag OPTIONAL ,                -- key to non-ncbi source 
    pre-text VisibleString OPTIONAL ,   -- text before anchor 
    anchor VisibleString OPTIONAL ,     -- text to show as highlight 
    url VisibleString OPTIONAL ,        -- if present, use this URL not Dbtag and datbase 
    post-text VisibleString OPTIONAL }  -- text after anchor 


Maps::= SEQUENCE {
        display-str VisibleString ,
        method CHOICE {
            proxy VisibleString ,  --url to non mapviewer mapviewing resource
            map-type ENUMERATED {  -- units used in display-str to query mapviewer 
                    cyto (0) ,
                    bp (1) ,
                    cM (2) ,
                    cR (3) ,
                    min (4)}}}
                        
Xtra-Terms ::= SEQUENCE {  -- see note 2
    tag VisibleString ,
    value VisibleString }

END 

--********************************************************************** 
-- 
--  Comments, notes, etc.
--   
--  1)  Ignored unless status = secondary.  This is where gene_ids (db = "GeneID")
--      are placed toward which the interface will direct users.  It is also
--      available for placing other source-db specific tags (i.e., db = "LocusID").
--
--  2)  These 'xtra' objects are for submitting data for Entrez indexing
--      that might not fit anywhere in the Entrezgene specification but
--      are considered by the data source submittor to be important.
--          xtra-index-terms is any string.
--          xtra-properties are tag/value pairs of properties/feilds as
--              defined in the Entrez database (i.e.: UNIGENE/Hs.74561)
--          xtra-iq are tag/value pairs of Entrez database/UID as defined
--              in the Entrezgene indexing code (i.e.: NUCLEOTIDE/20270626)
--
--  3)  Locus-tag and src-str2 are expected to be unique per organism (tax_id).
--      Protein accessions and the tag-value pairs in unique-keys
--      are expected to be unique over all organisms.
--********************************************************************** 


-- featdef.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  NCBI Sequence Feature Definition Module
--  by James Ostell, 1994
--
--**********************************************************************

NCBI-FeatDef DEFINITIONS ::=
BEGIN

EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet;


FeatDef ::= SEQUENCE {
    typelabel VisibleString ,	   -- short label for type eg "CDS"
    menulabel VisibleString ,	   -- label for a menu eg "Coding Region"
    featdef-key INTEGER ,		   -- unique for this feature definition
    seqfeat-key INTEGER ,		   -- SeqFeat.data.choice from objfeat.h
    entrygroup INTEGER ,		   -- Group for data entry
    displaygroup INTEGER ,		   -- Group for data display
    molgroup FeatMolType           -- Type of Molecule used for
}

FeatMolType ::= ENUMERATED {
	aa (1),  -- proteins
    na (2),  -- nucleic acids
    both (3) }  -- both

FeatDefSet ::= SEQUENCE OF FeatDef   -- collections of defintions

FeatDispGroup ::= SEQUENCE {
	groupkey INTEGER ,
    groupname VisibleString }

FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup

FeatDefGroupSet ::= SEQUENCE {
	groups FeatDispGroupSet ,
	defs FeatDefSet }

END

    
-- gbseq.asn
--$Revision: 413850 $
--*********************************************************
--
-- ASN.1 and XML for the components of a GenBank format sequence
-- J.Ostell 2002
-- Updated 25 May 2010
--
--*********************************************************

NCBI-GBSeq DEFINITIONS ::=
BEGIN

--********
--  GBSeq represents the elements in a GenBank style report
--    of a sequence with some small additions to structure and support
--    for protein (GenPept) versions of GenBank format as seen in
--    Entrez. While this represents the simplification, reduction of
--    detail, and flattening to a single sequence perspective of GenBank
--    format (compared with the full ASN.1 or XML from which GenBank and
--    this format is derived at NCBI), it is presented in ASN.1 or XML for
--    automated parsing and processing. It is hoped that this compromise
--    will be useful for those bulk processing at the GenBank format level
--    of detail today. Since it is a compromise, a number of pragmatic
--    decisions have been made.
--
--  In pursuit of simplicity and familiarity a number of
--    fields do not have full substructure defined here where there is
--    already a standard GenBank format string. For example:
--
--   Date  DD-Mon-YYYY
--   Authors   LastName, Intials (with periods)
--   Journal   JounalName Volume (issue), page-range (year)
--   FeatureLocations as per GenBank feature table, but FeatureIntervals
--    may also be provided as a convenience
--   FeatureQualifiers  as per GenBank feature table
--   Primary has a string that represents a table to construct
--    a third party (TPA) sequence.
--   other-seqids can have strings with the "vertical bar format" sequence
--    identifiers used in BLAST for example, when they are non-genbank types.
--    Currently in GenBank format you only see GI, but there are others, like
--    patents, submitter clone names, etc which will appear here, as they
--    always have in the ASN.1 format, and full XML format.
--   source-db is a formatted text block for peptides in GenPept format that
--    carries information from the source protein database.
--
--  There are also a number of elements that could have been
--   more exactly specified, but in the interest of simplicity
--   have been simply left as options. For example..
--
--  accession and accession.version will always appear in a GenBank record
--   they are optional because this format can also be used for non-GenBank
--   sequences, and in that case will have only "other-seqids".
--
--  sequences will normally all have "sequence" filled in. But contig records
--    will have a "join" statement in the "contig" slot, and no "sequence".
--    We also may consider a retrieval option with no sequence of any kind
--     and no feature table to quickly check minimal values.
--
--  a reference may have an author list, or be from a consortium, or both.
--
--  some fields, such as taxonomy, do appear as separate elements in GenBank
--    format but without a specific linetype (in GenBank format this comes
--    under ORGANISM). Another example is the separation of primary accession
--    from the list of secondary accessions. In GenBank format primary
--    accession is just the first one on the list that includes all secondaries
--    after it.
--
--  create-date deserves special comment. The date you see on the right hand
--    side of the LOCUS line in GenBank format is actually the last date the
--    the record was modified (or the update-date). The date the record was
--    first submitted to GenBank appears in the first submission citation in
--    the reference section. Internally in the databases and ASN.1 NCBI keeps
--    the first date the record was released into the sequence database at
--    NCBI as create-date. For records from EMBL, which supports create-date,
--    it is the date provided by EMBL. For DDBJ records, which do not supply
--    a create-date (same as GenBank format) the create-date is the first date
--    NCBI saw the record from DDBJ. For older GenBank records, before NCBI
--    took responsibility for GenBank, it is just the first date NCBI saw the
--    record. Create-date can be very useful, so we expose it here, but users
--    must understand it is only an approximation and comes from many sources,
--    and with many exceptions and caveats. It does NOT tell you the first
--    date the public might have seen this record and thus is NOT an accurate
--    measure for legal issues of precedence.
--
--********

GBSet ::= SEQUENCE OF GBSeq
        
GBSeq ::= SEQUENCE {
    locus VisibleString OPTIONAL ,
    length INTEGER ,
    strandedness VisibleString OPTIONAL ,
    moltype VisibleString ,
    topology VisibleString OPTIONAL ,
    division VisibleString OPTIONAL ,
    update-date VisibleString OPTIONAL ,
    create-date VisibleString OPTIONAL ,
    update-release VisibleString OPTIONAL ,
    create-release VisibleString OPTIONAL ,
    definition VisibleString OPTIONAL ,
    primary-accession VisibleString OPTIONAL ,
    entry-version VisibleString OPTIONAL ,
    accession-version VisibleString OPTIONAL ,
    other-seqids SEQUENCE OF GBSeqid OPTIONAL ,
    secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL,
    project VisibleString OPTIONAL ,
    keywords SEQUENCE OF GBKeyword OPTIONAL ,
    segment VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,
    organism VisibleString OPTIONAL ,
    taxonomy VisibleString OPTIONAL ,
    references SEQUENCE OF GBReference OPTIONAL ,
    comment VisibleString OPTIONAL ,
    comment-set SEQUENCE OF GBComment OPTIONAL ,
    struc-comments SEQUENCE OF GBStrucComment OPTIONAL ,
    primary VisibleString OPTIONAL ,
    source-db VisibleString OPTIONAL ,
    database-reference VisibleString OPTIONAL ,
    feature-table SEQUENCE OF GBFeature OPTIONAL ,
    feature-set SEQUENCE OF GBFeatureSet OPTIONAL ,
    sequence VisibleString OPTIONAL ,  -- Optional for contig, wgs, etc.
    contig VisibleString OPTIONAL ,
    alt-seq SEQUENCE OF GBAltSeqData OPTIONAL ,
    xrefs SEQUENCE OF GBXref OPTIONAL
}

GBSeqid ::= VisibleString

GBSecondary-accn ::= VisibleString

GBKeyword ::= VisibleString

GBReference ::= SEQUENCE {
    reference VisibleString ,
    position VisibleString OPTIONAL ,
    authors SEQUENCE OF GBAuthor OPTIONAL ,
    consortium VisibleString OPTIONAL ,
    title VisibleString OPTIONAL ,
    journal VisibleString ,
    xref SEQUENCE OF GBXref OPTIONAL ,
    pubmed INTEGER OPTIONAL ,
    remark VisibleString OPTIONAL
}

GBAuthor ::= VisibleString

GBXref ::= SEQUENCE {
    dbname VisibleString ,
    id VisibleString
}

GBComment ::= SEQUENCE {
    type VisibleString OPTIONAL ,
    paragraphs SEQUENCE OF GBCommentParagraph
}

GBCommentParagraph ::= VisibleString

GBStrucComment ::= SEQUENCE {
    name VisibleString OPTIONAL ,
    items SEQUENCE OF GBStrucCommentItem
}

GBStrucCommentItem ::= SEQUENCE {
    tag VisibleString OPTIONAL ,
    value VisibleString OPTIONAL ,
    url VisibleString OPTIONAL
}

GBFeatureSet ::= SEQUENCE {
    annot-source VisibleString OPTIONAL ,
    features SEQUENCE OF GBFeature
}

GBFeature ::= SEQUENCE {
    key VisibleString ,
    location VisibleString ,
    intervals SEQUENCE OF GBInterval OPTIONAL ,
    operator VisibleString OPTIONAL ,
    partial5 BOOLEAN OPTIONAL ,
    partial3 BOOLEAN OPTIONAL ,
    quals SEQUENCE OF GBQualifier OPTIONAL ,
    xrefs SEQUENCE OF GBXref OPTIONAL
}

GBInterval ::= SEQUENCE {
    from INTEGER OPTIONAL ,
    to INTEGER OPTIONAL ,
    point INTEGER OPTIONAL ,
    iscomp BOOLEAN OPTIONAL ,
    interbp BOOLEAN OPTIONAL ,
    accession VisibleString
}

GBQualifier ::= SEQUENCE {
    name VisibleString ,
    value VisibleString OPTIONAL
}

GBAltSeqData ::= SEQUENCE {
    name VisibleString ,  -- e.g., contig, wgs, scaffold, cage, genome
    items SEQUENCE OF GBAltSeqItem OPTIONAL
}

GBAltSeqItem ::= SEQUENCE {
    interval GBInterval OPTIONAL ,
    isgap BOOLEAN OPTIONAL ,
    gap-length INTEGER OPTIONAL ,
    gap-type VisibleString OPTIONAL ,
    gap-linkage VisibleString OPTIONAL ,
    gap-comment VisibleString OPTIONAL ,
    first-accn VisibleString OPTIONAL ,
    last-accn VisibleString OPTIONAL ,
    value VisibleString OPTIONAL
}

END


-- general.asn
--$Revision: 98896 $
--**********************************************************************
--
--  NCBI General Data elements
--  by James Ostell, 1990
--  Version 3.0 - June 1994
--
--**********************************************************************

NCBI-General DEFINITIONS ::=
BEGIN

EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field;

-- StringStore is really a VisibleString.  It is used to define very
--   long strings which may need to be stored by the receiving program
--   in special structures, such as a ByteStore, but it's just a hint.
--   AsnTool stores StringStores in ByteStore structures.
-- OCTET STRINGs are also stored in ByteStores by AsnTool
-- 
-- typedef struct bsunit {             /* for building multiline strings */
   -- Nlm_Handle str;            /* the string piece */
   -- Nlm_Int2 len_avail,
       -- len;
   -- struct bsunit PNTR next; }       /* the next one */
-- Nlm_BSUnit, PNTR Nlm_BSUnitPtr;
-- 
-- typedef struct bytestore {
   -- Nlm_Int4 seekptr,       /* current position */
      -- totlen,             /* total stored data length in bytes */
      -- chain_offset;       /* offset in ByteStore of first byte in curchain */
   -- Nlm_BSUnitPtr chain,       /* chain of elements */
      -- curchain;           /* the BSUnit containing seekptr */
-- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr;
--
-- AsnTool incorporates this as a primitive type, so the definition
--   is here just for completeness
-- 
--  StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING
--

-- BigInt is really an INTEGER. It is used to warn the receiving code to expect
--   a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue
--
--   Like StringStore, AsnTool incorporates it as a primitive. The definition would be:
--   BigInt ::= [APPLICATION 2] IMPLICIT INTEGER
--

-- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime
--  of ASN.1
--  It stores only a date
--

Date ::= CHOICE {
    str VisibleString ,        -- for those unparsed dates
    std Date-std }             -- use this if you can

Date-std ::= SEQUENCE {        -- NOTE: this is NOT a unix tm struct
    year INTEGER ,             -- full year (including 1900)
    month INTEGER OPTIONAL ,   -- month (1-12)
    day INTEGER OPTIONAL ,     -- day of month (1-31)
    season VisibleString OPTIONAL ,  -- for "spring", "may-june", etc
    hour INTEGER OPTIONAL ,    -- hour of day (0-23)
    minute INTEGER OPTIONAL ,  -- minute of hour (0-59)
    second INTEGER OPTIONAL }  -- second of minute (0-59)

-- Dbtag is generalized for tagging
-- eg. { "Social Security", str "023-79-8841" }
-- or  { "member", id 8882224 }

Dbtag ::= SEQUENCE {
    db VisibleString ,          -- name of database or system
    tag Object-id }         -- appropriate tag

-- Object-id can tag or name anything
--

Object-id ::= CHOICE {
    id INTEGER ,
    str VisibleString }

-- Person-id is to define a std element for people
--

Person-id ::= CHOICE {
    dbtag Dbtag ,               -- any defined database tag
    name Name-std ,             -- structured name
    ml VisibleString ,          -- MEDLINE name (semi-structured)
                                --    eg. "Jones RM"
    str VisibleString,          -- unstructured name
    consortium VisibleString }  -- consortium name

Name-std ::= SEQUENCE { -- Structured names
    last VisibleString ,
    first VisibleString OPTIONAL ,
    middle VisibleString OPTIONAL ,
    full VisibleString OPTIONAL ,    -- full name eg. "J. John Smith, Esq"
    initials VisibleString OPTIONAL,  -- first + middle initials
    suffix VisibleString OPTIONAL ,   -- Jr, Sr, III
    title VisibleString OPTIONAL }    -- Dr., Sister, etc

--**** Int-fuzz **********************************************
--*
--*   uncertainties in integer values

Int-fuzz ::= CHOICE {
    p-m INTEGER ,                    -- plus or minus fixed amount
    range SEQUENCE {                 -- max to min
        max INTEGER ,
        min INTEGER } ,
    pct INTEGER ,                    -- % plus or minus (x10) 0-1000
    lim ENUMERATED {                 -- some limit value
        unk (0) ,                    -- unknown
        gt (1) ,                     -- greater than
        lt (2) ,                     -- less than
        tr (3) ,                     -- space to right of position
        tl (4) ,                     -- space to left of position
        circle (5) ,                 -- artificial break at origin of circle
        other (255) } ,              -- something else
    alt SET OF INTEGER }             -- set of alternatives for the integer


--**** User-object **********************************************
--*
--*   a general object for a user defined structured data item
--*    used by Seq-feat and Seq-descr

User-object ::= SEQUENCE {
    class VisibleString OPTIONAL ,   -- endeavor which designed this object
    type Object-id ,                 -- type of object within class
    data SEQUENCE OF User-field }    -- the object itself

User-field ::= SEQUENCE {
    label Object-id ,                -- field label
    num INTEGER OPTIONAL ,           -- required for strs, ints, reals, oss
    data CHOICE {                    -- field contents
        str VisibleString ,
        int INTEGER ,
        real REAL ,
        bool BOOLEAN ,
        os OCTET STRING ,
        object User-object ,         -- for using other definitions
        strs SEQUENCE OF VisibleString ,
        ints SEQUENCE OF INTEGER ,
        reals SEQUENCE OF REAL ,
        oss SEQUENCE OF OCTET STRING ,
        fields SEQUENCE OF User-field ,
        objects SEQUENCE OF User-object } }


END


-- homologene.asn
HomoloGene DEFINITIONS ::=
BEGIN

IMPORTS Date FROM NCBI-General
        Seq-loc FROM NCBI-Seqloc
	Seq-align FROM NCBI-Seqalign;


-- HomoloGeneEntry taxid is the tax id of the group node, which can
-- be the same as the Gene tax id in case of singletons

HG-EntrySet ::= SEQUENCE {
	entries		SET OF HG-Entry -- homologene entry
}


HG-Entry ::= SEQUENCE {
	hg-id		INTEGER,
	version		INTEGER OPTIONAL,
	title		VisibleString OPTIONAL,
	caption		VisibleString OPTIONAL,
	taxid		INTEGER OPTIONAL,
	genes		SET OF HG-Gene OPTIONAL,
	cr-date		Date OPTIONAL,
	up-date		Date OPTIONAL,
	distances	SET OF HG-Stats OPTIONAL,
	commentaries	SET OF HG-CommentarySet OPTIONAL,
	warnings	SET OF VisibleString OPTIONAL,
	node		HG-Node OPTIONAL
}


HG-Gene ::= SEQUENCE {
	geneid			INTEGER,
	otherid			INTEGER OPTIONAL,  							-- internal use only!!!!!
	symbol			VisibleString OPTIONAL,
	aliases			SET OF VisibleString OPTIONAL,
	title				VisibleString,
	taxid				INTEGER,	 											--taxid of gene node
	prot-gi			INTEGER OPTIONAL,
	prot-acc		VisibleString OPTIONAL,
	prot-len		INTEGER OPTIONAL,
	nuc-gi			INTEGER OPTIONAL,
	nuc-acc			VisibleString OPTIONAL,
	gene-links	SET OF HG-Link OPTIONAL,
	prot-links	SET OF HG-Link OPTIONAL,
	domains			SET OF HG-Domain OPTIONAL,
	chr     		VisibleString OPTIONAL,
	location		Seq-loc OPTIONAL,        				-- location on the genome
	locus-tag		VisibleString OPTIONAL
}


HG-Stats ::= SEQUENCE {
	gi1		INTEGER,
	gi2		INTEGER,
	nuc-change	REAL,
	nuc-change-jc	REAL,
	prot-change	REAL,
	ka		REAL,
	ks		REAL,
	knr		REAL,
	knc		REAL,
	recip-best	BOOLEAN OPTIONAL
}


HG-Commentary ::= SEQUENCE {
	link			HG-Link,
	description		VisibleString OPTIONAL, -- main description
	caption			VisibleString OPTIONAL, -- extra text
	provider		VisibleString OPTIONAL,
	other-links		SET OF HG-Link OPTIONAL,
	other-commentaries	SET OF HG-Commentary OPTIONAL,
	taxid			INTEGER OPTIONAL,
	geneid			INTEGER OPTIONAL
}

HG-CommentarySet ::= SEQUENCE {
	hg-id		INTEGER OPTIONAL,
	title		VisibleString,
	commentaries	SET OF HG-Commentary
}

HG-CommentaryContainer ::= SET OF HG-CommentarySet

HG-Link ::= SEQUENCE {
	hypertext	VisibleString,
	url		VisibleString OPTIONAL
}

HG-Domain ::= SEQUENCE {
	begin		INTEGER,
	end		INTEGER,
	pssm-id		INTEGER OPTIONAL,	-- entrez uid
	cdd-id		VisibleString OPTIONAL,
	cdd-name	VisibleString OPTIONAL
}

HG-Node ::= SEQUENCE {
	type		ENUMERATED {
				family(0),
				ortholog(1),
				paralog(2),
				leaf(3)
			},
	id		HG-Node-id,
	caption		VisibleString OPTIONAL,
	current-node	BOOLEAN DEFAULT FALSE,
	children	SET OF HG-Node OPTIONAL,
	branch-len	INTEGER OPTIONAL
}

HG-Node-id ::= SEQUENCE {
	id		INTEGER OPTIONAL,
	id-type		ENUMERATED {
				none(0),
				geneid(1),
				hid(2)
			}
}

HG-Alignment ::= SEQUENCE {
	hg-id		INTEGER,
	alignment	Seq-align
}

HG-AlignmentSet ::= SET OF HG-Alignment

END

-- id1.asn
--$Revision: 1.12 $
--********************************************************************
--
--  Network Id server network access
--  Yaschenko 1996
--
--
--*********************************************************************
--
--  ID1.asn
--
--     messages for id server network access
--
--*********************************************************************

NCBI-ID1Access DEFINITIONS ::=
BEGIN

IMPORTS Seq-id FROM NCBI-Seqloc
		Seq-entry FROM NCBI-Seqset
		Seq-hist  FROM NCBI-Sequence;

        --**********************************
        -- requests
        --

ID1server-request ::= CHOICE {
		init NULL ,             -- DlInit
		getgi Seq-id ,          -- get a gi given a Seq-id
		getsefromgi ID1server-maxcomplex ,   -- given a gi, get the Seq-entry
		fini NULL,               -- DlFini
		getseqidsfromgi	INTEGER, --get all Seq-ids of given gi
		getgihist	INTEGER, --get an historical list of gis 
		getgirev	INTEGER, --get a revision history of gi
		getgistate	INTEGER,  --get a state of gi
		getsewithinfo	ID1server-maxcomplex,
		getblobinfo	ID1server-maxcomplex
}

--  Complexity stuff will be for ID1

ID1server-maxcomplex ::= SEQUENCE {
		maxplex Entry-complexities ,
		gi INTEGER ,
		ent INTEGER OPTIONAL, -- needed when you want to retrieve a given ent
		sat VisibleString OPTIONAL  -- satellite 0-id,1-dbEST
}

Entry-complexities ::= INTEGER {
		entry (0) ,              -- the "natural" entry for this (nuc-prot) 
		bioseq (1) ,             -- only the bioseq identified
		bioseq-set (2) ,         -- any seg-set it may be part of
		nuc-prot (3) ,           -- any nuc-prot it may be part of
		pub-set (4) 
}

ID1Seq-hist ::= SEQUENCE {
	hist Seq-hist
}


ID1server-back ::= CHOICE {
		init NULL ,                 -- DlInit
		error INTEGER ,
		gotgi INTEGER ,                 
		gotseqentry  Seq-entry,  -- live
		gotdeadseqentry  Seq-entry,  -- dead
                fini NULL,                  -- DlFini
		gistate	INTEGER,
		ids	SET OF Seq-id,
		gihist	SET OF ID1Seq-hist,  -- because hand crafted Seq-hist does not follow 
		                             -- same conventions 
		girevhist SET OF ID1Seq-hist,
		gotsewithinfo   ID1SeqEntry-info,
		gotblobinfo	ID1blob-info	
}

ID1server-debug ::= SET OF ID1server-back


ID1blob-info ::= SEQUENCE {
		gi INTEGER ,
		sat INTEGER,
                sat-key INTEGER,
                satname VisibleString,
		suppress	INTEGER,
		withdrawn	INTEGER,
		confidential	INTEGER,
                -- blob-state now contains blob version info.
                -- it's actually minutes from 01/01/1970
                -- and it's negative if blob is dead.
		blob-state	INTEGER,
                comment         VisibleString OPTIONAL, -- public comment for withdrawn record 
		extfeatmask	INTEGER	      OPTIONAL  -- mask for external features (SNP,...)
}

ID1SeqEntry-info ::= SEQUENCE {
		blob-info	ID1blob-info,
		blob		Seq-entry OPTIONAL
}
END

-- id2.asn
--$Revision: 534721 $
--********************************************************************
--
--  Network Id server network access
--  Vasilchenko 2003
--
--
--*********************************************************************
--
--  ID2.asn
--
--     messages for id server network access
--
--*********************************************************************

NCBI-ID2Access DEFINITIONS ::=
BEGIN

EXPORTS ID2-Blob-State, ID2-Blob-Id;

IMPORTS Seq-id, Seq-loc                                 FROM NCBI-Seqloc
        ID2S-Chunk-Id, ID2S-Seq-annot-Info              FROM NCBI-Seq-split;


--*********************************************************************
-- request types
--*********************************************************************

-- Requests are sent in packets to allow sending several requests at once
-- to avoid network latency, without possiblity of deadlock with server.
-- Server will not start sending replies until it will read the whole packet.
ID2-Request-Packet ::= SEQUENCE OF ID2-Request


ID2-Request ::= SEQUENCE {
        -- request's serial number, can be used in asynchronic clients
        -- server should copy it to corresponding field in reply
        serial-number  INTEGER OPTIONAL,

        params          ID2-Params OPTIONAL,

        request CHOICE {
                init            NULL,
                get-packages    ID2-Request-Get-Packages,
                get-seq-id      ID2-Request-Get-Seq-id,
                get-blob-id     ID2-Request-Get-Blob-Id,
                get-blob-info   ID2-Request-Get-Blob-Info,
                reget-blob      ID2-Request-ReGet-Blob,
                get-chunks      ID2S-Request-Get-Chunks
        }
}


-- Request for set of params packages know by server.
-- Packages can be used to abbreviate parameters of request.
ID2-Request-Get-Packages ::= SEQUENCE {
        -- return known packages from this list
        -- if unset - return all known packages
        names           SEQUENCE OF VisibleString OPTIONAL,

        -- return packages' names only
        no-contents     NULL OPTIONAL
}

-- Requested sequence ID, can be any string or Seq-id.
-- This request will be replied with one or more ID2-Reply-Get-Seq-id.
ID2-Request-Get-Seq-id ::= SEQUENCE {
        seq-id          ID2-Seq-id,
        seq-id-type     INTEGER {
                any     (0),    -- return any qualified Seq-id
                gi      (1),    -- gi is preferred
                text    (2),    -- text Seq-id (accession etc) is preferred
                general (4),    -- general Seq-id is preferred
                all     (127),  -- return all qualified Seq-ids of the sequence
                label   (128),  -- return a sequence string label as string
                                -- in general id with db "LABEL"
                taxid   (256),  -- return a sequence taxonomy ID as integer
                                -- in general id with db "TAXID"
                hash    (512),  -- return a sequence hash as integer
                                -- in general id with db "HASH"
                seq-length (1024), -- return a sequence Seq-inst.length as int
                                -- in general id with db "Seq-inst.length"
                seq-mol    (2048)  -- return a sequence Seq-inst.mol as int
                                -- in general id with db "Seq-inst.mol"
        } DEFAULT any
}


ID2-Seq-id ::= CHOICE {
        string          VisibleString,
        seq-id          Seq-id
}


-- Return blob-id with specified seq-id.
-- This request with be replied with one or more ID2-Reply-Get-Blob-Id.
ID2-Request-Get-Blob-Id ::= SEQUENCE {
        -- id can be supplied by inner request
        seq-id          ID2-Request-Get-Seq-id,

        -- return id of blob with sequence
        sources         SEQUENCE OF VisibleString OPTIONAL,

        -- return Blob-Ids with external features on this Seq-id
        external        NULL OPTIONAL
}


-- Return some information related to the blob.
-- This request with be replied with one or more of:
--   ID2-Reply-Get-Blob-Seq-ids - if requested by get-seq-ids field
--   ID2-Reply-Get-Blob         - if requested by get-data field
--   ID2S-Reply-Get-Split-Info
--   ID2S-Reply-Get-Chunk
-- Last two can be sent in addition to ID2-Reply-Get-Blob
-- if the blob is split on the server.
-- The replies are made separate to allow server to create replies easier
-- from precalculated data. Each of these replies have ID2-Reply-Data field.
ID2-Request-Get-Blob-Info ::= SEQUENCE {
        -- id can be supplied by inner request
        blob-id         CHOICE {
                -- id can be supplied by inner request
                blob-id         ID2-Blob-Id,

                -- generate blob-ids from request
                resolve         SEQUENCE {
                        request         ID2-Request-Get-Blob-Id,
                        
                        -- server will not send blobs listed here
                        exclude-blobs   SEQUENCE OF ID2-Blob-Id OPTIONAL
                }
        },

        -- return in addition list of Seq-ids also resolving to this blob
        get-seq-ids     NULL OPTIONAL,

        -- level of details requested immediately
        -- server will send relevant chunks if blob is splitted
        get-data        ID2-Get-Blob-Details OPTIONAL
}


-- This is similar to FTP reget command.
-- It may be unsupported by server.
-- It's defined only for plain blobs (returned in ID2-Reply-Get-Blob)
-- as all split data comes in small chunks, so reget doesn't make sense.
ID2-Request-ReGet-Blob ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- blob split version to resend
        split-version   INTEGER,

        -- start offset of data to get
        offset          INTEGER
}

-- Request for specific chunks.
-- Server will reply with one or more ID2S-Reply-Get-Chunk.
ID2S-Request-Get-Chunks ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- requests for specific chunks of splitted blob
        chunks          SEQUENCE OF ID2S-Chunk-Id,

        -- blob split version
        split-version   INTEGER OPTIONAL
}


-- The following structure describes what parts of blob are required
-- immediately after ID2-Request-Get-Blob-Info in case blob is split.
-- Seq-entry level will have probably the same values as Entry-complexities.
ID2-Get-Blob-Details ::= SEQUENCE {
        -- reference location for details - can be only part of sequence
        location        Seq-loc OPTIONAL,

        -- Seq-entry level for all data except descriptors (sequnence, annots)
        seq-class-level INTEGER DEFAULT 1,

        -- Seq-entry level for descriptors
        descr-level     INTEGER DEFAULT 1,

        -- mask of descriptor types - see Seqdesc for variants' values
        descr-type-mask INTEGER DEFAULT 0,
        
        -- mask of annotation types - see Seq-annot.data for values
        annot-type-mask INTEGER DEFAULT 0,

        -- mask of feature types - see SeqFeatData for values
        feat-type-mask  INTEGER DEFAULT 0,

        -- level of sequence data to load
        sequence-level  ENUMERATED {
                none     (0), -- not required
                seq-map  (1), -- at least seq-map
                seq-data (2)  -- include seq-data
        } DEFAULT none
}


--*********************************************************************
-- reply types
--*********************************************************************


ID2-Reply ::= SEQUENCE {
        -- request's serial number, copy from request
        serial-number   INTEGER OPTIONAL,

        params          ID2-Params OPTIONAL,

        error           SEQUENCE OF ID2-Error OPTIONAL,

        -- true if this reply is the last one for the request
        -- false if more replies will follow
        end-of-reply    NULL OPTIONAL,

        -- reply data moved at the end to make it easier to construct
        -- the reply data manually from precalculated data
        reply CHOICE {
                init            NULL,
                empty           NULL,
                get-package     ID2-Reply-Get-Package,
                get-seq-id      ID2-Reply-Get-Seq-id,
                get-blob-id     ID2-Reply-Get-Blob-Id,
                get-blob-seq-ids    ID2-Reply-Get-Blob-Seq-ids,
                get-blob        ID2-Reply-Get-Blob,
                reget-blob      ID2-Reply-ReGet-Blob,
                get-split-info  ID2S-Reply-Get-Split-Info,
                get-chunk       ID2S-Reply-Get-Chunk
        },

        -- additional error flag if the reply is broken in the middle
        -- of transfer.
        -- 'last-octet-string', and 'nothing' mean that
        -- client may use ReGet request to get the remaining data.
        discard         ENUMERATED {
                reply             (0),  -- whole reply should be discarded
                last-octet-string (1),  -- all data in embedded ID2-Reply-Data
                                        -- except last OCTET STRING is correct
                nothing           (2)   -- all data in embedded ID2-Reply-Data
                                        -- is correct, but is incomplete
        } OPTIONAL
}


ID2-Error ::= SEQUENCE {
        severity        ENUMERATED {
                -- nothing harmful happened
                warning             (1) ,

                -- command cannot be completed this time
                failed-command      (2) ,

                -- connection cannot be reused, reconnect is required
                failed-connection   (3) ,

                -- server cannot be used for a while
                failed-server       (4) ,

                -- resolve request gives no data
                -- probably temporarily (see retry-delay field)
                no-data             (5) ,

                -- data exists but client doesn't have permission to get it
                restricted-data     (6) ,

                -- this request type is not supported by server
                unsupported-command (7) ,

                -- error in request packet, cannot retry
                invalid-arguments   (8)
        },

        -- client may retry the request after specified time in seconds 
        retry-delay INTEGER OPTIONAL,

        message VisibleString OPTIONAL
}


-- Reply to ID2-Request-Get-Packages.
ID2-Reply-Get-Package ::= SEQUENCE {
        name            VisibleString,

        params          ID2-Params OPTIONAL
}


-- Reply to ID2-Request-Get-Seq-id.
ID2-Reply-Get-Seq-id ::= SEQUENCE {
        -- copy of request
        request         ID2-Request-Get-Seq-id,

        -- resolved Seq-id
        -- not set if error occurred
        seq-id          SEQUENCE OF Seq-id OPTIONAL,

        -- this Seq-id is the last one in the request
        end-of-reply    NULL OPTIONAL
}

-- Bit numbers for different blob states,
-- used in blob-state fields of several replies.
-- For example, dead & protected blob has blob-state 24 - 3rd and 4th bits set.
ID2-Blob-State ::= ENUMERATED {
        live            (0),
        suppressed-temp (1),
        suppressed      (2),
        dead            (3),
        protected       (4),
        withdrawn       (5)
}


-- Reply to ID2-Request-Get-Blob-Id.
ID2-Reply-Get-Blob-Id ::= SEQUENCE {
        -- requested Seq-id
        seq-id          Seq-id,

        -- result
        blob-id         ID2-Blob-Id OPTIONAL,

        -- version of split data
        -- (0 for non split)
        split-version   INTEGER DEFAULT 0,

        -- annotation types in this blob
        -- annotation are unknown if this field is omitted
        annot-info      SEQUENCE OF ID2S-Seq-annot-Info OPTIONAL,

        -- this Blob-id is the last one in the request
        end-of-reply    NULL OPTIONAL,

        -- state bits of the blob, 0 or missing means regular live data
        blob-state      INTEGER OPTIONAL
}


-- Reply to ID2-Request-Get-Blob-Info.
ID2-Reply-Get-Blob-Seq-ids ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- list of Seq-id resolving to this Blob-Id
        -- in compressed format
        ids             ID2-Reply-Data OPTIONAL
}


-- Reply to ID2-Request-Get-Blob-Info.
ID2-Reply-Get-Blob ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- version of split data
        -- (0 for non split)
        split-version   INTEGER DEFAULT 0,

        -- whole blob or blob skeleton
        -- not set if error occurred
        data            ID2-Reply-Data OPTIONAL,

        -- state bits of the blob, 0 or missing means regular live data
        blob-state      INTEGER OPTIONAL
}


-- Reply to ID2-Request-Get-Blob-Info.
ID2S-Reply-Get-Split-Info ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- version of split data
        split-version   INTEGER,

        -- blob split info
        -- not set if error occurred
        data            ID2-Reply-Data OPTIONAL,

        -- state bits of the blob, 0 or missing means regular live data
        blob-state      INTEGER OPTIONAL
}


-- Reply to ID2-Request-ReGet-Blob.
ID2-Reply-ReGet-Blob ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- version of data split
        split-version   INTEGER,

        -- offset of data
        offset          INTEGER,

        -- blob split info
        -- not set if error occurred
        data            ID2-Reply-Data OPTIONAL
}


-- Reply to ID2S-Request-Get-Chunks.
ID2S-Reply-Get-Chunk ::= SEQUENCE {
        blob-id         ID2-Blob-Id,

        -- id of chunk to send
        chunk-id        ID2S-Chunk-Id,

        -- chunk data
        -- not set if error occurred
        data            ID2-Reply-Data OPTIONAL
}


-- Data packing.
ID2-Reply-Data ::= SEQUENCE {
        -- index of negotiated types
        -- recommended types
        --   Seq-entry,
        --   ID2S-Split-Info,
        --   ID2S-Chunk
        data-type       INTEGER {
                seq-entry       (0),
                seq-annot       (1),
                id2s-split-info (2),
                id2s-chunk      (3)
        } DEFAULT seq-entry,

        -- serialization format (ASN.1 binary, ASN.1 text)
        -- index of negotiated formats
        data-format     INTEGER {
                asn-binary      (0),
                asn-text        (1),
                xml             (2)
        } DEFAULT asn-binary,

        -- post serialization compression (plain, gzip, etc.)
        -- index of negotiated compressions
        data-compression INTEGER {
                none            (0),
                gzip            (1),
                nlmzip          (2),
                bzip2           (3)
        } DEFAULT none,

        -- data blob
        data            SEQUENCE OF OCTET STRING
}


-- Data packed within ID2-Reply-Get-Blob-Seq-ids reply.
ID2-Blob-Seq-ids ::= SEQUENCE OF ID2-Blob-Seq-id


ID2-Blob-Seq-id ::= SEQUENCE {
        seq-id          Seq-id,
        
        -- this Seq-id is replaced by sequence in another blob
        replaced        NULL OPTIONAL
}


--*********************************************************************
-- utility types
--*********************************************************************


ID2-Blob-Id ::= SEQUENCE {
        sat             INTEGER,
        sub-sat         INTEGER {
                main        (0),
                snp         (1),
                snp-graph   (4),
                cdd         (8),
                mgc         (16),
                hprd        (32),
                sts         (64),
                trna        (128),
                exon        (512)
        } DEFAULT main,
        sat-key         INTEGER,
        -- version of blob, optional in some requests
        version         INTEGER OPTIONAL
}


ID2-Params ::= SEQUENCE OF ID2-Param


ID2-Param ::= SEQUENCE {
        name    VisibleString,
        value   SEQUENCE OF VisibleString OPTIONAL,
        type    ENUMERATED {
                -- no response expected
                set-value   (1),

                -- this option is for client only
                -- server replies with its value of param if known
                -- server omits this param in reply if unknown to server
                get-value   (2),

                -- no direct response expected,
                -- but if the param or its value is not supported
                -- an error is reported and the request is not be completed
                force-value (3),

                -- use named package
                -- value should be unset
                use-package (4)
        } DEFAULT set-value
}

END

-- insdseq.asn
--$Revision: 413850 $
--************************************************************************
--
-- ASN.1 and XML for the components of a GenBank/EMBL/DDBJ sequence record
-- The International Nucleotide Sequence Database (INSD) collaboration
-- Version 1.6, 25 May 2010
--
--************************************************************************

INSD-INSDSeq DEFINITIONS ::=
BEGIN

--  INSDSeq provides the elements of a sequence as presented in the
--    GenBank/EMBL/DDBJ-style flatfile formats, with a small amount of
--    additional structure.
--    Although this single perspective of the three flatfile formats
--    provides a useful simplification, it hides to some extent the
--    details of the actual data underlying those formats. Nevertheless,
--    the XML version of INSD-Seq is being provided with
--    the hopes that it will prove useful to those who bulk-process
--    sequence data at the flatfile-format level of detail. Further 
--    documentation regarding the content and conventions of those formats 
--    can be found at:
--
--    URLs for the DDBJ, EMBL, and GenBank Feature Table Document:
--    http://www.ddbj.nig.ac.jp/FT/full_index.html
--    http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html
--    http://www.ncbi.nlm.nih.gov/projects/collab/FT/index.html
--
--    URLs for DDBJ, EMBL, and GenBank Release Notes :
--    ftp://ftp.ddbj.nig.ac.jp/database/ddbj/ddbjrel.txt
--    http://www.ebi.ac.uk/embl/Documentation/Release_notes/current/relnotes.html
--    ftp://ftp.ncbi.nih.gov/genbank/gbrel.txt
--
--    Because INSDSeq is a compromise, a number of pragmatic decisions have
--    been made:
--
--  In pursuit of simplicity and familiarity a number of fields do not
--    have full substructure defined here where there is already a
--    standard flatfile format string. For example:
--
--   Dates:      DD-MON-YYYY (eg 10-JUN-2003)
--
--   Author:     LastName, Initials  (eg Smith, J.N.)
--            or Lastname Initials   (eg Smith J.N.)
--
--   Journal:    JournalName Volume (issue), page-range (year)
--            or JournalName Volume(issue):page-range(year)
--            eg Appl. Environ. Microbiol. 61 (4), 1646-1648 (1995)
--               Appl. Environ. Microbiol. 61(4):1646-1648(1995).
--
--  FeatureLocations are representated as in the flatfile feature table,
--    but FeatureIntervals may also be provided as a convenience
--
--  FeatureQualifiers are represented as in the flatfile feature table.
--
--  Primary has a string that represents a table to construct
--    a third party (TPA) sequence.
--
--  other-seqids can have strings with the "vertical bar format" sequence
--    identifiers used in BLAST for example, when they are non-INSD types.
--
--  Currently in flatfile format you only see Accession numbers, but there 
--    are others, like patents, submitter clone names, etc which will 
--    appear here
--
--  There are also a number of elements that could have been more exactly
--    specified, but in the interest of simplicity have been simply left as
--    optional. For example:
--
--  All publicly accessible sequence records in INSDSeq format will
--    include accession and accession.version. However, these elements are 
--    optional in optional in INSDSeq so that this format can also be used   
--    for non-public sequence data, prior to the assignment of accessions and 
--    version numbers. In such cases, records will have only "other-seqids".
--
--  sequences will normally all have "sequence" filled in. But contig records
--    will have a "join" statement in the "contig" slot, and no "sequence".
--    We also may consider a retrieval option with no sequence of any kind
--    and no feature table to quickly check minimal values.
--
--  Four (optional) elements are specific to records represented via the EMBL
--    sequence database: INSDSeq_update-release, INSDSeq_create-release,
--    INSDSeq_entry-version, and INSDSeq_database-reference.
--
--  One (optional) element is specific to records originating at the GenBank
--    and DDBJ sequence databases: INSDSeq_segment.
--
--********

INSDSet ::= SEQUENCE OF INSDSeq

INSDSeq ::= SEQUENCE {
    locus VisibleString OPTIONAL ,
    length INTEGER ,
    strandedness VisibleString OPTIONAL ,
    moltype VisibleString ,
    topology VisibleString OPTIONAL ,
    division VisibleString OPTIONAL ,
    update-date VisibleString OPTIONAL ,
    create-date VisibleString OPTIONAL ,
    update-release VisibleString OPTIONAL ,
    create-release VisibleString OPTIONAL ,
    definition VisibleString OPTIONAL ,
    primary-accession VisibleString OPTIONAL ,
    entry-version VisibleString OPTIONAL ,
    accession-version VisibleString OPTIONAL ,
    other-seqids SEQUENCE OF INSDSeqid OPTIONAL ,
    secondary-accessions SEQUENCE OF INSDSecondary-accn OPTIONAL,

--  INSDSeq_project has been deprecated in favor of INSDSeq_xrefs .
--  This element may be be removed from a future version of this DTD.

    project VisibleString OPTIONAL ,

    keywords SEQUENCE OF INSDKeyword OPTIONAL ,
    segment VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,
    organism VisibleString OPTIONAL ,
    taxonomy VisibleString OPTIONAL ,
    references SEQUENCE OF INSDReference OPTIONAL ,
    comment VisibleString OPTIONAL ,
    comment-set SEQUENCE OF INSDComment OPTIONAL ,
    struc-comments SEQUENCE OF INSDStrucComment OPTIONAL ,
    primary VisibleString OPTIONAL ,
    source-db VisibleString OPTIONAL ,
    database-reference VisibleString OPTIONAL ,
    feature-table SEQUENCE OF INSDFeature OPTIONAL ,
    feature-set SEQUENCE OF INSDFeatureSet OPTIONAL ,
    sequence VisibleString OPTIONAL ,  -- Optional for contig, wgs, etc.
    contig VisibleString OPTIONAL ,
    alt-seq SEQUENCE OF INSDAltSeqData OPTIONAL ,

--  INSDSeq_xrefs provides cross-references from a sequence record
--  to other database resources. These cross-references are at the
--  level of the entire record, rather than at the level of a specific
--  feature. These cross-references can include: BioProject, BioSample,
--  Sequence Read Archive, etc.

    xrefs SEQUENCE OF INSDXref OPTIONAL
}

INSDSeqid ::= VisibleString

INSDSecondary-accn ::= VisibleString

INSDKeyword ::= VisibleString

-- INSDReference_position contains a string value indicating the
-- basepair span(s) to which a reference applies. The allowable
-- formats are:
--
--   X..Y  : Where X and Y are integers separated by two periods,
--           X >= 1 , Y <= sequence length, and X <= Y 
--
--           Multiple basepair spans can exist, separated by a
--           semi-colon and a space. For example : 10..20; 100..500
--             
--   sites : The string literal 'sites', indicating that a reference
--           provides sequence annotation information, but the specific
--           basepair spans are either not captured, or were too numerous
--           to record.
--
--           The 'sites' literal string is singly occuring, and
--            cannot be used in conjunction with any X..Y basepair spans.
--
--           'sites' is a convention utilized by GenBank, and might
--           not be presented in XML provided by EMBL and DDBJ.
--
--   References that lack an INSDReference_position element are not
--   attributed to any particular region of the sequence.

INSDReference ::= SEQUENCE {
    reference VisibleString ,
    position VisibleString OPTIONAL ,
    authors SEQUENCE OF INSDAuthor OPTIONAL ,
    consortium VisibleString OPTIONAL ,
    title VisibleString OPTIONAL ,
    journal VisibleString ,
    xref SEQUENCE OF INSDXref OPTIONAL ,
    pubmed INTEGER OPTIONAL ,
    remark VisibleString OPTIONAL
}

INSDAuthor ::= VisibleString

-- INSDXref provides a method for referring to records in
-- other databases. INSDXref_dbname is a string value that
-- provides the name of the database, and INSDXref_dbname
-- is a string value that provides the record's identifier
-- in that database.

INSDXref ::= SEQUENCE {
    dbname VisibleString ,
    id VisibleString
}

INSDComment ::= SEQUENCE {
    type VisibleString OPTIONAL ,
    paragraphs SEQUENCE OF INSDCommentParagraph
}

INSDCommentParagraph ::= VisibleString

INSDStrucComment ::= SEQUENCE {
    name VisibleString OPTIONAL ,
    items SEQUENCE OF INSDStrucCommentItem
}

INSDStrucCommentItem ::= SEQUENCE {
    tag VisibleString OPTIONAL ,
    value VisibleString OPTIONAL ,
    url VisibleString OPTIONAL
}

-- INSDFeature_operator contains a string value describing
-- the relationship among a set of INSDInterval within
-- INSDFeature_intervals. The allowable formats are:
--
--   join :  The string literal 'join' indicates that the
--           INSDInterval intervals are biologically joined
--           together into a contiguous molecule.
--
--   order : The string literal 'order' indicates that the
--           INSDInterval intervals are in the presented
--           order, but they are not necessarily contiguous.
--
--   Either 'join' or 'order' is required if INSDFeature_intervals
--   is comprised of more than one INSDInterval .

INSDFeatureSet ::= SEQUENCE {
    annot-source VisibleString OPTIONAL ,
    features SEQUENCE OF INSDFeature
}

INSDFeature ::= SEQUENCE {
    key VisibleString ,
    location VisibleString ,
    intervals SEQUENCE OF INSDInterval OPTIONAL ,
    operator VisibleString OPTIONAL ,
    partial5 BOOLEAN OPTIONAL ,
    partial3 BOOLEAN OPTIONAL ,
    quals SEQUENCE OF INSDQualifier OPTIONAL ,
    xrefs SEQUENCE OF INSDXref OPTIONAL
}

-- INSDInterval_iscomp is a boolean indicating whether
-- an INSDInterval_from / INSDInterval_to location
-- represents a location on the complement strand.
-- When INSDInterval_iscomp is TRUE, it essentially
-- confirms that a 'from' value which is greater than
-- a 'to' value is intentional, because the location
-- is on the opposite strand of the presented sequence.

-- INSDInterval_interbp is a boolean indicating whether
-- a feature (such as a restriction site) is located
-- between two adjacent basepairs. When INSDInterval_interbp
-- is TRUE, the 'from' and 'to' values will differ by
-- exactly one base for linear molecules. For circular 
-- molecules, if the inter-basepair position falls between
-- the last and the first base, then 'from' will be the
-- final base (equal to the length of the sequence), and
-- 'to' will have a value of 1.

INSDInterval ::= SEQUENCE {
    from INTEGER OPTIONAL ,
    to INTEGER OPTIONAL ,
    point INTEGER OPTIONAL ,
    iscomp BOOLEAN OPTIONAL ,
    interbp BOOLEAN OPTIONAL ,
    accession VisibleString
}

INSDQualifier ::= SEQUENCE {
    name VisibleString ,
    value VisibleString OPTIONAL
}

-- INSDAltSeqData provides for sequence representations other than
-- literal basepair abbreviations (INSDSeq_sequence), such as the
-- CONTIG/CO linetype of the GenBank and EMBL flatfile formats.
-- It also accomodates the specification of accession-number ranges,
-- which are presented on a WGS master record (for the contigs and
-- and scaffolds of a WGS project).

INSDAltSeqData ::= SEQUENCE {
    name VisibleString ,  -- e.g., contig, wgs, scaffold, cage, genome
    items SEQUENCE OF INSDAltSeqItem OPTIONAL
}

INSDAltSeqItem ::= SEQUENCE {
    interval INSDInterval OPTIONAL ,
    isgap BOOLEAN OPTIONAL ,
    gap-length INTEGER OPTIONAL ,
    gap-type VisibleString OPTIONAL ,
    gap-linkage VisibleString OPTIONAL ,
    gap-comment VisibleString OPTIONAL ,
    first-accn VisibleString OPTIONAL ,
    last-accn VisibleString OPTIONAL ,
    value VisibleString OPTIONAL
}

END


-- medlars.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  MEDLARS data definitions
--  Grigoriy Starchenko, 1997
--
--**********************************************************************

NCBI-Medlars DEFINITIONS ::=
BEGIN

EXPORTS Medlars-entry, Medlars-record;

IMPORTS PubMedId FROM NCBI-Biblio;

Medlars-entry ::= SEQUENCE {     -- a MEDLARS entry
    pmid PubMedId,               -- All entries in PubMed must have it
    muid INTEGER OPTIONAL,       -- Medline(OCCS) id
    recs SET OF Medlars-record   -- List of Medlars records
}

Medlars-record ::= SEQUENCE {
    code INTEGER,                -- Unit record field type integer form
    abbr VisibleString OPTIONAL, -- Unit record field type abbreviation form
    data VisibleString           -- Unit record data
}

END

-- medline.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  MEDLINE data definitions
--  James Ostell, 1990
--
--  enhanced in 1996 to support PubMed records as well by simply adding
--    the PubMedId and making MedlineId optional
--
--**********************************************************************

NCBI-Medline DEFINITIONS ::=
BEGIN

EXPORTS Medline-entry, Medline-si;

IMPORTS Cit-art, PubMedId FROM NCBI-Biblio
        Date FROM NCBI-General;

                                -- a MEDLINE or PubMed entry
Medline-entry ::= SEQUENCE {
    uid INTEGER OPTIONAL ,      -- MEDLINE UID, sometimes not yet available if from PubMed
    em Date ,                   -- Entry Month
    cit Cit-art ,               -- article citation
    abstract VisibleString OPTIONAL ,
    mesh SET OF Medline-mesh OPTIONAL ,
    substance SET OF Medline-rn OPTIONAL ,
    xref SET OF Medline-si OPTIONAL ,
    idnum SET OF VisibleString OPTIONAL ,  -- ID Number (grants, contracts)
    gene SET OF VisibleString OPTIONAL ,
    pmid PubMedId OPTIONAL ,               -- MEDLINE records may include the PubMedId
    pub-type SET OF VisibleString OPTIONAL, -- may show publication types (review, etc)
    mlfield SET OF Medline-field OPTIONAL ,  -- additional Medline field types
    status INTEGER {
	publisher (1) ,      -- record as supplied by publisher
        premedline (2) ,     -- premedline record
        medline (3) } DEFAULT medline }  -- regular medline record

Medline-mesh ::= SEQUENCE {
    mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point (*)
    term VisibleString ,                   -- the MeSH term
    qual SET OF Medline-qual OPTIONAL }    -- qualifiers

Medline-qual ::= SEQUENCE {
    mp BOOLEAN DEFAULT FALSE ,       -- TRUE if main point
    subh VisibleString }             -- the subheading

Medline-rn ::= SEQUENCE {       -- medline substance records
    type ENUMERATED {           -- type of record
        nameonly (0) ,
        cas (1) ,               -- CAS number
        ec (2) } ,              -- EC number
    cit VisibleString OPTIONAL ,  -- CAS or EC number if present
    name VisibleString }          -- name (always present)

Medline-si ::= SEQUENCE {       -- medline cross reference records
    type ENUMERATED {           -- type of xref
        ddbj (1) ,              -- DNA Data Bank of Japan
        carbbank (2) ,          -- Carbohydrate Structure Database
        embl (3) ,              -- EMBL Data Library
        hdb (4) ,               -- Hybridoma Data Bank
        genbank (5) ,           -- GenBank
        hgml (6) ,              -- Human Gene Map Library
        mim (7) ,               -- Mendelian Inheritance in Man
        msd (8) ,               -- Microbial Strains Database
        pdb (9) ,               -- Protein Data Bank (Brookhaven)
        pir (10) ,              -- Protein Identification Resource
        prfseqdb (11) ,         -- Protein Research Foundation (Japan)
        psd (12) ,              -- Protein Sequence Database (Japan)
        swissprot (13) ,        -- SwissProt
        gdb (14) } ,            -- Genome Data Base
    cit VisibleString OPTIONAL }    -- the citation/accession number

Medline-field ::= SEQUENCE {
    type INTEGER {              -- Keyed type
	other (0) ,             -- look in line code
	comment (1) ,           -- comment line
        erratum (2) } ,         -- retracted, corrected, etc
    str VisibleString ,         -- the text
    ids SEQUENCE OF DocRef OPTIONAL }  -- pointers relevant to this text

DocRef ::= SEQUENCE {           -- reference to a document
    type INTEGER {
        medline (1) ,
        pubmed (2) ,
        ncbigi (3) } ,
    uid INTEGER }

END


-- mim.asn
--********************************************************************
--
--  MIM data definitions
--  Brandon Brylawski, 1996.
--  version 2.1
--
--********************************************************************

NCBI-Mim DEFINITIONS ::=
BEGIN

Mim-entries ::= SEQUENCE OF Mim-entry

Mim-set ::= SEQUENCE {
	releaseDate Mim-date ,
	mimEntries SEQUENCE OF Mim-entry }

Mim-entry ::= SEQUENCE {
	mimNumber VisibleString ,
	mimType INTEGER {
		none (0) ,
		star (1) ,
		caret (2) ,
		pound (3) ,
		plus (4) ,
		perc (5) } ,
	title VisibleString ,
	copyright VisibleString OPTIONAL ,
	symbol VisibleString OPTIONAL ,
	locus VisibleString OPTIONAL ,
	synonyms SEQUENCE OF VisibleString OPTIONAL ,
	aliases SEQUENCE OF VisibleString OPTIONAL ,
	included SEQUENCE OF VisibleString OPTIONAL ,
	seeAlso SEQUENCE OF Mim-cit OPTIONAL ,
	text SEQUENCE OF Mim-text OPTIONAL ,
	textfields SEQUENCE OF Mim-text OPTIONAL ,
	hasSummary BOOLEAN OPTIONAL ,
	summary SEQUENCE OF Mim-text OPTIONAL ,
	summaryAttribution SEQUENCE OF Mim-edit-item OPTIONAL ,
	summaryEditHistory SEQUENCE OF Mim-edit-item OPTIONAL ,
	summaryCreationDate Mim-edit-item OPTIONAL ,
	allelicVariants SEQUENCE OF Mim-allelic-variant OPTIONAL ,
	hasSynopsis BOOLEAN OPTIONAL ,
	clinicalSynopsis SEQUENCE OF Mim-index-term OPTIONAL ,
	synopsisAttribution SEQUENCE OF Mim-edit-item OPTIONAL ,
	synopsisEditHistory SEQUENCE OF Mim-edit-item OPTIONAL ,
	synopsisCreationDate Mim-edit-item OPTIONAL ,
	editHistory SEQUENCE OF Mim-edit-item OPTIONAL ,
	creationDate Mim-edit-item OPTIONAL ,
	references SEQUENCE OF Mim-reference OPTIONAL ,
	attribution SEQUENCE OF Mim-edit-item OPTIONAL ,
	numGeneMaps INTEGER ,
	medlineLinks Mim-link OPTIONAL ,
	proteinLinks Mim-link OPTIONAL ,
	nucleotideLinks Mim-link OPTIONAL ,
	structureLinks Mim-link OPTIONAL ,
	genomeLinks Mim-link OPTIONAL }

Mim-text ::= SEQUENCE {
	label VisibleString ,
	text VisibleString ,
	neighbors Mim-link OPTIONAL}

Mim-allelic-variant ::= SEQUENCE {
	number VisibleString ,
	name VisibleString  ,
	aliases SEQUENCE OF VisibleString  OPTIONAL ,
	mutation SEQUENCE OF Mim-text  OPTIONAL ,
	description SEQUENCE OF Mim-text OPTIONAL ,
	snpLinks Mim-link OPTIONAL }

Mim-link ::= SEQUENCE {
	num INTEGER ,
	uids VisibleString ,
	numRelevant INTEGER OPTIONAL }

Mim-author ::= SEQUENCE {
	name VisibleString ,
	index INTEGER }

Mim-cit ::= SEQUENCE {
	number INTEGER ,
	author VisibleString ,
	others VisibleString ,
	year INTEGER }

Mim-reference ::= SEQUENCE {
	number INTEGER ,
	origNumber INTEGER OPTIONAL ,
	type ENUMERATED {
		not-set (0) ,
		citation (1) ,
		book (2) ,
		personal-communication (3) ,
		book-citation (4) } OPTIONAL ,
	authors SEQUENCE OF Mim-author ,
	primaryAuthor VisibleString ,
	otherAuthors VisibleString ,
	citationTitle VisibleString ,
	citationType INTEGER OPTIONAL ,
	bookTitle VisibleString OPTIONAL ,
	editors SEQUENCE OF Mim-author OPTIONAL ,
	volume VisibleString OPTIONAL ,
	edition VisibleString OPTIONAL ,
	journal VisibleString OPTIONAL ,
	series VisibleString OPTIONAL ,
	publisher VisibleString OPTIONAL ,
	place VisibleString OPTIONAL ,
	commNote VisibleString OPTIONAL ,
	pubDate Mim-date ,
	pages SEQUENCE OF Mim-page OPTIONAL ,
	miscInfo VisibleString OPTIONAL ,
	pubmedUID INTEGER OPTIONAL ,
	ambiguous BOOLEAN ,
	noLink BOOLEAN OPTIONAL }

Mim-index-term ::= SEQUENCE {
	key VisibleString ,
	terms SEQUENCE OF VisibleString }

Mim-edit-item ::= SEQUENCE {
	author VisibleString ,
	modDate Mim-date }

Mim-date ::= SEQUENCE {
	year INTEGER ,
	month INTEGER OPTIONAL ,
	day INTEGER OPTIONAL }

Mim-page ::= SEQUENCE {
	from VisibleString ,
	to VisibleString OPTIONAL }

END


-- mla.asn
--$Revision: 209893 $
--********************************************************************
--
--  Network MEDLINE Archive message formats
--  Ostell 1993
--
--
--*********************************************************************
--
--  mla.asn
--
--     messages for medline archive data access
--
--*********************************************************************

NCBI-MedArchive DEFINITIONS ::=
BEGIN

IMPORTS Medline-entry FROM NCBI-Medline
        Medlars-entry FROM NCBI-Medlars
        Pubmed-entry FROM NCBI-PubMed
        Medline-si FROM NCBI-Medline
        Pub FROM NCBI-Pub
        Title, PubMedId FROM NCBI-Biblio;

        --**********************************
        -- requests
        --

Mla-request ::= CHOICE {
    init [0] NULL,              -- DlInit
    getmle [1] INTEGER,         -- get MedlineEntry
    getpub [2] INTEGER,         -- get citation by muid
    gettitle [3] Title-msg,     -- match titles
    citmatch [4] Pub,           --
    fini [5] NULL,              -- DlFini
    getmriuids [6] INTEGER,     -- Get MUIDs for an MRI
    getaccuids [7] Medline-si,  -- Get MUIDs for an Accessions
    uidtopmid [8] INTEGER,      -- get PMID for MUID
    pmidtouid [9] PubMedId,     -- get MUID for PMID
    getmlepmid [10] PubMedId,   -- get MedlineEntry by PubMed id
    getpubpmid [11] PubMedId,   -- get citation by PubMed id
    citmatchpmid [12] Pub,      -- citation match, PMID on out
    getmripmids [13] INTEGER,   -- get PMIDs for an MRI
    getaccpmids [14] Medline-si,-- get PMIDs for an Accessions
    citlstpmids [15] Pub,       -- generate list of PMID for Pub
    getmleuid [16] INTEGER,     -- get MedlineEntry by Medline id
    getmlrpmid [17] PubMedId,   -- get MedlarsEntry by PubMed id
    getmlruid [18] INTEGER      -- get MedlarsEntry by Medline id
    }

--**********************************************************************
--
--  if request = all
--	if one row returned
--	   reply=all, return every column
--	else 
--	   reply=ml-jta for each row
--
--  if request = not-set, reply=ml-jta
--
--  otherwise,
--	if request != ml-jta
--	   if column exist, reply=column, else reply=ml-jta
--
--**********************************************************************

Title-type ::= ENUMERATED {
    not-set (0),                -- request=ml-jta (default), reply=not-found
    name (1),
    tsub (2),
    trans (3),
    jta (4),
    iso-jta (5),
    ml-jta (6),
    coden (7),
    issn (8),
    abr (9),
    isbn (10),
    all (255)
    }

Title-msg ::= SEQUENCE {         -- Title match request/response
    type Title-type,             -- type to get, or type returned
    title Title                  -- title(s) to look up, or title(s) found
    }

Title-msg-list ::= SEQUENCE {
    num INTEGER,                 -- number of titles
    titles SEQUENCE OF Title-msg
    }

Error-val ::= ENUMERATED {
    not-found (0),               -- Entry was not found
    operational-error (1),       -- A run-time operation error was occurred
    cannot-connect-jrsrv (2),    -- Cannot connect to Journal server
    cannot-connect-pmdb (3),     -- Cannot connect to PubMed
    journal-not-found (4),       -- Journal title not found
    citation-not-found (5),      -- Volume, Page and Author do not match any
                                 -- article
    citation-ambiguous (6),      -- More than one article found
    citation-too-many (7),       -- Too many article was found

    cannot-connect-searchbackend-jrsrv(8),	-- Cannot connect to searchbackend Journals db
    cannot-connect-searchbackend-pmdb(9),	-- Cannot connect to searchbackend PubMed db
    cannot-connect-docsumbackend(10)		-- Cannot connect to docsumbackend
    }

Mla-back ::= CHOICE {
    init [0] NULL,                   -- DlInit
    error [1] Error-val,             -- not found for getmle/getpub/citmatch
    getmle [2] Medline-entry,        -- got Medline Entry
    getpub [3] Pub,
    gettitle [4] Title-msg-list,     -- match titles
    citmatch [5] INTEGER,            -- citation lookup muid or 0
    fini [6] NULL,                   -- DlFini
    getuids [7] SEQUENCE OF INTEGER, -- got a set of MUIDs
    getpmids [8] SEQUENCE OF INTEGER,-- got a set of PMIDs
    outuid [9] INTEGER,              -- result muid or 0 if not found
    outpmid [10] PubMedId,           -- result pmid or 0 if not found
    getpme [11] Pubmed-entry,        -- got Pubmed Entry
    getmlr [12] Medlars-entry        -- got Medlars Entry
    }

END

-- mmdb1.asn
--$Revision: 6.1 $
--**********************************************************************
--
--  Biological Macromolecule 3-D Structure Data Types for MMDB,
--                A Molecular Modeling Database
--
--  Definitions for a biomolecular assembly and the MMDB database
--
--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue, and Steve Bryant 
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
--  July 1995
--
--**********************************************************************

-- Contents of the MMDB database are currently based on files distributed by
-- the Protein Data Bank, PDB.  These data are changed in form, as described
-- in this specification. To some extent they are also changed in content, in 
-- that many data items implicit in PDB are made explicit, and others are
-- corrected or omitted as a consequence of validation checks.  The semantics
-- of MMDB data items are indicated by comments within the specification below.
-- These comments explain in detail the manner in which data items from  PDB 
-- have been mapped into MMDB. 

MMDB DEFINITIONS ::=

BEGIN

EXPORTS Biostruc, Biostruc-id, Biostruc-set, Biostruc-annot-set,
	Biostruc-residue-graph-set;

IMPORTS Biostruc-graph, Biomol-descr, Residue-graph FROM MMDB-Chemical-graph 
	Biostruc-model FROM MMDB-Structural-model
	Biostruc-feature-set FROM MMDB-Features
	Pub FROM NCBI-Pub
	Date, Object-id, Dbtag FROM NCBI-General;

-- A structure report or "biostruc" describes the components of a biomolecular 
-- assembly in terms of their names and descriptions, and a chemical graph 
-- giving atomic formula, connectivity and chirality. It also gives one or more
-- three-dimensional model structures, literally a mapping of the atoms, 
-- residues and/or molecules of each component into a measured three-
-- dimensional space. Structure may also be described by named features, which 
-- associate nodes in the chemical graph, or regions in space, with text or 
-- numeric descriptors.

-- Note that a biostruc may also contain cross references to other databases,
-- including citations to relevant scientific literature. These cross 
-- references use object types from other NCBI data specifications, which are 
-- "imported" into MMDB, and not repeated in this specification. 

Biostruc ::= SEQUENCE {
	id			SEQUENCE OF Biostruc-id,
	descr			SEQUENCE OF Biostruc-descr OPTIONAL,
	chemical-graph		Biostruc-graph,
	features		SEQUENCE OF Biostruc-feature-set OPTIONAL,
	model			SEQUENCE OF Biostruc-model OPTIONAL }

-- A Biostruc-id is a collection identifiers for the molecular assembly.
-- Mmdb-id's are NCBI-assigned, and are intended to be unique and stable 
-- identifiers.  Other-id's are synonyms.

Biostruc-id ::= CHOICE {
	mmdb-id			Mmdb-id,
	other-database		Dbtag,
	local-id		Object-id }

Mmdb-id ::= INTEGER


-- The description of a biostruc refers to both the reported chemical and 
-- spatial structure of a biomolecular assembly.  PDB-derived descriptors
-- which refer specifically to the chemical components or spatial structure
-- are not provided here, but instead as descriptors of the biostruc-graph or 
-- biostruc-model. For PDB-derived structures the biostruc name is the PDB 
-- id-code.  PDB-derived citations appear as publications within the biostruc 
-- description, and include a data-submission citation derived from PDB AUTHOR 
-- records.  Citations are described using the NCBI Pub specification.

Biostruc-descr ::= CHOICE {
	name			VisibleString,
	pdb-comment		VisibleString,
	other-comment		VisibleString,
	history			Biostruc-history, 
	attribution		Pub }


-- The history of a biostruc indicates it's origin and it's update history
-- within MMDB, the NCBI-maintained molecular structure database.  

Biostruc-history ::= SEQUENCE {
	replaces		Biostruc-replace OPTIONAL,
	replaced-by		Biostruc-replace OPTIONAL,
	data-source		Biostruc-source OPTIONAL }

Biostruc-replace ::= SEQUENCE {
	id			Biostruc-id,
	date			Date }

-- The origin of a biostruc is a reference to another database.  PDB release 
-- date and PDB-assigned id codes are recorded here, as are the PDB-assigned 
-- entry date and replacement history.

Biostruc-source ::= SEQUENCE {
	name-of-database	VisibleString,
	version-of-database	CHOICE {
		release-date		Date,
		release-code		VisibleString } OPTIONAL,
	database-entry-id	Biostruc-id,
	database-entry-date	Date,
	database-entry-history	SEQUENCE OF VisibleString OPTIONAL}


-- A biostruc set is a means to collect ASN.1 data for many biostrucs in 
-- one file, as convenient for application programs.  The object type is not
-- inteded to imply similarity of the biostrucs grouped together.

Biostruc-set ::= SEQUENCE {
	id		SEQUENCE OF Biostruc-id OPTIONAL,
	descr		SEQUENCE OF Biostruc-descr OPTIONAL,
	biostrucs	SEQUENCE OF Biostruc }


-- A biostruc annotation set is a means to collect ASN.1 data for biostruc
-- features into one file. The object type is intended as a means to store 
-- feature annotation of similar type, such as "core" definitions for a 
-- threading program, or structure-structure alignments for a structure-
-- similarity browser.

Biostruc-annot-set ::= SEQUENCE {
	id		SEQUENCE OF Biostruc-id OPTIONAL,
	descr		SEQUENCE OF Biostruc-descr OPTIONAL,
	features	SEQUENCE OF Biostruc-feature-set }


-- A biostruc residue graph set is a collection of residue graphs.  The object
-- type is intended as a means to record dictionaries containing the chemical
-- subgraphs of "standard" residue types, which are used as a means to 
-- simplify discription of the covalent structure of a biomolecular assembly.
-- The standard residue graph dictionary supplied with the MMDB database 
-- contains 20 standard L amino acids and 8 standard ribonucleotide groups. 
-- These graphs are complete, including explicit hydrogen atoms and separate 
-- instances for the terminal polypeptide and polynucleotide residues. 

Biostruc-residue-graph-set ::= SEQUENCE {
	id			SEQUENCE OF Biostruc-id OPTIONAL,
	descr			SEQUENCE OF Biomol-descr OPTIONAL,
	residue-graphs		SEQUENCE OF Residue-graph }

END


--**********************************************************************
--
--  Biological Macromolecule 3-D Structure Data Types for MMDB,
--                A Molecular Modeling Database
--
--  Definitions for a chemical graph
--
--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
--  July, 1995
--
--**********************************************************************

MMDB-Chemical-graph DEFINITIONS ::=

BEGIN

EXPORTS Biostruc-graph, Biomol-descr, Residue-graph,
	Molecule-id, PCSubstance-id, Residue-id, Atom-id;

IMPORTS Pub FROM NCBI-Pub
	BioSource FROM NCBI-BioSource
	Seq-id FROM NCBI-Seqloc
	Biostruc-id FROM MMDB;

-- A biostruc graph contains the complete chemical graph of the biomolecular 
-- assembly.  The assembly graph is defined hierarchically, in terms of 
-- subgraphs graphs of component molecules.  For PDB-derived biostrucs,
-- the molecules forming the assembly are the individual biopolymer chains and 
-- any non-polymer or "heterogen" groups which are present. 

-- The PDB-derived  "compound name" field appears as the name within the
-- biostruc-graph description.  PDB "class" and "source" fields appear as 
-- explicit attributes.  PDB-derived structures are assigned an assembly type 
-- of "other" unless they have been further classified as the "physiological
-- form" or "crystallographic cell" contents.  If they have, the source of the 
-- type classification appears as a citation within the  assembly description. 

-- Note that the biostruc-graph also includes as literals the subgraphs of 
-- any nonstandard residues present within it. For PDB-derived biostrucs these 
-- subgraphs are constructed automatically, with validation as described below.

Biostruc-graph ::= SEQUENCE {
	descr			SEQUENCE OF Biomol-descr OPTIONAL,
	molecule-graphs		SEQUENCE OF Molecule-graph,
	inter-molecule-bonds	SEQUENCE OF Inter-residue-bond OPTIONAL,
	residue-graphs		SEQUENCE OF Residue-graph OPTIONAL }

-- A biomolecule description refers to the chemical structure of a molecule or 
-- component substructures.  This descriptor type is used at the level of
-- assemblies, molecules and residues, and also for residue-graph dictionaries.
-- The BioSource object type is drawn from NCBI taxonomy data specifications,
-- and is not repeated here.

Biomol-descr ::= CHOICE {
	name			VisibleString,
	pdb-class		VisibleString,
	pdb-source		VisibleString,
	pdb-comment		VisibleString,
	other-comment		VisibleString,
	organism		BioSource,
	attribution		Pub,
	assembly-type		INTEGER {	physiological-form(1),
						crystallographic-cell(2),
						other(255) },
	molecule-type		INTEGER {	dna(1),
						rna(2),
						protein(3),
						other-biopolymer(4),
						solvent(5),
						other-nonpolymer(6),
						other(255) } }

-- A molecule chemical graph is defined by a sequence of residues.  Nonpolymers
-- are described in the same way, but may contain only a single residue.  

-- Biopolymer molecules are identified within PDB entries according to their
-- appearance on SEQRES records, which formally define a biopolymer as such. 
-- Biopolymers are defined by the distinction between ATOM and HETATM 
-- coordinate records only in cases where the chemical sequence from SEQRES
-- is in conflict with coordinate data. The PDB-assigned chain code appears as 
-- the name within the molecule descriptions of the biopolymers.

-- Nonpolymer molecules from PDB correspond to individual HETEROGEN groups, 
-- excluding any HETEROGEN groups which represent modified biopolymer residues.
-- These molecules are named according to the chain, residue type and residue 
-- number fields as assigned by PDB. Any description appearing in the PDB HET 
-- record appears as a pdb-comment within the molecule description. 

-- Molecule types for PDB-derived molecule graphs are assigned by matching 
-- residue and atom names against the PDB-documented standard types for protein,
-- DNA and RNA, and against residue codes commonly used to indicate solvent.
-- Classification is by "majority rule". If more than half of the residues in
-- a biopolymer are standard groups of one type, then the molecule is of that 
-- type, and otherwise classified as "other". Note that this classification does
-- not preclude the presence of modified residues, but insists they constitute 
-- less than half the biopolymer. Non-polymers are classified only as "solvent"
-- or "other".  

-- Note that a molecule graph may also contain a set of cross references 
-- to biopolymer sequence databases.  All biopolymer molecules in MMDB contain 
-- appropriate identifiers for the corresponding entry in the NCBI-Sequences 
-- database, in particular the NCBI "gi" number, which may be used for sequence
-- retrieval. The Seq-id object type is defined in the NCBI molecular sequence 
-- specification, and not repeated here.

Molecule-graph ::= SEQUENCE {
	id			Molecule-id,
	descr			SEQUENCE OF Biomol-descr OPTIONAL,
	seq-id			Seq-id OPTIONAL,
	residue-sequence	SEQUENCE OF Residue,
	inter-residue-bonds	SEQUENCE OF Inter-residue-bond OPTIONAL, 
	sid                     PCSubstance-id OPTIONAL }
   
Molecule-id ::= INTEGER

-- Pubchem substance id

PCSubstance-id ::= INTEGER

-- Residues may be assigned a text-string name as well as an id number. PDB 
-- assigned residue numbers appear as the residue name.

Residue ::= SEQUENCE {
	id			Residue-id,
	name			VisibleString OPTIONAL,
	residue-graph		Residue-graph-pntr }

Residue-id ::= INTEGER


-- Residue graphs from different sources may be referenced within a molecule
-- graph.  The allowed choices are the nonstandard residue graphs included in 
-- the present biostruc, residue graphs within other biostrucs, or residue 
-- graphs within tables of standard residue definitions.

Residue-graph-pntr ::= CHOICE {
	local			Residue-graph-id,
	biostruc		Biostruc-graph-pntr,
	standard		Biostruc-residue-graph-set-pntr }
	
Biostruc-graph-pntr ::= SEQUENCE {
	biostruc-id		Biostruc-id,
	residue-graph-id	Residue-graph-id }

Biostruc-residue-graph-set-pntr ::= SEQUENCE {
	biostruc-residue-graph-set-id	Biostruc-id,
	residue-graph-id		Residue-graph-id } 


-- Residue graphs define atomic formulae, connectivity, chirality, and names.
-- For standard residue graphs from the MMDB dictionary the PDB-assigned 
-- residue-type code appears as the name within the residue graph description,
-- and the full trivial name of the residue as a comment within that 
-- description.  For any nonstandard residue graphs provided with an MMDB 
-- biostruc the PDB-assigned residue-type code similarly appears as the name 
-- within the description, and any information provided on PDB HET records as 
-- a pdb-comment within that description.  

-- Note that nonstandard residue graphs for a PDB-derived biostruc may be 
-- incomplete. Current PDB format cannot represent connectivity for groups 
-- which are disordered, and for which no coordinates are given.  In these 
-- cases the residue graph defined in MMDB represents only the subgraph that 
-- could be identified from available ATOM, HETATM and CONECT records.

Residue-graph ::= SEQUENCE {
	id			Residue-graph-id,
	descr			SEQUENCE OF Biomol-descr OPTIONAL,
	residue-type		INTEGER {	deoxyribonucleotide(1),
						ribonucleotide(2),
						amino-acid(3),
						other(255) } OPTIONAL,
	iupac-code		SEQUENCE OF VisibleString OPTIONAL,
	atoms			SEQUENCE OF Atom,
	bonds			SEQUENCE OF Intra-residue-bond,
	chiral-centers		SEQUENCE OF Chiral-center OPTIONAL }
	
Residue-graph-id ::= INTEGER

-- Atoms in residue graphs are defined by elemental symbols and names.  PDB-
-- assigned atom names appear here in the name field, except in cases of known 
-- PDB synonyms.  In these cases atom names are mapped to the names used in the
-- MMDB standard dictionary. This occurs primarily for hydrogen atoms, where 
-- PDB practice allows synonyms for several atom types.  For PDB atoms the 
-- elemental symbol is obtained by parsing the PDB atom name field, allowing 
-- for known special-semantics cases where the atom name does not follow the
-- documented encoding rule.  Ionizable protons are identified within standard 
-- residue graphs in the MMDB dictionary, but not within automatically-defined
-- nonstandard graphs.

Atom ::= SEQUENCE {
	id			Atom-id,
	name			VisibleString OPTIONAL,
	iupac-code		SEQUENCE OF VisibleString OPTIONAL,
	element			ENUMERATED {
				h(1),   he(2),  li(3),  be(4),  b(5), 
				c(6),   n(7),   o(8),   f(9),   ne(10), 
				na(11), mg(12), al(13), si(14), p(15), 
				s(16),  cl(17), ar(18), k(19),  ca(20), 
				sc(21), ti(22), v(23),  cr(24), mn(25), 
				fe(26), co(27), ni(28), cu(29), zn(30), 
				ga(31), ge(32), as(33), se(34), br(35), 
				kr(36), rb(37), sr(38), y(39),  zr(40),
				nb(41), mo(42), tc(43), ru(44), rh(45),
				pd(46), ag(47), cd(48), in(49), sn(50),
				sb(51), te(52), i(53),  xe(54), cs(55),
				ba(56), la(57), ce(58), pr(59), nd(60),
				pm(61), sm(62), eu(63), gd(64), tb(65),
				dy(66), ho(67), er(68), tm(69), yb(70),
				lu(71), hf(72), ta(73), w(74),  re(75),
				os(76), ir(77), pt(78), au(79), hg(80),
				tl(81), pb(82), bi(83), po(84), at(85),
				rn(86), fr(87), ra(88), ac(89), th(90),
				pa(91), u(92),  np(93), pu(94), am(95),
				cm(96), bk(97), cf(98), es(99), 
				fm(100), md(101), no(102), lr(103),
				other(254), unknown(255) },
	ionizable-proton	ENUMERATED {
					true(1),
					false(2),
					unknown(255) } OPTIONAL }
	
Atom-id ::= INTEGER

-- Intra-residue-bond specifies connectivity between atoms in Residue-graph.
-- Unlike Inter-residue-bond defined later, its participating atoms are part of
-- a residue subgraph dictionary, not part of a specific biostruc-graph.

-- For residue graphs in the standard MMDB dictionary bonds are defined from
-- the known chemical structures of amino acids and nucleotides.  For 
-- nonstandard residue graphs bonds are defined from PDB CONECT records, with
-- validation for consistency with coordinate data, and from stereochemical
-- calculation to identify unreported bonds.  Validation and bond identification
-- are based on comparison of inter-atomic distances to the sum of covalent
-- radii for the corresponding elements. 

Intra-residue-bond ::= SEQUENCE {
	atom-id-1		Atom-id,
	atom-id-2		Atom-id,
	bond-order		INTEGER {
					single(1), 
					partial-double(2),
					aromatic(3), 
					double(4),
					triple(5),
					other(6),
					unknown(255)} OPTIONAL }

-- Chiral centers are atoms with tetrahedral geometry.  Chirality is defined
-- by a chiral volume involving the chiral center and 3 other atoms bonded to 
-- it.  For any coordinates assigned to atoms c, n1, n2, and n3, the vector 
-- triple product (n1-c) dot ( (n2-c) cross (n3-c) ) must have the indicated
-- sign.  The calculation assumes an orthogonal right-handed coordinate system
-- as is used for MMDB model structures.  

-- Chirality is defined for standard residues in the MMDB dictionary, but is 
-- not assigned automatically for PDB-derived nonstandard residues. If assigned
-- for nonstandard residues, the source of chirality information is described 
-- by a citation within the residue description.

Chiral-center ::= SEQUENCE {
	c			Atom-id,
	n1			Atom-id,
	n2			Atom-id,
	n3			Atom-id,
	sign			ENUMERATED { positive(1),
					     negative(2) } }

-- Inter-residue bonds are defined by a reference to two atoms. For PDB-derived 
-- structures bonds are identified from biopolymer connectivity according to
-- SEQRES and from other connectivity information on SSBOND and CONECT 
-- records. These data are validated and unreported bonds identified by
-- stereochemical calculation, using the same criteria as for intra-residue 
-- bonds.

Inter-residue-bond ::= SEQUENCE {
	atom-id-1		Atom-pntr,
	atom-id-2		Atom-pntr,
	bond-order		INTEGER {
					single(1), 
					partial-double(2),
					aromatic(3), 
					double(4),
					triple(5),
					other(6),
					unknown(255)} OPTIONAL }

-- Atoms, residues and molecules within the current biostruc are referenced 
-- by hierarchical pointers.

Atom-pntr ::= SEQUENCE {
	molecule-id		Molecule-id,
	residue-id		Residue-id,
	atom-id			Atom-id }

Atom-pntr-set ::= SEQUENCE OF Atom-pntr

END

-- mmdb2.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  Biological Macromolecule 3-D Structure Data Types for MMDB,
--                A Molecular Modeling Database
--
--  Definitions for structural models
--
--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
--  July, 1996
--
--**********************************************************************

MMDB-Structural-model DEFINITIONS ::=

BEGIN

EXPORTS Biostruc-model, Model-id, Model-coordinate-set-id;

IMPORTS Chem-graph-pntrs, Atom-pntrs, Chem-graph-alignment,
	Sphere, Cone, Cylinder, Brick, Transform FROM MMDB-Features
	Biostruc-id FROM MMDB
	Pub FROM NCBI-Pub;

-- A structural model maps chemical components into a measured three-
-- dimensional space. PDB-derived biostrucs generally contain 4 models, 
-- corresponding to "views" of the structure of a biomolecular assemble with 
-- increasing levels of complexity.  Model types indicate the complexity of the
-- view.  

-- The model named "NCBI all atom" represents a view suitable for most 
-- computational biology applications.  It provides complete atomic coordinate 
-- data for a "single best" model, omitting statistical disorder information 
-- and/or ensemble structure descriptions provided in the source PDB file.  
-- Construction of the single best model is based on the assumption that the 
-- contents of the "alternate conformation" field from pdb imply no correlation
-- among the occupancies of multiple sites assigned to sets of atoms: the best 
-- site is chosen only on the basis of highest occupancy. Note, however, that 
-- alternate conformation sets where correlation is implied are generally 
-- constrained in crystallographic refinement to have uniform occupancy, and 
-- will thus be selected as a set. For ensemble models the model which assigns 
-- coordinates to the most atoms is chosen.  If numbers of coordinates are the 
-- same, the model occurring first in the PDB file is selected.  The single 
-- best model includes complete coordinates for all nonpolymer components, but 
-- omits those classified as "solvent".  Model type is 3 for this model. 

-- The model named "NCBI backbone" represents a simple view intended for 
-- graphic displays and rapid transmission over a network.  It includes only 
-- alpha carbon or backbone phosphate coordinates for biopolymers. It is based 
-- on selection of alpha-carbon and backbone phosphate atoms from the "NCBI
-- all atom" model. The model type is set to 2.  An even simpler model gives 
-- only a cartoon representation, using cylinders corresponding to secondary 
-- structure elements.  This is named "NCBI vector", and has model type 1.

-- The models named "PDB Model 1", "PDB Model 2", etc. represent the complete
-- information provided by PDB, including full descriptions of statistical
-- disorder.  The name of the model is based on the contents of the PDB MODEL
-- record, with a default name of "PDB Model 1" for PDB files which contain 
-- only a single model.  Construction of these models is based on the 
-- assumption that contents of the PDB "alternate conformation" field are 
-- intended to imply correlation among the occupancies of atom sets flagged by
-- the same identifier.  The special flag " " (blank) is assumed to indicate 
-- sites occupied in all alternate conformations, and sites flagged otherwise,
-- together with " ", to indicate a distinct member of an ensemble of 
-- alternate conformations.  Note that construction of ensemble members 
-- according to these assumption requires two validation checks on PDB 
-- "alternate conformation" flags: they must be unique among sites assigned to 
-- the same atom, and that the special " " flag must occur only for unique
-- sites.  Sites which violate the first check are flagged as "u", for 
-- "unknown"; they are omitted from all ensemble definitions but are 
-- nontheless retained in the coordinate list.  Sites which violate the second
-- check are flagged "b" for "blank", and are included in an appropriately
-- named ensemble.  The model type for pdb all models is 4.

-- Note that in the MMDB database models are stored in the ASN.1 stream in
-- order of increasing model type value.  Since models occur as the last item
-- in a biostruc, parsers may avoid reading the entire stream if the desired
-- model is one of the simplified types, which occur first in the stream. This
-- can save considerable I/O time, particularly for large ensemble models from 
-- NMR determinations.

Biostruc-model ::= SEQUENCE {
	id			Model-id,
	type			Model-type,
	descr			SEQUENCE OF Model-descr OPTIONAL,
	model-space		Model-space OPTIONAL,
	model-coordinates	SEQUENCE OF Model-coordinate-set OPTIONAL }

Model-id ::= INTEGER

Model-type ::= INTEGER {
	ncbi-vector(1),
	ncbi-backbone(2),
	ncbi-all-atom(3),
	pdb-model(4),
	other(255)}

Model-descr ::= CHOICE {
	name			VisibleString,
	pdb-reso                VisibleString,
	pdb-method              VisibleString,
	pdb-comment		VisibleString,
	other-comment		VisibleString,
	attribution		Pub }

-- The model space defines measurement units and any external reference frame.
-- Coordinates refer to a right-handed orthogonal system defined on axes 
-- tagged x, y and z in the coordinate and feature definitions of a biostruc.
-- Coordinates from PDB-derived structures are reported without change, in
-- angstrom units.  The units of temperature and occupancy factors are not
-- defined explicitly in PDB, but are inferred from their value range.

Model-space ::= SEQUENCE {
	coordinate-units	ENUMERATED {
					angstroms(1),
					nanometers(2),
					other(3),
					unknown(255)},
	thermal-factor-units	ENUMERATED {
					b(1),
					u(2),
					other(3),
					unknown(255)} OPTIONAL,
	occupancy-factor-units	ENUMERATED {
					fractional(1),
					electrons(2),
					other(3),
					unknown(255)} OPTIONAL,
	density-units		ENUMERATED {
					electrons-per-unit-volume(1),
					arbitrary-scale(2),
					other(3),
					unknown(255)} OPTIONAL,
	reference-frame		Reference-frame OPTIONAL }

-- An external reference frame is a pointer to another biostruc, with an 
-- optional operator to rotate and translate coordinates into its model space.
-- This item is intended for representation of homology-derived model 
-- structures, and is not present for structures from PDB.

Reference-frame ::= SEQUENCE {
	biostruc-id		Biostruc-id,
	rotation-translation	Transform OPTIONAL }

-- Atomic coordinates may be assigned literally or by reference to another
-- biostruc.  The reference coordinate type is used to represent homology-
-- derived model structures.  PDB-derived structures have literal coordinates.

-- Referenced coordinates identify another biostruc, any transformation to be 
-- applied to coordinates from that biostruc, and a mapping of the chemical
-- graph of the present biostruc onto that of the referenced biostruc.  They
-- give an "alignment" of atoms in the current biostruc with those in another,
-- from which the coordinates of matched atoms may be retrieved.  For non-
-- atomic models "alignment" may also be represented by molecule and residue
-- equivalence lists.  Referenced coordinates are a data item inteded for 
-- representation of homology models, with an explicit pointer to their source
-- information. They do not occur in PDB-derived models.

Model-coordinate-set ::= SEQUENCE {
	id			Model-coordinate-set-id OPTIONAL,
	descr			SEQUENCE OF Model-descr OPTIONAL,
	coordinates		CHOICE {
		literal			Coordinates,
		reference		Chem-graph-alignment } }
	
Model-coordinate-set-id ::= INTEGER


-- Literal coordinates map chemical components into the model space.  Three 
-- mapping types are allowed, atomic coordinate models, density-grid models,
-- and surface models. A model consists of a sequence of such coordinate sets, 
-- and may thus combine coordinate subsets which have a different source.  
-- PDB-derived models contain a single atomic coordinate set, as they by
-- definition represent information from a single source.

Coordinates ::= CHOICE {		
	atomic			Atomic-coordinates,
	surface			Surface-coordinates,
	density			Density-coordinates }

-- Literal atomic coordinate values give location, occupancy and order
-- parameters, and a pointer to a specific atom defined in the biostruc graph.
-- Temperature and occupancy factors have their conventional crystallographic
-- definitions, with units defined in the model space declaration.  Atoms,
-- sites, temperature-factors, occupancies and alternate-conformation-ids
-- are parallel arrays, i.e. the have the same number of values as given by
-- number-of-points. Conformation ensembles represent distinct correlated-
-- disorder subsets of the coordinates.  They will be present only for certain 
-- "views" of PDB structures, as described above. Their derivation from PDB-
-- supplied "alternate-conformation" ids is described below. 

Atomic-coordinates ::= SEQUENCE {
	number-of-points	INTEGER,
	atoms			Atom-pntrs,
	sites			Model-space-points,
	temperature-factors	Atomic-temperature-factors OPTIONAL,
	occupancies		Atomic-occupancies OPTIONAL, 
	alternate-conf-ids	Alternate-conformation-ids OPTIONAL,
	conf-ensembles		SEQUENCE OF Conformation-ensemble OPTIONAL }

-- The atoms whose location is described by each coordinate are identified
-- via a hierarchical pointer to the chemical graph of the biomolecular
-- assembly.  Coordinates may be matched with atoms in the chemical structure
-- by the values of the molecule, residue and atom id's given here,  which 
-- match exactly the items of the same type defined in Biostruc-graph.

-- Coordinates are given as integer values, with a scale factor to convert 
-- to real values for each x, y or z, in the units indicated in model-space.
-- Integer values must be divided by the the scale factor.  This use of integer
-- values reduces the ASN.1 stream size. The scale factors for temperature 
-- factors and occupancies are given separately, but must be used in the same 
-- fashion to produce properly scaled real values.

Model-space-points ::= SEQUENCE {
	scale-factor		INTEGER,
	x			SEQUENCE OF INTEGER,	
	y			SEQUENCE OF INTEGER,
	z			SEQUENCE OF INTEGER } 

Atomic-temperature-factors ::= CHOICE {
	isotropic		Isotropic-temperature-factors,
	anisotropic		Anisotropic-temperature-factors }

Isotropic-temperature-factors ::= SEQUENCE {
	scale-factor		INTEGER,
	b			SEQUENCE OF INTEGER }

Anisotropic-temperature-factors ::= SEQUENCE {
	scale-factor		INTEGER,
	b-11			SEQUENCE OF INTEGER,
	b-12			SEQUENCE OF INTEGER,
	b-13			SEQUENCE OF INTEGER,
	b-22			SEQUENCE OF INTEGER,
	b-23			SEQUENCE OF INTEGER,
	b-33			SEQUENCE OF INTEGER }

Atomic-occupancies ::= SEQUENCE {
	scale-factor		INTEGER,
	o			SEQUENCE OF INTEGER }

-- An alternate conformation id is optionally associated with each coordinate. 
-- Aside from corrections due to the validation checks described above, the 
-- contents of MMDB Alternate-conformation-ids are identical to the PDB 
-- "alternate conformation" field.

Alternate-conformation-ids ::= SEQUENCE OF Alternate-conformation-id 

Alternate-conformation-id ::= VisibleString 

-- Correlated disorder ensemble is defined by a set of alternate conformation 
-- id's which identify coordinates relevant to that ensemble. These are 
-- defined from the validated and corrected contents of the PDB "alternate
-- conformation" field as described above.  A given ensemble, for example, may
-- consist of atom sites flagged by " " and "A" Alternate-conformation-ids. 
-- Names for ensembles are constructed from these flags. This example would be
-- named, in its description, "PDB Ensemble blank plus A".

-- Note that this interpretation is consistent with common PDB usage of the 
-- "alternate conformation" field, but that PDB specifications do not formally
-- distinguish between correlated and uncorrelated disorder in crystallographic
-- models. Ensembles identified in MMDB thus may not correspond to the meaning
-- intended by PDB or the depositor.  No information is lost, however, and
-- if the intended meaning is known alternative ensemble descriptions may be
-- reconstructed directly from the Alternate-conformation-ids.

-- Note that correlated disorder as defined here is allowed within an atomic 
-- coordinate set but not between the multiple sets which may define a model. 
-- Multiple sets within the same model are intended as a means to represent 
-- assemblies modeled from different experimentally determined structures,
-- where correlated disorder between coordinate sets is not relevant.

Conformation-ensemble ::= SEQUENCE {
	name		VisibleString,
	alt-conf-ids	SEQUENCE OF Alternate-conformation-id }


-- Literal surface coordinates define the chemical components whose structure
-- is described by a surface, and the surface itself.  The surface may be
-- either a regular geometric solid or a triangle-mesh of arbitrary shape.

Surface-coordinates ::= SEQUENCE {
	contents		Chem-graph-pntrs,
	surface			CHOICE {	sphere		Sphere,
						cone		Cone,
						cylinder	Cylinder,
						brick		Brick,
						tmesh		T-mesh,
						triangles	Triangles } }
T-mesh ::= SEQUENCE {
	number-of-points	INTEGER,
	scale-factor		INTEGER,
	swap			SEQUENCE OF BOOLEAN,
	x			SEQUENCE OF INTEGER,
	y			SEQUENCE OF INTEGER,
	z		        SEQUENCE OF INTEGER }

Triangles ::= SEQUENCE {
	number-of-points	INTEGER,
	scale-factor		INTEGER,
	x			SEQUENCE OF INTEGER,
	y			SEQUENCE OF INTEGER,
	z			SEQUENCE OF INTEGER,
	number-of-triangles     INTEGER,
	v1			SEQUENCE OF INTEGER, 
	v2			SEQUENCE OF INTEGER,
	v3			SEQUENCE OF INTEGER }


-- Literal density coordinates define the chemical components whose structure
-- is described by a density grid, parameters of this grid, and density values.

Density-coordinates ::= SEQUENCE {
	contents		Chem-graph-pntrs,
	grid-corners		Brick,
	grid-steps-x		INTEGER,
	grid-steps-y		INTEGER,
	grid-steps-z		INTEGER,
	fastest-varying		ENUMERATED {
					x(1),
					y(2),
					z(3)},
	slowest-varying		ENUMERATED {
					x(1),
					y(2),
					z(3)},
	scale-factor		INTEGER,
	density			SEQUENCE OF INTEGER }


END

-- mmdb3.asn
--$Revision: 240129 $
--**********************************************************************
--
--  Biological Macromolecule 3-D Structure Data Types for MMDB,
--                A Molecular Modeling Database
--
--  Definitions for structural features and biostruc addressing
--
--  By Hitomi Ohkawa, Jim Ostell, Chris Hogue and Steve Bryant 
--
--  National Center for Biotechnology Information
--  National Institutes of Health
--  Bethesda, MD 20894 USA
--
--  July, 1996
--
--**********************************************************************

MMDB-Features DEFINITIONS ::=

BEGIN

EXPORTS Biostruc-feature-set, Chem-graph-pntrs, Atom-pntrs,
	Chem-graph-alignment, Chem-graph-interaction, Sphere, 
	Cone, Cylinder, Brick, Transform, Biostruc-feature-set-id, 
	Biostruc-feature-id;

IMPORTS Biostruc-id FROM MMDB
	Molecule-id, Residue-id, Atom-id FROM MMDB-Chemical-graph
	Model-id, Model-coordinate-set-id FROM MMDB-Structural-model
	User-object FROM NCBI-General
	Pub FROM NCBI-Pub;

-- Named model features refer to sets of residues or atoms, or a region in 
-- the model space.  A few specific feature types are allowed for compatibility
-- with PDB usage, but the purpose of a named model feature is simply to
-- associate various types of information with a set of atoms or 
-- residues, or a spatially-defined region of the model structure.  They also
-- support association of various properties with each residue or atom of a
-- set.

-- PDB-derived secondary structure defines a single feature, represented as a
-- sequence of residue motifs, as are the contents of PDB SITE and
-- FTNOTE records.  NCBI-assigned core and secondary structure descriptions
-- are also represented as a sequence of residue motifs.

Biostruc-feature-set ::= SEQUENCE {
	id		Biostruc-feature-set-id,
	descr		SEQUENCE OF Biostruc-feature-set-descr OPTIONAL,
	features	SEQUENCE OF Biostruc-feature }

Biostruc-feature-set-id ::= INTEGER

Biostruc-feature-set-descr ::= CHOICE {
	name			VisibleString,
	pdb-comment		VisibleString,
	other-comment		VisibleString,
	attribution		Pub }

-- An explicitly specified type in Biostruc-feature allows for
-- efficient extraction and indexing of feature sets of a specific type. 
-- Special types are provided for coloring and rendering, as
-- as needed by molecular graphics programs.
 
Biostruc-feature ::= SEQUENCE {
	id		Biostruc-feature-id OPTIONAL,
	name		VisibleString OPTIONAL,
	type	INTEGER {	helix(1),
				strand(2),
				sheet(3),
				turn(4),
				site(5),
				footnote(6),
				comment(7),      -- new
				interaction(8),  -- interaction Data
				subgraph(100),   -- NCBI domain reserved
				region(101), 
				core(102),       -- user core definition
				supercore(103),  -- NCBI reserved
				color(150),      -- new
				render(151),     -- new
				label(152),      -- new
				transform(153),  -- new
				camera(154),     -- new
				script(155),      -- for scripts
				alignment(200),  -- VAST reserved 
				similarity(201),
				multalign(202),  -- multiple alignment
                indirect(203),   -- new
				cn3dstate(254),  -- Cn3D reserved
				other(255) } OPTIONAL,
	property	CHOICE { 
				color		Color-prop,
				render		Render-prop,
				transform	Transform,
				camera		Camera,
				script		Biostruc-script,
				user		User-object } OPTIONAL,
	location	CHOICE {
				subgraph	Chem-graph-pntrs,
				region		Region-pntrs,   
				alignment	Chem-graph-alignment,
				interaction     Chem-graph-interaction,
				similarity	Region-similarity, 
				indirect	Other-feature } OPTIONAL } -- new

-- Other-feature allows for specifying location via reference to another
-- Biostruc-feature and its location.

Other-feature ::= SEQUENCE {
	biostruc-id		Biostruc-id,
	set			Biostruc-feature-set-id,
	feature			Biostruc-feature-id }
                        
Biostruc-feature-id ::= INTEGER

-- Atom, residue or molecule motifs describe a substructure defined by a set
-- of nodes from the chemical graph. PDB secondary structure features are
-- described as a residue motif, since they are not associated with any one of
-- the multiple models that may be provided in a PDB file.  NCBI-assigned
-- secondary structure is represented in the same way, even though it is
-- model specific, since this allows for simple mapping of the structural 
-- feature onto a sequence-only representation. This addressing mode may also 
-- be used to describe features to be associated with particular atoms, 
-- as, for example, the chemical shift observed in an NMR experiment.

Biostruc-molecule-pntr ::= SEQUENCE {
	biostruc-id		Biostruc-id,
	molecule-id		Molecule-id }

Chem-graph-pntrs ::= CHOICE {
	atoms			Atom-pntrs,
	residues		Residue-pntrs,
	molecules		Molecule-pntrs }

Atom-pntrs ::= SEQUENCE {
	number-of-ptrs		INTEGER,
	molecule-ids		SEQUENCE OF Molecule-id,
	residue-ids		SEQUENCE OF Residue-id,
	atom-ids		SEQUENCE OF Atom-id }

Residue-pntrs ::= CHOICE {
	explicit		Residue-explicit-pntrs,
	interval		SEQUENCE OF Residue-interval-pntr }

Residue-explicit-pntrs ::= SEQUENCE {
	number-of-ptrs		INTEGER,
	molecule-ids		SEQUENCE OF Molecule-id,
	residue-ids		SEQUENCE OF Residue-id }

Residue-interval-pntr ::= SEQUENCE {
	molecule-id		Molecule-id,
	from			Residue-id,
	to			Residue-id }

Molecule-pntrs ::= SEQUENCE {
	number-of-ptrs		INTEGER,
	molecule-ids		SEQUENCE OF Molecule-id }

-- Region motifs describe features defined by spatial location, such as the
-- site specified by a coordinate value, or a rgeion within a bounding volume.

Region-pntrs ::= SEQUENCE {
	model-id	Model-id,
	region		CHOICE {
				site		SEQUENCE OF Region-coordinates,
				boundary	SEQUENCE OF Region-boundary } }

-- Coordinate sites describe a region in space by reference to individual 
-- coordinates, in a particular model.  These coordinates may be either the
-- x, y and z values of atomic coordinates, the triangles of a surface mesh, 
-- or the grid points of a density model. All are addressed in the same manner,
-- as coordinate indices which give offsets from the beginning of the 
-- coordinate data arrays.  A coordinate-index of 5, for example, refers to 
-- the 5th x, y and z values of an atomic coordinate set, the 5th v1, v2, and v3
-- values of a triangle mesh, or the 5th value in a density grid.

-- PDB SITE and FTNOTE records refer to particular atomic coordinates, and they
-- are represented as a region motif with addresses of type Region-coordinates.
-- Any names or descriptions provided by PDB are thus associated with the
-- indicated sites, in the indicated model. 

Region-coordinates ::= SEQUENCE {
	model-coord-set-id	Model-coordinate-set-id,
	number-of-coords	INTEGER OPTIONAL,
	coordinate-indices	SEQUENCE OF INTEGER OPTIONAL }

-- Region boundaries are defined by regular solids located in the model space.  

Region-boundary ::=	CHOICE {	sphere		Sphere,
					cone		Cone,
					cylinder	Cylinder,
					brick		Brick }

-- A biostruc alignment establishes an equivalence of nodes in the chemical
-- graphs of two or more biostrucs. This may be mapped to a sequence
-- alignment in the case of biopolymers.
-- The 'dimension' component indicates the number of participants
-- in the alignment.  For pairwise alignments, such as VAST 
-- structure-structure alignments, the dimension will be always 2, with
-- biostruc-ids, alignment, and domain each containing two entries for an  
-- aligned pair.  The 'alignment' component contains a pair of Chem-graph-pntrs
-- specifying a like number of corresponding residues in each structure.
-- The 'domain' component specifies a region of each structure considered 
-- in the alignment.  Only one transform (for the second structure) and
-- one aligndata (for the pair) are provided for each VAST alignment.
--
-- For multiple alignments, a set of components are treated as
-- parallel arrays of length 'dimension'.
-- The 'transform' component moves each structure to align it with
-- the structure specified as the first element in the "parallel" array,
-- so necessarily the first transform is a NULL transform.
-- Align-stats are placeholders for scores.

Chem-graph-alignment ::= SEQUENCE {
	dimension		INTEGER DEFAULT 2,
	biostruc-ids		SEQUENCE OF Biostruc-id,
	alignment		SEQUENCE OF Chem-graph-pntrs,
	domain			SEQUENCE OF Chem-graph-pntrs OPTIONAL, 
	transform		SEQUENCE OF Transform OPTIONAL,
	aligndata		SEQUENCE OF Align-stats OPTIONAL }

Chem-graph-interaction ::= SEQUENCE {
	type	INTEGER {	protein-protein(1),
				protein-dna(2),
				protein-rna(3),
				protein-chemical(4),
				dna-dna(5),
				dna-rna(6),
				dna-chemical(7),     
				rna-rna(8),  
				rna-chemical(9), 
				other(255) } OPTIONAL,
	distance-threshold RealValue OPTIONAL,
	interactors SEQUENCE OF Biostruc-molecule-pntr,
	residue-contacts SEQUENCE OF Chem-graph-pntrs OPTIONAL,
	atom-contacts SEQUENCE OF Chem-graph-pntrs OPTIONAL,
	atom-distance SEQUENCE OF RealValue OPTIONAL}

Align-stats ::= SEQUENCE {
	descr		VisibleString OPTIONAL,
	scale-factor	INTEGER OPTIONAL,
	vast-score	INTEGER OPTIONAL,
	vast-mlogp	INTEGER OPTIONAL,
	align-res	INTEGER OPTIONAL,
 	rmsd		INTEGER OPTIONAL,
	blast-score	INTEGER OPTIONAL,
	blast-mlogp	INTEGER OPTIONAL,
	other-score	INTEGER OPTIONAL }	        

-- A biostruc similarity describes spatial features which are similar between
-- two or more biostrucs.  Similarities are model dependent, and the model and
-- coordinate set ids of the biostrucs must be specified.  They do not 
-- necessarily map to a sequence alignment, as the regions referenced may
-- be pieces of a surface or grid, and thus not uniquely mapable to particular
-- chemical components.

Region-similarity ::= SEQUENCE {
	dimension		INTEGER DEFAULT 2,
	biostruc-ids		SEQUENCE OF Biostruc-id,
	similarity		SEQUENCE OF Region-pntrs,
	transform		SEQUENCE OF Transform }

-- Geometrical primitives are used in the definition of region motifs, and 
-- also non-atomic coordinates.  Spheres, cones, cylinders and bricks are 
-- defined by a few points in the model space.

Sphere ::= SEQUENCE { 
	center			Model-space-point,
	radius			RealValue }

Cone ::= SEQUENCE { 
	axis-top		Model-space-point,
	axis-bottom		Model-space-point,
	radius-bottom		RealValue }

Cylinder ::= SEQUENCE { 
	axis-top		Model-space-point,
	axis-bottom		Model-space-point,
	radius			RealValue }

-- A brick is defined by the coordinates of eight corners.  These are assumed
-- to appear in the order 000, 001, 010, 011, 100, 101, 110, 111, where the 
-- digits 0 and 1 refer to respectively to the x, y and z axes of a unit cube.
-- Opposite edges are assumed to be parallel. 

Brick ::= SEQUENCE {
	corner-000		Model-space-point,
	corner-001		Model-space-point,
	corner-010		Model-space-point,
	corner-011		Model-space-point,
	corner-100		Model-space-point,
	corner-101		Model-space-point,
	corner-110		Model-space-point,
	corner-111		Model-space-point }

Model-space-point ::= SEQUENCE {
	scale-factor		INTEGER,
	x			INTEGER,
	y			INTEGER,
	z			INTEGER } 

RealValue ::= SEQUENCE {
	scale-factor		INTEGER,
	scaled-integer-value	INTEGER }


Transform ::=  SEQUENCE {
            id  INTEGER,
            moves SEQUENCE OF Move }
            
Move ::= CHOICE {
	rotate		Rot-matrix,
	translate	Trans-matrix }
          
-- A rotation matrix is defined by 9 numbers, given by row, i.e.,
-- with column indices varying fastest.
-- Coordinates, as a matrix with columns x, y, an z, are rotated 
-- via multiplication with the rotation matrix. 
-- A translation matrix is defined by 3 numbers, which is added to
-- the rotated coordinates for specified amount of translation. 

Rot-matrix ::= SEQUENCE {
	scale-factor		INTEGER,
	rot-11			INTEGER,
	rot-12			INTEGER,
	rot-13			INTEGER,
	rot-21			INTEGER,
	rot-22			INTEGER,
	rot-23			INTEGER,
	rot-31			INTEGER,
	rot-32			INTEGER,
	rot-33			INTEGER }

Trans-matrix ::= SEQUENCE {
	scale-factor		INTEGER,
	tran-1			INTEGER,
	tran-2			INTEGER,
	tran-3			INTEGER }

-- The camera is a position relative to the world coordinates
-- of the structure referred to by a location.  
-- this is used to set the initial position of the
-- camera using OpenGL.  scale is the value used to scale the
-- other values from floating point to integer

Camera ::= SEQUENCE {
	x		INTEGER,
	y		INTEGER,
	distance	INTEGER,
	angle		INTEGER,
	scale		INTEGER,
    modelview   GL-matrix }
    
    
GL-matrix ::= SEQUENCE {
	scale		INTEGER,
	m11			INTEGER,
	m12			INTEGER,
	m13			INTEGER,
	m14			INTEGER,
	m21			INTEGER,
	m22			INTEGER,
	m23			INTEGER,
	m24			INTEGER,
	m31			INTEGER,
	m32			INTEGER,
	m33			INTEGER,
	m34			INTEGER,
	m41			INTEGER,
	m42			INTEGER,
	m43			INTEGER,
	m44			INTEGER }


Color-prop ::= SEQUENCE {
	r		INTEGER OPTIONAL, 
	g		INTEGER OPTIONAL,
	b		INTEGER OPTIONAL,
	name		VisibleString OPTIONAL }

-- Note that Render-prop is compatible with the Annmm specification,
-- i.e., its numbering schemes do not clash with those in Render-prop.

Render-prop ::= INTEGER {
	default		(0),  -- Default view
	wire		(1),  -- use wireframe 
	space		(2),  -- use spacefill
	stick		(3),  -- use stick model (thin cylinders)
	ballNStick	(4),  -- use ball & stick model
	thickWire	(5),  -- thicker wireframe
	hide		(9),  -- don't show this
	name		(10), -- display its name next to it
	number 		(11), -- display its number next to it 
	pdbNumber	(12), -- display its PDB number next to it
	objWireFrame	(150), -- display MMDB surface object as wireframe
	objPolygons	(151), -- display MMDB surface object as polygons   
	colorsetCPK	(225), -- color atoms like CPK models
	colorsetbyChain	(226), -- color each chain different
	colorsetbyTemp	(227), -- color using isotropic Temp factors 
	colorsetbyRes	(228), -- color using residue properties
	colorsetbyLen	(229), -- color changes along chain length
	colorsetbySStru	(230), -- color by secondary structure
	colorsetbyHydro (231), -- color by hydrophobicity
	colorsetbyObject(246), -- color each object differently
	colorsetbyDomain(247), -- color each domain differently
	other           (255)  
	}

--  When a Biostruc-Feature with a Biostruc-script is initiated,
--  it should play the specified steps one at a time, setting the feature-do
--  list as the active display.
--  The camera can be set using a feature-do, 
--  but it may be moved independently with
--  camera-move, which specifies how to move
--  the camera dynamically during the step along the path defined (e.g.,
--  a zoom, a rotate).
--  Any value of pause (in 1:10th's of a second) will force a pause
--  after an image is shown.
--  If waitevent is TRUE, it will await a mouse or keypress and ignore 
--  the pause value.

Biostruc-script ::= SEQUENCE OF Biostruc-script-step

Biostruc-script-step ::= SEQUENCE {
	step-id			Step-id,
	step-name		VisibleString OPTIONAL, 
	feature-do		SEQUENCE OF Other-feature OPTIONAL,
	camera-move		Transform OPTIONAL,
	pause			INTEGER DEFAULT 10,
	waitevent		BOOLEAN,
	extra			INTEGER, 
	jump			Step-id OPTIONAL }

Step-id ::= INTEGER

END

-- ncbimime.asn
--$Revision: 6.12 $
--****************************************************************
--
--  NCBI MIME type (chemical/ncbi-asn1-ascii and chemical/ncbi-asn1-binary)
--  by Jonathan Epstein, February 1996
--
--****************************************************************

NCBI-Mime DEFINITIONS ::=
BEGIN

EXPORTS Ncbi-mime-asn1;
IMPORTS Biostruc, Biostruc-annot-set FROM MMDB
    Cdd FROM NCBI-Cdd
	Seq-entry FROM NCBI-Seqset
	Seq-annot FROM NCBI-Sequence
    Medline-entry FROM NCBI-Medline
    Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d;

Ncbi-mime-asn1 ::= CHOICE {
	entrez	Entrez-general,			-- just a structure
    alignstruc  Biostruc-align,     -- structures & sequences & alignments
	alignseq	Biostruc-align-seq,	-- sequence alignment
    strucseq    Biostruc-seq,       -- structure & sequences
    strucseqs   Biostruc-seqs,      -- structure & sequences & alignments
    general     Biostruc-seqs-aligns-cdd    -- all-purpose "grab bag"
	-- others may be added here in the future
}

-- generic bundle of sequence and alignment info
Bundle-seqs-aligns ::= SEQUENCE {
    sequences SET OF Seq-entry OPTIONAL,        -- sequences
    seqaligns SET OF Seq-annot OPTIONAL,        -- sequence alignments
    strucaligns Biostruc-annot-set OPTIONAL,    -- structure alignments
    imports SET OF Seq-annot OPTIONAL,          -- imports (updates in Cn3D)
    style-dictionary Cn3d-style-dictionary OPTIONAL,    -- Cn3D stuff
    user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-seqs-aligns-cdd ::= SEQUENCE {
    seq-align-data CHOICE {
        bundle Bundle-seqs-aligns,          -- either seqs + alignments
        cdd Cdd                             -- or CDD (which contains these)
    },
    structures SET OF Biostruc OPTIONAL,    -- structures
    structure-type ENUMERATED {             -- type of structures to load if
        ncbi-backbone(2),                   -- not present; meanings and
        ncbi-all-atom(3),                   -- values are same as MMDB's
        pdb-model(4)                        -- Model-type
    } OPTIONAL
}

Biostruc-align ::= SEQUENCE {
	master	Biostruc,
	slaves	SET OF Biostruc,
	alignments	Biostruc-annot-set,	-- structure alignments
	sequences SET OF Seq-entry,	-- sequences
	seqalign SET OF Seq-annot,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-align-seq ::= SEQUENCE {	-- display seq structure align only
	sequences SET OF Seq-entry,	-- sequences
	seqalign SET OF Seq-annot,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-seq ::= SEQUENCE {	-- display  structure seq added by yanli
	structure Biostruc,
	sequences SET OF Seq-entry,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Biostruc-seqs ::= SEQUENCE { -- display blast alignment along with neighbor's structure added by yanli
	structure Biostruc,
	sequences SET OF Seq-entry,	-- sequences
	seqalign SET OF Seq-annot,
	style-dictionary Cn3d-style-dictionary OPTIONAL,
	user-annotations Cn3d-user-annotations OPTIONAL
}

Entrez-style ::= ENUMERATED {
	docsum (1),
	genbank (2) ,
	genpept (3) ,
	fasta (4) ,
	asn1 (5) ,
	graphic (6) ,
	alignment (7) ,
	globalview (8) ,
	report (9) ,
	medlars (10) ,
	embl (11) ,
	pdb (12) ,
	kinemage (13) }

Entrez-general ::= SEQUENCE {
	title VisibleString OPTIONAL,
	data CHOICE {
		ml	Medline-entry ,
		prot	Seq-entry ,
		nuc	Seq-entry ,
		genome	Seq-entry ,
		structure Biostruc ,
		strucAnnot Biostruc-annot-set } ,
	style Entrez-style ,
	location VisibleString OPTIONAL }
END

-- objprt.asn
--$Revision: 6.0 $
--********************************************************************
--
--  Print Templates
--  James Ostell, 1993
--
--
--********************************************************************

NCBI-ObjPrt DEFINITIONS ::=
BEGIN

EXPORTS PrintTemplate, PrintTemplateSet;

PrintTemplate ::= SEQUENCE {
    name TemplateName ,  -- name for this template
    labelfrom VisibleString OPTIONAL,    -- ASN.1 path to get label from
    format PrintFormat }

TemplateName ::= VisibleString

PrintTemplateSet ::= SEQUENCE OF PrintTemplate

PrintFormat ::= SEQUENCE {
    asn1 VisibleString ,    -- ASN.1 partial path for this
    label VisibleString OPTIONAL ,   -- printable label
    prefix VisibleString OPTIONAL,
    suffix VisibleString OPTIONAL,
    form PrintForm }

PrintForm ::=   CHOICE {      -- Forms for various ASN.1 components
    block PrintFormBlock,
    boolean PrintFormBoolean,
    enum PrintFormEnum,
    text PrintFormText,
    use-template TemplateName,
    user UserFormat ,
    null NULL }               -- rarely used

UserFormat ::= SEQUENCE {
    printfunc VisibleString ,
    defaultfunc VisibleString OPTIONAL }

PrintFormBlock ::= SEQUENCE {  -- for SEQUENCE, SET
    separator VisibleString OPTIONAL ,
    components SEQUENCE OF PrintFormat }

PrintFormBoolean ::= SEQUENCE {
    true VisibleString OPTIONAL ,
    false VisibleString OPTIONAL }

PrintFormEnum ::= SEQUENCE {
    values SEQUENCE OF VisibleString OPTIONAL }

PrintFormText ::= SEQUENCE {
    textfunc VisibleString OPTIONAL }
    
END


-- omssa.asn
-- $Id: omssa.asn 192083 2010-05-19 22:28:08Z lewisg $
--**********************************************************************
--
--  OMSSA (Open Mass Spectrometry Search Algorithm) data definitions
--  Lewis Geer, 2003
--
--  make using something like
--  "datatool -m omssa.asn -oc ObjOmssa -oA -od omssa.def"
--
--  note that this file requires omssa.def
--
--**********************************************************************

OMSSA DEFINITIONS ::=
BEGIN

IMPORTS Bioseq FROM NCBI-Sequence;

-- Generic holder for experimental info

NameValue ::= SEQUENCE {
        name VisibleString,
        value VisibleString
        }

-- Holds a single spectrum

MSSpectrum ::= SEQUENCE {
        number INTEGER, -- unique number of spectrum
        charge SEQUENCE OF INTEGER,  -- may be more than one if unknown
        precursormz INTEGER,  -- scaled precursor m/z, scale is in MSSearchSettings
        mz SEQUENCE OF INTEGER,  -- scaled product m/z
        abundance SEQUENCE OF INTEGER,  -- scaled product abundance
        iscale REAL,                 -- abundance scale, float to integer
        ids SEQUENCE OF VisibleString OPTIONAL,  -- ids/filenames
        namevalue SEQUENCE OF NameValue OPTIONAL -- extra info: retention times, etc.
        }


-- Holds a set of spectra

MSSpectrumset ::= SEQUENCE OF MSSpectrum


-- enumerate enzymes

MSEnzymes ::= INTEGER {
        trypsin (0),
        argc (1),
        cnbr (2),
        chymotrypsin (3),
        formicacid (4),
        lysc (5),
        lysc-p (6),
        pepsin-a (7),
        tryp-cnbr (8),
        tryp-chymo (9),
        trypsin-p (10),
        whole-protein (11),
        aspn (12),
        gluc (13),
        aspngluc (14),
        top-down (15),
        semi-tryptic (16),
        no-enzyme (17),
        chymotrypsin-p (18),
        aspn-de (19),
        gluc-de (20),
        lysn (21),
        thermolysin-p (22), 
        semi-chymotrypsin (23),
        semi-gluc (24),
        max(25),
        none (255)
        }


-- enumerate modifications

MSMod ::= INTEGER {
    methylk (0),          -- methylation of K
    oxym (1),             -- oxidation of methionine
    carboxymethylc (2),   -- carboxymethyl cysteine
    carbamidomethylc(3),  -- carbamidomethyl cysteine
    deamidationkq (4),    -- deamidation of K and Q
    propionamidec (5),    -- propionamide cysteine
    phosphorylations (6), -- phosphorylation of S
    phosphorylationt (7), -- phosphorylation of T
    phosphorylationy (8), -- phosphorylation of Y    
    ntermmcleave (9),     -- N terminal methionine cleavage
    ntermacetyl (10),     -- N terminal protein acetyl
    ntermmethyl (11),     -- N terminal protein methyl
    ntermtrimethyl (12),  -- N terminal protein trimethyl
    methythiold (13),     -- beta methythiolation of D
    methylq (14),         -- methylation of Q
    trimethylk (15),      -- trimethylation of K
    methyld (16),         -- methylation of D
    methyle (17),         -- methylation of E
    ctermpepmethyl (18),     -- C terminal methylation
    trideuteromethyld (19), -- trideuteromethylation of D
    trideuteromethyle (20), -- trideuteromethylation of E
    ctermpeptrideuteromethyl (21),  -- C terminal trideuteromethylation
    nformylmet (22),
    twoamino3oxobutanoicacid (23),
    acetylk (24),
    ctermamide (25),
    bmethylthiold (26),
    carbamidomethylk (27),
    carbamidometylh (28),
    carbamidomethyld (29),
    carbamidomethyle (30),
    carbamylk (31),
    ntermcarbamyl (32),
    citrullinationr (33),
    cysteicacidc (34),
    diiodinationy (35),
    dimethylk (36),
    dimethylr (37),
    ntermpepdimethyl (38),
    dihydroxyf (39),
    thioacetylk (40),
    ntermpeptioacetyl (41),
    farnesylationc (42),
    formylk (43),
    ntermpepformyl (44),
    formylkynureninw (45),
    phef (46),
    gammacarboxyld (47),
    gammacarboxyle (48),
    geranylgeranylc (49),
    ntermpepglucuronylg (50),
    glutathionec (51),
    glyglyk (52),
    guanidinationk (53),
    his2asnh (54),
    his2asph (55),
    ctermpephsem (56),
    ctermpephselactm (57),
    hydroxykynureninw (58),
    hydroxylationd (59),
    hydroxylationk (60),
    hydroxylationn (61),
    hydroxylationp (62),
    hydroxylationf (63),
    hydroxylationy (64),
    iodinationy (65),
    kynureninw (66),
    lipoylk (67),
    ctermpepmeester (68),
    meesterd (69),
    meestere (70),
    meesters (71),
    meestery (72),
    methylc (73),
    methylh (74),
    methyln (75),
    ntermpepmethyl (76),
    methylr (77),
    ntermpepmyristoyeylationg (78),
    ntermpepmyristoyl4hg (79),
    ntermpepmyristoylationg (80),
    myristoylationk (81),
    ntermformyl (82),
    nemc (83),
    nipcam (84),
    nitrow (85),
    nitroy (86),
    ctermpepo18 (87),
    ctermpepdio18 (88),
    oxyh (89),
    oxyw (90),
    ppantetheines (91),
    palmitoylationc (92),
    palmitoylationk (93),
    palmitoylations (94),
    palmitoylationt (95),
    phospholosss (96),
    phospholosst (97),
    phospholossy (98),
    phosphoneutrallossc (99),
    phosphoneutrallossd (100),
    phosphoneutrallossh (101),
    propionylk (102),
    ntermpeppropionyl (103),
    propionylheavyk (104),
    ntermpeppropionylheavy (105),
    pyridylk (106),
    ntermpeppyridyl (107),
    ntermpeppyrocmc (108),
    ntermpeppyroe (109),
    ntermpeppyroq (110),
    pyroglutamicp (111),
    spyridylethylc (112),
    semetm (113),
    sulfationy (114),
    suphonem (115),
    triiodinationy (116),
    trimethylationr (117),
    ntermpeptripalmitatec (118),
    usermod1 (119),  -- start of user defined mods
    usermod2 (120),
    usermod3 (121),
    usermod4 (122),
    usermod5 (123),
    usermod6 (124),
    usermod7 (125),
    usermod8 (126),
    usermod9 (127),
    usermod10 (128), -- end of user defined mods
    icatlight (129),
    icatheavy (130),
    camthiopropanoylk (131),
    phosphoneutrallosss (132),
    phosphoneutrallosst (133),
    phosphoetdlosss (134),
    phosphoetdlosst (135),
    arg-13c6 (136),
    arg-13c6-15n4 (137),
    lys-13c6 (138),
    oxy18 (139),
    beta-elim-s (140),
    beta-elim-t (141),
    usermod11 (142),
    usermod12 (143),
    usermod13 (144),
    usermod14 (145),
    usermod15 (146),
    usermod16 (147),
    usermod17 (148),
    usermod18 (149),
    usermod19 (150),
    usermod20 (151),
    usermod21 (152),
    usermod22 (153),
    usermod23 (154),
    usermod24 (155),
    usermod25 (156),
    usermod26 (157),
    usermod27 (158),
    usermod28 (159),
    usermod29 (160),
    usermod30 (161),
    sulfinicacid (162),
    arg2orn (163),
    dehydro (164),
    carboxykynurenin (165),
    sumoylation (166),
    iTRAQ114nterm (167),
    iTRAQ114K (168),
    iTRAQ114Y (169),
    iTRAQ115nterm (170),
    iTRAQ115K (171),
    iTRAQ115Y (172),
    iTRAQ116nterm (173),
    iTRAQ116K (174),
    iTRAQ116Y (175),
    iTRAQ117nterm (176),
    iTRAQ117K (177),
    iTRAQ117Y (178),
    mmts (179),
    lys-2H4 (180),
    lys-13C615N2 (181),
    hexNAcN (182),
    dHexHexNAcN (183),
    hexNAcS (184),
    hexNAcT (185),
    mod186 (186),
    mod187 (187),
    mod188 (188),
    mod189 (189),
    mod190 (190),
    mod191 (191),
    mod192 (192),
    mod193 (193),
    mod194 (194),
    mod195 (195),
    mod196 (196),
    mod197 (197),
    mod198 (198),
    mod199 (199),
    mod200 (200),
    mod201 (201),
    mod202 (202),
    mod203 (203),
    mod204 (204),
    mod205 (205),
    mod206 (206),
    mod207 (207),
    mod208 (208),
    mod209 (209),
    mod210 (210),
    mod211 (211),
    mod212 (212),
    mod213 (213),
    mod214 (214),
    mod215 (215),
    mod216 (216),
    mod217 (217),
    mod218 (218),
    mod219 (219),
    mod220 (220),
    mod221 (221),
    mod222 (222),
    mod223 (223),
    mod224 (224),
    mod225 (225),
    mod226 (226),
    mod227 (227),
    mod228 (228),
    mod229 (229),
    mod230 (230),
    max (231), -- maximum number of mods
    unknown(9999),  -- modification of unknown type
    none(10000)
    }

-- enumerate modification types

MSModType ::= INTEGER {
    modaa (0),   -- at particular amino acids
    modn (1),    -- at the N terminus of a protein
    modnaa (2),  -- at the N terminus of a protein at particular amino acids
    modc (3),    -- at the C terminus of a protein
    modcaa (4),  -- at the C terminus of a protein at particular amino acids
    modnp (5),   -- at the N terminus of a peptide
    modnpaa (6), -- at the N terminus of a peptide at particular amino acids
    modcp (7),   -- at the C terminus of a peptide
    modcpaa (8), -- at the C terminus of a peptide at particular amino acids
    modmax (9)   -- the max number of modification types
    }


-- mass container

MSMassSet ::= SEQUENCE {
    monomass REAL,
    averagemass REAL,
    n15mass REAL
    }

-- Modification Definition

MSModSpec ::= SEQUENCE {
    mod MSMod,  -- what is the mod
    type MSModType,  -- modification type
    name VisibleString,  -- friendly name of mod
    monomass REAL,       -- monoisotopic mass
    averagemass REAL,    -- average mass
    n15mass REAL,        -- monoisotopic n15 mass
    residues SEQUENCE OF VisibleString OPTIONAL,  -- residues to apply mod to
    neutralloss MSMassSet OPTIONAL,  -- loss after precursor mass determination
    unimod INTEGER OPTIONAL,         -- the equivalent Unimod Accession number
    psi-ms VisibleString OPTIONAL    -- the PSI-MS equivalent name  
    }
    
-- Holds a set of modifications

MSModSpecSet ::= SEQUENCE OF MSModSpec

-- How is charge to be handled?  Some input files are not clear
-- on this.  For example, a dta file only specifies one charge, 
-- even though the charge is not really known.

MSCalcPlusOne ::= INTEGER {
        dontcalc (0),  -- don't guess charge one
        calc (1)       -- guess charge one
        }

-- user instructions on whether to believe charges in input file
        
MSCalcCharge ::= INTEGER {
        calculate (0),  -- guess the charge(s) from the data
        usefile (1),    -- use what the input file says
        userange (2)    -- use the charge range specified
        }

-- How to handle precursor charge

MSChargeHandle ::= SEQUENCE {
        calcplusone MSCalcPlusOne DEFAULT 1,  -- do we guess charge one?
        calccharge MSCalcCharge DEFAULT 2,    -- how do we handle charges?
        mincharge INTEGER DEFAULT 2,          -- if userange, what is the min?
        maxcharge INTEGER DEFAULT 3,          -- if userange, what is the max?
        considermult INTEGER DEFAULT 3,       -- at which precursor charge to consider +2 ions?
        plusone REAL,                         -- what % of peaks below precursor needed to call as +1
        maxproductcharge INTEGER OPTIONAL,    -- maximum product ion charge
        prodlesspre BOOLEAN OPTIONAL,         -- product charge always less thanor equal to precursor?
        negative INTEGER DEFAULT 1            -- negative ion search if -1, positive ion if 1
        }
        

-- what type of atomic mass to use

MSSearchType ::= INTEGER {
        monoisotopic(0),
        average(1),
        monon15(2),
        exact(3),
        multiisotope(4),
        max(5)
        }
        
-- what is the charge dependence of the mass tolerance?

MSZdependence ::= INTEGER {
        independent(0),  -- mass tol. invariant with charge
        linearwithz(1),  -- mass tol. scales with charge
        max(2)
        }
        
-- Iterative search settings

MSIterativeSettings ::=  SEQUENCE {
        researchthresh REAL, -- e-val threshold for re-searching spectra, 0 = always re-search
        subsetthresh REAL,   -- e-val threshold for picking sequence subset, 0 = all sequences
        replacethresh REAL   -- e-val threshold for replacing hitset, 0 = only if better
        }
        
-- Library search settings

MSLibrarySettings ::= SEQUENCE {
        libnames SEQUENCE OF VisibleString, -- names of search libraries
        presearch BOOLEAN,                  -- should there be a restriction on precursor mass?
        useomssascore BOOLEAN,              -- use the omssa score?
        usereplicatescore BOOLEAN,          -- use the number of replicates score?
        qtofscore BOOLEAN                  -- use the qtof score?
        }

-- Generic search settings

MSSearchSettings ::= SEQUENCE {
        precursorsearchtype MSSearchType,  -- average or monoisotopic?
        productsearchtype MSSearchType,  -- average or monoisotopic?
        ionstosearch SEQUENCE OF MSIonType,  -- which ions to search?
        peptol REAL,  -- peptide mass tolerance
        msmstol REAL, -- msms mass tolerance
        zdep MSZdependence,  -- what is the charge dependence of the mass tolerance?
        cutoff REAL,  -- evalue cutoff
          -- next 3 fields define intensity fraction below
          -- which peaks will be discard
        cutlo REAL, -- the start of the cutoff, fraction of most intense peak
        cuthi REAL, -- the end of the cutoff
        cutinc REAL, -- the increment of the cutoff
        singlewin INTEGER,  -- the size of the single charge filtering window
        doublewin INTEGER,  -- the size of the double charge filtering window
        singlenum INTEGER,  -- the number of peaks allowed in the single window
        doublenum INTEGER,  -- the number of peaks allowed in the double window
        fixed SEQUENCE OF MSMod,     -- fixed PTM's
        variable SEQUENCE OF MSMod,  -- variable PTM's
        enzyme MSEnzymes,      -- digestion enzyme
        missedcleave INTEGER,  -- number of missed cleaves allowed
        hitlistlen INTEGER DEFAULT 25,  -- the number of hits kept in memory
                                        -- for a spectrum
        db VisibleString,  -- sequence set to search, e.g. "nr"
        tophitnum INTEGER, -- number of m/z to consider in first pass
        minhit INTEGER DEFAULT 2, -- minimum number of m/z values for a valid hit
        minspectra INTEGER DEFAULT 4, -- minimum number of m/z for a valid spectra
        scale INTEGER DEFAULT 100,  -- scale for m/z float to integer
        maxmods INTEGER DEFAULT 64,  -- maximum number of mass ladders per
                                     -- database peptide
        taxids SEQUENCE OF INTEGER OPTIONAL,  -- taxa to limit search
        chargehandling MSChargeHandle OPTIONAL,  -- how to deal with charges
        usermods MSModSpecSet OPTIONAL,  -- user defined modifications
        pseudocount INTEGER DEFAULT 1, -- min number of counts per precursor bin
        searchb1 INTEGER DEFAULT 0,    -- should b1 product be in search (1=no, 0=yes)
        searchctermproduct INTEGER DEFAULT 0, -- should c terminus ion be searched (1=no, 0=yes)
        maxproductions INTEGER DEFAULT 0,     -- max number of ions in each series (0=all)
        minnoenzyme INTEGER DEFAULT 4,        -- min number of AA in peptide for noenzyme search
        maxnoenzyme INTEGER DEFAULT 0,        -- max number of AA in peptide for noenzyme search (0=none)
        exactmass REAL OPTIONAL,              -- the threshold in Da for adding neutron
        settingid INTEGER OPTIONAL,           -- id of the search settings
        iterativesettings MSIterativeSettings OPTIONAL,   -- iterative search settings
        precursorcull INTEGER OPTIONAL,       -- turn on aggressive precursor culling for ETD (0=none)
        infiles SEQUENCE OF MSInFile OPTIONAL,         -- input files
        outfiles SEQUENCE OF MSOutFile OPTIONAL,       -- output files
        nocorrelationscore INTEGER OPTIONAL,           -- turn on correlation score (1=nocorr)
        probfollowingion REAL OPTIONAL,                -- probability of a consecutive ion (used in correlation)
        nmethionine BOOLEAN OPTIONAL,                  -- should nmethionine be cleaved?
        automassadjust REAL OPTIONAL,                  -- fraction allowable adjustment of product mass tolerance
        lomasscutoff REAL OPTIONAL,                    -- low mass filter in Daltons, unscaled
        libsearchsettings MSLibrarySettings OPTIONAL,  -- library search settings
        noprolineions SEQUENCE OF MSIonType OPTIONAL,  -- which ions to use no proline rule
        reversesearch BOOLEAN OPTIONAL,                -- do reverse search
        othersettings SEQUENCE OF NameValue OPTIONAL,  -- extra search settings
        numisotopes INTEGER OPTIONAL,                  -- number of isotopic peaks to search when using MSSearchType multiisotope
        pepppm BOOLEAN OPTIONAL,                       -- search precursor as ppm
        msmsppm BOOLEAN OPTIONAL,                      -- search product as ppm
        reportedhitcount INTEGER OPTIONAL              -- the maximum number of hits to report per spectrum, 0=all
        }

MSSerialDataFormat ::= INTEGER {
        none (0) ,
        asntext (1),      -- open ASN.1 text format
        asnbinary (2),    -- open ASN.1 binary format
        xml (3),          -- open XML format
        csv (4),          -- csv (excel)
        pepxml (5),       -- pepXML format
	xmlbz2 (6)        -- bzip2 XML format
        }

MSOutFile ::= SEQUENCE {
        outfile VisibleString,                -- output file name
        outfiletype MSSerialDataFormat,       -- output file type
        includerequest BOOLEAN                -- should the output include the request?
        }

MSSpectrumFileType ::= INTEGER { 
        dta(0), 
        dtablank(1), 
        dtaxml(2), 
        asc(3), 
        pkl(4), 
        pks(5), 
        sciex(6), 
        mgf(7), 
        unknown(8),
        oms(9),    -- asn.1 binary for iterative search
        omx(10),   -- xml for iterative search
        xml(11),   -- xml MSRequest
	omxbz2 (12) -- bzip2 omx file
        }

MSInFile ::= SEQUENCE {
        infile VisibleString,                     -- input file name
        infiletype MSSpectrumFileType        -- input file type
        }

MSSearchSettingsSet ::= SEQUENCE OF MSSearchSettings

-- The search request that is given to the OMSSA algorithm

MSRequest ::= SEQUENCE {
        spectra MSSpectrumset,      -- the set of spectra
        settings MSSearchSettings,  -- the search settings
        rid VisibleString OPTIONAL,  -- request id
        moresettings MSSearchSettingsSet OPTIONAL, -- additional search runs
        modset MSModSpecSet OPTIONAL  -- list of mods that can be used in search
        }


-- enumeration of ion types

MSIonType ::= INTEGER {
        a (0),
        b (1),
        c (2),
        x (3),
        y (4),
        z (5),       -- actually zdot
        parent(6),
        internal(7),
        immonium(8),
        unknown(9),
        adot (10),
        x-CO2 (11),
        adot-CO2 (12),
        max (13)
        }
        
-- types of neutral loss

MSIonNeutralLoss ::= INTEGER {
        water (0),    -- minus 18 Da
        ammonia (1)   -- minus 17 Da
        }

-- iosotopic type of ion

MSIonIsotopicType ::= INTEGER {
        monoisotopic (0), -- no c13s in molecule
        c13 (1),          -- one c13 in molecule
        c13two (2),       -- two c13s in molecule, and so on...
        c13three (3),
        c13four (4)
        }

-- type of immonium ion

MSImmonium ::= SEQUENCE {
        parent VisibleString,           -- parent amino acid
        product VisibleString OPTIONAL  -- product ion code
        }

-- ion type at a finer level than ion series

MSIon ::= SEQUENCE {
        neutralloss MSIonNeutralLoss OPTIONAL,  -- is this peak a neutral loss?
        isotope MSIonIsotopicType OPTIONAL,         -- isotopic composition of peak
        internal VisibleString OPTIONAL,  -- if iontype is internal, this is the internal sequence
        immonium MSImmonium OPTIONAL      -- if iontype is immonium, show characteristics
        }

-- annotated comments about the ion

MSIonAnnot ::= SEQUENCE {
        suspect BOOLEAN OPTIONAL,         -- is this peak suspect?
        massdiff REAL OPTIONAL,           -- what is the difference in mass from library spectrum?
        missingisotope BOOLEAN OPTIONAL   -- are the lower mass peaks missing?
        }

-- defines a particular ion

MSMZHit ::= SEQUENCE {
        ion MSIonType,  -- ion type, e.g. b
        charge INTEGER, -- ion charge
        number INTEGER, -- the sequential number of the ion
        mz INTEGER,              -- scaled m/z value in Da
        index INTEGER OPTIONAL,  -- the index of the peak in the original spectrum
        moreion MSIon OPTIONAL,  -- more information about the ion type
        annotation MSIonAnnot OPTIONAL   -- annotations on the ion
}


-- contains information about sequences with identical peptide
-- sequences

MSPepHit ::= SEQUENCE {
        start INTEGER,       -- start position (inclusive) in sequence
        stop INTEGER,        -- stop position (inclusive) in sequence
        gi INTEGER OPTIONAL, -- genbank identifier
        accession VisibleString OPTIONAL,  -- sequence accession
        defline VisibleString OPTIONAL,    -- sequence description
        protlength INTEGER OPTIONAL,       -- length of protein
        oid INTEGER OPTIONAL,              -- blast library oid 
        reversed BOOLEAN OPTIONAL,         -- reversed sequence
        pepstart VisibleString OPTIONAL,   -- AA before the peptide
        pepstop VisibleString OPTIONAL     -- AA after the peptide
}        

-- modifications to a hit peptide

MSModHit ::= SEQUENCE {
        site INTEGER,  -- the position in the peptide
        modtype MSMod  -- the type of modification
        }


-- sets of scores

MSScoreSet ::= SEQUENCE {
       name VisibleString,
       value REAL
       }

-- hits to a given spectrum

MSHits ::= SEQUENCE {
        evalue REAL,     -- E-value (expect value)
        pvalue REAL,     -- P-value (probability value)
        charge INTEGER,  -- the charge state used in search.  -1 == not +1
        pephits SEQUENCE OF MSPepHit, -- peptides that match this hit
        mzhits SEQUENCE OF MSMZHit OPTIONAL,  -- ions hit
        pepstring VisibleString OPTIONAL,  -- the peptide sequence
        mass INTEGER OPTIONAL,  -- scaled experimental mass of peptide in Da
        mods SEQUENCE OF MSModHit OPTIONAL,  -- modifications to sequence
        pepstart VisibleString OPTIONAL,  -- AA before the peptide (depricated)
        pepstop VisibleString OPTIONAL,   -- AA after the peptide (depricated)
        protlength INTEGER OPTIONAL,      -- length of protein hit (depricated)
        theomass INTEGER OPTIONAL,        -- scaled theoretical mass of peptide hit
        oid INTEGER OPTIONAL,              -- blast library oid (depricated) 
	scores SEQUENCE OF MSScoreSet OPTIONAL, -- optional scores (for library search)
	libaccession VisibleString OPTIONAL     -- library search accesssion
        }


-- error return for a particular spectrum's hitset

MSHitError ::= INTEGER {
        none (0),
        generalerr (1),
        unable2read (2),  -- can't read the spectrum
        notenuffpeaks (3) -- not enough peaks to search
        }

-- MSHitSet annotation by end user

MSUserAnnot ::= INTEGER {
        none (0),
        delete (1),
        flag (2)
        }

-- contains a set of hits to a single spectrum

MSHitSet ::= SEQUENCE {
        number INTEGER, -- unique number of spectrum
        error MSHitError OPTIONAL,               -- error, if any
        hits SEQUENCE OF MSHits OPTIONAL,        -- set of hit to spectrum
        ids SEQUENCE OF VisibleString OPTIONAL,  -- filenames or other ids of spectra searched
        namevalue SEQUENCE OF NameValue OPTIONAL,-- extra info: retention times, etc.
        settingid INTEGER OPTIONAL,              -- id of the search setting used
        userannotation MSUserAnnot OPTIONAL      -- allows users to flag certain
        }


-- error return for the entire response

MSResponseError ::= INTEGER {
        none (0),
        generalerr (1),
        noblastdb (2),   -- unable to open blast library
        noinput (3)      -- input missing
        }


-- bioseq container

MSBioseq ::= SEQUENCE {
        oid INTEGER, -- blast library oid
        seq Bioseq
        }

MSBioseqSet ::= SEQUENCE OF MSBioseq

-- search results

MSResponse ::= SEQUENCE {
        hitsets SEQUENCE OF MSHitSet,  -- hits grouped by spectrum
        scale INTEGER DEFAULT 100,  -- scale to change m/z float to integer
        rid VisibleString OPTIONAL,  -- request id
        error MSResponseError OPTIONAL,  -- error response
        version VisibleString OPTIONAL,  -- version of OMSSA
        email VisibleString OPTIONAL,  -- email address for notification
        dbversion INTEGER OPTIONAL,    -- version of db searched (usually size)
        bioseqs MSBioseqSet OPTIONAL  -- sequences found in search     
        }        
        
-- holds both search requests and responses

MSSearch ::= SEQUENCE {
        request SEQUENCE OF MSRequest OPTIONAL,
        response SEQUENCE OF MSResponse OPTIONAL
        }

END

-- pcassay.asn
-- $Id: pcassay.asn 282508 2011-05-11 22:47:31Z ywang $
-- ===========================================================================
--
--                            PUBLIC DOMAIN NOTICE
--               National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the
--  terms of the United States Copyright Act.  It was written as part of
--  the author's official duties as a United States Government employee and
--  thus cannot be copyrighted.  This software/database is freely available
--  to the public for use. The National Library of Medicine and the U.S.
--  Government have not placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy
--  and reliability of the software and data, the NLM and the U.S.
--  Government do not and cannot warrant the performance or results that
--  may be obtained by using this software or data. The NLM and the U.S.
--  Government disclaim all warranties, express or implied, including
--  warranties of performance, merchantability or fitness for any particular
--  purpose.
--
--  Please cite the author in any work or product based on this material.
--
-- ===========================================================================
--
-- Authors:  NCBI Structure Group
--
-- File Description:
--      ASN.1 definitions for PubChem biological assay data database
--
-- ===========================================================================

NCBI-PCAssay DEFINITIONS ::= BEGIN

IMPORTS Pub                              FROM NCBI-Pub
        BioSource                        FROM NCBI-BioSource 
        Date, Object-id                  FROM NCBI-General
        PC-ID, PC-Source, PC-XRefData    FROM NCBI-PCSubstance;

-- EXPORTS ;


-- Container for multiple Assay Data Submissions
PC-AssayContainer ::= SEQUENCE OF PC-AssaySubmit


-- Container for Data Depositions and Assay Definitions
PC-AssaySubmit ::= SEQUENCE {
    assay             CHOICE {                               -- Assay Description or pre-existing Identifier
                          aid           INTEGER,             --   Assay Identifier
                          aid-source    PC-Source,           --   External Assay Identifier
                          descr         PC-AssayDescription, --   Assay Description (new or updated)
                          aidver        PC-ID                --   Assay Identifier/Version (for internal use)
                      },
    data              SEQUENCE OF PC-AssayResults  OPTIONAL, -- Assay Data Deposition (vector)
    revoke            SEQUENCE OF INTEGER          OPTIONAL  -- List of SID's whose data is to be suppressed
}


-- Container for multiple Assay Result Sets
-- PC-AssayResultsSet ::= SEQUENCE OF PC-AssayResults


-- Assay Results provided for a given Substance tested, with respect to the results types defined in the 
--   referenced Assay Description
PC-AssayResults ::= SEQUENCE {
    -- Internal/External Tracking Information
    sid               INTEGER,                               -- Tested Substance ID/Version  [Either valid ID or, 
                                                             --   if "sid-source" is used, this is a "0" value]
                                                             --   Note: A valid ID is greater than "0"
    sid-source        PC-Source                    OPTIONAL, -- External Identifier for this Substance
                                                             --   Note: May be used in-lieu of "sid"
                                                             --   Note: This is non-optional if "sid" is "0"
    version           INTEGER                      OPTIONAL, -- Version identifier for this AID-SID Result
                                                             --   Note: Incoming data should set this to be "0"

    -- Data Annotation/Qualifier and URL to further Depositor Information
    comment           VisibleString                OPTIONAL, -- Annotation or qualifier for this Result

    -- Assay Result Data for this Sample
    --   Note: Users need populate only those "tid"s, for which there is data, in any order.
    outcome           INTEGER {                              -- Assay Outcome
                          inactive        (1),               --   Substance is considered Inactive
                          active          (2),               --   Substance is considered Active
                          inconclusive    (3),               --   Substance is Inconclusive
                          unspecified     (4),               --   Substance Outcome is Unspecified
                          probe           (5)                --   Substance Outcome is Unspecified
                      }               DEFAULT unspecified,
    rank              INTEGER                      OPTIONAL, -- Rank of Assay Outcome (for result ordering)
                                                             --   Note: Larger numbers are more active
    data              SEQUENCE OF PC-AssayData     OPTIONAL, -- Assay Data Reported for this SID (vector)
    url               VisibleString                OPTIONAL, -- Depositor provided URL for this Result
    xref              SEQUENCE OF PC-AnnotatedXRef OPTIONAL, -- annotated Cross-Reference Information
                                                             -- to be removed, 
                                                             -- instead regulard TID will be generated
                                                             -- which are then annotated by xref type
    date              Date                         OPTIONAL  -- Pubchem Release Date

}


-- Assay Readouts/Results for a Tested Substance
PC-AssayData ::= SEQUENCE {
    tid               INTEGER,                            -- Assay Result Field Type ID (TID)
                                                          --   Note: Result Field ID's must be greater than "0"
    value             CHOICE {                            -- Assay Result, must be the same type as defined for TID
                          ival    INTEGER,
                          fval    REAL,
                          bval    BOOLEAN,
                          sval    VisibleString
                      }
}


-- Assay Description provided by an Organization that describes the assay/protocol performed and defines the 
--   measured end-points and parameters to be stored.  An Assay Description is not a database table.  You can 
--   define as many Result Definitions as needed and they need not be used by all Substances tested.
-- Assay Descriptions can be modified on both description text and Result Definitions after initial submission
-- as desired, and such udpates will be tracked in PubChem

PC-AssayDescription ::= SEQUENCE {
    -- Internal/External Tracking Information
    aid               PC-ID,                                 -- Assay Description ID/Version  [Either valid ID
                                                             --   or, if "aid-source" is used, a "0" dummy value]
                                                             --   Note: Version is for internal use (only?)
                                                             --   Note: A valid ID is greater than "0"
    aid-source        PC-Source                    OPTIONAL, -- External Identifier for this Assay Description
                                                             --   Note: May be used in-lieu of "aid"
                                                             --   Note: This is non-optional if "aid" ID is "0"

    -- Assay Description Information
    name              VisibleString,                         -- Short Assay Name (for display purposes)
    description       SEQUENCE OF VisibleString    OPTIONAL, -- Description of Assay
    protocol          SEQUENCE OF VisibleString    OPTIONAL, -- Procedure used to generate results
    comment           SEQUENCE OF VisibleString    OPTIONAL, -- Comments or additional information
    xref              SEQUENCE OF PC-AnnotatedXRef OPTIONAL, -- Annotated Cross-Reference Information

    -- Allowed Assay Result Types
    results           SEQUENCE OF PC-ResultType    OPTIONAL,             -- Result Definitions (vector)

    -- Additional Information
    pub               SEQUENCE OF Pub                 OPTIONAL, -- Depositor provided publications for this assay
    revision          INTEGER                         OPTIONAL, -- Revision identifier for textual description
    target            SEQUENCE OF PC-AssayTargetInfo  OPTIONAL, -- Target information
    activity-outcome-method  INTEGER  {                         -- Assay Outcome Qualifier
                               other              (0),          --   All Other Type
                               screening          (1),          --   Primary Screen Assay
                               confirmatory       (2),          --   Confirmatory Assay
                               summary            (3)          --   Probe Summary Assay
    }                                                 OPTIONAL,

    dr                SEQUENCE OF PC-AssayDRAttr      OPTIONAL, -- Dose-Response Attribution
    substance-type    INTEGER{
                         small-molecule                (1),
                         nucleotide                    (2),
                         other                         (255)
                      }   OPTIONAL,                             -- to distinguish the type of substance used in the screening

    -- Grant and project category information
    grant-number      SEQUENCE OF VisibleString     OPTIONAL,               -- grant proposal number
                                                                -- required for 'MLSCN' & 'MLPCN' projects
    project-category  INTEGER{
                        mlscn    (1),
                        mlpcn    (2),
                        mlscn-ap (3),
                        mlpcn-ap (4),
                        journal-article (5), -- to be deprecated
                        assay-vendor (6),
                        literature-extracted (7), 
                        literature-author (8), 
                        literature-publisher (9), 
                        rnaigi (10),
                        other    (255)
                      }                 OPTIONAL,               -- to distinguish projects funded through MLSCN, MLPCN or other
                                                                -- mlscn: assay depositions from MLSCN screen center
                                                                -- mlpcn: assay depositions from MLPCN screen center
                                                                -- mlscn-ap: assay depositions from MLSCN assay provider
                                                                -- mlpcn-ap: assay depositions from MLPCN assay provider
                                                                -- required for 'MLSCN' & 'MLPCN' projects
                                                                -- journal-article: to be deprecated; replaced by option 7,8 & 9 to better characterize data from literature 
                                                                -- literature-extracted: data from literature, extracted by curators
                                                                -- literature-author: data from literature, submitted by author of articles
                                                                -- literature-publisher: data from literature, submitted by journals/publishers
                                                                -- rnaigi: RNAi screenings from RNAi Global Initiative 

    -- annotation to indicate whether an assay is a panel, e.g. containing multiple components, or belongs to a group    
    is-panel          BOOLEAN    OPTIONAL,                      -- annotation for panel assay, e.g. to indicate that this assay
                                                                -- contains multiple members/components. 
                                                                -- A panel assay can be one assay reporting readouts for
                                                                -- many targets, or reporting readouts for different
                                                                -- cell lines, or different organisms
                                                                -- examples such as bioassay containing kinase profiling data
                                                                -- or bioassay containing cytoxicity data for multiple cell lines
                                                                -- or bioassay containing screening data from different stages, 
                                                                -- such as primary screening, follow ups
    assay-group       SEQUENCE OF VisibleString   OPTIONAL,     -- annotation for assay gruop information, e.g. this 
                                                                -- assay belongs to a group of assay associated by 
                                                                -- a unique name, e.g. the value assigned to 'assay-group',  
                                                                -- assays belonging to this group can be 
                                                                -- retrieved using this unique name
                                                                -- it is depositor's responsibility to make the name 
                                                                -- distinct if necessary
                                                                -- this can be used as the mechanism to specify 'related bioassays'
                                                                -- before hand so it is not necessary to update the descriptions of
                                                                -- related assays when new group member submitted to PubChem 

    panel-info        PC-AssayPanel     OPTIONAL,               -- for 'panel' type of bioassay only
                                                                -- store assay panel member information
    is-mlp-late-stage       BOOLEAN OPTIONAL,                   -- verification for 'late-stage-data' deposition
    categorized-comment SEQUENCE OF PC-CategorizedComment OPTIONAL
                                                                -- to report categorized description/comment by associating with a category title 
}

-- Description for Panel Assay
-- Describe general information about the panel, and link to information for each panel member 
PC-AssayPanel ::= SEQUENCE {
    name      VisibleString,                                    -- short name for the panel, such as 'Kinase Profiling'
    descr     VisibleString                        OPTIONAL,    -- short description of this panel
    member    SEQUENCE OF PC-AssayPanelMember      OPTIONAL     -- store panel member information
}

-- Specific information about each panel member(or component), such as target, cell line name, cross-reference ... 
PC-AssayPanelMember ::= SEQUENCE {   
    mid        INTEGER,                                         -- ID for panel member 
                                                                -- if a kinase panel with 300 kinases, ID will range from 1 to 300  
                                                                -- TIDs of the same panel member to be grouped based on panel member ID
    name           VisibleString  OPTIONAL,                          -- short name for this panel member 
    description     VisibleString  OPTIONAL,                     -- description about specifics of this panel member
                                                                -- such as about cell line, or target information 
    protocol       SEQUENCE OF VisibleString    OPTIONAL,       -- Specific procedure used to generate results for the panel member
    comment        SEQUENCE OF VisibleString    OPTIONAL,       -- Comments or additional information
    target         SEQUENCE OF PC-AssayTargetInfo  OPTIONAL,    -- often provided for profiling assays across protein families
    xref           SEQUENCE OF PC-AnnotatedXRef    OPTIONAL,    -- annotated Cross-Reference Information
    activity-outcome-method  INTEGER  {                         -- Assay Outcome Qualifier
                               other              (0),          --   All Other Type
                               screening          (1),          --   Primary Screen Assay
                               confirmatory       (2),          --   Confirmatory Assay
                               summary            (3)           --   Probe Summary Assay
                             }                        OPTIONAL,
    dr                SEQUENCE OF PC-AssayDRAttr      OPTIONAL,  -- Dose-Response Attribution within the panel member
    categorized-comment SEQUENCE OF PC-CategorizedComment OPTIONAL
                                                                -- to report categorized description/comment by associating with a category title 
} 

-- Definition for Categorized description/comment 
-- This field is added to provide flexibility for depositors to present textual description/comments in a desirable way 
-- and to facilitate information validation by the depositor and data exchange with PubChem 
PC-CategorizedComment ::= SEQUENCE {
    title     VisibleString,                                    -- title for the description/comment
    comment   SEQUENCE OF VisibleString                                     -- description/comment content
}

--  Assay Dose-response attribute information used to define a set of readouts
--    as being part of a dose-response curve (for curve plotting/analysis)
PC-AssayDRAttr ::=SEQUENCE {
    id                INTEGER,                               -- Unique dose-response test set identifier
                                                             --   Note: A valid ID is greater than "0"
    descr             VisibleString               OPTIONAL,  -- Dose-Response Curve Description (used as curve title)
    dn                VisibleString               OPTIONAL,  -- Dose Axis Description (used as axis name)
    rn                VisibleString               OPTIONAL,  -- Response Axis Description (used as axis name)
    type              INTEGER {
                        experimental (0),                    -- dose-response data points measured directly by experiment
                        calculated   (1)                     -- dose-response data points derived from fitted curve
                      }          OPTIONAL
}


-- Molecular target information provides by organization describes the functionality of the target, 
-- facilitates the linking between PubChem bioassays, and the linking between target molecule to other NCBI resources
PC-AssayTargetInfo ::= SEQUENCE {
   name                    VisibleString,                     -- Molecular name of target
   mol-id                  INTEGER,                           -- NCBI database identifier of the target molecule
   molecule-type           INTEGER {                          -- Assay Target Type
                               protein            (1),        -- mol-id: NCBI Protein GI 
                               dna                (2),        -- mol-id: NCBI Nucleotide GI
                               rna                (3),        -- mol-id: NCBI Nucleotide GI
                               gene               (4),        -- mol-id: NCBI Gene ID
                               biosystem          (5),        -- mol-id: NCBI BioSystems ID
                               other              (255)
                           }                DEFAULT protein,
   organism                BioSource               OPTIONAL,  -- Target Organism
   descr                   VisibleString           OPTIONAL,  -- Target Description  (e.g., cellular functionality and location)
   comment                 SEQUENCE OF VisibleString           OPTIONAL   -- Comments or Additional Information
}

-- Annotated Cross-Reference (XRef) Information to allow the XRef to be qualified, as to its meaning or context
PC-AnnotatedXRef ::= SEQUENCE {
    xref              PC-XRefData,                    -- Cross-Reference Information
    comment           VisibleString         OPTIONAL,  -- Annotation qualifier describing Cross-Reference meaning
    type              INTEGER {
                         pcit        (1),             -- primary PMID/citation directly associated with the current assay data 
                         pgene       (2)              -- gene encoding the protein assay target
                      }                     OPTIONAL
}

-- Definition of Allowed Result Types for a given Assay
PC-ResultType ::= SEQUENCE {
    -- Tracking or Description Information
    tid               INTEGER,                             -- Assay Result Field Type ID (TID)
    name              VisibleString,                       -- Result Field Name (short name for display)
    description       SEQUENCE OF VisibleString  OPTIONAL, -- Result Field Description

    -- Result Data Type and Validation Information
    type             INTEGER {                             -- Result Data Type
                         float            (1),
                         int              (2),
                         bool             (3),
                         string           (4)
                     },
    constraints      CHOICE {                              -- Allowed Values, used for validating incoming data
                         -- If type is "float"
                         fset      SEQUENCE OF REAL,       --   Allowed values must be equal to one of these
                         fmin      REAL,                   --   Allowed values (x) must be [ fmin <= x ]
                         fmax      REAL,                   --   Allowed values (x) must be [ x <= fmax ]
                         frange    PC-RealMinMax,          --   Minimum/Maximum Range [ min <= x <= max ]

                         -- If type is "int"
                         iset      SEQUENCE OF INTEGER,    --   Allowed values must be equal to one of these
                         imin      INTEGER,                --   Allowed values (x) must be [ imin <= x ]
                         imax      INTEGER,                --   Allowed values (x) must be [ x <= imax ]
                         irange    PC-IntegerMinMax,       --   Minimum/Maximum Range [ min <= x <= max ]

                         -- If type is "string"
                         sset   SEQUENCE OF VisibleString  --   Allowed values must be equal to one of these
                     }                           OPTIONAL,

    -- Unit information provides the units for the values reported for this TID.  For example, if the values 
    --   reported for this TID are a concentration, e.g., micro-molar, setting the unit "um" allows PubChem to 
    --   know that the value, e.g., "1.3", is actually "1.3 uM".  This also allows PubChem to properly report the 
    --   units when displaying the reported values for this TID.  If the enumerated units provided below are 
    --   insufficient, you may represent the units as a string in the optional "sunit" field (see below).
    unit             INTEGER {                             -- Units for Value
                         ppt              (1),             -- Parts per Thousand
                         ppm              (2),             -- Parts per Million
                         ppb              (3),             -- Parts per Billion
                         mm               (4),             -- milliM
                         um               (5),             -- microM
                         nm               (6),             -- nanoM
                         pm               (7),             -- picoM
                         fm               (8),             -- femtoM
                         mgml             (9),             -- milligrams per mL
                         ugml            (10),             -- micrograms per mL
                         ngml            (11),             -- nanograms per mL
                         pgml            (12),             -- picograms per mL
                         fgml            (13),             -- femtograms per mL
                         m               (14),             -- Molar
                         percent         (15),             -- Percent
                         ratio           (16),             -- Ratio
                         sec             (17),             -- Seconds
                         rsec            (18),             -- Reciprocal Seconds
                         min             (19),             -- Minutes
                         rmin            (20),             -- Reciprocal Minutes
                         day             (21),             -- Days
                         rday            (22),             -- Reciprocal Days
                         ml-min-kg       (23),             -- milliliter / minute / kilogram
                         l-kg            (24),             -- liter / kilogram
                         hr-ng-ml        (25),             -- hour * nanogram / milliliter
                         cm-sec          (26),             -- centimeter / second
                         mg-kg           (27),             -- milligram / kilogram
                         none           (254),
                         unspecified    (255)
                     }                           OPTIONAL,
  
    -- ATTENTION: sunit field is DEPRECATED. It is no longer
    --            supported and remains for legacy data only.
    sunit            VisibleString               OPTIONAL, -- Unit Type (as a String)
  
  
    -- Value Transform information qualifies the values reported for this TID.  For example, if the values
    --   reported for this TID are "-Log10 GI50", you may want to consider setting
    --   the "nlog" value below.  In doing so, PubChem would know that the value, e.g., "5.0" 
    --   is actually "1.0e-5".  If the transformation applied is not listed, you may represent
    --   this transformation as a string in the "stransform" (see below) for eventual inclusion 
    --   in the enumerated transform list below.
    --
    --
    -- ATTENTION: transform field is DEPRECATED. It is no longer
    --            supported and remains for legacy data only.
    transform        INTEGER {                              -- Value Type Details
                         linear           (1),              -- Linear Scale (x)
                         ln               (2),              -- Natural Log Scale (ln x)
                         log              (3),              -- Log Base 10 Scale (log10 x)
                         reciprocal       (4),              -- Reciprocal Scale (1/x)
                         negative         (5),              -- Negative Linear Scale (-x)
                         nlog             (6),              -- Negative Log Base 10 Scale (-log10 x)
                         nln              (7)               -- Negative Natural Log Scane (-ln x)
			     }                           OPTIONAL,
  
    -- ATTENTION: stransform field is DEPRECATED. It is no longer
    --            supported and remains for legacy data only.
    stransform       VisibleString               OPTIONAL,  -- Value Transform Type (as a String)
  
  
    tc               PC-ConcentrationAttr        OPTIONAL,  -- Tested concentration attribute
    ac               BOOLEAN                     OPTIONAL,   -- if true, indicates that this TID field 
                                                            -- provides active concentration summary by 
                                                            -- reporting the concentration which produces 
                                                            -- 50% of the maximum possible biological response
                                                            -- such as IC50, EC50, AC50, GI50 etc. 
                                                            -- or by reporting constant parameters such as Ki, 
                                                            -- that based on which the activity outcome in this assay is called   
    panel-info       PC-AssayPanelTestResult     OPTIONAL,  -- needed for panel assay only
                                                            -- each panel member may have a number of TID columns reported
                                                            -- such TIDs are grouped by panel member ID, see PC-AssayPanelMemberInfo
    annot            INTEGER {
                           pmid  (1),                       -- PubMed ID
                           mmdb  (2),                       -- MMDB ID
                           url   (3),                       -- indicate  TID data is a url that provides supplementary information
                           protein-gi (4),                  -- GenBank General ID (GI) for a Protein
                           nucleotide-gi (5),               -- GenBank General ID (GI) for a Nucleotide 
                           taxonomy (6),                    -- Taxonomy ID for an Organism
                           mim(7),                          -- MIM, Mendelian Inheritance in Man, ID 
                           gene(8),                         -- Entrez Gene ID
                           probe(9),                        -- Entrez Probe ID
                           aid (10),                        -- PubChem BioAssay ID, may be used in 'Summary' assay
                           sid (11),                        -- PubChem Substance ID, may be used in 'Summary' assay 
                           cid (12),                        -- PubChem Compound ID 
                           protein-target-gi (13),          -- GenBank General ID (GI) for a Protein target
                           biosystems-target-id (14),       -- NCBI BioSystems ID 
                           target-name (15),                -- target name
                           target-descr (16),               -- brief target description 
                           target-tax-id (17),              -- NCBI Taxonomy ID for target molecule 
                           gene-target-id (18),             -- NCBI Gene ID for a gene target 
                           dna-nucleotide-target-gi (19),   -- GenBank General ID (GI) for a DNA Nucleotide target 
                           rna-nucleotide-target-gi (20)    -- GenBank General ID (GI) for a RNA Nucleotide target 
                     }   OPTIONAL                           -- treat substance associated cross-reference as regular TID
                                                            -- web servers would make a link 
                                                            -- to the corresponding record in Entrez databases
                                                            -- treat substance associated target information as regular TID
                                                            -- for example, for RNAi screening data, each 'substance' may
                                                            -- correspond to a specific gene target
}

PC-AssayPanelTestResult ::= SEQUENCE {
    mid               INTEGER,                              -- panel member ID, see PC-AssayPanelMemberInfo
                                                            -- track association between a group of TIDs and panel member
    readout-annot    INTEGER {
                       regular  (1),                        -- to indicate this TID column is a regular readout
                       outcome  (2),                        -- to indicate this TID column is  "outcome" of the particular panel member
                       score(3),                            -- to indicate this TID column reports activity score of the particular panel member
                                                            -- to be used for neighboring assays
                       ac       (4)                         -- to indicate this TID column is "active concentration"   
                                                            -- 'outcome' and 'ac' type of TID are to be used for data analysis 
                                                            -- across members of panel
                                                            -- or across multiple assays including other panel assays
                    }  DEFAULT regular

}

--The concentration attribute is to indicate that the readout under this test result field is biological concentration-response data, the attribute provides the value and unit of the tested concentration
PC-ConcentrationAttr ::= SEQUENCE {
    concentration    REAL,
    unit             INTEGER {                             -- Units for Concentration

                         um               (5)              -- microM
                     },                       
   dr-id             INTEGER                     OPTIONAL  -- Dose-Response Attribution ID (if applicable)
}


-- Mininum and Maximum Constraints on an Integer Value (used for validating incoming data)
PC-IntegerMinMax ::= SEQUENCE {
    min               INTEGER,                             -- Minimum Value Allowed
    max               INTEGER                              -- Maximum Value Allowed
}


-- Mininum and Maximum Constraints on a Real Value (used for validating incoming data)
PC-RealMinMax ::= SEQUENCE {
    min               REAL,                                -- Minimum Value Allowed
    max               REAL                                 -- Maximum Value Allowed
}


END

-- pcassay2.asn
-- $Id: pcassay2.asn 626145 2021-02-24 09:40:04Z chengt2 $
-- ===========================================================================
--
--                            PUBLIC DOMAIN NOTICE
--               National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the
--  terms of the United States Copyright Act.  It was written as part of
--  the author's official duties as a United States Government employee and
--  thus cannot be copyrighted.  This software/database is freely available
--  to the public for use. The National Library of Medicine and the U.S.
--  Government have not placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy
--  and reliability of the software and data, the NLM and the U.S.
--  Government do not and cannot warrant the performance or results that
--  may be obtained by using this software or data. The NLM and the U.S.
--  Government disclaim all warranties, express or implied, including
--  warranties of performance, merchantability or fitness for any particular
--  purpose.
--
--  Please cite the author in any work or product based on this material.
--
-- ===========================================================================
--
-- Authors:  NCBI Structure Group
--
-- File Description:
--      ASN.1 definitions for PubChem biological assay data database (PCAssay2)
--
-- ===========================================================================

NCBI-PCAssay2 DEFINITIONS ::= BEGIN

IMPORTS BioSource                        FROM NCBI-BioSource
        Date                             FROM NCBI-General
        PC-ID, PC-Source, PC-XRefData    FROM NCBI-PCSubstance;

-- EXPORTS ;


-- Container for multiple Assay Data Submissions
PC-AssayContainer ::= SEQUENCE OF PC-AssaySubmit


-- Container for Data Depositions and Assay Definitions
PC-AssaySubmit ::= SEQUENCE {
    assay             CHOICE {                               -- Assay Description or pre-existing Identifier
                          aid           INTEGER,             --   Assay Identifier
                          aid-source    PC-Source,           --   External Assay Identifier
                          descr         PC-AssayDescription, --   Assay Description (new or updated)
                          aidver        PC-ID                --   Assay Identifier/Version (for internal use)
                      },
    data              SEQUENCE OF PC-AssayResults  OPTIONAL, -- Assay Data Deposition (vector)
    revoke            SEQUENCE OF INTEGER          OPTIONAL  -- List of SID's whose data is to be suppressed (vector)
}


-- Container for multiple Assay Result Sets
-- PC-AssayResultsSet ::= SEQUENCE OF PC-AssayResults


-- Assay Results provided for a given Substance tested, with respect to the results types defined in the
--   referenced Assay Description
PC-AssayResults ::= SEQUENCE {
    -- Internal/External Tracking Information
    sid               INTEGER,                               -- Tested Substance ID/Version  [Either valid ID or,
                                                             --   if "sid-source" is used, this is a "0" value]
                                                             --   Note: A valid ID is greater than "0"
    sid-source        PC-Source                    OPTIONAL, -- External Identifier for this Substance
                                                             --   Note: May be used in-lieu of "sid"
                                                             --   Note: This is non-optional if "sid" is "0"
    version           INTEGER                      OPTIONAL, -- Version identifier for this AID-SID Result
                                                             --   Note: Incoming data should set this to be "0"

    -- Data Annotation/Qualifier and URL to further Depositor Information
    comment           UTF8String                   OPTIONAL, -- Annotation or qualifier for this Result

    -- Assay Result Data for this Sample
    --   Note: Users need populate only those "tid"s, for which there is data, in any order.
    outcome           INTEGER {                              -- Assay Outcome
                          inactive        (1),               --   Substance is considered Inactive
                          active          (2),               --   Substance is considered Active
                          inconclusive    (3),               --   Substance is Inconclusive
                          unspecified     (4),               --   Substance Outcome is Unspecified
                          probe           (5)                --   Substance Outcome is Unspecified
                      }               DEFAULT unspecified,
    rank              INTEGER                      OPTIONAL, -- Rank of Assay Outcome (for result ordering)
                                                             --   Note: Larger numbers are more active
    data              SEQUENCE OF PC-AssayData     OPTIONAL, -- Assay Data Reported for this SID (vector)
    url               UTF8String                   OPTIONAL, -- Depositor provided URL for this Result
    xref              SEQUENCE OF PC-AnnotatedXRef OPTIONAL, -- annotated Cross-Reference Information
                                                             -- to be removed,
                                                             -- instead regulard TID will be generated
                                                             -- which are then annotated by xref type
    date              Date                         OPTIONAL  -- Pubchem Release Date
}


-- Assay Readouts/Results for a Tested Substance
PC-AssayData ::= SEQUENCE {
    tid               INTEGER,                            -- Assay Result Field Type ID (TID)
                                                          --   Note: Result Field ID's must be greater than "0"
    value             CHOICE {                            -- Assay Result, must be the same type as defined for TID
                          ival    INTEGER,
                          fval    REAL,
                          bval    BOOLEAN,
                          sval    UTF8String
                      }
}


-- Assay Description provided by an Organization that describes the assay/protocol performed and defines the
--   measured end-points and parameters to be stored.  An Assay Description is not a database table.  You can
--   define as many Result Definitions as needed and they need not be used by all Substances tested.
-- Assay Descriptions can be modified on both description text and Result Definitions after initial submission
-- as desired, and such udpates will be tracked in PubChem

PC-AssayDescription ::= SEQUENCE {
    -- Internal/External Tracking Information
    aid               PC-ID,                                 -- Assay Description ID/Version  [Either valid ID
                                                             --   or, if "aid-source" is used, a "0" dummy value]
                                                             --   Note: Version is for internal use (only?)
                                                             --   Note: A valid ID is greater than "0"
    aid-source        PC-Source                    OPTIONAL, -- External Identifier for this Assay Description
                                                             --   Note: May be used in-lieu of "aid"
                                                             --   Note: This is non-optional if "aid" ID is "0"

    -- Assay Description Information
    name              UTF8String,                            -- Short Assay Name (for display purposes)
    description       SEQUENCE OF UTF8String       OPTIONAL, -- Description of Assay
    protocol          SEQUENCE OF UTF8String       OPTIONAL, -- Procedure used to generate results
    comment           SEQUENCE OF UTF8String       OPTIONAL, -- Comments or additional information
    xref              SEQUENCE OF PC-AnnotatedXRef OPTIONAL, -- Annotated Cross-Reference Information

    -- Allowed Assay Result Types
    results           SEQUENCE OF PC-ResultType    OPTIONAL, -- Result Definitions (vector)

    -- Additional Information
    -- pub               SEQUENCE OF Pub                 OPTIONAL, Depositor provided publications for this assay (never used)
    revision          INTEGER                         OPTIONAL, -- Revision identifier for textual description
    target            SEQUENCE OF PC-AssayTargetInfo  OPTIONAL, -- Target information
    activity-outcome-method  INTEGER {                         -- Assay Outcome Qualifier
                               other              (0),          --   All Other Type
                               screening          (1),          --   Primary Screen Assay
                               confirmatory       (2),          --   Confirmatory Assay
                               summary            (3)           --   Probe Summary Assay
    }                                                 OPTIONAL,

    dr                SEQUENCE OF PC-AssayDRAttr      OPTIONAL, -- Dose-Response Attribution
    substance-type    INTEGER {
                         small-molecule                (1),
                         nucleotide                    (2),
                         other                       (255)
                      }   OPTIONAL,                             -- to distinguish the type of substance used in the screening

    -- Grant and project category information
    grant-number      SEQUENCE OF VisibleString     OPTIONAL,   -- grant proposal number
                                                                -- required for 'MLSCN' & 'MLPCN' projects
    project-category  INTEGER {                   -- to distinguish projects funded through MLSCN, MLPCN or other
                        mlscn                  (1), -- assay depositions from MLSCN screen center
                        mlpcn                  (2), -- assay depositions from MLPCN screen center
                        mlscn-ap               (3), -- assay depositions from MLSCN assay provider
                        mlpcn-ap               (4), -- assay depositions from MLPCN assay provider
                        journal-article        (5), -- to be deprecated and replaced by option 7, 8 & 9
                        assay-vendor           (6), -- assay depositions from assay vendors
                        literature-extracted   (7), -- data from literature, extracted by curators
                        literature-author      (8), -- data from literature, submitted by author of articles
                        literature-publisher   (9), -- data from literature, submitted by journals/publishers
                        rnaigi                (10), -- RNAi screenings from RNAi Global Initiative
                        other                (255)
                      }                 OPTIONAL,

    assay-group       SEQUENCE OF VisibleString   OPTIONAL,     -- annotation for assay gruop information, e.g. this
                                                                -- assay belongs to a group of assay associated by
                                                                -- a unique name, e.g. the value assigned to 'assay-group',
                                                                -- assays belonging to this group can be
                                                                -- retrieved using this unique name
                                                                -- it is depositor's responsibility to make the name
                                                                -- distinct if necessary
                                                                -- this can be used as the mechanism to specify 'related bioassays'
                                                                -- before hand so it is not necessary to update the descriptions of
                                                                -- related assays when new group member submitted to PubChem

    -- is-mlp-late-stage       BOOLEAN OPTIONAL,                   verification for 'late-stage-data' deposition (never used)
    categorized-comment SEQUENCE OF PC-CategorizedComment OPTIONAL
                                                                -- to report categorized description/comment by associating with a category title
}


-- Definition for Categorized description/comment
-- This field is added to provide flexibility for depositors to present textual description/comments in a desirable way
-- and to facilitate information validation by the depositor and data exchange with PubChem
PC-CategorizedComment ::= SEQUENCE {
    title     UTF8String,                                    -- title for the description/comment
    comment   SEQUENCE OF UTF8String                         -- description/comment content
}


--  Assay Dose-response attribute information used to define a set of readouts
--    as being part of a dose-response curve (for curve plotting/analysis)
PC-AssayDRAttr ::=SEQUENCE {
    id                INTEGER,                               -- Unique dose-response test set identifier
                                                             --   Note: A valid ID is greater than "0"
    descr             UTF8String               OPTIONAL,     -- Dose-Response Curve Description (used as curve title)
    dn                UTF8String               OPTIONAL,     -- Dose Axis Description (used as axis name)
    rn                UTF8String               OPTIONAL,     -- Response Axis Description (used as axis name)
    type              INTEGER {
                        experimental (0),                    -- dose-response data points measured directly by experiment
                        calculated   (1)                     -- dose-response data points derived from fitted curve
                      }          OPTIONAL
}


-- Molecular target information provides by organization describes the functionality of the target,
-- facilitates the linking between PubChem bioassays, and the linking between target molecule to other NCBI resources
PC-AssayTargetInfo ::= SEQUENCE {
   name                    UTF8String,                     -- Molecular name of target
   mol-id                  CHOICE {                           -- database and identifier of the target molecule
                               gene-id              INTEGER,       -- target is a NCBI Gene ID
                               protein-accession    VisibleString, -- target is a NCBI Protein Accession
                               nucleotide-accession VisibleString, -- target is a NCBI Nucleotide Accession
                               other                VisibleString, -- target is beyond supported type (format = TYPE::RESOURCE::IDENTIFIER)
                               tax-id               INTEGER        -- target is a NCBI Taxonomy ID
                           },
   organism                BioSource               OPTIONAL,  -- Target Organism
   descr                   UTF8String              OPTIONAL,  -- Target Description  (e.g., cellular functionality and location)
   comment                 SEQUENCE OF UTF8String  OPTIONAL   -- Comments or Additional Information
}


-- Annotated Cross-Reference (XRef) Information to allow the XRef to be qualified, as to its meaning or context
PC-AnnotatedXRef ::= SEQUENCE {
    xref              PC-XRefData,                    -- Cross-Reference Information
    comment           UTF8String         OPTIONAL,    -- Annotation qualifier describing Cross-Reference meaning
    type              INTEGER {
                         pcit        (1),             -- primary PMID/citation directly associated with the current assay data
                         pgene       (2)              -- gene encoding the protein assay target
                      }                  OPTIONAL
}


-- Definition of Allowed Result Types for a given Assay
PC-ResultType ::= SEQUENCE {
    -- Tracking or Description Information
    tid               INTEGER,                             -- Assay Result Field Type ID (TID)
    name              UTF8String,                          -- Result Field Name (short name for display)
    description       SEQUENCE OF UTF8String  OPTIONAL,    -- Result Field Description

    -- Result Data Type and Validation Information
    type             INTEGER {                             -- Result Data Type
                         float            (1),
                         int              (2),
                         bool             (3),
                         string           (4)
                     },
    constraints      CHOICE {                              -- Allowed Values, used for validating incoming data
                         -- If type is "float"
                         fset      SEQUENCE OF REAL,       --   Allowed values must be equal to one of these
                         fmin      REAL,                   --   Allowed values (x) must be [ fmin <= x ]
                         fmax      REAL,                   --   Allowed values (x) must be [ x <= fmax ]
                         frange    PC-RealMinMax,          --   Minimum/Maximum Range [ min <= x <= max ]

                         -- If type is "int"
                         iset      SEQUENCE OF INTEGER,    --   Allowed values must be equal to one of these
                         imin      INTEGER,                --   Allowed values (x) must be [ imin <= x ]
                         imax      INTEGER,                --   Allowed values (x) must be [ x <= imax ]
                         irange    PC-IntegerMinMax,       --   Minimum/Maximum Range [ min <= x <= max ]

                         -- If type is "string"
                         sset   SEQUENCE OF VisibleString  --   Allowed values must be equal to one of these
                     }                           OPTIONAL,

    -- Unit information provides the units for the values reported for this TID.  For example, if the values
    --   reported for this TID are a concentration, e.g., micro-molar, setting the unit "um" allows PubChem to
    --   know that the value, e.g., "1.3", is actually "1.3 uM".  This also allows PubChem to properly report the
    --   units when displaying the reported values for this TID.  If the enumerated units provided below are
    --   insufficient, you may represent the units as a string in the optional "sunit" field (see below).
    unit             INTEGER {                             -- Units for Value
                         ppt              (1),             -- Parts per Thousand
                         ppm              (2),             -- Parts per Million
                         ppb              (3),             -- Parts per Billion
                         mm               (4),             -- milliM
                         um               (5),             -- microM
                         nm               (6),             -- nanoM
                         pm               (7),             -- picoM
                         fm               (8),             -- femtoM
                         mgml             (9),             -- milligrams per mL
                         ugml            (10),             -- micrograms per mL
                         ngml            (11),             -- nanograms per mL
                         pgml            (12),             -- picograms per mL
                         fgml            (13),             -- femtograms per mL
                         m               (14),             -- Molar
                         percent         (15),             -- Percent
                         ratio           (16),             -- Ratio
                         sec             (17),             -- Seconds
                         rsec            (18),             -- Reciprocal Seconds
                         min             (19),             -- Minutes
                         rmin            (20),             -- Reciprocal Minutes
                         day             (21),             -- Days
                         rday            (22),             -- Reciprocal Days
                         ml-min-kg       (23),             -- milliliter / minute / kilogram
                         l-kg            (24),             -- liter / kilogram
                         hr-ng-ml        (25),             -- hour * nanogram / milliliter
                         cm-sec          (26),             -- centimeter / second
                         mg-kg           (27),             -- milligram / kilogram
                         none           (254),
                         unspecified    (255)
                     }                           OPTIONAL,

    -- ATTENTION: sunit field is DEPRECATED. It is no longer
    --            supported and remains for legacy data only.
    sunit            VisibleString               OPTIONAL, -- Unit Type (as a String)


    -- Value Transform information qualifies the values reported for this TID.  For example, if the values
    --   reported for this TID are "-Log10 GI50", you may want to consider setting
    --   the "nlog" value below.  In doing so, PubChem would know that the value, e.g., "5.0"
    --   is actually "1.0e-5".  If the transformation applied is not listed, you may represent
    --   this transformation as a string in the "stransform" (see below) for eventual inclusion
    --   in the enumerated transform list below.
    --
    --
    -- ATTENTION: transform field is DEPRECATED. It is no longer
    --            supported and remains for legacy data only.
    transform        INTEGER {                              -- Value Type Details
                         linear           (1),              -- Linear Scale (x)
                         ln               (2),              -- Natural Log Scale (ln x)
                         log              (3),              -- Log Base 10 Scale (log10 x)
                         reciprocal       (4),              -- Reciprocal Scale (1/x)
                         negative         (5),              -- Negative Linear Scale (-x)
                         nlog             (6),              -- Negative Log Base 10 Scale (-log10 x)
                         nln              (7)               -- Negative Natural Log Scane (-ln x)
                 }                           OPTIONAL,

    -- ATTENTION: stransform field is DEPRECATED. It is no longer
    --            supported and remains for legacy data only.
    -- stransform       VisibleString               OPTIONAL,  Value Transform Type as a string (never used)


    tc               PC-ConcentrationAttr        OPTIONAL,  -- Tested concentration attribute
    ac               BOOLEAN                     OPTIONAL,  -- if true, indicates that this TID field
                                                            -- provides active concentration summary by
                                                            -- reporting the concentration which produces
                                                            -- 50% of the maximum possible biological response
                                                            -- such as IC50, EC50, AC50, GI50 etc.
                                                            -- or by reporting constant parameters such as Ki,
                                                            -- that based on which the activity outcome in this assay is called
    ac-qualifier     BOOLEAN                     OPTIONAL, -- endpoint qualifier (e.g. <, <=, =, >, >=) associated with the ac field above

    annot            INTEGER {
                           pmid                          (1), -- PubMed ID
                           mmdb                          (2), -- MMDB ID
                           url                           (3), -- indicate TID data is a url that provides supplementary information
                        -- protein-gi                    (4), GenBank General ID (GI) for a Protein
                        -- nucleotide-gi                 (5), GenBank General ID (GI) for a Nucleotide
                           taxonomy                      (6), -- Taxonomy ID for an Organism
                           mim                           (7), -- MIM, Mendelian Inheritance in Man, ID
                           gene                          (8), -- Entrez Gene ID
                           probe                         (9), -- Entrez Probe ID
                           aid                          (10), -- PubChem BioAssay ID, may be used in 'Summary' assay
                           sid                          (11), -- PubChem Substance ID, may be used in 'Summary' assay
                           cid                          (12), -- PubChem Compound ID
                        -- protein-target-gi            (13), GenBank General ID (GI) for a Protein target
                        -- biosystems-target-id         (14), NCBI BioSystems ID
                           target-name                  (15), -- target name
                           target-descr                 (16), -- brief target description
                           target-tax-id                (17), -- NCBI Taxonomy ID for target molecule
                           gene-target-id               (18), -- NCBI Gene ID for a gene target
                        -- dna-nucleotide-target-gi     (19), GenBank General ID (GI) for a DNA Nucleotide target
                        -- rna-nucleotide-target-gi     (20), GenBank General ID (GI) for a RNA Nucleotide target
                           protein-target-accession     (21), -- GenBank Accession for a Protein target
                           nucleotide-target-accession  (22), -- GenBank Accession for a DNA/RNA Nucleotide target
                           other                       (255)  -- for identifier types not currently support
                     }   OPTIONAL                           -- treat substance associated cross-reference as regular TID
                                                            -- web servers would make a link
                                                            -- to the corresponding record in Entrez databases
                                                            -- treat substance associated target information as regular TID
                                                            -- for example, for RNAi screening data, each 'substance' may
                                                            -- correspond to a specific gene target
}

--The concentration attribute is to indicate that the readout under this test result field is biological concentration-response data, the attribute provides the value and unit of the tested concentration
PC-ConcentrationAttr ::= SEQUENCE {
    concentration    REAL,
    unit             INTEGER {                             -- Units for Concentration
                         um               (5)              -- microM
                     },
   dr-id             INTEGER                     OPTIONAL  -- Dose-Response Attribution ID (if applicable)
}


-- Mininum and Maximum Constraints on an Integer Value (used for validating incoming data)
PC-IntegerMinMax ::= SEQUENCE {
    min               INTEGER,                             -- Minimum Value Allowed
    max               INTEGER                              -- Maximum Value Allowed
}


-- Mininum and Maximum Constraints on a Real Value (used for validating incoming data)
PC-RealMinMax ::= SEQUENCE {
    min               REAL,                                -- Minimum Value Allowed
    max               REAL                                 -- Maximum Value Allowed
}


END

-- pcsubstance.asn
-- $Id: pcsubstance.asn 639091 2021-10-13 12:03:12Z thiessen $
-- ===========================================================================
--
--                            PUBLIC DOMAIN NOTICE
--               National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the
--  terms of the United States Copyright Act.  It was written as part of
--  the author's official duties as a United States Government employee and
--  thus cannot be copyrighted.  This software/database is freely available
--  to the public for use. The National Library of Medicine and the U.S.
--  Government have not placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy
--  and reliability of the software and data, the NLM and the U.S.
--  Government do not and cannot warrant the performance or results that
--  may be obtained by using this software or data. The NLM and the U.S.
--  Government disclaim all warranties, express or implied, including
--  warranties of performance, merchantability or fitness for any particular
--  purpose.
--
--  Please cite the author in any work or product based on this material.
--
-- ===========================================================================
--
-- Authors:  NCBI Structure Group
--
-- File Description:
--      ASN.1 definitions for PubChem small molecule database
--
-- ===========================================================================

NCBI-PCSubstance DEFINITIONS ::= BEGIN

EXPORTS PC-Substance, PC-Compound, PC-Substances, PC-Compounds,
        PC-Source, PC-ID, PC-InfoData, PC-XRefData;

IMPORTS Pub              FROM NCBI-Pub
        Date, Object-id  FROM NCBI-General;


-- Root Record for Chemical Substance Definition
PC-Substance ::= SEQUENCE {
    -- Internal Tracking Information
    sid            PC-ID,                               -- Substance ID/Version  [Either valid ID or a "0" dummy
                                                        --   value, if "source" is to be used]
                                                        --   Note: Version is for internal use (only?)
                                                        --   Note: A valid ID is greater than "0"
    source         PC-Source,                           -- Data Source for this Submission

    -- Substance Description Information
    pub            SEQUENCE OF Pub            OPTIONAL, -- Articles Describing this Substance
    synonyms       SEQUENCE OF VisibleString  OPTIONAL, -- Substance Names provided by Depositor
    comment        SEQUENCE OF VisibleString  OPTIONAL, -- Comments and Description provided by Depositor
    xref           SEQUENCE OF PC-XRefData    OPTIONAL, -- X-Ref/LinkOut Data provided by Depositor

    -- Structure Description
    compound       PC-Compounds               OPTIONAL  -- Original Deposited Structure Information
}


-- Holder for groups of Substances
PC-Substances ::= SEQUENCE OF PC-Substance


-- ID and Version Description Information
PC-ID ::= SEQUENCE {
    id             INTEGER,                             -- Unique "Global" ID
                                                        --   Note: Must be greater than "0" or, if invalid, "0"
    version        INTEGER                              -- Incremented when Depositor updates record
                                                        --   Note: For Internal Use (only?)
}


-- Describes Substance Source, if from another database
PC-Source ::= CHOICE {
    individual     Pub,                                 -- Individual Submission
    db             PC-DBTracking,                       -- External DB Submission
    mmdb           PC-MMDBSource                        -- MMDB Submission (deprecated)
}


-- External DB Tracking Information
PC-DBTracking ::= SEQUENCE {
    name           VisibleString,                       -- Unique Name of External Database
    source-id      Object-id,                           -- Primary Unique ID used by External DB
    date           Date                       OPTIONAL, -- External Database Release Date
    description    VisibleString              OPTIONAL, -- External Database Release Code/Description
    pub            Pub                        OPTIONAL  -- Data Submission to same DB by original Author
}


-- MMDB Source Record detailing specific location or part of an MMDB Record
PC-MMDBSource ::= SEQUENCE {
    mmdb-id        INTEGER,                             -- MMDB Record ID
                                                        --   Note: Must be greater than "0" or, if invalid, "0"
    molecule-id    INTEGER,                             -- MMDB Molecule ID
                                                        --   Note: Must be greater than "0" or, if invalid, "0"
    molecule-name  SEQUENCE OF VisibleString,           -- MMDB Molecule Name
    residue-id     INTEGER                    OPTIONAL, -- Residue ID
                                                        --   Note: Must be greater than "0" or, if invalid, "0"
    residue-name   VisibleString              OPTIONAL, -- Residue Name
    atom-id        INTEGER                    OPTIONAL, -- Atom ID
                                                        --   Note: Must be greater than "0" or, if invalid, "0"
    atom-name      VisibleString              OPTIONAL  -- Atom Name
}


-- Depositor Provided X-Ref and LinkOut data for Entrez
PC-XRefData ::= CHOICE {
        regid            VisibleString,           -- External Database Registry ID
        rn               VisibleString,           -- Registry Number (e.g., EC Number, CAS Number)
        mesh             VisibleString,           -- MESH Index Term
        pmid             INTEGER,                 -- PubMed ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        gi               INTEGER,                 -- GenBank General ID
                                                  --   Note: Please use protein-gi or nucleotide-gi, if possible
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        mmdb             INTEGER,                 -- MMDB ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        sid              INTEGER,                 -- PubChem Substance ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        cid              INTEGER,                 -- PubChem Compound ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        dburl            VisibleString,           -- Depositor Source Database Homepage
        sburl            VisibleString,           -- Depositor Homepage for a Substance
        asurl            VisibleString,           -- Depositor Homepage for an Assay
        protein-gi       INTEGER,                 -- GenBank General ID for a Protein
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        nucleotide-gi    INTEGER,                 -- GenBank General ID for a Nucleotide
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        taxonomy         INTEGER,                 -- Taxonomy ID for an Organism
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        aid              INTEGER,                 -- PubChem BioAssay ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        mim              INTEGER,                 -- MIM, Mendelian Inheritance in Man, Number
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        gene             INTEGER,                 -- Entrez Gene ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        probe            INTEGER,                 -- Probe ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        biosystem        INTEGER,                 -- BioSystem ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        geogse           INTEGER,                 -- Gene Expression Omnibus Series Accession (GEO GSE) ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        geogsm           INTEGER,                 -- Gene Expression Omnibus Sample Accession (GEO GSM) ID
                                                  --   Note: Must be greater than "0" or, if invalid, "0"
        patent           VisibleString,           -- Patent Identifier (e.g., USPTO, EPO, WPO, JPO, CPO)
        protein-accession        VisibleString,   -- GenBank Accession for a Protein
        nucleotide-accession     VisibleString,   -- GenBank Accession for a Nucleotide
        doi                      VisibleString,   -- digital object identifier (DOI)
        citation                 VisibleString    -- citation when PMID or DOI are not available
}


-- Compound Record
PC-Compound ::= SEQUENCE {
    -- Tracking Information
    id             PC-CompoundType,                        -- Compound Qualifier (Type/ID)
    atoms          PC-Atoms                      OPTIONAL, -- AtomID/Type Information
    bonds          PC-Bonds                      OPTIONAL, -- BondID/Type/Atom Information
    stereo         SEQUENCE OF PC-StereoCenter   OPTIONAL, -- StereoCenter Descriptions
    coords         SEQUENCE OF PC-Coordinates    OPTIONAL, -- 2D/3D Coordinate Sets of Compound
    charge         INTEGER                       OPTIONAL, -- Provided Total Formal Charge  (Signed Integer)
    props          SEQUENCE OF PC-InfoData       OPTIONAL, -- Derived (computed) Properties
    stereogroups   SEQUENCE OF PC-StereoGroup    OPTIONAL, -- Relative stereochemistry groups
    count          PC-Count                      OPTIONAL, -- Counts of various properties
    vbalt          PC-Compounds                  OPTIONAL, -- Alternate Valence-Bond Forms
    groups         SEQUENCE OF PC-Group          OPTIONAL  -- Superatom groups
}


-- Holder for groups of Compounds
PC-Compounds ::= SEQUENCE OF PC-Compound


-- Qualification used to describe the type of Compound deposited, standardized, or derived.
--    Please note that mixtures/cocktails may be specified using previously deposited substances.
PC-CompoundType ::= SEQUENCE {
    type        INTEGER {               --  Compound Qualifier or Type
                    -- For Compound Depositions
                    deposited           (0),               -- Original Deposited Compound
                    -- For Standardized Compounds
                    standardized        (1),               -- Standardized Form of a Deposited Compound
                    component           (2),               -- Component of a Standardized Compound
                    neutralized         (3),               -- Neutralized Form of a Standardized Compound
                    -- For Mixture/Cocktail Depositions
                    mixture             (4),               -- Substance that is a component of a mixture
                    -- For Theoretical Compounds
                    tautomer            (5),               -- Predicted Tautomer Form
                    pka-state           (6),               -- Predicted Ionized pKa Form

                    unknown           (255)                -- Unknown Compound Type
                }                                OPTIONAL,
    id          CHOICE {                --  Compound Namespace and ID  (absent for "deposited" type compounds)
                    cid        INTEGER,                    --  Standardized Compound
                    sid        INTEGER,                    --  PubChem Substance (for "mixture" type compounds)
                    xid        INTEGER                     --  PubChem Theoretical Compound
                }                                OPTIONAL
}


-- Superatom group (e.g. from MOL Sgroup)
PC-Group ::= SEQUENCE {

    -- Atoms in this group (list of aid from PC-Atoms, e.g. from MOL FIELD SAL)
    atoms                    SEQUENCE OF INTEGER,
    
    -- These enumerated values are adapted from the ctfile format specification
    type                     INTEGER {                     -- Type of group (e.g. from MOL field STY)
                                 sup        (1),           -- Superatom
                                 mul        (2),           -- Multiple group
                                 sru        (3),           -- Structure repeat unit (polymer)
                                 mon        (4),           -- Monomer
                                 mer        (5),           -- Mer type
                                 cop        (6),           -- Copolymer
                                 cro        (7),           -- Crosslink
                                 mod        (8),           -- Modification
                                 gra        (9),           -- Graft
                                 com       (10),           -- Component
                                 mix       (11),           -- Mixture
                                 for       (12),           -- Formulation
                                 dat       (13),           -- Data Sgroup
                                 any       (14),           -- Any polymer
                                 gen       (15),           -- Generic
                                 unknown  (255)
                             },
    subtype                  INTEGER {                     -- Subtype (e.g. from MOL field SST)
                                 alt        (1),           -- Alternating
                                 ran        (2),           -- Random
                                 blo        (3),           -- Block
                                 unknown  (255)
                             } OPTIONAL, 
    connectivity             INTEGER {                     -- Connectivity (e.g. from MOL field SCN)
                                 hh         (1),           -- Head-to-head
                                 ht         (2),           -- Head-to-tail
                                 eu         (3),           -- Either unknown
                                 unknown  (255)
                             } OPTIONAL,

    label                    INTEGER OPTIONAL,             -- Label (e.g. from MOL field SLB)
    subscript                VisibleString OPTIONAL,       -- Subscript (e.g. from MOL field SMT)
    
    repeat-count             CHOICE {                      -- Repeat count (e.g. for polymers)
                                 exact     INTEGER,
                                 range     SEQUENCE {
                                     lower     INTEGER,
                                     upper     INTEGER
                                 }
                             } OPTIONAL,
    
    -- Special bonds in this group (typically capping/crossing bonds, e.g. from MOL field SBL)
    --   If present, from and to must be parallel lists of aid from PC-Bonds
    bonds                    SEQUENCE {
                                 from      SEQUENCE OF INTEGER,
                                 to        SEQUENCE OF INTEGER
                             } OPTIONAL,
    
    -- Bracket display (e.g. from MOL field SDI)
    brackets                 SEQUENCE {
                                 left      PC-Bracket,
                                 right     PC-Bracket
                             } OPTIONAL
}


-- Display coordinates for a bracket (e.g. from MOL field SDI)
PC-Bracket ::= SEQUENCE {
    x1                       REAL,
    y1                       REAL,
    x2                       REAL,
    y2                       REAL
}


-- Counts of various properties of a Compound
PC-Count ::= SEQUENCE {
    heavy-atom               INTEGER,             -- Total count of non-Hydrogen (Heavy) Atoms

    -- StereoChemistry Counts
    atom-chiral              INTEGER,             -- Total count of (SP3) Chiral Atoms
    atom-chiral-def          INTEGER,             -- Total count of Defined (SP3) Chiral Atoms
    atom-chiral-undef        INTEGER,             -- Total count of Undefined (SP3) Chiral Atoms
    bond-chiral              INTEGER,             -- Total count of (SP2) Chiral Bonds
    bond-chiral-def          INTEGER,             -- Total count of (SP2) Defined Chiral Bonds
    bond-chiral-undef        INTEGER,             -- Total count of (SP2) Undefined Chiral Bonds

    -- Isotopic Counts
    isotope-atom             INTEGER,             -- Total count of Atoms with Isotopic Information

    -- Discrete Structure Counts
    covalent-unit            INTEGER,             -- Total count of covalently-bonded units in the record
    tautomers                INTEGER              -- Number of possible tautomers (Max. 999)
}


-- List of atom identifiers which are in a common stereochemistry group.
-- All atoms in this group possess the characteristic of the type specified.
-- The convention adopted is intended to be compatible with MDL's Enhanced
-- Stereochemical Representation white paper.
-- An atom can only be member of a single stereo group, and all atoms
-- in a stereo group must have a stereo descriptor.
-- Stereogroups only apply to stereocenters that can have parity.
PC-StereoGroup ::= SEQUENCE {
    type           INTEGER {
                       absolute         (1),            -- Absolute configuration is known
                       or               (2),            -- Relative configuration is known (absolute configuration is unknown)
                       and              (3),            -- Mixture of stereoisomers
                       unknown        (255)             -- Unknown configuration type
                   },
     aid           SEQUENCE OF INTEGER                  -- Atom Identifiers of atoms in this group
                                                        --   Note: Atom ID's must be greater than "0"
}


-- Compound Description/Descriptor Data
PC-InfoData ::= SEQUENCE {
    urn            PC-Urn,                              -- Universal Resource Name  [for Value Qualification]
    value          CHOICE {                             -- Data Value
                          bval     BOOLEAN,                   -- Boolean or Binary
                          bvec     SEQUENCE OF BOOLEAN,       -- Boolean Vector
                          ival     INTEGER,                   -- Integer (signed or unsigned)
                          ivec     SEQUENCE OF INTEGER,       -- Integer Vector
                          fval     REAL,                      -- Float or Double
                          fvec     SEQUENCE OF REAL,          -- Double Vector
                          sval     VisibleString,             -- String
                          slist    SEQUENCE OF VisibleString, -- List of Strings
                          date     Date,                      -- Date
                          binary   OCTET STRING,              -- Binary Data
                          bitlist  BIT STRING                 -- Bit List (specialized version of Boolean vector)
                   }
}


-- Universal Resource Name
--    Provides explicit source information on derived or calculated data
PC-Urn ::= SEQUENCE {
    label           VisibleString,                       -- Generic Name or Label for Display  [e.g., "Log P"]
    name            VisibleString              OPTIONAL, -- Qualified Name  [e.g., "XlogP"]
    datatype        PC-UrnDataType             OPTIONAL, -- Specific Data Type of Value  [e.g., binary]
    parameters      VisibleString              OPTIONAL, -- Implementation Parameter  [e.g., "metal=0"]
    implementation  VisibleString              OPTIONAL, -- Implementation Name  [e.g., "E_XlogP"]
    version         VisibleString              OPTIONAL, -- Implementation Version  [e.g., "3.317"]
    software        VisibleString              OPTIONAL, -- Implementation Software  [e.g., "Cactvs"]
    source          VisibleString              OPTIONAL, -- Implementation Organization  [e.g., "xemistry.com"]
    release         VisibleString              OPTIONAL  -- NCBI Implementation Release  [e.g., "10.25.2005"]
}


-- URN Data Type
--   Provides the ability to use more specific data types than that directly provided by ASN.1.
--   Provides for more specific validation of specified data.
PC-UrnDataType ::= INTEGER {
        -- Basic Data Types
        string                         (1),  -- String                             [maps to a VisibleString]
        stringlist                     (2),  -- List of Strings                    [maps to VisibleString list]
        int                            (3),  -- 32-Bit Signed Integer              [maps to an INTEGER]
        intvec                         (4),  -- Vector of 32-Bit Signed Integer    [maps to INTEGER vector]
        uint                           (5),  -- 32-Bit Unsigned Integer            [maps to an INTEGER]
        uintvec                        (6),  -- Vector of 32-Bit Unsigned Integer  [maps to INTEGER vector]
        double                         (7),  -- 64-Bit Float                       [maps to a REAL]
        doublevec                      (8),  -- Vector of Double                   [maps to REAL vector]
        bool                           (9),  -- Boolean or Binary value            [maps to a BOOLEAN]
        boolvec                       (10),  -- Boolean Vector                     [maps to BOOLEAN vector]

        -- Specialized Data Types
        uint64                        (11),  -- 64-Bit Unsigned Integer (Hex form) [maps to a VisibleString]
        binary                        (12),  -- Binary Data Blob                   [maps to an OCTET STRING]
        url                           (13),  -- URL                                [maps to a VisibleString]
        unicode                       (14),  -- UniCode String                     [maps to a VisibleString]
        date                          (15),  -- ISO8601 Date                       [maps to a Date]
        fingerprint                   (16),  -- Binary Fingerprint (Gzip'ped bit   [maps to an OCTET STRING]
                                             --   list w/ 4-Byte prefix denoting bit list length)

        unknown                      (255)   -- Unknown Data Type               [maps to a set of VisibleString]
}


-- Coordinates for the Compound of a given type
PC-Coordinates ::= SEQUENCE {
    type           SEQUENCE OF PC-CoordinateType,          -- Coordinate Type Information (vector)
    aid            SEQUENCE OF INTEGER,                    -- Conformer Atom IDs (vector)
                                                           --   (to be kept synchronized with Conformers)
                                                           --   Note: Atom ID's must be greater than "0"
    conformers     SEQUENCE OF PC-Conformer      OPTIONAL, -- Conformers for this Coordinate Set

    atomlabels     SEQUENCE OF PC-AtomString     OPTIONAL, -- Atom labels for Conformer Set

    data           SEQUENCE OF PC-InfoData       OPTIONAL  -- Data Associated with these Coordinates
}


-- Drawing/Conformer Definition (in Parallel Arrays, synchronized to aid integer list)
--   3D coordinates are specified in a right-handed coordinate system. For 2D plots, Y axis leads upwards.
PC-Conformer ::= SEQUENCE {
    --  [Note: Parallel Arrays must be kept Synchronized]
    x              SEQUENCE OF REAL,                       -- X Coordinates (vector)
    y              SEQUENCE OF REAL,                       -- Y Coordinates (vector)
    z              SEQUENCE OF REAL              OPTIONAL, -- Z Coordinates (vector)

    style          PC-DrawAnnotations            OPTIONAL, -- Structure Annotations

    data           SEQUENCE OF PC-InfoData       OPTIONAL  -- Data Associated with this Conformer
}


-- Holder for groups of Conformers
PC-Conformers ::= SEQUENCE OF PC-Conformer


-- Coordinate Set Type Distinctions
PC-CoordinateType ::= INTEGER {
        twod                (1),  -- 2D Coordinates
        threed              (2),  -- 3D Coordinates (should also indicate units, below)
        submitted           (3),  -- Depositor Provided Coordinates
        experimental        (4),  -- Experimentally Determined Coordinates
        computed            (5),  -- Computed Coordinates
        standardized        (6),  -- Standardized Coordinates
        augmented           (7),  -- Hybrid Original with Computed Coordinates (e.g., explicit H)
        aligned             (8),  -- Template used to align drawing
        compact             (9),  -- Drawing uses shorthand forms (e.g., COOH, OCH3, Et, etc.)
        units-angstroms    (10),  -- (3D) Coordinate units are Angstroms
        units-nanometers   (11),  -- (3D) Coordinate units are nanometers
        units-pixel        (12),  -- (2D) Coordinate units are pixels
        units-points       (13),  -- (2D) Coordinate units are points
        units-stdbonds     (14),  -- (2D) Coordinate units are standard bond lengths (1.0)
        units-unknown     (255)   -- Coordinate units are unknown or unspecified
}


-- Drawing Annotations (in Parallel Arrays)
--    [Note: A pair of atoms can have multiple annotations]
PC-DrawAnnotations ::= SEQUENCE {
    --  [Note: Parallel Arrays must be kept Synchronized]
    annotation     SEQUENCE OF PC-BondAnnotation, -- Bond Annotations (vector)
    aid1           SEQUENCE OF INTEGER,           -- Atom1 Identifier (vector)
                                                  --   Note: Atom ID's must be greater than "0"
    aid2           SEQUENCE OF INTEGER            -- Atom2 Identifier (vector)
                                                  --   Note: Atom ID's must be greater than "0"
}


-- Atom-Atom Annotation Information
PC-BondAnnotation ::= INTEGER {
    crossed        (1),                          -- Double Bond that can be both Cis/Trans
    dashed         (2),                          -- Hydrogen-Bond (3D Only?)
    wavy           (3),                          -- Unknown Stereochemistry
    dotted         (4),                          -- Complex/Fractional
    wedge-up       (5),                          -- Above-Plane
    wedge-down     (6),                          -- Below-Plane
    arrow          (7),                          -- Dative
    aromatic       (8),                          -- Aromatic
    resonance      (9),                          -- Resonance
    bold          (10),                          -- Fat Bond (Non-Specific User Interpreted Information)
    fischer       (11),                          -- Interpret Bond Stereo using Fischer Conventions
    closeContact  (12),                          -- Identification of Atom-Atom Close Contacts (3D Only)
    unknown      (255)                           -- Unspecified or Unknown Atom-Atom Annotation
}


-- Atom Information  (in Parallel Arrays)
PC-Atoms ::= SEQUENCE {
    --  [Note: Parallel Arrays must be kept Synchronized]
    aid            SEQUENCE OF INTEGER,                    -- Atom Identifiers (vector)
                                                           --   Note: Atom ID's must be greater than "0"
    element        SEQUENCE OF PC-Element,                 -- Atomic Numbers (vector)

    -- Independent Arrays of ID-Value Pairs  (Technically allows multiple values per Atom)
    label          SEQUENCE OF PC-AtomString     OPTIONAL, -- Atom labels
    isotope        SEQUENCE OF PC-AtomInt        OPTIONAL, -- Isotopic Information
    charge         SEQUENCE OF PC-AtomInt        OPTIONAL, -- Formal Charges
    radical        SEQUENCE OF PC-AtomRadical    OPTIONAL, -- Radical Information
    source         SEQUENCE OF PC-AtomSource     OPTIONAL, -- E.g. identity of MMDB "R" groups
    comment        SEQUENCE OF PC-AtomString     OPTIONAL  -- Atom Comments
}


-- Specification of an Association between an Atom Identifier and Source
PC-AtomSource ::= SEQUENCE {
    aid            INTEGER,                      -- Atom Identifier for the R-Group Source
                                                 --   Note: Atom ID's must be greater than "0"
    source         PC-MMDBSource                 -- Atom Specific MMDB Record
}


-- Specification of an Association between an Atom Identifier and an Integer Value
PC-AtomInt ::= SEQUENCE {
    aid            INTEGER,                      -- Atom Identifier for the Value
                                                 --   Note: Atom ID's must be greater than "0"
    value          INTEGER                       -- Value Associated to the ID
}


-- Specification of an Association between an Atom Identifier and a String Value
PC-AtomString ::= SEQUENCE {
    aid            INTEGER,                      -- Atom Identifier for the Value
                                                 --   Note: Atom ID's must be greater than "0"
    value          VisibleString                 -- Value Associated to the ID
}


-- Rudimentary Atom Electronic Configuration Designation
PC-AtomRadical ::= SEQUENCE {
    aid            INTEGER,                      -- Atom Identifier for the Value
                                                 --   Note: Atom ID's must be greater than "0"
    type           INTEGER {                     -- Type of Atom Radical
                       singlet    (1),           -- Open-Shell Singlet
                       doublet    (2),           -- Open-Shell Doublet
                       triplet    (3),           -- Open-Shell Triplet
                       quartet    (4),           -- Open-Shell Quartet
                       quintet    (5),           -- Open-Shell Quintet
                       hextet     (6),           -- Open-Shell Hextet
                       heptet     (7),           -- Open-Shell Quintet
                       octet      (8),           -- Open-Shell Octet
                       none     (255)            -- Closed-Shell Singlet
                   }
}


-- Element Information [which may contain "illegal" element values]
PC-Element::= INTEGER {
    -- Illegal Atom Numbers that may be Interpreted to be something else
    a  (255),                                    -- Unspecified Atom (Asterick)
    d  (254),                                    -- Dummy Atom
    r  (253),                                    -- Rgroup Label
    lp (252),                                    -- Lone Pair

    -- Elements
    h  (1), he (2), li (3), be (4), b  (5),
    c  (6), n  (7), o  (8), f  (9), ne(10),
    na(11), mg(12), al(13), si(14), p (15),
    s (16), cl(17), ar(18), k (19), ca(20),
    sc(21), ti(22), v (23), cr(24), mn(25),
    fe(26), co(27), ni(28), cu(29), zn(30),
    ga(31), ge(32), as(33), se(34), br(35),
    kr(36), rb(37), sr(38), y (39), zr(40),
    nb(41), mo(42), tc(43), ru(44), rh(45),
    pd(46), ag(47), cd(48), in(49), sn(50),
    sb(51), te(52), i (53), xe(54), cs(55),
    ba(56), la(57), ce(58), pr(59), nd(60),
    pm(61), sm(62), eu(63), gd(64), tb(65),
    dy(66), ho(67), er(68), tm(69), yb(70),
    lu(71), hf(72), ta(73), w (74), re(75),
    os(76), ir(77), pt(78), au(79), hg(80),
    tl(81), pb(82), bi(83), po(84), at(85),
    rn(86), fr(87), ra(88), ac(89), th(90),
    pa(91), u(92),  np(93), pu(94), am(95),
    cm(96), bk(97), cf(98), es(99), fm(100),
    md(101), no(102), lr(103), rf(104), db(105),
    sg(106), bh(107), hs(108), mt(109), ds(110),
    rg(111), cn(112), nh(113), fl(114), mc(115),
    lv(116), ts(117), og(118)
}


-- Bond Description Information  (in Parallel Arrays)
PC-Bonds ::= SEQUENCE {
    --  [Note: Parallel Arrays must be kept Synchronized]
    aid1           SEQUENCE OF INTEGER,          -- Atom1 Identifier (vector)
                                                 --   Note: Atom ID's must be greater than "0"
    aid2           SEQUENCE OF INTEGER,          -- Atom2 Identifier (vector)
                                                 --   Note: Atom ID's must be greater than "0"
    order          SEQUENCE OF PC-BondType       -- Bond Type Information (vector)
}


-- Bond Type Information
PC-BondType ::= INTEGER {
    single         (1),                          -- Single Bond
    double         (2),                          -- Double Bond
    triple         (3),                          -- Triple Bond
    quadruple      (4),                          -- Quadruple Bond
    dative         (5),                          -- Dative Bond
    complex        (6),                          -- Complex Bond
    ionic          (7),                          -- Ionic Bond
    unknown      (255)                           -- Unknown/Unspecified Connectivity
}


-- Allowed Stereogenic Center Types
--   [Using IUPAC Stereogenic Center recommendations and terminology]
PC-StereoCenter ::= CHOICE {
    tetrahedral    PC-StereoTetrahedral,         -- Tetrahedral (SP3) StereoCenter
    planar         PC-StereoPlanar,              -- Planar (SP2) StereoCenter
    squareplanar   PC-StereoSquarePlanar,        -- Square Planar (SP4) StereoCenter
    octahedral     PC-StereoOctahedral,          -- Octahedral (OC-6) / Square Pyramid (SPY-5) StereoCenters
    bipyramid      PC-StereoTrigonalBiPyramid,   -- Trigonal BiPyramid (TBPY-4 and TBPY-5) StereoCenters
    tshape         PC-StereoTShape,              -- T-Shaped (TS-3) StereoCenters
    pentagonal     PC-StereoPentagonalBiPyramid  -- Pentagonal BiPyramid (PBPY-7) StereoCenters
}


-- SP3 Tetrahedral StereoCenter, Trigonal Pyramid Stereogenic Center,
--   Cumulenic StereoCenter (Linear systems of an even number of double bonds),
--   or Hindered biaryl stereocenter (All biaryls have hindered rotation that
--   to some extent the ortho-hydrogens prevent coplanarity)
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoTetrahedral ::= SEQUENCE {
    center         INTEGER,                      -- Atom Identifier of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    above          INTEGER,                      -- Atom Identifier of Atom Above the Plane
                                                 --   Note: Atom ID's must be greater than "0"
    top            INTEGER,                      -- Atom Identifier of Atom In-Plane and at the Top
                                                 --   Note: Atom ID's must be greater than "0"
    bottom         INTEGER,                      -- Atom Identifier of Atom In-Plane and at the Bottom
                                                 --   Note: Atom ID's must be greater than "0"
    below          INTEGER,                      -- Atom Identifier of Atom Below the Plane
                                                 --   Note: Atom ID's must be greater than "0"
    parity         INTEGER {                     -- StereoCenter Designation
                       clockwise          (1),
                       counterclockwise   (2),
                       any                (3),
                       unknown          (255)
                   }                  OPTIONAL,
    type           INTEGER {                     -- Type of StereoCenter, Tetrahedral, if not specified
                       tetrahedral        (1),   -- Tetrahedral StereoCenter
                       cumulenic          (2),   -- Cumulenic StereoCenter
                       biaryl             (3)    -- Biaryl StereoCenter
                   }                  OPTIONAL
}


-- SP2 Planar Stereogenic Center, Cumulenic StereoCenter (Linear systems on an odd
--   number of double bonds present planar stereochemistry)
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoPlanar ::= SEQUENCE {
    left           INTEGER,                      -- Atom ID of Left Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    ltop           INTEGER,                      -- Atom ID of Top Atom attached to the Left Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    lbottom        INTEGER,                      -- Atom ID of Bottom Atom attached to the Left Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    right          INTEGER,                      -- Atom ID of Right Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    rtop           INTEGER,                      -- Atom ID of Top Atom attached to the Right Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    rbottom        INTEGER,                      -- Atom ID of Bottom Atom attached to the Right Double Bond Atom
                                                 --   Note: Atom ID's must be greater than "0"
    parity         INTEGER {                     -- StereoCenter Designation
                       same             (1),
                       opposite         (2),
                       any              (3),
                       unknown        (255)
                   }                  OPTIONAL,
    type           INTEGER {                     -- Type of StereoCenter, SP2 Planar, if not specified
                       planar           (1),     -- SP2 Planar StereoCenter
                       cumulenic        (2)      -- Cumulenic StereoCenter
                   }                  OPTIONAL
}


-- Square Planar (SP4) StereoCenters
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoSquarePlanar ::= SEQUENCE {
    center         INTEGER,                      -- Atom ID of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    lbelow         INTEGER,                      -- Atom ID of Left Below Plane Atom
                                                 --   Note: Atom ID's must be greater than "0"
    rbelow         INTEGER,                      -- Atom ID of Right Below Plane Atom
                                                 --   Note: Atom ID's must be greater than "0"
    labove         INTEGER,                      -- Atom ID of Left Above Plane Atom
                                                 --   Note: Atom ID's must be greater than "0"
    rabove         INTEGER,                      -- Atom ID of Right Above Plane Atom
                                                 --   Note: Atom ID's must be greater than "0"
    parity         INTEGER {                     -- StereoCenter Type
                       u-shape          (1),     --   U shaped isomer (labove-lbelow-rbelow-rabove)
                       z-shape          (2),     --   Z shaped isomer (labove-rabove-lbelow-rbelow)
                       x-shape          (3),     --   X shaped isomer (labove-rbelow-rabove-lbelow)
                       any              (4),     --   Nonspecific mixture of isomers
                       unknown        (255)
                   }                  OPTIONAL
}


-- Octahedral (OC-6) and Square Pyramid (SPY-5) StereoCenters
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoOctahedral ::= SEQUENCE {
    center         INTEGER,                      -- Atom ID of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    top            INTEGER,                      -- Atom ID of Atom In-Plane and at the Top
                                                 --   Note: Atom ID's must be greater than "0"
    bottom         INTEGER,                      -- Atom ID of Atom In-Plane and at the Bottom
                                                 --   Note: Atom ID's must be greater than "0"
    labove         INTEGER,                      -- Atom ID of Atom Above the Plane on the Left
                                                 --   Note: Atom ID's must be greater than "0"
    lbelow         INTEGER,                      -- Atom ID of Atom Below the Plane on the Left
                                                 --   Note: Atom ID's must be greater than "0"
    rabove         INTEGER,                      -- Atom ID of Atom Above the Plane on the Right
                                                 --   Note: Atom ID's must be greater than "0"
    rbelow         INTEGER                       -- Atom ID of Atom Below the Plane on the Right
                                                 --   Note: Atom ID's must be greater than "0"
}


-- Trigonal BiPyramid (TBPY-4 and TBPY-5) StereoCenters
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoTrigonalBiPyramid ::= SEQUENCE {
    center         INTEGER,                      -- Atom ID of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    above          INTEGER,                      -- Atom ID of Atom Above the Plane
                                                 --   Note: Atom ID's must be greater than "0"
    below          INTEGER,                      -- Atom ID of Atom Below the Plane
                                                 --   Note: Atom ID's must be greater than "0"
    top            INTEGER,                      -- Atom ID of Atom In-Plane and at the Top
                                                 --   Note: Atom ID's must be greater than "0"
    bottom         INTEGER,                      -- Atom ID of Atom In-Plane and at the Bottom
                                                 --   Note: Atom ID's must be greater than "0"
    right          INTEGER                       -- Atom ID of Atom In-Plane and to the Right
                                                 --   Note: Atom ID's must be greater than "0"
}


-- T-Shaped (TS-3) StereoCenters
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoTShape ::= SEQUENCE {
    center         INTEGER,                      -- Atom ID of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    top            INTEGER,                      -- Atom ID of Atom In-Plane and at the Top
                                                 --   Note: Atom ID's must be greater than "0"
    bottom         INTEGER,                      -- Atom ID of Atom In-Plane and at the Bottom
                                                 --   Note: Atom ID's must be greater than "0"
    above          INTEGER                       -- Atom ID of Atom Above the Plane
                                                 --   Note: Atom ID's must be greater than "0"
}


-- Pentagonal BiPyramid (PBPY-7) StereoCenters
--   [Using IUPAC Stereogenic Center recommendations and terminology]
--   [Note: "-1" can be used for the Atom Identifier to represent a lone-pair or implicit hydrogen]
PC-StereoPentagonalBiPyramid ::= SEQUENCE {
    center         INTEGER,                      -- Atom ID of Atom Center
                                                 --   Note: Atom ID's must be greater than "0"
    top            INTEGER,                      -- Atom ID of Atom In-Plane and at the Top
                                                 --   Note: Atom ID's must be greater than "0"
    bottom         INTEGER,                      -- Atom ID of Atom In-Plane and at the Bottom
                                                 --   Note: Atom ID's must be greater than "0"
    left           INTEGER,                      -- Atom ID of Atom In-Plane and at the Left
                                                 --   Note: Atom ID's must be greater than "0"
    labove         INTEGER,                      -- Atom ID of Atom Above the Plane on the Left
                                                 --   Note: Atom ID's must be greater than "0"
    lbelow         INTEGER,                      -- Atom ID of Atom Below the Plane on the Left
                                                 --   Note: Atom ID's must be greater than "0"
    rabove         INTEGER,                      -- Atom ID of Atom Above the Plane on the Right
                                                 --   Note: Atom ID's must be greater than "0"
    rbelow         INTEGER                       -- Atom ID of Atom Below the Plane on the Right
                                                 --   Note: Atom ID's must be greater than "0"
}

END

-- proj.asn
--$Revision: 6.3 $
--****************************************************************
--
--  NCBI Project Definition Module
--  by Jim Ostell and Jonathan Kans, 1998
--
--****************************************************************

NCBI-Project DEFINITIONS ::=
BEGIN

EXPORTS Project, Project-item;

IMPORTS Date FROM NCBI-General
        PubMedId FROM NCBI-Biblio
        Seq-id, Seq-loc FROM NCBI-Seqloc
        Seq-annot, Pubdesc FROM NCBI-Sequence
        Seq-entry FROM NCBI-Seqset
        Pubmed-entry FROM NCBI-PubMed;

Project ::= SEQUENCE {
    descr Project-descr OPTIONAL ,
    data Project-item }

Project-item ::= CHOICE {
    pmuid SET OF INTEGER ,
    protuid SET OF INTEGER ,
    nucuid SET OF INTEGER ,
    sequid SET OF INTEGER ,
    genomeuid SET OF INTEGER ,
    structuid SET OF INTEGER ,
    pmid SET OF PubMedId ,
    protid SET OF Seq-id ,
    nucid SET OF Seq-id ,
    seqid SET OF Seq-id ,
    genomeid SET OF Seq-id ,
    structid NULL ,
    pment SET OF Pubmed-entry ,
    protent SET OF Seq-entry ,
    nucent SET OF Seq-entry ,
    seqent SET OF Seq-entry ,
    genomeent SET OF Seq-entry ,
    structent NULL ,
    seqannot SET OF Seq-annot ,
    loc SET OF Seq-loc ,
    proj SET OF Project
}

Project-descr ::= SEQUENCE {
    id SET OF Project-id ,
    name VisibleString OPTIONAL ,
    descr SET OF Projdesc OPTIONAL }

Projdesc ::= CHOICE {
    pub Pubdesc ,
    date Date ,
    comment VisibleString ,
    title VisibleString
}

Project-id ::= VisibleString

END


-- pub.asn
--$Revision: 6.0 $
--********************************************************************
--
--  Publication common set
--  James Ostell, 1990
--
--  This is the base class definitions for Publications of all sorts
--
--  support for PubMedId added in 1996
--********************************************************************

NCBI-Pub DEFINITIONS ::=
BEGIN

EXPORTS Pub, Pub-set, Pub-equiv;

IMPORTS Medline-entry FROM NCBI-Medline
        Cit-art, Cit-jour, Cit-book, Cit-proc, Cit-pat, Id-pat, Cit-gen,
        Cit-let, Cit-sub, PubMedId FROM NCBI-Biblio;

Pub ::= CHOICE {
    gen Cit-gen ,        -- general or generic unparsed
    sub Cit-sub ,        -- submission
    medline Medline-entry ,
    muid INTEGER ,       -- medline uid
    article Cit-art ,
    journal Cit-jour ,
    book Cit-book ,
    proc Cit-proc ,      -- proceedings of a meeting
    patent Cit-pat ,
    pat-id Id-pat ,      -- identify a patent
    man Cit-let ,        -- manuscript, thesis, or letter
    equiv Pub-equiv,     -- to cite a variety of ways
	pmid PubMedId }      -- PubMedId

Pub-equiv ::= SET OF Pub   -- equivalent identifiers for same citation

Pub-set ::= CHOICE {
    pub SET OF Pub ,
    medline SET OF Medline-entry ,
    article SET OF Cit-art ,
    journal SET OF Cit-jour ,
    book SET OF Cit-book ,
    proc SET OF Cit-proc ,      -- proceedings of a meeting
    patent SET OF Cit-pat }

END


-- pubmed.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  PUBMED data definitions
--
--**********************************************************************

NCBI-PubMed DEFINITIONS ::=
BEGIN

EXPORTS Pubmed-entry, Pubmed-url;

IMPORTS PubMedId FROM NCBI-Biblio
        Medline-entry FROM NCBI-Medline;

Pubmed-entry ::= SEQUENCE {        -- a PubMed entry
    -- PUBMED records must include the PubMedId
    pmid PubMedId,

    -- Medline entry information
    medent Medline-entry OPTIONAL,

    -- Publisher name
    publisher VisibleString OPTIONAL,

    -- List of URL to publisher cite
    urls SET OF Pubmed-url OPTIONAL,

    -- Publisher's article identifier
    pubid VisibleString OPTIONAL
}

Pubmed-url ::= SEQUENCE {
    location VisibleString OPTIONAL, -- Location code
    url VisibleString                -- Selected URL for location
}

END

-- remap.asn
--$Id: remap.asn,v 1.2 2004/07/28 13:43:33 jcherry Exp $********************************************
--
--  remap.asn
--   Version 1
--
--   API for remapping locations on sequences
--
--   Author: Josh Cherry
--
--***************************************************************

NCBI-Remap DEFINITIONS ::=
BEGIN

IMPORTS Seq-loc FROM NCBI-Seqloc;


Remap-dt ::= INTEGER                   -- a date/time stamp
Remap-db-id ::= VisibleString          -- database name


  --***************************************
  --  Remap Request types
  --***************************************
       --****************************************
       -- The basic request wrapper leaves space for a version which
       --   allow the server to support older clients
       -- The tool parameter allows us to log the client types for
       --   debugging and tuning
       --****************************************
       
Remap-request ::= SEQUENCE {           -- a standard request
  request RMRequest ,                    -- the actual request
  version INTEGER ,                      -- ASN1 spec version
  tool VisibleString OPTIONAL }          -- tool making request

RMRequest ::= CHOICE {                   -- request types
  remap Remap-query ,                    -- do the actual remapping
  maps-to-builds VisibleString ,         -- what builds can this be mapped to?
  maps-from-builds VisibleString ,       -- what builds can be mapped to this?
  all-builds NULL }                      -- all the builds the server knows of

Remap-query ::= SEQUENCE {
  from-build VisibleString ,             -- build to map from
  to-build VisibleString ,               -- build to map to
  locs SEQUENCE OF Seq-loc }             -- the locations to remap

  --**********************************************************
  -- Replies from the server
  --  all replies contain the date/time stamp when they were executed
  --**********************************************************

Remap-reply ::= SEQUENCE {
  reply RMReply ,                       -- the actual reply
  dt Remap-dt ,                         -- date/time stamp from server
  server VisibleString ,                -- server version info
  msg VisibleString OPTIONAL }          -- possibly a message to the user

RMReply ::= CHOICE {
  error VisibleString ,                 -- if nothing can be returned
  remap Remap-result ,                  -- result of actual remapping
  maps-to-builds SEQUENCE OF VisibleString ,  -- all the builds that the server
                                              -- knows how to map this build to
  maps-from-builds SEQUENCE OF VisibleString ,-- all the builds that the server
                                              -- knows how to map to this build
  all-builds SEQUENCE OF VisibleString } -- all builds that the server knows of

Remap-result ::= SEQUENCE OF Seq-loc  -- remapped locations

END


-- scoremat.asn
--$Id: scoremat.asn 655681 2022-09-07 14:57:35Z lanczyck $
-- ===========================================================================
--
--                            PUBLIC DOMAIN NOTICE
--               National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the
--  terms of the United States Copyright Act.  It was written as part of
--  the author's official duties as a United States Government employee and
--  thus cannot be copyrighted.  This software/database is freely available
--  to the public for use. The National Library of Medicine and the U.S.
--  Government have not placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy
--  and reliability of the software and data, the NLM and the U.S.
--  Government do not and cannot warrant the performance or results that
--  may be obtained by using this software or data. The NLM and the U.S.
--  Government disclaim all warranties, express or implied, including
--  warranties of performance, merchantability or fitness for any particular
--  purpose.
--
--  Please cite the author in any work or product based on this material.
--
-- ===========================================================================
--
-- Author:  Christiam Camacho
--
-- File Description:
--      ASN.1 definitions for scoring matrix
--
-- ===========================================================================

NCBI-ScoreMat DEFINITIONS ::= BEGIN

EXPORTS    Pssm, PssmIntermediateData, PssmFinalData, 
           PssmParameters, PssmWithParameters;
    
IMPORTS    Object-id   FROM NCBI-General
           Seq-entry   FROM NCBI-Seqset;

-- a rudimentary block/core-model, to be used with block-based alignment 
-- routines and threading

BlockProperty ::= SEQUENCE {
  type     INTEGER { unassigned  (0),
                     threshold   (1),       -- score threshold for heuristics
		     minscore    (2),       -- observed minimum score in CD
		     maxscore    (3),       -- observed maximum score in CD
		     meanscore   (4),       -- observed mean score in CD
		     variance    (5),       -- observed score variance
		     name       (10),       -- just name the block
		     is-optional(20),       -- block may not have to be used    
                     other     (255) },
  intvalue  INTEGER OPTIONAL,
  textvalue VisibleString OPTIONAL
}

CoreBlock ::= SEQUENCE {
  start          INTEGER,                   -- begin of block on query
  stop           INTEGER,                   -- end of block on query
  minstart       INTEGER OPTIONAL,          -- optional N-terminal extension
  maxstop        INTEGER OPTIONAL,          -- optional C-terminal extension
  property       SEQUENCE OF BlockProperty OPTIONAL
}

LoopConstraint ::= SEQUENCE {
  minlength      INTEGER DEFAULT 0,         -- minimum length of unaligned region
  maxlength      INTEGER DEFAULT 100000     -- maximum length of unaligned region
}

CoreDef ::= SEQUENCE {
  nblocks        INTEGER,                   -- number of core elements/blocks
  blocks         SEQUENCE OF CoreBlock,     -- nblocks locations
  loops          SEQUENCE OF LoopConstraint, -- (nblocks+1) constraints

  isDiscontinuous BOOLEAN OPTIONAL,         -- is it a discontinuous domain

  insertions SEQUENCE OF INTEGER OPTIONAL   -- positions of long insertions
}

Site-annot ::= SEQUENCE {
  startPosition  INTEGER,                -- location of the annotation,
  stopPosition   INTEGER,                -- start and stop position in the
                                         -- PSSM

  description    VisibleString OPTIONAL, -- holds description or names, that
                                         -- can be used for labels in
                                         -- visualization

  type           INTEGER OPTIONAL,       -- type of the annotated feature,
                                         -- similarly to Align-annot in
                                         -- NCBI-Cdd

  aliases        SEQUENCE OF VisibleString OPTIONAL, -- additional names for
                                                     -- the annotation

  motif          VisibleString OPTIONAL, -- motif to validate mapping of sites

  motifuse       INTEGER OPTIONAL        -- 0 for validation
                                         -- 1 for motif in seqloc
                                         -- 2 for multiple motifs in seqloc
}

Site-annot-set ::= SEQUENCE OF Site-annot

-- ===========================================================================
-- PSI-BLAST, formatrpsdb, RPS-BLAST workflow:
-- ===========================================
--
-- Two possible inputs to PSI-BLAST and formatrpsdb:
-- 1) PssmWithParams where pssm field contains intermediate PSSM data (matrix 
--    of frequency ratios)
-- 2) PssmWithParams where pssm field contains final PSSM data (matrix of 
--    scores and statistical parameters) - such as written by cddumper
--
-- In case 1, PSI-BLAST's PSSM engine is invoked to create the PSSM and perform
-- the PSI-BLAST search or build the PSSM to then build the RPS-BLAST database.
-- In case 2, PSI-BLAST's PSSM engine is not invoked and the matrix of scores
-- statistical parameters are used to perform the search in PSI-BLAST and the
-- same data and the data in PssmWithParams::params::rpsdbparams is used to
-- build the PSSM and ultimately the RPS-BLAST database
-- 
-- 
--                 reads    ++++++++++++++ writes
-- PssmWithParams  ====>    + PSI-BLAST  + =====> PssmWithParams
--                          ++++++++++++++             |  ^
--         ^                                           |  |
--         |                                           |  |
--         +===========================================+  |
--                                                     |  |
--         +===========================================+  |
--         |                                              |
-- reads   |                                              | 
--         v                                              |
--  +++++++++++++++ writes +++++++++++++++++++++++        |
--  | formatrpsdb | =====> | RPS-BLAST databases |        |
--  +++++++++++++++        +++++++++++++++++++++++        |
--                                   ^                    |
--                                   |                    |
--                                   | reads              |
--                             +++++++++++++              |
--                             | RPS-BLAST |              |
--                             +++++++++++++              |
--                                                        |
--       reads  ++++++++++++               writes         |
--  Cdd ======> | cddumper | =============================+
--              ++++++++++++
--
-- ===========================================================================

-- Contains the PSSM's scores and its associated statistical parameters. 
-- Dimensions and order in which scores are stored must be the same as that 
-- specified in Pssm::numRows, Pssm::numColumns, and Pssm::byrow
PssmFinalData ::= SEQUENCE {

    -- PSSM's scores
    scores              SEQUENCE OF INTEGER, 

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    lambda              REAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
	kappa               REAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    h                   REAL,

    -- scaling factor used to obtain more precision when building the PSSM.
    -- (i.e.: scores are scaled by this value). By default, PSI-BLAST's PSSM
    -- engine generates PSSMs which are not scaled-up, however, if PSI-BLAST is
    -- given a PSSM which contains a scaled-up PSSM (indicated by having a
    -- scalingFactor greater than 1), then it will scale down the PSSM to
    -- perform the initial stages of the search with it.
    -- N.B.: When building RPS-BLAST databases, if formatrpsdb is provided 
    -- scaled-up PSSMs, it will ensure that all PSSMs used to build the 
    -- RPS-BLAST database are scaled by the same factor (otherwise, RPS-BLAST 
    -- will silently produce incorrect results).
    scalingFactor       INTEGER DEFAULT 1,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    lambdaUngapped      REAL OPTIONAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
	kappaUngapped       REAL OPTIONAL,

    -- Karlin & Altschul parameter produced during the PSSM's calculation
    hUngapped           REAL OPTIONAL,

    -- Word score threshold
    wordScoreThreshold    REAL OPTIONAL
}

-- Contains the PSSM's intermediate data used to create the PSSM's scores 
-- and statistical parameters. Dimensions and order in which scores are 
-- stored must be the same as that specified in Pssm::numRows, 
-- Pssm::numColumns, and Pssm::byrow
PssmIntermediateData ::= SEQUENCE {

    -- observed residue frequencies (or counts) per position of the PSSM 
    -- (prior to application of pseudocounts)
    resFreqsPerPos              SEQUENCE OF INTEGER OPTIONAL, 

    -- Weighted observed residue frequencies per position of the PSSM.
    -- (N.B.: each position's weights should add up to 1.0).
    -- This field corresponds to f_i (f sub i) in equation 2 of 
    -- Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    weightedResFreqsPerPos      SEQUENCE OF REAL OPTIONAL,

    -- PSSM's frequency ratios
    freqRatios                  SEQUENCE OF REAL,

    -- Information content per position of the PSSM
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    informationContent          SEQUENCE OF REAL OPTIONAL,

    -- Relative weight for columns of the PSSM without gaps to pseudocounts
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    gaplessColumnWeights        SEQUENCE OF REAL OPTIONAL,

    -- Used in sequence weights computation
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    sigma                       SEQUENCE OF REAL OPTIONAL,

    -- Length of the aligned regions per position of the query sequence
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    intervalSizes               SEQUENCE OF INTEGER OPTIONAL,

    -- Number of matching sequences per position of the PSSM (including the
    -- query)
    -- NOTE: this is needed for diagnostics information only (i.e.:
    -- -out_ascii_pssm option in psiblast)
    numMatchingSeqs             SEQUENCE OF INTEGER OPTIONAL,

    -- Number of independent observations per position of the PSSM
    -- NOTE: this is needed for building CDD database for DELTA-BLAST
    numIndeptObsr               SEQUENCE OF REAL OPTIONAL
}

-- Position-specific scoring matrix
--
-- Column indices on the PSSM refer to the positions corresponding to the
-- query/master sequence, i.e. the number of columns (N) is the same
-- as the length of the query/master sequence. 
-- Row indices refer to individual amino acid types, i.e. the number of 
-- rows (M) is the same as the number of different residues in the 
-- alphabet we use. Consequently, row labels are amino acid identifiers.
--
-- PSSMs are stored as linear arrays of integers. By default, we store
-- them column-by-column, M values for the first column followed by M
-- values for the second column, and so on. In order to provide
-- flexibility for external applications, the boolean field "byrow" is 
-- provided to specify the storage order.
Pssm ::= SEQUENCE {

    -- Is the this a protein or nucleotide scoring matrix?
    isProtein       BOOLEAN DEFAULT TRUE,	

    -- PSSM identifier
    identifier      Object-id OPTIONAL,	

    -- The dimensions of the matrix are returned so the client can
    -- verify that all data was received.

    numRows         INTEGER,	-- number of rows
    numColumns      INTEGER,	-- number of columns

    -- row-labels is given to note the order of residue types so that it can
    -- be cross-checked between applications.
    -- If this field is not given, the matrix values are presented in 
    -- order of the alphabet ncbistdaa is used for protein, ncbi4na for nucl.
    -- for proteins the values returned correspond to 
    -- (-,-), (-,A), (-,B), (-,C) ... (A,-), (A,A), (A,B), (A,C) ...
    rowLabels       SEQUENCE OF VisibleString OPTIONAL,

    -- are matrices stored row by row?
    byRow           BOOLEAN DEFAULT FALSE, 

    -- PSSM representative sequence (master) 
    query           Seq-entry OPTIONAL,           

    -- both intermediateData and finalData can be provided, but at least one of
    -- them must be provided.
    -- N.B.: by default PSI-BLAST will return the PSSM in its PssmIntermediateData 
    -- representation. 

    -- Intermediate or final data for the PSSM
    intermediateData    PssmIntermediateData OPTIONAL,

    -- Final representation for the PSSM
    finalData           PssmFinalData OPTIONAL
}

-- This structure is used to create the RPS-BLAST database auxiliary file 
-- (*.aux) and it contains parameters set at creation time of the PSSM.
-- Also, the matrixName field is used by formatrpsdb to build a PSSM from 
-- a Pssm structure which only contains PssmIntermediateData.
FormatRpsDbParameters ::= SEQUENCE {

    -- name of the underlying score matrix whose frequency ratios were
    -- used in PSSM construction (e.g.: BLOSUM62)
    matrixName   VisibleString,

    -- gap opening penalty corresponding to the matrix above
    gapOpen      INTEGER OPTIONAL,             

    -- gap extension penalty corresponding to the matrix above
    gapExtend    INTEGER OPTIONAL

}

-- Populated by PSSM engine of PSI-BLAST, original source for these values 
-- are the PSI-BLAST options specified using the BLAST options API
PssmParameters ::= SEQUENCE {

    -- pseudocount constant used for PSSM. This field corresponds to beta in 
    -- equation 2 of Nucleic Acids Res. 2001 Jul 15;29(14):2994-3005.
    pseudocount INTEGER OPTIONAL,             

    -- data needed by formatrpsdb to create RPS-BLAST databases. matrixName is
    -- populated by PSI-BLAST
    rpsdbparams     FormatRpsDbParameters OPTIONAL,

    -- alignment constraints needed by sequence-structure threader
    -- and other global or local block-alignment algorithms
    constraints     CoreDef OPTIONAL,

    -- bit score threshold for specific conserved domain hits
    bitScoreThresh  REAL OPTIONAL,

    -- bit score threshold for reporting any conserved domain hits
    bitScoreReportingThresh  REAL OPTIONAL,

    -- conserved functional sites with annotations
    annotatedSites  Site-annot-set OPTIONAL
}

-- Envelope containing PSSM and the parameters used to create it. 
-- Provided for use in PSI-BLAST, formatrpsdb, and for the structure group.
PssmWithParameters ::= SEQUENCE {

    -- This field is applicable to PSI-BLAST and formatrpsdb.
    -- When both the intermediate and final PSSM data are provided in this
    -- field, the final data (matrix of scores and associated statistical
    -- parameters) takes precedence and that data is used for further
    -- processing. The rationale for this is that the PSSM's scores and
    -- statistical parameters might have been calculated by other applications
    -- and it might not be possible to recreate it by using PSI-BLAST's PSSM 
    -- engine.
	pssm        Pssm,

    -- This field's rpsdbparams is used to specify the values of options 
    -- for processing by formatrpsdb. If these are not set, the command 
    -- line defaults of formatrpsdb are applied. This field is used
    -- by PSI-BLAST to verify that the underlying scorem matrix used to BUILD
    -- the PSSM is the same as the one being specified through the BLAST
    -- Options API. If this field is omitted, no verification will be
    -- performed, so be careful to keep track of what matrix was used to build
    -- the PSSM or else the results produced by PSI-BLAST will be unreliable.
    params      PssmParameters OPTIONAL
}

END

-- seq.asn
--$Revision: 587100 $
--**********************************************************************
--
--  NCBI Sequence elements
--  by James Ostell, 1990
--  Version 3.0 - June 1994
--
--**********************************************************************

NCBI-Sequence DEFINITIONS ::=
BEGIN

EXPORTS Annotdesc, Annot-descr, Bioseq, GIBB-mol, Heterogen, MolInfo,
        Numbering, Pubdesc, Seq-annot, Seq-data, Seqdesc, Seq-descr, Seq-ext,
        Seq-hist, Seq-inst, Seq-literal, Seqdesc, Delta-ext, Seq-gap;

IMPORTS Date, Int-fuzz, Dbtag, Object-id, User-object FROM NCBI-General
        Seq-align FROM NCBI-Seqalign
        Seq-feat, ModelEvidenceSupport FROM NCBI-Seqfeat
        Seq-graph FROM NCBI-Seqres
        Pub-equiv FROM NCBI-Pub
        Org-ref FROM NCBI-Organism
        BioSource FROM NCBI-BioSource
        Seq-id, Seq-loc FROM NCBI-Seqloc
        GB-block FROM GenBank-General
        PIR-block FROM PIR-General
        EMBL-block FROM EMBL-General
        SP-block FROM SP-General
        PRF-block FROM PRF-General
        PDB-block FROM PDB-General
        Seq-table FROM NCBI-SeqTable;

--*** Sequence ********************************
--*

Bioseq ::= SEQUENCE {
    id SET OF Seq-id ,            -- equivalent identifiers
    descr Seq-descr OPTIONAL , -- descriptors
    inst Seq-inst ,            -- the sequence data
    annot SET OF Seq-annot OPTIONAL }

--*** Descriptors *****************************
--*

Seq-descr ::= SET OF Seqdesc

Seqdesc ::= CHOICE {
    mol-type GIBB-mol ,          -- type of molecule
    modif SET OF GIBB-mod ,             -- modifiers
    method GIBB-method ,         -- sequencing method
    name VisibleString ,         -- a name for this sequence
    title VisibleString ,        -- a title for this sequence
    org Org-ref ,                -- if all from one organism
    comment VisibleString ,      -- a more extensive comment
    num Numbering ,              -- a numbering system
    maploc Dbtag ,               -- map location of this sequence
    pir PIR-block ,              -- PIR specific info
    genbank GB-block ,           -- GenBank specific info
    pub Pubdesc ,                -- a reference to the publication
    region VisibleString ,       -- overall region (globin locus)
    user User-object ,           -- user defined object
    sp SP-block ,                -- SWISSPROT specific info
    dbxref Dbtag ,               -- xref to other databases
    embl EMBL-block ,            -- EMBL specific information
    create-date Date ,           -- date entry first created/released
    update-date Date ,           -- date of last update
    prf PRF-block ,              -- PRF specific information
    pdb PDB-block ,              -- PDB specific information
    het Heterogen ,              -- cofactor, etc associated but not bound
    source BioSource ,           -- source of materials, includes Org-ref
    molinfo MolInfo ,            -- info on the molecule and techniques
    modelev ModelEvidenceSupport -- model evidence for XM records
}

--******* NOTE:
--*       mol-type, modif, method, and org are consolidated and expanded
--*       in Org-ref, BioSource, and MolInfo in this specification. They
--*       will be removed in later specifications. Do not use them in the
--*       the future. Instead expect the new structures.
--*
--***************************

--********************************************************************
--
-- MolInfo gives information on the
-- classification of the type and quality of the sequence
--
-- WARNING: this will replace GIBB-mol, GIBB-mod, GIBB-method
--
--********************************************************************

MolInfo ::= SEQUENCE {
    biomol INTEGER {
        unknown (0) ,
        genomic (1) ,
        pre-RNA (2) ,              -- precursor RNA of any sort really
        mRNA (3) ,
        rRNA (4) ,
        tRNA (5) ,
        snRNA (6) ,
        scRNA (7) ,
        peptide (8) ,
        other-genetic (9) ,      -- other genetic material
        genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
        cRNA (11) ,              -- viral RNA genome copy intermediate
        snoRNA (12) ,            -- small nucleolar RNA
        transcribed-RNA (13) ,   -- transcribed RNA other than existing classes
        ncRNA (14) ,
        tmRNA (15) ,
        other (255) } DEFAULT unknown ,
    tech INTEGER {
        unknown (0) ,
        standard (1) ,          -- standard sequencing
        est (2) ,               -- Expressed Sequence Tag
        sts (3) ,               -- Sequence Tagged Site
        survey (4) ,            -- one-pass genomic sequence
        genemap (5) ,           -- from genetic mapping techniques
        physmap (6) ,           -- from physical mapping techniques
        derived (7) ,           -- derived from other data, not a primary entity
        concept-trans (8) ,     -- conceptual translation
        seq-pept (9) ,          -- peptide was sequenced
        both (10) ,             -- concept transl. w/ partial pept. seq.
        seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap
        seq-pept-homol (12) ,   -- sequenced peptide, ordered by homology
        concept-trans-a (13) ,  -- conceptual transl. supplied by author
        htgs-1 (14) ,           -- unordered High Throughput sequence contig
        htgs-2 (15) ,           -- ordered High Throughput sequence contig
        htgs-3 (16) ,           -- finished High Throughput sequence
        fli-cdna (17) ,         -- full length insert cDNA
        htgs-0 (18) ,           -- single genomic reads for coordination
        htc (19) ,              -- high throughput cDNA
        wgs (20) ,              -- whole genome shotgun sequencing
        barcode (21) ,          -- barcode of life project
        composite-wgs-htgs (22) , -- composite of WGS and HTGS
        tsa (23) ,              -- transcriptome shotgun assembly
        targeted (24) ,         -- targeted locus sets/studies
        other (255) }           -- use Source.techexp
               DEFAULT unknown ,
    techexp VisibleString OPTIONAL ,   -- explanation if tech not enough
    --
    -- Completeness is not indicated in most records.  For genomes, assume
    -- the sequences are incomplete unless specifically marked as complete.
    -- For mRNAs, assume the ends are not known exactly unless marked as
    -- having the left or right end.
    --
    completeness INTEGER {
      unknown (0) ,
      complete (1) ,                   -- complete biological entity
      partial (2) ,                    -- partial but no details given
      no-left (3) ,                    -- missing 5' or NH3 end
      no-right (4) ,                   -- missing 3' or COOH end
      no-ends (5) ,                    -- missing both ends
      has-left (6) ,                   -- 5' or NH3 end present
      has-right (7) ,                  -- 3' or COOH end present
      other (255) } DEFAULT unknown ,
    gbmoltype VisibleString OPTIONAL } -- identifies particular ncRNA


GIBB-mol ::= ENUMERATED {       -- type of molecule represented
    unknown (0) ,
    genomic (1) ,
    pre-mRNA (2) ,              -- precursor RNA of any sort really
    mRNA (3) ,
    rRNA (4) ,
    tRNA (5) ,
    snRNA (6) ,
    scRNA (7) ,
    peptide (8) ,
    other-genetic (9) ,      -- other genetic material
    genomic-mRNA (10) ,      -- reported a mix of genomic and cdna sequence
    other (255) }

GIBB-mod ::= ENUMERATED {        -- GenInfo Backbone modifiers
    dna (0) ,
    rna (1) ,
    extrachrom (2) ,
    plasmid (3) ,
    mitochondrial (4) ,
    chloroplast (5) ,
    kinetoplast (6) ,
    cyanelle (7) ,
    synthetic (8) ,
    recombinant (9) ,
    partial (10) ,
    complete (11) ,
    mutagen (12) ,    -- subject of mutagenesis ?
    natmut (13) ,     -- natural mutant ?
    transposon (14) ,
    insertion-seq (15) ,
    no-left (16) ,    -- missing left end (5' for na, NH2 for aa)
    no-right (17) ,   -- missing right end (3' or COOH)
    macronuclear (18) ,
    proviral (19) ,
    est (20) ,        -- expressed sequence tag
    sts (21) ,        -- sequence tagged site
    survey (22) ,     -- one pass survey sequence
    chromoplast (23) ,
    genemap (24) ,    -- is a genetic map
    restmap (25) ,    -- is an ordered restriction map
    physmap (26) ,    -- is a physical map (not ordered restriction map)
    other (255) }

GIBB-method ::= ENUMERATED {        -- sequencing methods
    concept-trans (1) ,    -- conceptual translation
    seq-pept (2) ,         -- peptide was sequenced
    both (3) ,             -- concept transl. w/ partial pept. seq.
    seq-pept-overlap (4) , -- sequenced peptide, ordered by overlap
    seq-pept-homol (5) ,   -- sequenced peptide, ordered by homology
    concept-trans-a (6) ,  -- conceptual transl. supplied by author
    other (255) }

Numbering ::= CHOICE {           -- any display numbering system
    cont Num-cont ,              -- continuous numbering
    enum Num-enum ,              -- enumerated names for residues
    ref Num-ref ,                -- by reference to another sequence
    real Num-real }              -- supports mapping to a float system

Num-cont ::= SEQUENCE {          -- continuous display numbering system
    refnum INTEGER DEFAULT 1,         -- number assigned to first residue
    has-zero BOOLEAN DEFAULT FALSE ,  -- 0 used?
    ascending BOOLEAN DEFAULT TRUE }  -- ascending numbers?

Num-enum ::= SEQUENCE {          -- any tags to residues
    num INTEGER ,                        -- number of tags to follow
    names SEQUENCE OF VisibleString }    -- the tags

Num-ref ::= SEQUENCE {           -- by reference to other sequences
    type ENUMERATED {            -- type of reference
        not-set (0) ,
        sources (1) ,            -- by segmented or const seq sources
        aligns (2) } ,           -- by alignments given below
    aligns Seq-align OPTIONAL }

Num-real ::= SEQUENCE {          -- mapping to floating point system
    a REAL ,                     -- from an integer system used by Bioseq
    b REAL ,                     -- position = (a * int_position) + b
    units VisibleString OPTIONAL }

Pubdesc ::= SEQUENCE {              -- how sequence presented in pub
    pub Pub-equiv ,                 -- the citation(s)
    name VisibleString OPTIONAL ,   -- name used in paper
    fig VisibleString OPTIONAL ,    -- figure in paper
    num Numbering OPTIONAL ,        -- numbering from paper
    numexc BOOLEAN OPTIONAL ,       -- numbering problem with paper
    poly-a BOOLEAN OPTIONAL ,       -- poly A tail indicated in figure?
    maploc VisibleString OPTIONAL , -- map location reported in paper
    seq-raw StringStore OPTIONAL ,  -- original sequence from paper
    align-group INTEGER OPTIONAL ,  -- this seq aligned with others in paper
    comment VisibleString OPTIONAL, -- any comment on this pub in context
    reftype INTEGER {           -- type of reference in a GenBank record
        seq (0) ,               -- refers to sequence
        sites (1) ,             -- refers to unspecified features
        feats (2) ,             -- refers to specified features
        no-target (3) }         -- nothing specified (EMBL)
        DEFAULT seq }

Heterogen ::= VisibleString       -- cofactor, prosthetic group, inhibitor, etc

--*** Instances of sequences *******************************
--*

Seq-inst ::= SEQUENCE {            -- the sequence data itself
    repr ENUMERATED {              -- representation class
        not-set (0) ,              -- empty
        virtual (1) ,              -- no seq data
        raw (2) ,                  -- continuous sequence
        seg (3) ,                  -- segmented sequence
        const (4) ,                -- constructed sequence
        ref (5) ,                  -- reference to another sequence
        consen (6) ,               -- consensus sequence or pattern
        map (7) ,                  -- ordered map of any kind
        delta (8) ,              -- sequence made by changes (delta) to others
        other (255) } ,
    mol ENUMERATED {               -- molecule class in living organism
        not-set (0) ,              --   > cdna = rna
        dna (1) ,
        rna (2) ,
        aa (3) ,
        na (4) ,                   -- just a nucleic acid
        other (255) } ,
    length INTEGER OPTIONAL ,      -- length of sequence in residues
    fuzz Int-fuzz OPTIONAL ,       -- length uncertainty
    topology ENUMERATED {          -- topology of molecule
        not-set (0) ,
        linear (1) ,
        circular (2) ,
        tandem (3) ,               -- some part of tandem repeat
        other (255) } DEFAULT linear ,
    strand ENUMERATED {            -- strandedness in living organism
        not-set (0) ,
        ss (1) ,                   -- single strand
        ds (2) ,                   -- double strand
        mixed (3) ,
        other (255) } OPTIONAL ,   -- default ds for DNA, ss for RNA, pept
    seq-data Seq-data OPTIONAL ,   -- the sequence
    ext Seq-ext OPTIONAL ,         -- extensions for special types
    hist Seq-hist OPTIONAL }       -- sequence history

--*** Sequence Extensions **********************************
--*  for representing more complex types
--*  const type uses Seq-hist.assembly

Seq-ext ::= CHOICE {
    seg Seg-ext ,        -- segmented sequences
    ref Ref-ext ,        -- hot link to another sequence (a view)
    map Map-ext ,        -- ordered map of markers
    delta Delta-ext }

Seg-ext ::= SEQUENCE OF Seq-loc

Ref-ext ::= Seq-loc

Map-ext ::= SEQUENCE OF Seq-feat

Delta-ext ::= SEQUENCE OF Delta-seq

Delta-seq ::= CHOICE {
    loc Seq-loc ,       -- point to a sequence
    literal Seq-literal }   -- a piece of sequence

Seq-literal ::= SEQUENCE {
    length INTEGER ,         -- must give a length in residues
    fuzz Int-fuzz OPTIONAL , -- could be unsure
    seq-data Seq-data OPTIONAL } -- may have the data

--*** Sequence History Record ***********************************
--** assembly = records how seq was assembled from others
--** replaces = records sequences made obsolete by this one
--** replaced-by = this seq is made obsolete by another(s)

Seq-hist ::= SEQUENCE {
    assembly SET OF Seq-align OPTIONAL ,-- how was this assembled?
    replaces Seq-hist-rec OPTIONAL ,    -- seq makes these seqs obsolete
    replaced-by Seq-hist-rec OPTIONAL , -- these seqs make this one obsolete
    deleted CHOICE {
        bool BOOLEAN ,
        date Date } OPTIONAL }

Seq-hist-rec ::= SEQUENCE {
    date Date OPTIONAL ,
    ids SET OF Seq-id }

--*** Various internal sequence representations ************
--*      all are controlled, fixed length forms

Seq-data ::= CHOICE {              -- sequence representations
    iupacna IUPACna ,              -- IUPAC 1 letter nuc acid code
    iupacaa IUPACaa ,              -- IUPAC 1 letter amino acid code
    ncbi2na NCBI2na ,              -- 2 bit nucleic acid code
    ncbi4na NCBI4na ,              -- 4 bit nucleic acid code
    ncbi8na NCBI8na ,              -- 8 bit extended nucleic acid code
    ncbipna NCBIpna ,              -- nucleic acid probabilities
    ncbi8aa NCBI8aa ,              -- 8 bit extended amino acid codes
    ncbieaa NCBIeaa ,              -- extended ASCII 1 letter aa codes
    ncbipaa NCBIpaa ,              -- amino acid probabilities
    ncbistdaa NCBIstdaa,           -- consecutive codes for std aas
    gap Seq-gap                    -- gap types
}

Seq-gap ::= SEQUENCE {
    type INTEGER {
        unknown(0),
        fragment(1),               -- Deprecated. Used only for AGP 1.1
        clone(2),                  -- Deprecated. Used only for AGP 1.1
        short-arm(3),
        heterochromatin(4),
        centromere(5),
        telomere(6),
        repeat(7),
        contig(8),
        scaffold(9),
        contamination(10),
        other(255)
    },
    linkage INTEGER {
        unlinked(0),
        linked(1),
        other(255)
    } OPTIONAL,
    linkage-evidence SET OF Linkage-evidence OPTIONAL
}

Linkage-evidence ::= SEQUENCE {
    type INTEGER {
        paired-ends(0),
        align-genus(1),
        align-xgenus(2),
        align-trnscpt(3),
        within-clone(4),
        clone-contig(5),
        map(6),
        strobe(7),
        unspecified(8),
        pcr(9),
        proximity-ligation(10),
        other(255)
    }
}

IUPACna ::= StringStore       -- IUPAC 1 letter codes, no spaces
IUPACaa ::= StringStore       -- IUPAC 1 letter codes, no spaces
NCBI2na ::= OCTET STRING      -- 00=A, 01=C, 10=G, 11=T
NCBI4na ::= OCTET STRING      -- 1 bit each for agct
                              -- 0001=A, 0010=C, 0100=G, 1000=T/U
                              -- 0101=Purine, 1010=Pyrimidine, etc
NCBI8na ::= OCTET STRING      -- for modified nucleic acids
NCBIpna ::= OCTET STRING      -- 5 octets/base, prob for a,c,g,t,n
                              -- probabilities are coded 0-255 = 0.0-1.0
NCBI8aa ::= OCTET STRING      -- for modified amino acids
NCBIeaa ::= StringStore       -- ASCII extended 1 letter aa codes
                              -- IUPAC codes + U=selenocysteine
NCBIpaa ::= OCTET STRING      -- 25 octets/aa, prob for IUPAC aas in order:
                              -- A-Y,B,Z,X,(ter),anything
                              -- probabilities are coded 0-255 = 0.0-1.0
NCBIstdaa ::= OCTET STRING    -- codes 0-25, 1 per byte

--*** Sequence Annotation *************************************
--*

-- This is a replica of Textseq-id
-- This is specific for annotations, and exists to maintain a semantic
-- difference between IDs assigned to annotations and IDs assigned to
-- sequences
Textannot-id ::= SEQUENCE {
    name	  VisibleString OPTIONAL ,
    accession VisibleString OPTIONAL ,
    release   VisibleString OPTIONAL ,
    version   INTEGER       OPTIONAL
}

Annot-id ::= CHOICE {
    local Object-id ,
    ncbi INTEGER ,
    general Dbtag,
    other Textannot-id
}

Annot-descr ::= SET OF Annotdesc

Annotdesc ::= CHOICE {
    name VisibleString ,         -- a short name for this collection
    title VisibleString ,        -- a title for this collection
    comment VisibleString ,      -- a more extensive comment
    pub Pubdesc ,                -- a reference to the publication
    user User-object ,           -- user defined object
    create-date Date ,           -- date entry first created/released
    update-date Date ,           -- date of last update
    src Seq-id ,                 -- source sequence from which annot came
    align Align-def,             -- definition of the SeqAligns
    region Seq-loc }             -- all contents cover this region

Align-def ::= SEQUENCE {
    align-type INTEGER {         -- class of align Seq-annot
      ref (1) ,                  -- set of alignments to the same sequence
      alt (2) ,                  -- set of alternate alignments of the same seqs
      blocks (3) ,               -- set of aligned blocks in the same seqs
      other (255) } ,
    ids SET OF Seq-id OPTIONAL } -- used for the one ref seqid for now

Seq-annot ::= SEQUENCE {
    id SET OF Annot-id OPTIONAL ,
    db INTEGER {                 -- source of annotation
        genbank (1) ,
        embl (2) ,
        ddbj (3) ,
        pir  (4) ,
        sp   (5) ,
        bbone (6) ,
        pdb   (7) ,
        other (255) } OPTIONAL ,
    name VisibleString OPTIONAL ,-- source if "other" above
    desc Annot-descr OPTIONAL ,  -- used only for stand alone Seq-annots
    data CHOICE {
        ftable SET OF Seq-feat ,
        align SET OF Seq-align ,
        graph SET OF Seq-graph ,
        ids SET OF Seq-id ,      -- used for communication between tools
        locs SET OF Seq-loc ,    -- used for communication between tools
        seq-table Seq-table } }  -- features in table form

END


-- seqalign.asn
--$Revision: 370567 $
--**********************************************************************
--
--  NCBI Sequence Alignment elements
--  by James Ostell, 1990
--
--**********************************************************************

NCBI-Seqalign DEFINITIONS ::=
BEGIN

EXPORTS Seq-align, Score, Score-set, Seq-align-set;

IMPORTS Seq-id, Seq-loc , Na-strand FROM NCBI-Seqloc
        User-object, Object-id FROM NCBI-General;

--*** Sequence Alignment ********************************
--*

Seq-align-set ::= SET OF Seq-align

Seq-align ::= SEQUENCE {
    type ENUMERATED {
        not-set (0) ,
        global (1) ,
        diags (2) ,     -- unbroken, but not ordered, diagonals
        partial (3) ,   -- mapping pieces together
        disc (4) ,      -- discontinuous alignment
        other (255) } ,
    dim INTEGER OPTIONAL ,     -- dimensionality
    score SET OF Score OPTIONAL ,   -- for whole alignment
    segs CHOICE {                   -- alignment data
        dendiag SEQUENCE OF Dense-diag ,
        denseg              Dense-seg ,
        std     SEQUENCE OF Std-seg ,
        packed              Packed-seg ,
        disc                Seq-align-set,
        spliced             Spliced-seg,
        sparse              Sparse-seg
    } ,
    
    -- regions of sequence over which align
    --  was computed
    bounds SET OF Seq-loc OPTIONAL,

    -- alignment id
    id SEQUENCE OF Object-id OPTIONAL,

    --extra info
    ext SEQUENCE OF User-object OPTIONAL
}

Dense-diag ::= SEQUENCE {         -- for (multiway) diagonals
    dim INTEGER DEFAULT 2 ,    -- dimensionality
    ids SEQUENCE OF Seq-id ,   -- sequences in order
    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order
    len INTEGER ,                 -- len of aligned segments
    strands SEQUENCE OF Na-strand OPTIONAL ,
    scores SET OF Score OPTIONAL }

    -- Dense-seg: the densist packing for sequence alignments only.
    --            a start of -1 indicates a gap for that sequence of
    --            length lens.
    --
    -- id=100  AAGGCCTTTTAGAGATGATGATGATGATGA
    -- id=200  AAGGCCTTTTAG.......GATGATGATGA
    -- id=300  ....CCTTTTAGAGATGATGAT....ATGA
    --
    -- dim = 3, numseg = 6, ids = { 100, 200, 300 }
    -- starts = { 0,0,-1, 4,4,0, 12,-1,8, 19,12,15, 22,15,-1, 26,19,18 }
    -- lens = { 4, 8, 7, 3, 4, 4 }
    --

Dense-seg ::= SEQUENCE {          -- for (multiway) global or partial alignments
    dim INTEGER DEFAULT 2 ,       -- dimensionality
    numseg INTEGER ,              -- number of segments here
    ids SEQUENCE OF Seq-id ,      -- sequences in order
    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order within segs
    lens SEQUENCE OF INTEGER ,    -- lengths in ids order within segs
    strands SEQUENCE OF Na-strand OPTIONAL ,
    scores SEQUENCE OF Score OPTIONAL }  -- score for each seg

Packed-seg ::= SEQUENCE {         -- for (multiway) global or partial alignments
    dim INTEGER DEFAULT 2 ,       -- dimensionality
    numseg INTEGER ,              -- number of segments here
    ids SEQUENCE OF Seq-id ,      -- sequences in order
    starts SEQUENCE OF INTEGER ,  -- start OFFSETS in ids order for whole alignment
    present OCTET STRING ,        -- Boolean if each sequence present or absent in
                                  --   each segment
    lens SEQUENCE OF INTEGER ,    -- length of each segment
    strands SEQUENCE OF Na-strand OPTIONAL ,
    scores SEQUENCE OF Score OPTIONAL }  -- score for each segment

Std-seg ::= SEQUENCE {
    dim INTEGER DEFAULT 2 ,       -- dimensionality
    ids SEQUENCE OF Seq-id OPTIONAL ,
    loc SEQUENCE OF Seq-loc ,
    scores SET OF Score OPTIONAL }


Spliced-seg ::= SEQUENCE {
    -- product is either protein or transcript (cDNA)
    product-id Seq-id OPTIONAL,
    genomic-id Seq-id OPTIONAL,

    -- should be 'plus' or 'minus'
    product-strand Na-strand OPTIONAL ,
    genomic-strand Na-strand OPTIONAL ,
    
    product-type ENUMERATED {
        transcript(0),
        protein(1)
    },

    -- set of segments involved
    -- each segment corresponds to one exon
    -- exons are always in biological order
    exons SEQUENCE OF Spliced-exon ,

    -- start of poly(A) tail on the transcript
    -- For sense transcripts:
    --   aligned product positions < poly-a <= product-length
    --   poly-a == product-length indicates inferred poly(A) tail at transcript's end
    -- For antisense transcripts:
    --   -1 <= poly-a < aligned product positions
    --   poly-a == -1 indicates inferred poly(A) tail at transcript's start
    poly-a INTEGER OPTIONAL,

    -- length of the product, in bases/residues
    -- from this (or from poly-a if present), a 3' unaligned length can be extracted
    product-length INTEGER OPTIONAL,

    -- alignment descriptors / modifiers
    -- this provides us a set for extension
    modifiers SET OF Spliced-seg-modifier OPTIONAL
}

Spliced-seg-modifier ::= CHOICE {
    -- protein aligns from the start and the first codon 
    -- on both product and genomic is start codon
    start-codon-found BOOLEAN,
    
    -- protein aligns to it's end and there is stop codon 
    -- on the genomic right after the alignment
    stop-codon-found BOOLEAN
}


-- complete or partial exon
-- two consecutive Spliced-exons may belong to one exon
Spliced-exon ::= SEQUENCE {
    -- product-end >= product-start
    product-start Product-pos ,
    product-end Product-pos ,

    -- genomic-end >= genomic-start
    genomic-start INTEGER ,
    genomic-end INTEGER ,

    -- product is either protein or transcript (cDNA)
    product-id Seq-id OPTIONAL ,
    genomic-id Seq-id OPTIONAL ,

    -- should be 'plus' or 'minus'
    product-strand Na-strand OPTIONAL ,
    
    -- genomic-strand represents the strand of translation
    genomic-strand Na-strand OPTIONAL ,

    -- basic seqments always are in biologic order
    parts SEQUENCE OF Spliced-exon-chunk OPTIONAL ,

    -- scores for this exon
    scores Score-set OPTIONAL ,

    -- splice sites
    acceptor-before-exon Splice-site OPTIONAL,
    donor-after-exon Splice-site OPTIONAL,
    
    -- flag: is this exon complete or partial?
    partial BOOLEAN OPTIONAL,

    --extra info
    ext SEQUENCE OF User-object OPTIONAL
}


Product-pos ::= CHOICE {
    nucpos INTEGER,
    protpos Prot-pos
}


-- position on protein (1/3 of amino-acid resolution)
Prot-pos ::= SEQUENCE {
    -- amino-acid position (0-based)
    amin INTEGER ,

    -- position within codon (1-based)
    -- 0 = not set (meaning 1)
    frame INTEGER DEFAULT 0
}


-- Spliced-exon-chunk: piece of an exon
-- lengths are given in nucleotide bases (1/3 of aminoacid when product is a
-- protein)
Spliced-exon-chunk ::= CHOICE {
    -- both sequences represented, product and genomic sequences match
    match INTEGER ,

    -- both sequences represented, product and genomic sequences do not match
    mismatch INTEGER ,

    -- both sequences are represented, there is sufficient similarity 
    -- between product and genomic sequences. Can be used to replace stretches
    -- of matches and mismatches, mostly for protein to genomic where 
    -- definition of match or mismatch depends on translation table
    diag INTEGER ,

     -- insertion in product sequence (i.e. gap in the genomic sequence)
    product-ins INTEGER ,

     -- insertion in genomic sequence (i.e. gap in the product sequence)
    genomic-ins INTEGER
}


-- site involved in splice
Splice-site ::= SEQUENCE {
    -- typically two bases in the intronic region, always
    -- in IUPAC format
    bases VisibleString
}


-- ==========================================================================
--
-- Sparse-seg follows the semantics of dense-seg and is more optimal for
-- representing sparse multiple alignments
--
-- ==========================================================================


Sparse-seg ::= SEQUENCE {
    master-id Seq-id OPTIONAL,

    -- pairwise alignments constituting this multiple alignment
    rows SET OF Sparse-align,

    -- per-row scores
    row-scores SET OF Score OPTIONAL,

    -- index of extra items
    ext  SET OF Sparse-seg-ext OPTIONAL
}

Sparse-align ::= SEQUENCE {
    first-id Seq-id,
    second-id Seq-id,

    numseg INTEGER,                      --number of segments
    first-starts SEQUENCE OF INTEGER ,   --starts on the first sequence [numseg]
    second-starts SEQUENCE OF INTEGER ,  --starts on the second sequence [numseg]
    lens SEQUENCE OF INTEGER ,           --lengths of segments [numseg]
    second-strands SEQUENCE OF Na-strand OPTIONAL ,

    -- per-segment scores
    seg-scores SET OF Score OPTIONAL
}

Sparse-seg-ext ::= SEQUENCE {
    --seg-ext SET OF {
    --    index INTEGER,
    --    data User-field
    -- }
    index INTEGER
}


-- use of Score is discouraged for external ASN.1 specifications
Score ::= SEQUENCE {
    id Object-id OPTIONAL ,
    value CHOICE {
        real REAL ,
        int INTEGER
    }
}

-- use of Score-set is encouraged for external ASN.1 specifications
Score-set ::= SET OF Score

END 


-- seqblock.asn
--$Revision: 6.0 $
--*********************************************************************
--
-- 1990 - J.Ostell
-- Version 3.0 - June 1994
--
--*********************************************************************
--*********************************************************************
--
--  EMBL specific data
--  This block of specifications was developed by Reiner Fuchs of EMBL
--  Updated by J.Ostell, 1994
--
--*********************************************************************

EMBL-General DEFINITIONS ::=
BEGIN

EXPORTS EMBL-dbname, EMBL-xref, EMBL-block;

IMPORTS Date, Object-id FROM NCBI-General;

EMBL-dbname ::= CHOICE {
    code ENUMERATED {
        embl(0),
        genbank(1),
        ddbj(2),
        geninfo(3),
        medline(4),
        swissprot(5),
        pir(6),
        pdb(7),
        epd(8),
        ecd(9),
        tfd(10),
        flybase(11),
        prosite(12),
        enzyme(13),
        mim(14),
        ecoseq(15),
        hiv(16) ,
        other (255) } ,
    name    VisibleString }

EMBL-xref ::= SEQUENCE {
    dbname EMBL-dbname,
    id SEQUENCE OF Object-id }

EMBL-block ::= SEQUENCE {
    class ENUMERATED {
        not-set(0),
        standard(1),
        unannotated(2),
        other(255) } DEFAULT standard,
    div ENUMERATED {
        fun(0),
        inv(1),
        mam(2),
        org(3),
        phg(4),
        pln(5),
        pri(6),
        pro(7),
        rod(8),
        syn(9),
        una(10),
        vrl(11),
        vrt(12),
        pat(13),
        est(14),
        sts(15),
        other (255) } OPTIONAL,
    creation-date Date,
    update-date Date,
    extra-acc SEQUENCE OF VisibleString OPTIONAL,
    keywords SEQUENCE OF VisibleString OPTIONAL,
    xref SEQUENCE OF EMBL-xref OPTIONAL }

END

--*********************************************************************
--
--  SWISSPROT specific data
--  This block of specifications was developed by Mark Cavanaugh of
--      NCBI working with Amos Bairoch of SWISSPROT
--
--*********************************************************************

SP-General DEFINITIONS ::=
BEGIN

EXPORTS SP-block;

IMPORTS Date, Dbtag FROM NCBI-General
        Seq-id FROM NCBI-Seqloc;

SP-block ::= SEQUENCE {         -- SWISSPROT specific descriptions
    class ENUMERATED {
        not-set (0) ,
        standard (1) ,      -- conforms to all SWISSPROT checks
        prelim (2) ,        -- only seq and biblio checked
        other (255) } ,
    extra-acc SET OF VisibleString OPTIONAL ,  -- old SWISSPROT ids
    imeth BOOLEAN DEFAULT FALSE ,  -- seq known to start with Met
    plasnm SET OF VisibleString OPTIONAL,  -- plasmid names carrying gene
    seqref SET OF Seq-id OPTIONAL,         -- xref to other sequences
    dbref SET OF Dbtag OPTIONAL ,          -- xref to non-sequence dbases
    keywords SET OF VisibleString OPTIONAL , -- keywords
    created Date OPTIONAL ,         -- creation date
    sequpd Date OPTIONAL ,          -- sequence update
    annotupd Date OPTIONAL }        -- annotation update

END

--*********************************************************************
--
--  PIR specific data
--  This block of specifications was developed by Jim Ostell of
--      NCBI
--
--*********************************************************************

PIR-General DEFINITIONS ::=
BEGIN

EXPORTS PIR-block;

IMPORTS Seq-id FROM NCBI-Seqloc;

PIR-block ::= SEQUENCE {          -- PIR specific descriptions
    had-punct BOOLEAN OPTIONAL ,      -- had punctuation in sequence ?
    host VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,     -- source line
    summary VisibleString OPTIONAL ,
    genetic VisibleString OPTIONAL ,
    includes VisibleString OPTIONAL ,
    placement VisibleString OPTIONAL ,
    superfamily VisibleString OPTIONAL ,
    keywords SEQUENCE OF VisibleString OPTIONAL ,
    cross-reference VisibleString OPTIONAL ,
    date VisibleString OPTIONAL ,
    seq-raw VisibleString OPTIONAL ,  -- seq with punctuation
    seqref SET OF Seq-id OPTIONAL }         -- xref to other sequences

END

--*********************************************************************
--
--  GenBank specific data
--  This block of specifications was developed by Jim Ostell of
--      NCBI
--
--*********************************************************************

GenBank-General DEFINITIONS ::=
BEGIN

EXPORTS GB-block;

IMPORTS Date FROM NCBI-General;

GB-block ::= SEQUENCE {          -- GenBank specific descriptions
    extra-accessions SEQUENCE OF VisibleString OPTIONAL ,
    source VisibleString OPTIONAL ,     -- source line
    keywords SEQUENCE OF VisibleString OPTIONAL ,
    origin VisibleString OPTIONAL,
    date VisibleString OPTIONAL ,       -- OBSOLETE old form Entry Date
    entry-date Date OPTIONAL ,          -- replaces date
    div VisibleString OPTIONAL ,        -- GenBank division
    taxonomy VisibleString OPTIONAL }   -- continuation line of organism

END

--**********************************************************************
-- PRF specific definition
--    PRF is a protein sequence database crated and maintained by
--    Protein Research Foundation, Minoo-city, Osaka, Japan.
--
--    Written by A.Ogiwara, Inst.Chem.Res. (Dr.Kanehisa's Lab),
--            Kyoto Univ., Japan
--
--**********************************************************************

PRF-General DEFINITIONS ::=
BEGIN

EXPORTS PRF-block;

PRF-block ::= SEQUENCE {
      extra-src       PRF-ExtraSrc OPTIONAL,
      keywords        SEQUENCE OF VisibleString OPTIONAL
}

PRF-ExtraSrc ::= SEQUENCE {
      host    VisibleString OPTIONAL,
      part    VisibleString OPTIONAL,
      state   VisibleString OPTIONAL,
      strain  VisibleString OPTIONAL,
      taxon   VisibleString OPTIONAL
}

END

--*********************************************************************
--
--  PDB specific data
--  This block of specifications was developed by Jim Ostell and
--      Steve Bryant of NCBI
--
--*********************************************************************

PDB-General DEFINITIONS ::=
BEGIN

EXPORTS PDB-block;

IMPORTS Date FROM NCBI-General;

PDB-block ::= SEQUENCE {          -- PDB specific descriptions
    deposition Date ,         -- deposition date  month,year
    class VisibleString ,
    compound SEQUENCE OF VisibleString ,
    source SEQUENCE OF VisibleString ,
    exp-method VisibleString OPTIONAL ,  -- present if NOT X-ray diffraction
    replace PDB-replace OPTIONAL } -- replacement history

PDB-replace ::= SEQUENCE {
    date Date ,
    ids SEQUENCE OF VisibleString }   -- entry ids replace by this one

END


-- seqcode.asn
--$Revision: 6.0 $
--  *********************************************************************
--
--  These are code and conversion tables for NCBI sequence codes
--  ASN.1 for the sequences themselves are define in seq.asn
--
--  Seq-map-table and Seq-code-table REQUIRE that codes start with 0
--    and increase continuously.  So IUPAC codes, which are upper case
--    letters will always have 65 0 cells before the codes begin.  This
--    allows all codes to do indexed lookups for things
--
--  Valid names for code tables are:
--    IUPACna
--    IUPACaa
--    IUPACeaa
--    IUPACaa3     3 letter amino acid codes : parallels IUPACeaa
--                   display only, not a data exchange type
--    NCBI2na
--    NCBI4na
--    NCBI8na
--    NCBI8aa
--    NCBIstdaa
--     probability types map to IUPAC types for display as characters

NCBI-SeqCode DEFINITIONS ::=
BEGIN

EXPORTS Seq-code-table, Seq-map-table, Seq-code-set;

Seq-code-type ::= ENUMERATED {              -- sequence representations
    iupacna (1) ,              -- IUPAC 1 letter nuc acid code
    iupacaa (2) ,              -- IUPAC 1 letter amino acid code
    ncbi2na (3) ,              -- 2 bit nucleic acid code
    ncbi4na (4) ,              -- 4 bit nucleic acid code
    ncbi8na (5) ,              -- 8 bit extended nucleic acid code
    ncbipna (6) ,              -- nucleic acid probabilities
    ncbi8aa (7) ,              -- 8 bit extended amino acid codes
    ncbieaa (8) ,              -- extended ASCII 1 letter aa codes
    ncbipaa (9) ,              -- amino acid probabilities
    iupacaa3 (10) ,            -- 3 letter code only for display
    ncbistdaa (11) }           -- consecutive codes for std aas, 0-25

Seq-map-table ::= SEQUENCE { -- for tables of sequence mappings 
    from Seq-code-type ,      -- code to map from
    to Seq-code-type ,        -- code to map to
    num INTEGER ,             -- number of rows in table
    start-at INTEGER DEFAULT 0 ,   -- index offset of first element
    table SEQUENCE OF INTEGER }  -- table of values, in from-to order

Seq-code-table ::= SEQUENCE { -- for names of coded values
    code Seq-code-type ,      -- name of code
    num INTEGER ,             -- number of rows in table
    one-letter BOOLEAN ,   -- symbol is ALWAYS 1 letter?
    start-at INTEGER DEFAULT 0 ,   -- index offset of first element
    table SEQUENCE OF
        SEQUENCE {
            symbol VisibleString ,      -- the printed symbol or letter
            name VisibleString } ,      -- an explanatory name or string
    comps SEQUENCE OF INTEGER OPTIONAL } -- pointers to complement nuc acid

Seq-code-set ::= SEQUENCE {    -- for distribution
    codes SET OF Seq-code-table OPTIONAL ,
    maps SET OF Seq-map-table OPTIONAL }

END


-- seqfeat.asn
--$Revision: 545131 $
--**********************************************************************
--
--  NCBI Sequence Feature elements
--  by James Ostell, 1990
--  Version 3.0 - June 1994
--
--**********************************************************************

NCBI-Seqfeat DEFINITIONS ::=
BEGIN

EXPORTS Seq-feat, Feat-id, Genetic-code, ModelEvidenceSupport;

IMPORTS Gene-ref FROM NCBI-Gene
        Prot-ref FROM NCBI-Protein
        Org-ref FROM NCBI-Organism
        Variation-ref FROM NCBI-Variation
        BioSource FROM NCBI-BioSource
        RNA-ref FROM NCBI-RNA
        Seq-id, Seq-loc, Giimport-id FROM NCBI-Seqloc
        Pubdesc, Numbering, Heterogen FROM NCBI-Sequence
        Rsite-ref FROM NCBI-Rsite
        Txinit FROM NCBI-TxInit
        DOI, PubMedId FROM NCBI-Biblio
        Pub-set FROM NCBI-Pub
        Object-id, Dbtag, User-object FROM NCBI-General;

--*** Feature identifiers ********************************
--*

Feat-id ::= CHOICE {
    gibb INTEGER ,            -- geninfo backbone
    giim Giimport-id ,        -- geninfo import
    local Object-id ,         -- for local software use
    general Dbtag }           -- for use by various databases

--*** Seq-feat *******************************************
--*  sequence feature generalization

Seq-feat ::= SEQUENCE {
    id Feat-id OPTIONAL ,
    data SeqFeatData ,           -- the specific data
    partial BOOLEAN OPTIONAL ,    -- incomplete in some way?
    except BOOLEAN OPTIONAL ,     -- something funny about this?
    comment VisibleString OPTIONAL ,
    product Seq-loc OPTIONAL ,    -- product of process
    location Seq-loc ,            -- feature made from
    qual SEQUENCE OF Gb-qual OPTIONAL ,  -- qualifiers
    title VisibleString OPTIONAL ,   -- for user defined label
    ext User-object OPTIONAL ,    -- user defined structure extension
    cit Pub-set OPTIONAL ,        -- citations for this feature
    exp-ev ENUMERATED {           -- evidence for existence of feature
        experimental (1) ,        -- any reasonable experimental check
        not-experimental (2) } OPTIONAL , -- similarity, pattern, etc
    xref SET OF SeqFeatXref OPTIONAL ,   -- cite other relevant features
    dbxref SET OF Dbtag OPTIONAL ,  -- support for xref to other databases
    pseudo BOOLEAN OPTIONAL ,     -- annotated on pseudogene?
    except-text VisibleString OPTIONAL , -- explain if except=TRUE
    ids SET OF Feat-id OPTIONAL ,       -- set of Ids; will replace 'id' field
    exts SET OF User-object OPTIONAL , -- set of extensions; will replace 'ext' field
    support SeqFeatSupport OPTIONAL  -- will replace /experiment, /inference, model-evidence
}

SeqFeatData ::= CHOICE {
    gene Gene-ref ,
    org Org-ref ,
    cdregion Cdregion ,
    prot Prot-ref ,
    rna RNA-ref ,
    pub Pubdesc ,              -- publication applies to this seq
    seq Seq-loc ,              -- to annotate origin from another seq
    imp Imp-feat ,
    region VisibleString,      -- named region (globin locus)
    comment NULL ,             -- just a comment
    bond ENUMERATED {
        disulfide (1) ,
        thiolester (2) ,
        xlink (3) ,
        thioether (4) ,
        other (255) } ,
    site ENUMERATED {
        active (1) ,
        binding (2) ,
        cleavage (3) ,
        inhibit (4) ,
        modified (5),
        glycosylation (6) ,
        myristoylation (7) ,
        mutagenized (8) ,
        metal-binding (9) ,
        phosphorylation (10) ,
        acetylation (11) ,
        amidation (12) ,
        methylation (13) ,
        hydroxylation (14) ,
        sulfatation (15) ,
        oxidative-deamination (16) ,
        pyrrolidone-carboxylic-acid (17) ,
        gamma-carboxyglutamic-acid (18) ,
        blocked (19) ,
        lipid-binding (20) ,
        np-binding (21) ,
        dna-binding (22) ,
        signal-peptide (23) ,
        transit-peptide (24) ,
        transmembrane-region (25) ,
        nitrosylation (26) ,
        other (255) } ,
    rsite Rsite-ref ,       -- restriction site  (for maps really)
    user User-object ,      -- user defined structure
    txinit Txinit ,         -- transcription initiation
    num Numbering ,         -- a numbering system
    psec-str ENUMERATED {   -- protein secondary structure
        helix (1) ,         -- any helix
        sheet (2) ,         -- beta sheet
        turn  (3) } ,       -- beta or gamma turn
    non-std-residue VisibleString ,  -- non-standard residue here in seq
    het Heterogen ,         -- cofactor, prosthetic grp, etc, bound to seq
    biosrc BioSource,
    clone Clone-ref,
    variation Variation-ref
}

SeqFeatXref ::= SEQUENCE {       -- both optional because can have one or both
    id Feat-id OPTIONAL ,        -- the feature copied
    data SeqFeatData OPTIONAL }  -- the specific data

SeqFeatSupport ::= SEQUENCE {
  experiment SET OF ExperimentSupport OPTIONAL ,
  inference SET OF InferenceSupport OPTIONAL ,
  model-evidence SET OF ModelEvidenceSupport OPTIONAL
}

EvidenceCategory ::= INTEGER {
  not-set (0) ,
  coordinates (1) ,
  description (2) ,
  existence (3)
}

ExperimentSupport ::= SEQUENCE {
  category EvidenceCategory OPTIONAL ,
  explanation VisibleString ,
  pmids SET OF PubMedId OPTIONAL ,
  dois SET OF DOI OPTIONAL
}

Program-id ::= SEQUENCE {
  name VisibleString ,
  version VisibleString OPTIONAL
}

EvidenceBasis ::= SEQUENCE {
  programs SET OF Program-id OPTIONAL ,
  accessions SET OF Seq-id OPTIONAL
}

InferenceSupport ::= SEQUENCE {
  category EvidenceCategory OPTIONAL ,
  type INTEGER {
    not-set (0) ,
    similar-to-sequence (1) ,
    similar-to-aa (2) ,
    similar-to-dna (3) ,
    similar-to-rna (4) ,
    similar-to-mrna (5) ,
    similiar-to-est (6) ,
    similar-to-other-rna (7) ,
    profile (8) ,
    nucleotide-motif (9) ,
    protein-motif (10) ,
    ab-initio-prediction (11) ,
    alignment (12) ,
    other (255)
  } DEFAULT not-set ,
  other-type VisibleString OPTIONAL ,
  same-species BOOLEAN DEFAULT FALSE ,
  basis EvidenceBasis ,
  pmids SET OF PubMedId OPTIONAL ,
  dois SET OF DOI OPTIONAL
}

ModelEvidenceItem ::= SEQUENCE {
  id Seq-id ,
  exon-count INTEGER OPTIONAL ,
  exon-length INTEGER OPTIONAL ,
  full-length BOOLEAN DEFAULT FALSE ,
  supports-all-exon-combo BOOLEAN DEFAULT FALSE
}

ModelEvidenceSupport ::= SEQUENCE {
  method VisibleString OPTIONAL ,
  mrna SET OF ModelEvidenceItem OPTIONAL ,
  est SET OF ModelEvidenceItem OPTIONAL ,
  protein SET OF ModelEvidenceItem OPTIONAL ,
  identification Seq-id OPTIONAL ,
  dbxref SET OF Dbtag OPTIONAL ,
  exon-count INTEGER OPTIONAL ,
  exon-length INTEGER OPTIONAL ,
  full-length BOOLEAN DEFAULT FALSE ,
  supports-all-exon-combo BOOLEAN DEFAULT FALSE
}

--*** CdRegion ***********************************************
--*
--*  Instructions to translate from a nucleic acid to a peptide
--*    conflict means it's supposed to translate but doesn't
--*


Cdregion ::= SEQUENCE {
    orf BOOLEAN OPTIONAL ,             -- just an ORF ?
    frame ENUMERATED {
        not-set (0) ,                  -- not set, code uses one
        one (1) ,
        two (2) ,
        three (3) } DEFAULT not-set ,      -- reading frame
    conflict BOOLEAN OPTIONAL ,        -- conflict
    gaps INTEGER OPTIONAL ,            -- number of gaps on conflict/except
    mismatch INTEGER OPTIONAL ,        -- number of mismatches on above
    code Genetic-code OPTIONAL ,       -- genetic code used
    code-break SEQUENCE OF Code-break OPTIONAL ,   -- individual exceptions
    stops INTEGER OPTIONAL }           -- number of stop codons on above

                    -- each code is 64 cells long, in the order where
                    -- T=0,C=1,A=2,G=3, TTT=0, TTC=1, TCA=4, etc
                    -- NOTE: this order does NOT correspond to a Seq-data
                    -- encoding.  It is "natural" to codon usage instead.
                    -- the value in each cell is the AA coded for
                    -- start= AA coded only if first in peptide
                    --   in start array, if codon is not a legitimate start
                    --   codon, that cell will have the "gap" symbol for
                    --   that alphabet.  Otherwise it will have the AA
                    --   encoded when that codon is used at the start.

Genetic-code ::= SET OF CHOICE {
    name VisibleString ,               -- name of a code
    id INTEGER ,                       -- id in dbase
    ncbieaa VisibleString ,            -- indexed to IUPAC extended
    ncbi8aa OCTET STRING ,             -- indexed to NCBI8aa
    ncbistdaa OCTET STRING ,           -- indexed to NCBIstdaa
    sncbieaa VisibleString ,            -- start, indexed to IUPAC extended
    sncbi8aa OCTET STRING ,             -- start, indexed to NCBI8aa
    sncbistdaa OCTET STRING }           -- start, indexed to NCBIstdaa

Code-break ::= SEQUENCE {              -- specific codon exceptions
    loc Seq-loc ,                      -- location of exception
    aa CHOICE {                        -- the amino acid
        ncbieaa INTEGER ,              -- ASCII value of NCBIeaa code
        ncbi8aa INTEGER ,              -- NCBI8aa code
        ncbistdaa INTEGER } }           -- NCBIstdaa code

Genetic-code-table ::= SET OF Genetic-code     -- table of genetic codes

--*** Import ***********************************************
--*
--*  Features imported from other databases
--*

Imp-feat ::= SEQUENCE {
    key VisibleString ,
    loc VisibleString OPTIONAL ,         -- original location string
    descr VisibleString OPTIONAL }       -- text description

Gb-qual ::= SEQUENCE {
    qual VisibleString ,
    val VisibleString }


--*** Clone-ref ***********************************************
--*
--*  Specification of clone features
--*

Clone-ref ::= SEQUENCE {
    name VisibleString,        -- Official clone symbol
    library VisibleString OPTIONAL,     -- Library name

    concordant BOOLEAN DEFAULT FALSE, -- OPTIONAL?
    unique BOOLEAN DEFAULT FALSE, -- OPTIONAL?
    placement-method INTEGER {
        end-seq (0),           -- Clone placed by end sequence
        insert-alignment (1),  -- Clone placed by insert alignment
        sts (2),               -- Clone placed by STS
        fish (3),
        fingerprint (4),
        end-seq-insert-alignment (5), -- combined end-seq and insert align
        external (253),           -- Placement provided externally
        curated (254),            -- Human placed or approved
        other (255)
    } OPTIONAL,
    clone-seq Clone-seq-set OPTIONAL
}

Clone-seq-set ::= SET OF Clone-seq


Clone-seq ::= SEQUENCE {
    type INTEGER {
        insert (0),
        end (1),
        other (255)
    },
    confidence INTEGER {
        multiple (0),        -- Multiple hits
        na (1),              -- Unspecified
        nohit-rep (2),       -- No hits, end flagged repetitive
        nohitnorep (3),      -- No hits, end not flagged repetitive
        other-chrm (4),      -- Hit on different chromosome
        unique (5),
        virtual (6),         -- Virtual (hasn't been sequenced)
        multiple-rep (7),    -- Multiple hits, end flagged repetitive
        multiplenorep (8),   -- Multiple hits, end not flagged repetitive
        no-hit (9),          -- No hits
        other (255)
    } OPTIONAL,
    location Seq-loc,        -- location on sequence
    seq Seq-loc OPTIONAL,    -- clone sequence location
    align-id Dbtag OPTIONAL, -- internal alignment identifier
    support INTEGER {
        prototype (0),       -- sequence used to place clone
        supporting (1),      -- sequence supports placement
        supports-other(2),   -- supports a different placement
        non-supporting (3)   -- does not support any placement
    } OPTIONAL
}

END


--*** Variation-ref ***********************************************
--*
--*  Specification of variation features
--*

NCBI-Variation DEFINITIONS ::=
BEGIN

EXPORTS Variation-ref, Variation-inst, VariantProperties,
        Population-data, Phenotype;

IMPORTS Int-fuzz, User-object, Object-id, Dbtag FROM NCBI-General
        Seq-literal FROM NCBI-Sequence
        SubSource FROM NCBI-BioSource
        Seq-loc FROM NCBI-Seqloc
        Pub FROM NCBI-Pub;


-- --------------------------------------------------------------------------
-- Historically, the dbSNP definitions document data structures used in the
-- processing and annotation of variations by the dbSNP group.  The intention
-- is to provide information to clients that reflect internal information
-- produced during the mapping of SNPs
-- --------------------------------------------------------------------------

VariantProperties ::= SEQUENCE {
    version INTEGER,

    -- NOTE:
    -- The format for most of these values is as an integer
    -- Unless otherwise noted, these integers represent a bitwise OR (= simple
    -- sum) of the possible values, and as such, these values represent the
    -- specific bit flags that may be set for each of the possible attributes
    -- here.

    resource-link INTEGER {
        preserved        (1), -- Clinical, Pubmed, Cited, (0x01)
        provisional      (2), -- Provisional Third Party Annotations (0x02)
        has3D            (4), -- Has 3D strcture SNP3D table (0x04)
        submitterLinkout (8), -- SNP->SubSNP->Batch link_out (0x08)
        clinical        (16), -- Clinical if LSDB, OMIM, TPA, Diagnostic (0x10)
        genotypeKit     (32)  -- Marker exists on high density genotyping kit
                              -- (0x20)
    } OPTIONAL,

    gene-location INTEGER {
        in-gene         (1), -- Sequence intervals covered by a gene ID but not
                             -- having an aligned transcript (0x01)
        near-gene-5     (2), -- Within 2kb of the 5' end of a gene feature
        near-gene-3     (4), -- Within 0.5kb of the 3' end of a gene feature
        intron          (8), -- In Intron (0x08)
        donor          (16), -- In donor splice-site (0x10)
        acceptor       (32), -- In acceptor splice-site (0x20)
        utr-5          (64), -- In 5' UTR (0x40)
        utr-3         (128), -- In 3' UTR (0x80)
        in-start-codon(256), -- the variant is observed in a start codon
                             -- (0x100)
        in-stop-codon (512), -- the variant is observed in a stop codon
                             -- (0x200)
        intergenic   (1024), -- variant located between genes (0x400)
        conserved-noncoding(2048) -- variant is located in a conserved
                                  -- non-coding region (0x800)
    } OPTIONAL,

    effect INTEGER {
        no-change      (0), -- known to cause no functional changes
                            -- since 0 does not combine with any other bit
                            -- value, 'no-change' specifically implies that
                            -- there are no consequences
        synonymous     (1), -- one allele in the set does not change the encoded
                            -- amino acid (0x1)
        nonsense       (2), -- one allele in the set changes to STOP codon
                            -- (TER).  (0x2)
        missense       (4), -- one allele in the set changes protein peptide
                            -- (0x4)
        frameshift     (8), -- one allele in the set changes all downstream
                            -- amino acids (0x8)

        up-regulator  (16), -- the variant causes increased transcription
                            -- (0x10)
        down-regulator(32), -- the variant causes decreased transcription
                            -- (0x20)
        methylation   (64),
        stop-gain     (128), -- reference codon is not stop codon, but the snp
                             -- variant allele changes the codon to a
                             -- terminating codon.
        stop-loss     (256)  -- reverse of STOP-GAIN: reference codon is a
                             -- stop codon, but a snp variant allele changes
                             -- the codon to a non-terminating codon.
    } OPTIONAL,

    mapping INTEGER {
        has-other-snp         (1), -- Another SNP has the same mapped positions
                                   -- on reference assembly (0x01)
        has-assembly-conflict (2), -- Weight 1 or 2 SNPs that map to different
                                   -- chromosomes on different assemblies (0x02)
        is-assembly-specific  (4)  -- Only maps to 1 assembly (0x04)
    } OPTIONAL,

    -- map-weight captures specificity of placement
    -- NOTE: This is *NOT* a bitfield
    map-weight INTEGER {
        is-uniquely-placed(1),
        placed-twice-on-same-chrom(2),
        placed-twice-on-diff-chrom(3),
        many-placements(10)
    } OPTIONAL,

    frequency-based-validation INTEGER {
        is-mutation       (1), -- low frequency variation that is cited in
                               -- journal or other reputable sources (0x01)
        above-5pct-all    (2), -- >5% minor allele freq in each and all
                               -- populations (0x02)
        above-5pct-1plus  (4), -- >5% minor allele freq in 1+ populations (0x04)
        validated         (8), -- Bit is set if the variant has a minor allele
                               -- observed in two or more separate chromosomes
        above-1pct-all   (16), -- >1% minor allele freq in each and all
                               -- populations (0x10)
        above-1pct-1plus (32)  -- >1% minor allele freq in 1+ populations (0x20)
    } OPTIONAL,

    genotype INTEGER {
        in-haplotype-set (1), -- Exists in a haplotype tagging set (0x01)
        has-genotypes    (2)  -- SNP has individual genotype (0x02)
    } OPTIONAL,

    -- project IDs are IDs from BioProjects
    -- in order to report information about project relationships, we
    -- require projects to be registered
    -- This field in many ways duplicates dbxrefs; however, the
    -- intention of this field is to more adequately reflect
    -- ownership and data source
    --
    -- 11/9/2010: DO NOT USE
    -- This field was changed in the spec in a breaking way; using it will
    -- break clients.  We are officially suppressing / abandoning this field.
    -- Clients who need to use this should instead place the data in
    -- Seq-feat.dbxref, using the db name 'BioProject'
    project-data SET OF INTEGER OPTIONAL,

    quality-check INTEGER {
        contig-allele-missing   (1), -- Reference sequence allele at the mapped
                                     -- position is not present in the SNP
                                     -- allele list, adjusted for orientation
                                     -- (0x01)
        withdrawn-by-submitter  (2), -- One member SS is withdrawn by submitter
                                     -- (0x02)
        non-overlapping-alleles (4), -- RS set has 2+ alleles from different
                                     -- submissions and these sets share no
                                     -- alleles in common (0x04)
        strain-specific         (8), -- Straing specific fixed difference (0x08)
        genotype-conflict      (16)  -- Has Genotype Conflict (0x10)
    } OPTIONAL,

    confidence INTEGER {
        unknown         (0),
        likely-artifact (1),
        other           (255)
    } OPTIONAL,

    -- has this variant been validated?
    -- While a boolean flag offers no subtle distinctions of validation
    -- methods, occasionally it is only known as a single boolean value
    -- NOTE: this flag is redundant and should be omitted if more comprehensive
    -- validation information is present
    other-validation BOOLEAN OPTIONAL,

    -- origin of this allele, if known
    -- note that these are powers-of-two, and represent bits; thus, we can
    -- represent more than one state simultaneously through a bitwise OR
    allele-origin INTEGER {
        unknown         (0),
        germline        (1),
        somatic         (2),
        inherited       (4),
        paternal        (8),
        maternal        (16),
        de-novo         (32),
        biparental      (64),
        uniparental     (128),
        not-tested      (256),
        tested-inconclusive (512),
        not-reported   (1024),

        -- stopper - 2^31
        other           (1073741824)
    } OPTIONAL,

    -- observed allele state, if known
    -- NOTE: THIS IS NOT A BITFIELD!
    allele-state INTEGER {
        unknown         (0),
        homozygous      (1),
        heterozygous    (2),
        hemizygous      (3),
        nullizygous     (4),
        other           (255)
    } OPTIONAL,

    -- NOTE:
    -- 'allele-frequency' here refers to the minor allele frequency of the
    -- default population
    allele-frequency REAL OPTIONAL,

    -- is this variant the ancestral allele?
    is-ancestral-allele BOOLEAN OPTIONAL
}

Phenotype ::= SEQUENCE {
    source VisibleString OPTIONAL,
    term VisibleString OPTIONAL,
    xref SET OF Dbtag OPTIONAL,

    -- does this variant have known clinical significance?
    clinical-significance INTEGER {
        unknown                 (0),
        untested                (1),
        non-pathogenic          (2),
        probable-non-pathogenic (3),
        probable-pathogenic     (4),
        pathogenic              (5),
        drug-response           (6),
        histocompatibility      (7),
        other                   (255)
    } OPTIONAL
}

Population-data ::= SEQUENCE {
    -- assayed population (e.g. HAPMAP-CEU)
    population VisibleString,
    genotype-frequency REAL OPTIONAL,
    chromosomes-tested INTEGER OPTIONAL,
    sample-ids SET OF Object-id OPTIONAL,
    allele-frequency REAL OPTIONAL,

    -- This field is an explicit bit-field
    -- Valid values should be a bitwise combination (= simple sum)
    -- of any of the values below
    flags INTEGER {
        is-default-population   (1),
        is-minor-allele         (2),
        is-rare-allele          (4)
    } OPTIONAL
}

Ext-loc ::= SEQUENCE {
    id Object-id,
    location Seq-loc
}


Variation-ref ::= SEQUENCE {
    -- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv)
    -- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1'
    --
    -- we relate three kinds of IDs here:
    --  - our current object's id
    --  - the id of this object's parent, if it exists
    --  - the sample ID that this item originates from
    id        Dbtag OPTIONAL,
    parent-id Dbtag OPTIONAL,
    sample-id Object-id OPTIONAL,
    other-ids SET OF Dbtag OPTIONAL,

    -- names and synonyms
    -- some variants have well-known canonical names and possible accepted
    -- synonyms
    name VisibleString OPTIONAL,
    synonyms SET OF VisibleString OPTIONAL,

    -- tag for comment and descriptions
    description VisibleString OPTIONAL,

    -- phenotype
    phenotype SET OF Phenotype OPTIONAL,

    -- sequencing / acuisition method
    method SET OF INTEGER {
        unknown             (0),
        bac-acgh            (1),
        computational       (2),
        curated             (3),
        digital-array       (4),
        expression-array    (5),
        fish                (6),
        flanking-sequence   (7),
        maph                (8),
        mcd-analysis        (9),
        mlpa                (10),
        oea-assembly        (11),
        oligo-acgh          (12),
        paired-end          (13),
        pcr                 (14),
        qpcr                (15),
        read-depth          (16),
        roma                (17),
        rt-pcr              (18),
        sage                (19),
        sequence-alignment  (20),
        sequencing          (21),
        snp-array           (22),
        snp-genoytyping     (23),
        southern            (24),
        western             (25),
        optical-mapping     (26),

        other               (255)
    } OPTIONAL,

    -- Note about SNP representation and pretinent fields: allele-frequency,
    -- population, quality-codes:
    -- The case of multiple alleles for a SNP would be described by
    -- parent-feature of type Variation-set.diff-alleles, where the child
    -- features of type Variation-inst, all at the same location, would
    -- describe individual alleles.

    -- population data
    -- DEPRECATED - do not use
    population-data SET OF Population-data OPTIONAL,

    -- variant properties bit fields
    variant-prop VariantProperties OPTIONAL,

    -- has this variant been validated?
    -- DEPRECATED: new field = VariantProperties.other-validation
    validated BOOLEAN OPTIONAL,

    -- link-outs to GeneTests database
    -- DEPRECATED - do not use
    clinical-test SET OF Dbtag OPTIONAL,

    -- origin of this allele, if known
    -- note that these are powers-of-two, and represent bits; thus, we can
    -- represent more than one state simultaneously through a bitwise OR
    -- DEPRECATED: new field = VariantProperties.allele-origin
    allele-origin INTEGER {
        unknown         (0),
        germline        (1),
        somatic         (2),
        inherited       (4),
        paternal        (8),
        maternal        (16),
        de-novo         (32),
        biparental      (64),
        uniparental     (128),
        not-tested      (256),
        tested-inconclusive (512),

        -- stopper - 2^31
        other           (1073741824)
    } OPTIONAL,

    -- observed allele state, if known
    -- DEPRECATED: new field = VariantProperties.allele-state
    allele-state INTEGER {
        unknown         (0),
        homozygous      (1),
        heterozygous    (2),
        hemizygous      (3),
        nullizygous     (4),
        other           (255)
    } OPTIONAL,

    -- NOTE:
    -- 'allele-frequency' here refers to the minor allele frequency of the
    -- default population
    -- DEPRECATED: new field = VariantProperties.allele-frequency
    allele-frequency REAL OPTIONAL,

    -- is this variant the ancestral allele?
    -- DEPRECATED: new field = VariantProperties.is-ancestral-allele
    is-ancestral-allele BOOLEAN OPTIONAL,

    -- publication support.
    -- Note: made this pub instead of pub-equiv, since
    -- Pub can be pub-equiv and pub-equiv is a set of pubs, but it looks like
    -- Pub is more often used as top-level container
    -- DEPRECATED - do not use; use Seq-feat.dbxref instead
    pub Pub OPTIONAL,

    data CHOICE {
        unknown NULL,
        note    VisibleString, --free-form
        uniparental-disomy NULL,

        -- actual sequence-edit at feat.location
        instance        Variation-inst,

        -- Set of related Variations.
        -- Location of the set equals to the union of member locations
        set SEQUENCE {
            type INTEGER {
                unknown     (0),
                compound    (1), -- complex change at the same location on the
                                 -- same molecule
                products    (2), -- different products arising from the same
                                 -- variation in a precursor, e.g. r.[13g>a,
                                 -- 13_88del]
                haplotype   (3), -- changes on the same allele, e.g
                                 -- r.[13g>a;15u>c]
                genotype    (4), -- changes on different alleles in the same
                                 -- genotype, e.g. g.[476C>T]+[476C>T]
                mosaic      (5), -- different genotypes in the same individual
                individual  (6), -- same organism; allele relationship unknown,
                                 -- e.g. g.[476C>T(+)183G>C]
                population  (7), -- population
                alleles     (8), -- set represents a set of observed alleles
                package     (9), -- set represents a package of observations at
                                 -- a given location, generally containing
                                 -- asserted + reference
                other       (255)
            },
            variations SET OF Variation-ref,
            name  VisibleString OPTIONAL
        },

        -- variant is a complex and undescribed change at the location
        -- This type of variant is known to occur in dbVar submissions
        complex NULL
    },

    consequence SET OF CHOICE {
        unknown     NULL,
        splicing    NULL, --some effect on splicing
        note        VisibleString,  --freeform

        -- Describe resulting variation in the product, e.g. missense,
        -- nonsense, silent, neutral, etc in a protein, that arises from
        -- THIS variation.
        variation   Variation-ref,

        -- see http://www.hgvs.org/mutnomen/recs-prot.html
        frameshift SEQUENCE {
            phase INTEGER OPTIONAL,
            x-length INTEGER OPTIONAL
        },

        loss-of-heterozygosity SEQUENCE {
            -- In germline comparison, it will be reference genome assembly
            -- (default) or reference/normal population. In somatic mutation,
            -- it will be a name of the normal tissue.
            reference VisibleString OPTIONAL,

            -- Name of the testing subject type or the testing tissue.
            test VisibleString OPTIONAL
        }
    } OPTIONAL,

    -- Observed location, if different from the parent set or feature.location.
    -- DEPRECATED - do not use
    location        Seq-loc OPTIONAL,

    -- reference other locs, e.g. mapped source
    -- DEPRECATED - do not use
    ext-locs SET OF Ext-loc OPTIONAL,

    -- DEPRECATED - do not use; use Seq-feat.exts instead
    ext             User-object OPTIONAL,

    somatic-origin SET OF SEQUENCE {
        -- description of the somatic origin itself
        source SubSource OPTIONAL,
        -- condition related to this origin's type
        condition SEQUENCE {
            description VisibleString OPTIONAL,
            -- reference to BioTerm / other descriptive database
            object-id SET OF Dbtag OPTIONAL
        } OPTIONAL
    } OPTIONAL

}


Delta-item ::= SEQUENCE {
    seq CHOICE {
        literal Seq-literal,
        loc Seq-loc,
        this NULL --same location as variation-ref itself
    } OPTIONAL,

    -- Multiplier allows representing a tandem, e.g.  ATATAT as AT*3
    -- This allows describing CNV/SSR where delta=self  with a
    -- multiplier which specifies the count of the repeat unit.

    multiplier          INTEGER OPTIONAL, --assumed 1 if not specified.
    multiplier-fuzz     Int-fuzz OPTIONAL,

    action INTEGER {

        -- replace len(seq) positions starting with location.start with seq
        morph      (0),

        -- go downstream by distance specified by multiplier (upstream if < 0),
        -- in genomic context.
        offset     (1),

        -- excise sequence at location
        -- if multiplier is specified, delete len(location)*multiplier
        -- positions downstream
        del-at     (2),

        -- insert seq before the location.start
        ins-before (3)

    } DEFAULT morph
}


-- Variation instance
Variation-inst ::= SEQUENCE {
    type INTEGER {
        unknown         (0),    -- delta=[]
        identity        (1),    -- delta=[]
        inv             (2),    -- delta=[del, ins.seq=
                                -- RevComp(variation-location)]
        snv             (3),    -- delta=[morph of length 1]
                                -- NOTE: this is snV not snP; the latter
                                -- requires frequency-based validation to be
                                -- established in VariantProperties
                                -- the strict definition of SNP is an SNV with
                                -- an established population frequency of at
                                -- least 1% in at least 1 popuplation
        mnp             (4),    -- delta=[morph of length >1]
        delins          (5),    -- delta=[del, ins]
        del             (6),    -- delta=[del]
        ins             (7),    -- delta=[ins]
        microsatellite  (8),    -- delta=[del, ins.seq= repeat-unit with fuzzy
                                -- multiplier]
                                -- variation-location is the microsat expansion
                                -- on the sequence
        transposon      (9),    -- delta=[del, ins.seq= known donor or 'this']
                                -- variation-location is equiv of transposon
                                -- locs.
        cnv             (10),   -- delta=[del, ins= 'this' with fuzzy
                                -- multiplier]
        direct-copy     (11),   -- delta=[ins.seq= upstream location on the
                                -- same strand]
        rev-direct-copy (12),   -- delta=[ins.seq= downstream location on the
                                -- same strand]
        inverted-copy   (13),   -- delta=[ins.seq= upstream location on the
                                -- opposite strand]
        everted-copy    (14),   -- delta=[ins.seq= downstream location on the
                                -- opposite strand]
        translocation   (15),   -- delta=like delins
        prot-missense   (16),   -- delta=[morph of length 1]
        prot-nonsense   (17),   -- delta=[del]; variation-location is the tail
                                -- of the protein being truncated
        prot-neutral    (18),   -- delta=[morph of length 1]
        prot-silent     (19),   -- delta=[morph of length 1, same AA as at
                                -- variation-location]
        prot-other      (20),   -- delta=any

        other           (255)   -- delta=any
    },

    -- Sequence that replaces the location, in biological order.
    delta SEQUENCE OF Delta-item,

    -- 'observation' is used to label items in a Variation-ref package
    -- This field is explicitly a bit-field, so the bitwise OR (= sum) of any
    -- of the values may be observed.
    observation INTEGER {
        asserted        (1),   -- inst represents the asserted base at a
                               -- position
        reference       (2),   -- inst represents the reference base at the
                               -- position
        variant         (4)    -- inst represent the observed variant at a
                               -- given position
    } OPTIONAL
}

END


--**********************************************************************
--
--  NCBI Restriction Sites
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Rsite DEFINITIONS ::=
BEGIN

EXPORTS Rsite-ref;

IMPORTS Dbtag FROM NCBI-General;

Rsite-ref ::= CHOICE {
    str VisibleString ,     -- may be unparsable
    db  Dbtag }             -- pointer to a restriction site database

END

--**********************************************************************
--
--  NCBI RNAs
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-RNA DEFINITIONS ::=
BEGIN

EXPORTS RNA-ref, Trna-ext, RNA-gen, RNA-qual, RNA-qual-set;

IMPORTS Seq-loc FROM NCBI-Seqloc;

--*** rnas ***********************************************
--*
--*  various rnas
--*
                         -- minimal RNA sequence
RNA-ref ::= SEQUENCE {
    type ENUMERATED {            -- type of RNA feature
        unknown (0) ,
        premsg (1) ,
        mRNA (2) ,
        tRNA (3) ,
        rRNA (4) ,
        snRNA (5) ,              -- will become ncRNA, with RNA-gen.class = snRNA
        scRNA (6) ,              -- will become ncRNA, with RNA-gen.class = scRNA
        snoRNA (7) ,             -- will become ncRNA, with RNA-gen.class = snoRNA
        ncRNA (8) ,              -- non-coding RNA; subsumes snRNA, scRNA, snoRNA
        tmRNA (9) ,
        miscRNA (10) ,
        other (255) } ,
    pseudo BOOLEAN OPTIONAL ,
    ext CHOICE {
        name VisibleString ,        -- for naming "other" type
        tRNA Trna-ext ,             -- for tRNAs
        gen RNA-gen } OPTIONAL      -- generic fields for ncRNA, tmRNA, miscRNA
    }

Trna-ext ::= SEQUENCE {                 -- tRNA feature extensions
    aa CHOICE {                         -- aa this carries
        iupacaa INTEGER ,
        ncbieaa INTEGER ,
        ncbi8aa INTEGER ,
        ncbistdaa INTEGER } OPTIONAL ,
    codon SET OF INTEGER OPTIONAL ,     -- codon(s) as in Genetic-code
    anticodon Seq-loc OPTIONAL }        -- location of anticodon

RNA-gen ::= SEQUENCE {
    class VisibleString OPTIONAL ,      -- for ncRNAs, the class of non-coding RNA:
                                        -- examples: antisense_RNA, guide_RNA, snRNA
    product VisibleString OPTIONAL ,
    quals RNA-qual-set OPTIONAL         -- e.g., tag_peptide qualifier for tmRNAs
}

RNA-qual ::= SEQUENCE {                 -- Additional data values for RNA-gen,
    qual VisibleString ,                -- in a tag (qual), value (val) format
    val VisibleString }

RNA-qual-set ::= SEQUENCE OF RNA-qual

END

--**********************************************************************
--
--  NCBI Genes
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Gene DEFINITIONS ::=
BEGIN

EXPORTS Gene-ref, Gene-nomenclature;

IMPORTS Dbtag FROM NCBI-General;

--*** Gene ***********************************************
--*
--*  reference to a gene
--*

Gene-ref ::= SEQUENCE {
    locus VisibleString OPTIONAL ,        -- Official gene symbol
    allele VisibleString OPTIONAL ,       -- Official allele designation
    desc VisibleString OPTIONAL ,         -- descriptive name
    maploc VisibleString OPTIONAL ,       -- descriptive map location
    pseudo BOOLEAN DEFAULT FALSE ,        -- pseudogene
    db SET OF Dbtag OPTIONAL ,            -- ids in other dbases
    syn SET OF VisibleString OPTIONAL ,   -- synonyms for locus
    locus-tag VisibleString OPTIONAL ,    -- systematic gene name (e.g., MI0001, ORF0069)
    formal-name Gene-nomenclature OPTIONAL
}

Gene-nomenclature ::= SEQUENCE {
    status ENUMERATED {
        unknown (0) ,
        official (1) ,
        interim (2)
    } ,
    symbol VisibleString OPTIONAL ,
    name VisibleString OPTIONAL ,
    source Dbtag OPTIONAL
}

END


--**********************************************************************
--
--  NCBI Organism
--  by James Ostell, 1994
--  version 3.0
--
--**********************************************************************

NCBI-Organism DEFINITIONS ::=
BEGIN

EXPORTS Org-ref;

IMPORTS Dbtag FROM NCBI-General;

--*** Org-ref ***********************************************
--*
--*  Reference to an organism
--*     defines only the organism.. lower levels of detail for biological
--*     molecules are provided by the Source object
--*

Org-ref ::= SEQUENCE {
    taxname VisibleString OPTIONAL ,   -- preferred formal name
    common VisibleString OPTIONAL ,    -- common name
    mod SET OF VisibleString OPTIONAL , -- unstructured modifiers
    db SET OF Dbtag OPTIONAL ,         -- ids in taxonomic or culture dbases
    syn SET OF VisibleString OPTIONAL ,  -- synonyms for taxname or common
    orgname OrgName OPTIONAL }


OrgName ::= SEQUENCE {
    name CHOICE {
        binomial BinomialOrgName ,         -- genus/species type name
        virus VisibleString ,              -- virus names are different
        hybrid MultiOrgName ,              -- hybrid between organisms
        namedhybrid BinomialOrgName ,      -- some hybrids have genus x species name
        partial PartialOrgName } OPTIONAL , -- when genus not known
    attrib VisibleString OPTIONAL ,        -- attribution of name
    mod SEQUENCE OF OrgMod OPTIONAL ,
    lineage VisibleString OPTIONAL ,       -- lineage with semicolon separators
    gcode INTEGER OPTIONAL ,               -- genetic code (see CdRegion)
    mgcode INTEGER OPTIONAL ,              -- mitochondrial genetic code
    div VisibleString OPTIONAL ,           -- GenBank division code
    pgcode INTEGER OPTIONAL }              -- plastid genetic code


OrgMod ::= SEQUENCE {
    subtype INTEGER {
        strain (2) ,
        substrain (3) ,
        type (4) ,
        subtype (5) ,
        variety (6) ,
        serotype (7) ,
        serogroup (8) ,
        serovar (9) ,
        cultivar (10) ,
        pathovar (11) ,
        chemovar (12) ,
        biovar (13) ,
        biotype (14) ,
        group (15) ,
        subgroup (16) ,
        isolate (17) ,
        common (18) ,
        acronym (19) ,
        dosage (20) ,          -- chromosome dosage of hybrid
        nat-host (21) ,        -- natural host of this specimen
        sub-species (22) ,
        specimen-voucher (23) ,
        authority (24) ,
        forma (25) ,
        forma-specialis (26) ,
        ecotype (27) ,
        synonym (28) ,
        anamorph (29) ,
        teleomorph (30) ,
        breed (31) ,
        gb-acronym (32) ,       -- used by taxonomy database
        gb-anamorph (33) ,      -- used by taxonomy database
        gb-synonym (34) ,       -- used by taxonomy database
        culture-collection (35) ,
        bio-material (36) ,
        metagenome-source (37) ,
        type-material (38) ,
        nomenclature (39) ,     -- code of nomenclature in subname (B,P,V,Z or combination)
        old-lineage (253) ,
        old-name (254) ,
        other (255) } ,         -- ASN5: old-name (254) will be added to next spec
    subname VisibleString ,
    attrib VisibleString OPTIONAL }  -- attribution/source of name

BinomialOrgName ::= SEQUENCE {
    genus VisibleString ,               -- required
    species VisibleString OPTIONAL ,    -- species required if subspecies used
    subspecies VisibleString OPTIONAL }

MultiOrgName ::= SEQUENCE OF OrgName   -- the first will be used to assign division

PartialOrgName ::= SEQUENCE OF TaxElement  -- when we don't know the genus

TaxElement ::= SEQUENCE {
    fixed-level INTEGER {
       other (0) ,                     -- level must be set in string
       family (1) ,
       order (2) ,
       class (3) } ,
    level VisibleString OPTIONAL ,
    name VisibleString }

END


--**********************************************************************
--
--  NCBI BioSource
--  by James Ostell, 1994
--  version 3.0
--
--**********************************************************************

NCBI-BioSource DEFINITIONS ::=
BEGIN

EXPORTS BioSource, SubSource;

IMPORTS Org-ref FROM NCBI-Organism;

--********************************************************************
--
-- BioSource gives the source of the biological material
--   for sequences
--
--********************************************************************

BioSource ::= SEQUENCE {
    genome INTEGER {         -- biological context
        unknown (0) ,
        genomic (1) ,
        chloroplast (2) ,
        chromoplast (3) ,
        kinetoplast (4) ,
        mitochondrion (5) ,
        plastid (6) ,
        macronuclear (7) ,
        extrachrom (8) ,
        plasmid (9) ,
        transposon (10) ,
        insertion-seq (11) ,
        cyanelle (12) ,
        proviral (13) ,
        virion (14) ,
        nucleomorph (15) ,
        apicoplast (16) ,
        leucoplast (17) ,
        proplastid (18) ,
        endogenous-virus (19) ,
        hydrogenosome (20) ,
        chromosome (21) ,
        chromatophore (22) ,
        plasmid-in-mitochondrion (23) ,
        plasmid-in-plastid (24)
      } DEFAULT unknown ,
    origin INTEGER {
      unknown (0) ,
      natural (1) ,                    -- normal biological entity
      natmut (2) ,                     -- naturally occurring mutant
      mut (3) ,                        -- artificially mutagenized
      artificial (4) ,                 -- artificially engineered
      synthetic (5) ,                  -- purely synthetic
      other (255)
    } DEFAULT unknown ,
    org Org-ref ,
    subtype SEQUENCE OF SubSource OPTIONAL ,
    is-focus NULL OPTIONAL ,           -- to distinguish biological focus
    pcr-primers PCRReactionSet OPTIONAL }

PCRReactionSet ::= SET OF PCRReaction

PCRReaction ::= SEQUENCE {
    forward PCRPrimerSet OPTIONAL ,
    reverse PCRPrimerSet OPTIONAL }

PCRPrimerSet ::= SET OF PCRPrimer

PCRPrimer ::= SEQUENCE {
    seq PCRPrimerSeq OPTIONAL ,
    name PCRPrimerName OPTIONAL }

PCRPrimerSeq ::= VisibleString

PCRPrimerName ::= VisibleString

SubSource ::= SEQUENCE {
    subtype INTEGER {
        chromosome (1) ,
        map (2) ,
        clone (3) ,
        subclone (4) ,
        haplotype (5) ,
        genotype (6) ,
        sex (7) ,
        cell-line (8) ,
        cell-type (9) ,
        tissue-type (10) ,
        clone-lib (11) ,
        dev-stage (12) ,
        frequency (13) ,
        germline (14) ,
        rearranged (15) ,
        lab-host (16) ,
        pop-variant (17) ,
        tissue-lib (18) ,
        plasmid-name (19) ,
        transposon-name (20) ,
        insertion-seq-name (21) ,
        plastid-name (22) ,
        country (23) ,
        segment (24) ,
        endogenous-virus-name (25) ,
        transgenic (26) ,
        environmental-sample (27) ,
        isolation-source (28) ,
        lat-lon (29) ,          -- +/- decimal degrees
        collection-date (30) ,  -- DD-MMM-YYYY format
        collected-by (31) ,     -- name of person who collected the sample
        identified-by (32) ,    -- name of person who identified the sample
        fwd-primer-seq (33) ,   -- sequence (possibly more than one; semicolon-separated)
        rev-primer-seq (34) ,   -- sequence (possibly more than one; semicolon-separated)
        fwd-primer-name (35) ,
        rev-primer-name (36) ,
        metagenomic (37) ,
        mating-type (38) ,
        linkage-group (39) ,
        haplogroup (40) ,
        whole-replicon (41) ,
        phenotype (42) ,
        altitude (43) ,
        other (255) } ,
    name VisibleString ,
    attrib VisibleString OPTIONAL }    -- attribution/source of this name

END

--**********************************************************************
--
--  NCBI Protein
--  by James Ostell, 1990
--  version 0.8
--
--**********************************************************************

NCBI-Protein DEFINITIONS ::=
BEGIN

EXPORTS Prot-ref;

IMPORTS Dbtag FROM NCBI-General;

--*** Prot-ref ***********************************************
--*
--*  Reference to a protein name
--*

Prot-ref ::= SEQUENCE {
    name SET OF VisibleString OPTIONAL ,      -- protein name
    desc VisibleString OPTIONAL ,      -- description (instead of name)
    ec SET OF VisibleString OPTIONAL , -- E.C. number(s)
    activity SET OF VisibleString OPTIONAL ,  -- activities
    db SET OF Dbtag OPTIONAL ,         -- ids in other dbases
    processed ENUMERATED {             -- processing status
       not-set (0) ,
       preprotein (1) ,
       mature (2) ,
       signal-peptide (3) ,
       transit-peptide (4) ,
       propeptide (5) } DEFAULT not-set }

END
--********************************************************************
--
--  Transcription Initiation Site Feature Data Block
--  James Ostell, 1991
--  Philip Bucher, David Ghosh
--  version 1.1
--
--
--
--********************************************************************

NCBI-TxInit DEFINITIONS ::=
BEGIN

EXPORTS Txinit;

IMPORTS Gene-ref FROM NCBI-Gene
        Prot-ref FROM NCBI-Protein
        Org-ref FROM NCBI-Organism;

Txinit ::= SEQUENCE {
    name VisibleString ,    -- descriptive name of initiation site
    syn SEQUENCE OF VisibleString OPTIONAL ,   -- synonyms
    gene SEQUENCE OF Gene-ref OPTIONAL ,  -- gene(s) transcribed
    protein SEQUENCE OF Prot-ref OPTIONAL ,   -- protein(s) produced
    rna SEQUENCE OF VisibleString OPTIONAL ,  -- rna(s) produced
    expression VisibleString OPTIONAL ,  -- tissue/time of expression
    txsystem ENUMERATED {       -- transcription apparatus used at this site
        unknown (0) ,
        pol1 (1) ,      -- eukaryotic Pol I
        pol2 (2) ,      -- eukaryotic Pol II
        pol3 (3) ,      -- eukaryotic Pol III
        bacterial (4) ,
        viral (5) ,
        rna (6) ,       -- RNA replicase
        organelle (7) ,
        other (255) } ,
    txdescr VisibleString OPTIONAL ,   -- modifiers on txsystem
    txorg Org-ref OPTIONAL ,  -- organism supplying transcription apparatus
    mapping-precise BOOLEAN DEFAULT FALSE ,  -- mapping precise or approx
    location-accurate BOOLEAN DEFAULT FALSE , -- does Seq-loc reflect mapping
    inittype ENUMERATED {
        unknown (0) ,
        single (1) ,
        multiple (2) ,
        region (3) } OPTIONAL ,
    evidence SET OF Tx-evidence OPTIONAL }

Tx-evidence ::= SEQUENCE {
    exp-code ENUMERATED {
        unknown (0) ,
        rna-seq (1) ,   -- direct RNA sequencing
        rna-size (2) ,  -- RNA length measurement
        np-map (3) ,    -- nuclease protection mapping with homologous sequence ladder
        np-size (4) ,   -- nuclease protected fragment length measurement
        pe-seq (5) ,    -- dideoxy RNA sequencing
        cDNA-seq (6) ,  -- full-length cDNA sequencing
        pe-map (7) ,    -- primer extension mapping with homologous sequence ladder
        pe-size (8) ,   -- primer extension product length measurement
        pseudo-seq (9) , -- full-length processed pseudogene sequencing
        rev-pe-map (10) ,   -- see NOTE (1) below
        other (255) } ,
    expression-system ENUMERATED {
        unknown (0) ,
        physiological (1) ,
        in-vitro (2) ,
        oocyte (3) ,
        transfection (4) ,
        transgenic (5) ,
        other (255) } DEFAULT physiological ,
    low-prec-data BOOLEAN DEFAULT FALSE ,
    from-homolog BOOLEAN DEFAULT FALSE }     -- experiment actually done on
                                             --  close homolog

    -- NOTE (1) length measurement of a reverse direction primer-extension
    --          product (blocked  by  RNA  5'end)  by  comparison with
    --          homologous sequence ladder (J. Mol. Biol. 199, 587)

END


-- seqloc.asn
--$Revision: 182653 $
--**********************************************************************
--
--  NCBI Sequence location and identifier elements
--  by James Ostell, 1990
--
--  Version 3.0 - 1994
--
--**********************************************************************

NCBI-Seqloc DEFINITIONS ::=
BEGIN

EXPORTS Seq-id, Seq-loc, Seq-interval, Packed-seqint, Seq-point, Packed-seqpnt,
        Na-strand, Giimport-id;

IMPORTS Object-id, Int-fuzz, Dbtag, Date FROM NCBI-General
        Id-pat FROM NCBI-Biblio
        Feat-id FROM NCBI-Seqfeat;

--*** Sequence identifiers ********************************
--*

Seq-id ::= CHOICE {
    local Object-id ,            -- local use
    gibbsq INTEGER ,             -- Geninfo backbone seqid
    gibbmt INTEGER ,             -- Geninfo backbone moltype
    giim Giimport-id ,           -- Geninfo import id
    genbank Textseq-id ,
    embl Textseq-id ,
    pir Textseq-id ,
    swissprot Textseq-id ,
    patent Patent-seq-id ,
    other Textseq-id ,           -- for historical reasons, 'other' = 'refseq'
    general Dbtag ,              -- for other databases
    gi INTEGER ,                 -- GenInfo Integrated Database
    ddbj Textseq-id ,            -- DDBJ
    prf Textseq-id ,             -- PRF SEQDB
    pdb PDB-seq-id ,             -- PDB sequence
    tpg Textseq-id ,             -- Third Party Annot/Seq Genbank
    tpe Textseq-id ,             -- Third Party Annot/Seq EMBL
    tpd Textseq-id ,             -- Third Party Annot/Seq DDBJ
    gpipe Textseq-id ,           -- Internal NCBI genome pipeline processing ID
    named-annot-track Textseq-id -- Internal named annotation tracking ID
}

Seq-id-set ::= SET OF Seq-id


Patent-seq-id ::= SEQUENCE {
    seqid INTEGER ,         -- number of sequence in patent
    cit Id-pat }           -- patent citation

Textseq-id ::= SEQUENCE {
    name VisibleString OPTIONAL ,
    accession VisibleString OPTIONAL ,
    release VisibleString OPTIONAL ,
    version INTEGER OPTIONAL }

Giimport-id ::= SEQUENCE {
    id INTEGER ,                     -- the id to use here
    db VisibleString OPTIONAL ,      -- dbase used in
    release VisibleString OPTIONAL } -- the release

PDB-seq-id ::= SEQUENCE {
    mol PDB-mol-id ,           -- the molecule name
    chain INTEGER DEFAULT 32 , -- a single ASCII character, chain id
    rel Date OPTIONAL }        -- release date, month and year

PDB-mol-id ::= VisibleString  -- name of mol, 4 chars
    
--*** Sequence locations **********************************
--*

Seq-loc ::= CHOICE {
    null NULL ,           -- not placed
    empty Seq-id ,        -- to NULL one Seq-id in a collection
    whole Seq-id ,        -- whole sequence
    int Seq-interval ,    -- from to
    packed-int Packed-seqint ,
    pnt Seq-point ,
    packed-pnt Packed-seqpnt ,
    mix Seq-loc-mix ,
    equiv Seq-loc-equiv ,  -- equivalent sets of locations
    bond Seq-bond ,
    feat Feat-id }         -- indirect, through a Seq-feat
    

Seq-interval ::= SEQUENCE {
    from INTEGER ,
    to INTEGER ,
    strand Na-strand OPTIONAL ,
    id Seq-id ,    -- WARNING: this used to be optional
    fuzz-from Int-fuzz OPTIONAL ,
    fuzz-to Int-fuzz OPTIONAL }

Packed-seqint ::= SEQUENCE OF Seq-interval

Seq-point ::= SEQUENCE {
    point INTEGER ,
    strand Na-strand OPTIONAL ,
    id Seq-id ,     -- WARNING: this used to be optional
    fuzz Int-fuzz OPTIONAL }

Packed-seqpnt ::= SEQUENCE {
    strand Na-strand OPTIONAL ,
    id Seq-id ,
    fuzz Int-fuzz OPTIONAL ,
    points SEQUENCE OF INTEGER }

Na-strand ::= ENUMERATED {          -- strand of nucleic acid
    unknown (0) ,
    plus (1) ,
    minus (2) ,               
    both (3) ,                -- in forward orientation
    both-rev (4) ,            -- in reverse orientation
    other (255) }

Seq-bond ::= SEQUENCE {         -- bond between residues
    a Seq-point ,           -- connection to a least one residue
    b Seq-point OPTIONAL }  -- other end may not be available

Seq-loc-mix ::= SEQUENCE OF Seq-loc   -- this will hold anything

Seq-loc-equiv ::= SET OF Seq-loc      -- for a set of equivalent locations

END
    

-- seqres.asn
--$Revision: 6.0 $
--**********************************************************************
--
--  NCBI Sequence Analysis Results (other than alignments)
--  by James Ostell, 1990
--
--**********************************************************************

NCBI-Seqres DEFINITIONS ::=
BEGIN

EXPORTS Seq-graph;

IMPORTS Seq-loc FROM NCBI-Seqloc;

--*** Sequence Graph ********************************
--*
--*   for values mapped by residue or range to sequence
--*

Seq-graph ::= SEQUENCE {
    title VisibleString OPTIONAL ,
    comment VisibleString OPTIONAL ,
    loc Seq-loc ,                       -- region this applies to
    title-x VisibleString OPTIONAL ,    -- title for x-axis
    title-y VisibleString OPTIONAL ,
    comp INTEGER OPTIONAL ,             -- compression (residues/value)
    a REAL OPTIONAL ,                   -- for scaling values
    b REAL OPTIONAL ,                   -- display = (a x value) + b
    numval INTEGER ,                    -- number of values in graph
    graph CHOICE {
        real Real-graph ,
        int Int-graph ,
        byte Byte-graph } }

Real-graph ::= SEQUENCE {
    max REAL ,                          -- top of graph
    min REAL ,                          -- bottom of graph
    axis REAL ,                         -- value to draw axis on
    values SEQUENCE OF REAL }

Int-graph ::= SEQUENCE {
    max INTEGER ,
    min INTEGER ,
    axis INTEGER ,
    values SEQUENCE OF INTEGER } 

Byte-graph ::= SEQUENCE {              -- integer from 0-255
    max INTEGER ,
    min INTEGER ,
    axis INTEGER ,
    values OCTET STRING }

END


-- seqset.asn
--$Revision: 279709 $
--**********************************************************************
--
--  NCBI Sequence Collections
--  by James Ostell, 1990
--
--  Version 3.0 - 1994
--
--**********************************************************************

NCBI-Seqset DEFINITIONS ::=
BEGIN

EXPORTS Bioseq-set, Seq-entry;

IMPORTS Bioseq, Seq-annot, Seq-descr FROM NCBI-Sequence
        Object-id, Dbtag, Date FROM NCBI-General;

--*** Sequence Collections ********************************
--*

Bioseq-set ::= SEQUENCE {      -- just a collection
    id Object-id OPTIONAL ,
    coll Dbtag OPTIONAL ,          -- to identify a collection
    level INTEGER OPTIONAL ,       -- nesting level
    class ENUMERATED {
        not-set (0) ,
        nuc-prot (1) ,              -- nuc acid and coded proteins
        segset (2) ,                -- segmented sequence + parts
        conset (3) ,                -- constructed sequence + parts
        parts (4) ,                 -- parts for 2 or 3
        gibb (5) ,                  -- geninfo backbone
        gi (6) ,                    -- geninfo
        genbank (7) ,               -- converted genbank
        pir (8) ,                   -- converted pir
        pub-set (9) ,               -- all the seqs from a single publication
        equiv (10) ,                -- a set of equivalent maps or seqs
        swissprot (11) ,            -- converted SWISSPROT
        pdb-entry (12) ,            -- a complete PDB entry
        mut-set (13) ,              -- set of mutations
        pop-set (14) ,              -- population study
        phy-set (15) ,              -- phylogenetic study
        eco-set (16) ,              -- ecological sample study
        gen-prod-set (17) ,         -- genomic products, chrom+mRNA+protein
        wgs-set (18) ,              -- whole genome shotgun project
        named-annot (19) ,          -- named annotation set
        named-annot-prod (20) ,     -- with instantiated mRNA+protein
        read-set (21) ,             -- set from a single read
        paired-end-reads (22) ,     -- paired sequences within a read-set
        small-genome-set (23) ,     -- viral segments or mitochondrial minicircles
        other (255) } DEFAULT not-set ,
    release VisibleString OPTIONAL ,
    date Date OPTIONAL ,
    descr Seq-descr OPTIONAL ,
    seq-set SEQUENCE OF Seq-entry ,
    annot SET OF Seq-annot OPTIONAL }

Seq-entry ::= CHOICE {
        seq Bioseq ,
        set Bioseq-set }

END


-- seqsplit.asn
--$Revision: 464672 $
--********************************************************************
--
--  Network Id server network access
--  Vasilchenko 2003
--
--
--*********************************************************************
--
--  seqsplit.asn
--
--     representation of split sequences
--
--*********************************************************************

NCBI-Seq-split DEFINITIONS ::=
BEGIN

EXPORTS ID2S-Chunk-Id, ID2S-Seq-annot-Info;

IMPORTS Seq-id                                      FROM NCBI-Seqloc
        Seq-entry                                   FROM NCBI-Seqset
        Bioseq, Seq-annot, Seq-descr, Seq-literal   FROM NCBI-Sequence
        Seq-align                                   FROM NCBI-Seqalign;

----------------------------------------------------------------------------
-- Blob split info types
----------------------------------------------------------------------------

----------------------------------------------------------------------------
-- Chunks split description


ID2S-Split-Info ::= SEQUENCE {
        bioseqs-info    SET OF ID2S-Bioseqs-Info OPTIONAL,
        chunks          SET OF ID2S-Chunk-Info,
        skeleton        Seq-entry OPTIONAL
}


ID2S-Bioseqs-Info ::= SEQUENCE {
        info            ID2S-Bioseq-Info,
        bioseqs         ID2S-Bioseq-Ids
}


ID2S-Bioseq-Info ::= SEQUENCE {
        gap-count       INTEGER OPTIONAL,
        seq-map-has-ref BOOLEAN OPTIONAL
}


ID2S-Chunk-Info ::= SEQUENCE {
        id              ID2S-Chunk-Id,
        content         SET OF ID2S-Chunk-Content
}


-- Description of information in this chunk
-- Place means id of Bioseq or Bioseq-set
ID2S-Chunk-Content ::= CHOICE {
        -- place of Seq-descrs
        seq-descr       ID2S-Seq-descr-Info,

        -- locations and types of annotations
        seq-annot       ID2S-Seq-annot-Info,

        -- place of assembly history
        seq-assembly    ID2S-Seq-assembly-Info,

        -- place of sequence map
        seq-map         ID2S-Seq-map-Info,
        
        -- place of sequence data
        seq-data        ID2S-Seq-data-Info,

        -- place of Seq-annots
        seq-annot-place ID2S-Seq-annot-place-Info,

        -- place of Bioseqs
        bioseq-place    SET OF ID2S-Bioseq-place-Info,

        -- ids of features
        feat-ids        SET OF ID2S-Seq-feat-Ids-Info
}


ID2S-Seq-descr-Info ::= SEQUENCE {
        type-mask       INTEGER, -- mask of Seq-descr types,
        bioseqs         ID2S-Bioseq-Ids OPTIONAL,
        bioseq-sets     ID2S-Bioseq-set-Ids OPTIONAL
}


ID2S-Seq-annot-Info ::= SEQUENCE {
        -- name is set if this is named annot
        -- name may be empty which differ from unnamed annot
        name            VisibleString OPTIONAL,
        align           NULL OPTIONAL,
        graph           NULL OPTIONAL,
        feat            SET OF ID2S-Feat-type-Info OPTIONAL,
        seq-loc         ID2S-Seq-loc OPTIONAL
}


ID2S-Seq-annot-place-Info ::= SEQUENCE {
        name            VisibleString OPTIONAL,
        bioseqs         ID2S-Bioseq-Ids OPTIONAL,
        bioseq-sets     ID2S-Bioseq-set-Ids OPTIONAL
}


ID2S-Seq-feat-Ids-Info ::= SEQUENCE {
        feat-types      SET OF ID2S-Feat-type-Info OPTIONAL,
        xref-types      SET OF ID2S-Feat-type-Info OPTIONAL,
        local-ids       SET OF INTEGER OPTIONAL,
        local-str-ids   SET OF VisibleString OPTIONAL
}


ID2S-Feat-type-Info ::=  SEQUENCE {
        type            INTEGER,
        subtypes        SET OF INTEGER OPTIONAL
}


ID2S-Seq-assembly-Info ::= SEQUENCE {
        bioseqs         ID2S-Bioseq-Ids
}


ID2S-Seq-map-Info ::= ID2S-Seq-loc


ID2S-Seq-data-Info ::= ID2S-Seq-loc


ID2S-Bioseq-place-Info ::= SEQUENCE {
        bioseq-set      INTEGER,
        seq-ids         ID2S-Bioseq-Ids
}


ID2S-Chunk ::= SEQUENCE {
        data            SET OF ID2S-Chunk-Data
}


ID2S-Chunk-Data ::= SEQUENCE {
        -- place of data to insert
        id              CHOICE {
                -- Bioseq-set id
                bioseq-set      INTEGER,
                -- Bioseq id
                gi              INTEGER,
                -- Bioseq id
                seq-id          Seq-id
        },
        -- Seq-descr, for Bioseq and Bioseq-set
        descr           Seq-descr OPTIONAL,
        -- Seq-annot, for Bioseq and Bioseq-set
        annots          SET OF Seq-annot OPTIONAL,
        -- assembly history Seq-align, for Bioseq
        assembly        SET OF Seq-align OPTIONAL,
        -- sequence map, for Bioseq
        seq-map         SEQUENCE OF ID2S-Sequence-Piece OPTIONAL,
        -- sequence data, for Bioseq
        seq-data        SEQUENCE OF ID2S-Sequence-Piece OPTIONAL,
        -- Bioseq, for Bioseq-set
        bioseqs         SET OF Bioseq OPTIONAL
}


ID2S-Sequence-Piece ::= SEQUENCE {
        start           INTEGER,  -- start position on sequence
        data            SEQUENCE OF Seq-literal
}


----------------------------------------------------------------------------
-- utility types
----------------------------------------------------------------------------


ID2S-Chunk-Id ::= INTEGER


ID2S-Bioseq-set-Ids ::= SET OF INTEGER


ID2S-Bioseq-Ids ::= SET OF CHOICE {
        gi              INTEGER,
        seq-id          Seq-id,
        gi-range        ID2S-Gi-Range
}


ID2S-Gi-Range ::= SEQUENCE {
        start           INTEGER,             -- start gi in this gi range
        count           INTEGER DEFAULT 1    -- number of sequential gis
}


-- ID2S-Seq-loc is used to represent unordered and unstranded
-- set of intervals on set of sequences.
-- It's optimized for compact encoding of several common cases:
--    Seq-ids of type gi,
--    intervals covering whole sequences,
--    whole sequences with sequential gis,
--    set of intervals on the same sequence (Seq-id sharing).
ID2S-Seq-loc ::= CHOICE {
        whole-gi        INTEGER,             -- whole sequence by gi
        whole-seq-id    Seq-id,              -- whole sequence by Seq-id
        whole-gi-range  ID2S-Gi-Range,       -- set of whole sequences by gis
        gi-interval     ID2S-Gi-Interval,    -- interval on sequence by gi
        seq-id-interval ID2S-Seq-id-Interval,-- interval on sequence by Seq-id
        gi-ints         ID2S-Gi-Ints,        -- set of intervals on the same gi
        seq-id-ints     ID2S-Seq-id-Ints,    -- set of intervals on the same id
        loc-set         SET OF ID2S-Seq-loc  -- combination of locations
}


ID2S-Gi-Interval ::= SEQUENCE {
        gi              INTEGER,
        start           INTEGER,
        length          INTEGER DEFAULT 1
}


ID2S-Seq-id-Interval ::= SEQUENCE {
        seq-id          Seq-id,
        start           INTEGER,
        length          INTEGER DEFAULT 1
}


ID2S-Interval ::= SEQUENCE {
        start           INTEGER,
        length          INTEGER DEFAULT 1
}


ID2S-Gi-Ints ::= SEQUENCE {
        gi              INTEGER,
        ints            SET OF ID2S-Interval
}


ID2S-Seq-id-Ints ::= SEQUENCE {
        seq-id          Seq-id,
        ints            SET OF ID2S-Interval
}


END

-- seqtable.asn
--$Revision: 386776 $
--  ----------------------------------------------------------------------------
--
--                            PUBLIC DOMAIN NOTICE
--                National Center for Biotechnology Information
--
--  This software/database is a "United States Government Work" under the terms
--  of the United States Copyright Act.  It was written as part of the author's
--  official duties as a United States Government employee and thus cannot be
--  copyrighted.  This software/database is freely available to the public for
--  use.  The National Library of Medicine and the U.S. Government have not
--  placed any restriction on its use or reproduction.
--
--  Although all reasonable efforts have been taken to ensure the accuracy and
--  reliability of the software and data, the NLM and the U.S. Government do not
--  and cannot warrant the performance or results that may be obtained by using
--  this software or data.  The NLM and the U.S. Government disclaim all
--  warranties, express or implied, including warranties of performance,
--  merchantability or fitness for any particular purpose.
--
--  Please cite the authors in any work or product based on this material.
--
--  ----------------------------------------------------------------------------
--
--  Authors: Mike DiCuccio, Eugene Vasilchenko
--
--  ASN.1 interface to table readers
--
--  ----------------------------------------------------------------------------

NCBI-SeqTable DEFINITIONS ::=

BEGIN

EXPORTS
    SeqTable-column-info, SeqTable-column, Seq-table;
    
IMPORTS
    Seq-id, Seq-loc, Seq-interval   FROM NCBI-Seqloc;


SeqTable-column-info ::= SEQUENCE {
    -- user friendly column name, can be skipped
    title VisibleString OPTIONAL,

    -- identification of the column data in the objects described by the table
    field-id INTEGER { -- known column data types
        -- position types
        location        (0), -- location as Seq-loc
        location-id     (1), -- location Seq-id
        location-gi     (2), -- gi
        location-from   (3), -- interval from
        location-to     (4), -- interval to
        location-strand (5), -- location strand
        location-fuzz-from-lim (6),
        location-fuzz-to-lim   (7),

        product         (10), -- product as Seq-loc
        product-id      (11), -- product Seq-id
        product-gi      (12), -- product gi
        product-from    (13), -- product interval from
        product-to      (14), -- product interval to
        product-strand  (15), -- product strand
        product-fuzz-from-lim (16),
        product-fuzz-to-lim   (17),
        
        -- main feature fields
        id-local        (20), -- id.local.id
        xref-id-local   (21), -- xref.id.local.id
        partial         (22),
        comment         (23),
        title           (24),
        ext             (25), -- field-name must be "E.xxx", see below
        qual            (26), -- field-name must be "Q.xxx", see below
        dbxref          (27), -- field-name must be "D.xxx", see below

        -- various data fields
        data-imp-key        (30),
        data-region         (31),
        data-cdregion-frame (32),

        -- extra fields, see also special values for str below
        ext-type        (40),
        qual-qual       (41),
        qual-val        (42),
        dbxref-db       (43),
        dbxref-tag      (44)
    } OPTIONAL,

    -- any column can be identified by ASN.1 text locator string
    -- with omitted object type.
    -- examples:
    --   "data.gene.locus" for Seq-feat.data.gene.locus
    --   "data.imp.key" for Seq-feat.data.imp.key
    --   "qual.qual"
    --    - Seq-feat.qual is SEQUENCE so several columns are allowed
    --      see also "Q.xxx" special value for shorter qual representation
    --   "ext.type.str"
    --   "ext.data.label.str"
    --   "ext.data.data.int"
    --      see also "E.xxx" special value for shorter ext representation
    -- special values start with capital letter:
    --   "E.xxx" - ext.data.label.str = xxx, ext.data.data = data
    --    - Seq-feat.ext.data is SEQUENCE so several columns are allowed
    --   "Q.xxx" - qual.qual = xxx, qual.val = data
    --    - Seq-feat.qual is SEQUENCE so several columns are allowed
    --   "D.xxx" - dbxref.id = xxx, dbxref.tag = data
    --    - Seq-feat.dbxref is SET so several columns are allowed
    field-name  VisibleString OPTIONAL
}


CommonString-table ::= SEQUENCE {
    -- set of possible values
    strings     SEQUENCE OF UTF8String,

    -- indexes of values
    indexes     SEQUENCE OF INTEGER
}


CommonBytes-table ::= SEQUENCE {
    -- set of possible values
    bytes       SEQUENCE OF OCTET STRING,

    -- indexes of values
    indexes     SEQUENCE OF INTEGER
}


SeqTable-multi-data ::= CHOICE {
    -- a set of integers, one per row
    int         SEQUENCE OF INTEGER,
    
    -- a set of reals, one per row
    real        SEQUENCE OF REAL,

    -- a set of strings, one per row
    string      SEQUENCE OF UTF8String,

    -- a set of byte arrays, one per row
    bytes       SEQUENCE OF OCTET STRING,

    -- a set of string with small set of possible values
    common-string   CommonString-table,

    -- a set of byte arrays with small set of possible values
    common-bytes    CommonBytes-table,

    -- a set of bits, one per row
    -- this uses bm::bvector<> as its storage mechanism
    bit         OCTET STRING,

    -- a set of locations, one per row
    loc         SEQUENCE OF Seq-loc,
    id          SEQUENCE OF Seq-id,
    interval    SEQUENCE OF Seq-interval
}


SeqTable-single-data ::= CHOICE {
    -- integer
    int         INTEGER,
    
    -- real
    real        REAL,

    -- string
    string      UTF8String,

    -- byte array
    bytes       OCTET STRING,

    -- bit
    bit         BOOLEAN,

    -- location
    loc         Seq-loc,
    id          Seq-id,
    interval    Seq-interval
}


SeqTable-sparse-index ::= CHOICE {
    -- indexes of rows with values
    indexes SEQUENCE OF INTEGER,

    -- bitset of rows with values
    bit-set OCTET STRING
}


SeqTable-column ::= SEQUENCE {
    -- column description or reference to previously defined info
    header      SeqTable-column-info,   -- information about data

    -- row data
    data        SeqTable-multi-data OPTIONAL,

    -- in case not all rows contain data this field will contain sparse info
    sparse      SeqTable-sparse-index OPTIONAL,

    -- default value for sparse table, or if row data is too short
    default     SeqTable-single-data OPTIONAL,

    -- single value for indexes not listed in sparse table
    sparse-other SeqTable-single-data OPTIONAL
}


Seq-table ::= SEQUENCE {
    -- type of features in this table, equal to Seq-feat.data variant index
    feat-type   INTEGER,

    -- subtype of features in this table, defined in header SeqFeatData.hpp
    feat-subtype INTEGER OPTIONAL,

    -- number of rows
    num-rows    INTEGER,

    -- data in columns
    columns     SEQUENCE OF SeqTable-column
}


END

-- submit.asn
--$Revision: 6.1 $
--********************************************************************
--
--  Direct Submission of Sequence Data
--  James Ostell, 1991
--
--  This is a trial specification for direct submission of sequence
--    data worked out between NCBI and EMBL
--  Later revised to reflect work with GenBank and Integrated database
--
--  Version 3.0, 1994
--    This is the official NCBI sequence submission format now.
--
--********************************************************************

NCBI-Submit DEFINITIONS ::=
BEGIN

EXPORTS Seq-submit, Contact-info;

IMPORTS Cit-sub, Author FROM NCBI-Biblio
        Date, Object-id FROM NCBI-General
        Seq-annot FROM NCBI-Sequence
        Seq-id FROM NCBI-Seqloc
        Seq-entry FROM NCBI-Seqset;

Seq-submit ::= SEQUENCE {
    sub Submit-block ,
    data CHOICE {
        entrys  SET OF Seq-entry ,  -- sequence(s)
        annots  SET OF Seq-annot ,  -- annotation(s)
        delete  SET OF Seq-id } } -- deletions of entries

Submit-block ::= SEQUENCE {
    contact Contact-info ,        -- who to contact
    cit Cit-sub ,                 -- citation for this submission
    hup BOOLEAN DEFAULT FALSE ,   -- hold until publish
    reldate Date OPTIONAL ,       -- release by date
    subtype INTEGER {             -- type of submission
        new (1) ,                 -- new data
        update (2) ,              -- update by author
        revision (3) ,            -- 3rd party (non-author) update
        other (255) } OPTIONAL ,
    tool VisibleString OPTIONAL,  -- tool used to make submission
    user-tag VisibleString OPTIONAL, -- user supplied id for this submission
    comment VisibleString OPTIONAL } -- user comments/advice to database

Contact-info ::= SEQUENCE {      -- who to contact to discuss the submission
    name VisibleString OPTIONAL ,        -- OBSOLETE: will be removed
    address SEQUENCE OF VisibleString OPTIONAL ,
    phone VisibleString OPTIONAL ,
    fax VisibleString OPTIONAL ,
    email VisibleString OPTIONAL ,
    telex VisibleString OPTIONAL ,
    owner-id Object-id OPTIONAL ,         -- for owner accounts
    password OCTET STRING OPTIONAL ,
    last-name VisibleString OPTIONAL ,  -- structured to replace name above
    first-name VisibleString OPTIONAL ,
    middle-initial VisibleString OPTIONAL ,
    contact Author OPTIONAL }           -- WARNING: this will replace the above

END


-- tinyseq.asn
--$Revision: 6.1 $
--**********************************************************************
--
--  ASN.1 for a tiny Bioseq in XML
--    basically a structured FASTA file with a few extras
--    in this case we drop all modularity of components
--      All ids are Optional - simpler structure, less checking
--      Components of organism are hard coded - can't easily add or change
--      sequence is just string whether DNA or protein
--  by James Ostell, 2000
--
--**********************************************************************

NCBI-TSeq DEFINITIONS ::=
BEGIN

TSeq ::= SEQUENCE {
	seqtype ENUMERATED {
		nucleotide (1),
		protein (2) },
	gi INTEGER OPTIONAL,
	accver VisibleString OPTIONAL,
	sid VisibleString OPTIONAL,
	local VisibleString OPTIONAL,
	taxid INTEGER OPTIONAL,
	orgname VisibleString OPTIONAL,
	defline VisibleString,
	length INTEGER,
	sequence VisibleString }

TSeqSet ::= SEQUENCE OF TSeq    -- a bunch of them

END