-- access.asn --$Revision: 6.0 $ --********************************************************************* -- -- access.asn -- -- messages for data access -- --********************************************************************* NCBI-Access DEFINITIONS ::= BEGIN EXPORTS Link-set; -- links between same class = neighbors -- links between other classes = links Link-set ::= SEQUENCE { num INTEGER , -- number of links to this doc type uids SEQUENCE OF INTEGER OPTIONAL , -- the links weights SEQUENCE OF INTEGER OPTIONAL } -- the weights END -- biblio.asn --$Revision: 6.2 $ --**************************************************************** -- -- NCBI Bibliographic data elements -- by James Ostell, 1990 -- -- Taken from the American National Standard for -- Bibliographic References -- ANSI Z39.29-1977 -- Version 3.0 - June 1994 -- PubMedId added in 1996 -- ArticleIds and eprint elements added in 1999 -- --**************************************************************** NCBI-Biblio DEFINITIONS ::= BEGIN EXPORTS Cit-art, Cit-jour, Cit-book, Cit-pat, Cit-let, Id-pat, Cit-gen, Cit-proc, Cit-sub, Title, Author, PubMedId; IMPORTS Person-id, Date, Dbtag FROM NCBI-General; -- Article Ids ArticleId ::= CHOICE { -- can be many ids for an article pubmed PubMedId , -- see types below medline MedlineUID , doi DOI , pii PII , pmcid PmcID , pmcpid PmcPid , pmpid PmPid , other Dbtag } -- generic catch all PubMedId ::= INTEGER -- Id from the PubMed database at NCBI MedlineUID ::= INTEGER -- Id from MEDLINE DOI ::= VisibleString -- Document Object Identifier PII ::= VisibleString -- Controlled Publisher Identifier PmcID ::= INTEGER -- PubMed Central Id PmcPid ::= VisibleString -- Publisher Id supplied to PubMed Central PmPid ::= VisibleString -- Publisher Id supplied to PubMed ArticleIdSet ::= SET OF ArticleId -- Status Dates PubStatus ::= INTEGER { -- points of publication received (1) , -- date manuscript received for review accepted (2) , -- accepted for publication epublish (3) , -- published electronically by publisher ppublish (4) , -- published in print by publisher revised (5) , -- article revised by publisher/author pmc (6) , -- article first appeared in PubMed Central pmcr (7) , -- article revision in PubMed Central pubmed (8) , -- article citation first appeared in PubMed pubmedr (9) , -- article citation revision in PubMed aheadofprint (10), -- epublish, but will be followed by print premedline (11), -- date into PreMedline status medline (12), -- date made a MEDLINE record other (255) } PubStatusDate ::= SEQUENCE { -- done as a structure so fields can be added pubstatus PubStatus , date Date } -- time may be added later PubStatusDateSet ::= SET OF PubStatusDate -- Citation Types Cit-art ::= SEQUENCE { -- article in journal or book title Title OPTIONAL , -- title of paper (ANSI requires) authors Auth-list OPTIONAL , -- authors (ANSI requires) from CHOICE { -- journal or book journal Cit-jour , book Cit-book , proc Cit-proc } , ids ArticleIdSet OPTIONAL } -- lots of ids Cit-jour ::= SEQUENCE { -- Journal citation title Title , -- title of journal imp Imprint } Cit-book ::= SEQUENCE { -- Book citation title Title , -- Title of book coll Title OPTIONAL , -- part of a collection authors Auth-list, -- authors imp Imprint } Cit-proc ::= SEQUENCE { -- Meeting proceedings book Cit-book , -- citation to meeting meet Meeting } -- time and location of meeting -- Patent number and date-issue were made optional in 1997 to -- support patent applications being issued from the USPTO -- Semantically a Cit-pat must have either a patent number or -- an application number (or both) to be valid Cit-pat ::= SEQUENCE { -- patent citation title VisibleString , authors Auth-list, -- author/inventor country VisibleString , -- Patent Document Country doc-type VisibleString , -- Patent Document Type number VisibleString OPTIONAL, -- Patent Document Number date-issue Date OPTIONAL, -- Patent Issue/Pub Date class SEQUENCE OF VisibleString OPTIONAL , -- Patent Doc Class Code app-number VisibleString OPTIONAL , -- Patent Doc Appl Number app-date Date OPTIONAL , -- Patent Appl File Date applicants Auth-list OPTIONAL , -- Applicants assignees Auth-list OPTIONAL , -- Assignees priority SEQUENCE OF Patent-priority OPTIONAL , -- Priorities abstract VisibleString OPTIONAL } -- abstract of patent Patent-priority ::= SEQUENCE { country VisibleString , -- Patent country code number VisibleString , -- number assigned in that country date Date } -- date of application Id-pat ::= SEQUENCE { -- just to identify a patent country VisibleString , -- Patent Document Country id CHOICE { number VisibleString , -- Patent Document Number app-number VisibleString } , -- Patent Doc Appl Number doc-type VisibleString OPTIONAL } -- Patent Doc Type Cit-let ::= SEQUENCE { -- letter, thesis, or manuscript cit Cit-book , -- same fields as a book man-id VisibleString OPTIONAL , -- Manuscript identifier type ENUMERATED { manuscript (1) , letter (2) , thesis (3) } OPTIONAL } -- NOTE: this is just to cite a -- direct data submission, see NCBI-Submit -- for the form of a sequence submission Cit-sub ::= SEQUENCE { -- citation for a direct submission authors Auth-list , -- not necessarily authors of the paper imp Imprint OPTIONAL , -- this only used to get date.. will go medium ENUMERATED { -- medium of submission paper (1) , tape (2) , floppy (3) , email (4) , other (255) } OPTIONAL , date Date OPTIONAL , -- replaces imp, will become required descr VisibleString OPTIONAL } -- description of changes for public view Cit-gen ::= SEQUENCE { -- NOT from ANSI, this is a catchall cit VisibleString OPTIONAL , -- anything, not parsable authors Auth-list OPTIONAL , muid INTEGER OPTIONAL , -- medline uid journal Title OPTIONAL , volume VisibleString OPTIONAL , issue VisibleString OPTIONAL , pages VisibleString OPTIONAL , date Date OPTIONAL , serial-number INTEGER OPTIONAL , -- for GenBank style references title VisibleString OPTIONAL , -- eg. cit="unpublished",title="title" pmid PubMedId OPTIONAL } -- PubMed Id -- Authorship Group Auth-list ::= SEQUENCE { names CHOICE { std SEQUENCE OF Author , -- full citations ml SEQUENCE OF VisibleString , -- MEDLINE, semi-structured str SEQUENCE OF VisibleString } , -- free for all affil Affil OPTIONAL } -- author affiliation Author ::= SEQUENCE { name Person-id , -- Author, Primary or Secondary level ENUMERATED { primary (1), secondary (2) } OPTIONAL , role ENUMERATED { -- Author Role Indicator compiler (1), editor (2), patent-assignee (3), translator (4) } OPTIONAL , affil Affil OPTIONAL , is-corr BOOLEAN OPTIONAL } -- TRUE if corresponding author Affil ::= CHOICE { str VisibleString , -- unparsed string std SEQUENCE { -- std representation affil VisibleString OPTIONAL , -- Author Affiliation, Name div VisibleString OPTIONAL , -- Author Affiliation, Division city VisibleString OPTIONAL , -- Author Affiliation, City sub VisibleString OPTIONAL , -- Author Affiliation, County Sub country VisibleString OPTIONAL , -- Author Affiliation, Country street VisibleString OPTIONAL , -- street address, not ANSI email VisibleString OPTIONAL , fax VisibleString OPTIONAL , phone VisibleString OPTIONAL , postal-code VisibleString OPTIONAL }} -- Title Group -- Valid for = A = Analytic (Cit-art) -- J = Journals (Cit-jour) -- B = Book (Cit-book) -- Valid for: Title ::= SET OF CHOICE { name VisibleString , -- Title, Anal,Coll,Mono AJB tsub VisibleString , -- Title, Subordinate A B trans VisibleString , -- Title, Translated AJB jta VisibleString , -- Title, Abbreviated J iso-jta VisibleString , -- specifically ISO jta J ml-jta VisibleString , -- specifically MEDLINE jta J coden VisibleString , -- a coden J issn VisibleString , -- ISSN J abr VisibleString , -- Title, Abbreviated B isbn VisibleString } -- ISBN B Imprint ::= SEQUENCE { -- Imprint group date Date , -- date of publication volume VisibleString OPTIONAL , issue VisibleString OPTIONAL , pages VisibleString OPTIONAL , section VisibleString OPTIONAL , pub Affil OPTIONAL, -- publisher, required for book cprt Date OPTIONAL, -- copyright date, " " " part-sup VisibleString OPTIONAL , -- part/sup of volume language VisibleString DEFAULT "ENG" , -- put here for simplicity prepub ENUMERATED { -- for prepublication citations submitted (1) , -- submitted, not accepted in-press (2) , -- accepted, not published other (255) } OPTIONAL , part-supi VisibleString OPTIONAL , -- part/sup on issue retract CitRetract OPTIONAL , -- retraction info pubstatus PubStatus OPTIONAL , -- current status of this publication history PubStatusDateSet OPTIONAL } -- dates for this record CitRetract ::= SEQUENCE { type ENUMERATED { -- retraction of an entry retracted (1) , -- this citation retracted notice (2) , -- this citation is a retraction notice in-error (3) , -- an erratum was published about this erratum (4) } , -- this is a published erratum exp VisibleString OPTIONAL } -- citation and/or explanation Meeting ::= SEQUENCE { number VisibleString , date Date , place Affil OPTIONAL } END -- biotree.asn --$Revision: 1.4 $ --********************************************************************* -- -- biotree.asn -- -- BioTree ASN -- Anatoliy Kuznetsov -- --********************************************************************* NCBI-BioTree DEFINITIONS ::= BEGIN EXPORTS BioTreeContainer, DistanceMatrix; BioTreeContainer ::= SEQUENCE { treetype VisibleString OPTIONAL, -- hint on what kind of tree is that fdict FeatureDictSet, -- features dictionary nodes NodeSet -- set of nodes with encoded topology } NodeSet ::= SET OF Node Node ::= SEQUENCE { id INTEGER, -- node uid parent INTEGER OPTIONAL, -- parent node id features NodeFeatureSet OPTIONAL } NodeFeatureSet ::= SET OF NodeFeature NodeFeature ::= SEQUENCE { featureid INTEGER, value VisibleString } FeatureDictSet ::= SET OF FeatureDescr FeatureDescr ::= SEQUENCE { id INTEGER, -- feature id name VisibleString -- feature name } DistanceMatrix ::= SEQUENCE { labels SEQUENCE OF VisibleString, -- n labels distances SEQUENCE OF REAL -- n(n-1)/2 pairwise distances -- (0, 1)...(0, n), (1, 2)...(1, n)... } END -- blast.asn -- ---------------------------------------------------------------------------- -- -- PUBLIC DOMAIN NOTICE -- National Center for Biotechnology Information -- -- This software/database is a "United States Government Work" under the terms -- of the United States Copyright Act. It was written as part of the author's -- official duties as a United States Government employee and thus cannot be -- copyrighted. This software/database is freely available to the public for -- use. The National Library of Medicine and the U.S. Government have not -- placed any restriction on its use or reproduction. -- -- Although all reasonable efforts have been taken to ensure the accuracy and -- reliability of the software and data, the NLM and the U.S. Government do not -- and cannot warrant the performance or results that may be obtained by using -- this software or data. The NLM and the U.S. Government disclaim all -- warranties, express or implied, including warranties of performance, -- merchantability or fitness for any particular purpose. -- -- Please cite the authors in any work or product based on this material. -- -- ---------------------------------------------------------------------------- -- -- Authors: Tom Madden, Tim Boemker -- -- ASN.1 interface to BLAST. -- -- ---------------------------------------------------------------------------- NCBI-Blast4 DEFINITIONS ::= BEGIN EXPORTS Blast4-ka-block, Blast4-value, Blast4-parameter, Blast4-parameters; IMPORTS Bioseq FROM NCBI-Sequence Seq-data FROM NCBI-Sequence Bioseq-set FROM NCBI-Seqset PssmWithParameters FROM NCBI-ScoreMat Seq-id, Seq-loc FROM NCBI-Seqloc Seq-align, Seq-align-set FROM NCBI-Seqalign; -- -------------------------------------------------------------------- -- -- Requests -- -- -------------------------------------------------------------------- Blast4-request ::= SEQUENCE { ident VisibleString OPTIONAL, body Blast4-request-body } Blast4-request-body ::= CHOICE { finish-params Blast4-finish-params-request, get-databases NULL, get-matrices NULL, get-parameters NULL, get-paramsets NULL, get-programs NULL, get-search-results Blast4-get-search-results-request, get-sequences Blast4-get-sequences-request, queue-search Blast4-queue-search-request, get-request-info Blast4-get-request-info-request, get-sequence-parts Blast4-get-seq-parts-request } Blast4-finish-params-request ::= SEQUENCE { program VisibleString, service VisibleString, paramset VisibleString OPTIONAL, params Blast4-parameters OPTIONAL } Blast4-get-search-results-request ::= SEQUENCE { request-id VisibleString } Blast4-get-sequences-request ::= SEQUENCE { database Blast4-database, seq-ids SEQUENCE OF Seq-id } -- If a PSSM is used (ie. for PSI-Blast), it must contain a "query" -- for formatting purposes. Bioseq-set may contain any number of -- queries, specified as data. Seq-loc-list may contain only the -- "whole" or "interval" types. In the case of "whole", any number of -- queries may be used; in the case of "interval", there should be -- exactly one query. (This is limited by the BlastObject.) Blast4-queries ::= CHOICE { pssm PssmWithParameters, seq-loc-list SEQUENCE OF Seq-loc, bioseq-set Bioseq-set } -- Options have been broken down into two groups as part of the BLAST -- API work. The algorithm options essentially correspond to those -- options available via the CBlastOptions class. -- algorithm-options: Options for BLAST (ie. seq comparison) algorithm. -- program-options: Other options, such as which seqs. to compare. Blast4-queue-search-request ::= SEQUENCE { program VisibleString, service VisibleString, queries Blast4-queries, subject Blast4-subject, paramset VisibleString OPTIONAL, algorithm-options Blast4-parameters OPTIONAL, program-options Blast4-parameters OPTIONAL } -- Fetch information about the search request. Blast4-get-request-info-request ::= SEQUENCE { request-id VisibleString } Blast4-get-request-info-reply ::= SEQUENCE { database Blast4-database, program VisibleString, service VisibleString, created-by VisibleString, queries Blast4-queries, algorithm-options Blast4-parameters, program-options Blast4-parameters } -- Fetch parts of a sequence a-la cart. Blast4-get-seq-parts-request ::= SEQUENCE { database Blast4-database, id Seq-id, need-meta-data BOOLEAN, -- If end is 0, no data will be fetched. If end is past the -- length of the sequence, it will be adjusted to the end of -- the sequence (this allows fetching of the first chunk in -- cases where the length is not yet known). start INTEGER OPTIONAL, end INTEGER OPTIONAL } -- -------------------------------------------------------------------- -- -- Replies -- -- -------------------------------------------------------------------- Blast4-reply ::= SEQUENCE { errors SEQUENCE OF Blast4-error OPTIONAL, body Blast4-reply-body } Blast4-reply-body ::= CHOICE { finish-params Blast4-finish-params-reply, get-databases Blast4-get-databases-reply, get-matrices Blast4-get-matrices-reply, get-parameters Blast4-get-parameters-reply, get-paramsets Blast4-get-paramsets-reply, get-programs Blast4-get-programs-reply, get-search-results Blast4-get-search-results-reply, get-sequences Blast4-get-sequences-reply, queue-search Blast4-queue-search-reply, get-queries Blast4-get-queries-reply, get-request-info Blast4-get-request-info-reply, get-sequence-parts Blast4-get-seq-parts-reply } Blast4-finish-params-reply ::= Blast4-parameters Blast4-get-databases-reply ::= SEQUENCE OF Blast4-database-info Blast4-get-matrices-reply ::= SEQUENCE OF Blast4-matrix-id Blast4-get-parameters-reply ::= SEQUENCE OF Blast4-parameter-info Blast4-get-paramsets-reply ::= SEQUENCE OF Blast4-paramset-info Blast4-get-programs-reply ::= SEQUENCE OF Blast4-program-info Blast4-get-search-results-reply ::= SEQUENCE { alignments Seq-align-set OPTIONAL, phi-alignments Blast4-phi-alignments OPTIONAL, -- Masking locations for the query sequence(s). Each element of this set -- corresponds to a single query's translation frame as appropriate. masks SEQUENCE OF Blast4-mask OPTIONAL, ka-blocks SEQUENCE OF Blast4-ka-block OPTIONAL, search-stats SEQUENCE OF VisibleString OPTIONAL, pssm PssmWithParameters OPTIONAL } Blast4-get-sequences-reply ::= SEQUENCE OF Bioseq Blast4-queue-search-reply ::= SEQUENCE { request-id VisibleString OPTIONAL } Blast4-get-queries-reply ::= SEQUENCE { queries Blast4-queries } Blast4-get-seq-parts-reply ::= SEQUENCE { bioseq Bioseq OPTIONAL, ids SEQUENCE OF Seq-id OPTIONAL, length INTEGER OPTIONAL, data Seq-data OPTIONAL } -- -------------------------------------------------------------------- -- -- Errors -- -- -------------------------------------------------------------------- Blast4-error ::= SEQUENCE { code INTEGER, message VisibleString OPTIONAL } Blast4-error-flags ::= ENUMERATED { warning (1024), error (2048) } Blast4-error-code ::= INTEGER { -- warnings conversion-warning (1024), -- errors internal-error (2048), not-implemented (2049), not-allowed (2050), bad-request (2051), bad-request-id (2052), search-pending (2053) } -- -------------------------------------------------------------------- -- -- Other types in alphabetical order -- -- -------------------------------------------------------------------- Blast4-cutoff ::= CHOICE { e-value REAL, raw-score INTEGER } Blast4-database ::= SEQUENCE { name VisibleString, type Blast4-residue-type } -- Borrowed from seq.asn Blast4-seqtech ::= INTEGER { unknown (0) , standard (1) , -- standard sequencing est (2) , -- Expressed Sequence Tag sts (3) , -- Sequence Tagged Site survey (4) , -- one-pass genomic sequence genemap (5) , -- from genetic mapping techniques physmap (6) , -- from physical mapping techniques derived (7) , -- derived from other data, not a primary entity concept-trans (8) , -- conceptual translation seq-pept (9) , -- peptide was sequenced both (10) , -- concept transl. w/ partial pept. seq. seq-pept-overlap (11) , -- sequenced peptide, ordered by overlap seq-pept-homol (12) , -- sequenced peptide, ordered by homology concept-trans-a (13) , -- conceptual transl. supplied by author htgs-1 (14) , -- unordered High Throughput sequence contig htgs-2 (15) , -- ordered High Throughput sequence contig htgs-3 (16) , -- finished High Throughput sequence fli-cdna (17) , -- full length insert cDNA htgs-0 (18) , -- single genomic reads for coordination htc (19) , -- high throughput cDNA wgs (20) , -- whole genome shotgun sequencing other (255) -- use Source.techexp } Blast4-database-info ::= SEQUENCE { database Blast4-database, description VisibleString, last-updated VisibleString, total-length BigInt, num-sequences BigInt, seqtech Blast4-seqtech, taxid INTEGER } Blast4-frame-type ::= ENUMERATED { notset (0), plus1 (1), plus2 (2), plus3 (3), minus1 (4), minus2 (5), minus3 (6) } Blast4-ka-block ::= SEQUENCE { lambda REAL, k REAL, h REAL, gapped BOOLEAN } -- Masking locations for a query's frame. The locations field is a single -- Seq-loc of type Packed-int, which contains all the masking locations for the -- translation frame specified by the frame field. -- Notes: -- On input (i.e.: when the client specifies masking locations as a -- Blast4-parameter), in the case of protein queries, the frame field must -- always be notset, in the case of nucleotide queries (regardless of whether -- the search will translate these or not), the frame must be plus1. Masking -- locations in the translated encoding are not permitted. -- On output (i.e.: when blast 4 server encodes these as part of the -- Blast4-get-search-results-reply), the same conventions as above apply for -- non-translated protein and nucleotide queries, but in the case of translated -- nucleotide queries, the frame field can be specified in any of the -- translation frames as appropriate. Blast4-mask ::= SEQUENCE { locations SEQUENCE OF Seq-loc, frame Blast4-frame-type } Blast4-matrix-id ::= SEQUENCE { residue-type Blast4-residue-type, name VisibleString } Blast4-parameter ::= SEQUENCE { name VisibleString, value Blast4-value } Blast4-parameter-info ::= SEQUENCE { name VisibleString, type VisibleString } Blast4-paramset-info ::= SEQUENCE { program VisibleString, name VisibleString } Blast4-program-info ::= SEQUENCE { program VisibleString, services SEQUENCE OF VisibleString } Blast4-residue-type ::= ENUMERATED { unknown (0), protein (1), nucleotide (2) } Blast4-strand-type ::= ENUMERATED { forward-strand (1), reverse-strand (2), both-strands (3) } Blast4-subject ::= CHOICE { database VisibleString, sequences SEQUENCE OF Bioseq } Blast4-parameters ::= SEQUENCE OF Blast4-parameter Blast4-phi-alignments ::= SEQUENCE { num-alignments INTEGER, seq-locs SEQUENCE OF Seq-loc } Blast4-value ::= CHOICE { -- scalar types big-integer BigInt, bioseq Bioseq, boolean BOOLEAN, cutoff Blast4-cutoff, integer INTEGER, matrix PssmWithParameters, real REAL, seq-align Seq-align, seq-id Seq-id, seq-loc Seq-loc, strand-type Blast4-strand-type, string VisibleString, -- lists of scalar types big-integer-list SEQUENCE OF BigInt, bioseq-list SEQUENCE OF Bioseq, boolean-list SEQUENCE OF BOOLEAN, cutoff-list SEQUENCE OF Blast4-cutoff, integer-list SEQUENCE OF INTEGER, matrix-list SEQUENCE OF PssmWithParameters, real-list SEQUENCE OF REAL, seq-align-list SEQUENCE OF Seq-align, seq-id-list SEQUENCE OF Seq-id, seq-loc-list SEQUENCE OF Seq-loc, strand-type-list SEQUENCE OF Blast4-strand-type, string-list SEQUENCE OF VisibleString, -- imported collection types bioseq-set Bioseq-set, seq-align-set Seq-align-set, -- Intended to represent user-provided masking locations for a single query -- sequence (name field in Blast4-parameter should be "LCaseMask"). -- Multiple Blast4-parameters of this type are needed to specify masking -- locations for multiple queries. query-mask Blast4-mask } END -- blastdb.asn --$Id: blastdb.asn 100080 2007-03-12 16:05:35Z kazimird $ -- -- Notes: -- -- taxonomy: an integer is proposed, which would require some sort of -- table (or network connection) to do the conversions from integer -- to various names. This could save quite a bit of space for databases -- that are predominantly of one organism (e.g., human in htgs). -- I've proposed here that table contain scientific-, common-, and -- blast-names at the advice of Scott Federhen. Scott also was in -- favor of having the complete lineage in the file, but it seems like -- this would be seldom used and we could have a view with a link back -- to the taxonomy page for anyone needing it. Since one file would -- suffice for all blast databases, it seems like this should be a new file. -- -- memberships: a sequence of integers is proposed. Each bit of an integer -- would indicate membership in some (virtual) blast database (e.g., pdb, -- swissprot) or some classification (e.g., mRNA, genomic). -- -- links: a sequence of integers is proposed. Each bit of an integer would -- indicate a link that could be established based upon the gi of the -- database sequence. -- NCBI-BlastDL DEFINITIONS ::= BEGIN EXPORTS Blast-def-line-set, Blast-def-line; IMPORTS Seq-id FROM NCBI-Seqloc; Blast-def-line-set ::= SEQUENCE OF Blast-def-line -- all deflines for an entry Blast-def-line ::= SEQUENCE { title VisibleString OPTIONAL, -- simple title seqid SEQUENCE OF Seq-id, -- Regular NCBI Seq-Id taxid INTEGER OPTIONAL, -- taxonomy id memberships SEQUENCE OF INTEGER OPTIONAL, -- bit arrays links SEQUENCE OF INTEGER OPTIONAL, -- bit arrays other-info SEQUENCE OF INTEGER OPTIONAL -- for future use (probably genomic sequences) } END -- blastxml.asn --$Id: blastxml.asn 120927 2008-02-28 18:57:30Z ucko $ NCBI-BlastOutput DEFINITIONS ::= BEGIN BlastOutput ::= SEQUENCE { program VisibleString , -- BLAST program: blastp, tblastx etc. version VisibleString , -- Program version reference VisibleString , -- Steven, David, Tom and others db VisibleString , -- BLAST Database name query-ID VisibleString , -- SeqId of query query-def VisibleString , -- Definition line of query query-len INTEGER , -- length of query sequence query-seq VisibleString OPTIONAL , -- query sequence itself param Parameters, -- search parameters iterations SEQUENCE OF Iteration, mbstat Statistics OPTIONAL -- Mega BLAST search statistics } Iteration ::= SEQUENCE { iter-num INTEGER , -- iteration number query-ID VisibleString OPTIONAL, -- SeqId of query query-def VisibleString OPTIONAL,-- Definition line of query query-len INTEGER OPTIONAL , -- length of query sequence hits SEQUENCE OF Hit OPTIONAL, -- Hits one for every db sequence stat Statistics OPTIONAL, -- search statistics message VisibleString OPTIONAL -- Some (error?) information } Parameters ::= SEQUENCE { matrix VisibleString OPTIONAL , -- Matrix used (-M) expect REAL , -- Expectation threshold (-e) include REAL OPTIONAL , -- Inclusion threshold (-h) sc-match INTEGER OPTIONAL , -- match score for NT (-r) sc-mismatch INTEGER OPTIONAL , -- mismatch score for NT (-q) gap-open INTEGER , -- Gap opening cost (-G) gap-extend INTEGER , -- Gap extension cost (-E) filter VisibleString OPTIONAL, -- Filtering options (-F) pattern VisibleString OPTIONAL, -- PHI-BLAST pattern entrez-query VisibleString OPTIONAL -- Limit of request to Entrez query } Statistics ::= SEQUENCE { db-num INTEGER , -- Number of sequences in BLAST db db-len BigInt , -- Length of BLAST db hsp-len INTEGER , -- Effective HSP length eff-space REAL, -- Effective search space kappa REAL, -- Karlin-Altschul parameter K lambda REAL, -- Karlin-Altschul parameter Lambda entropy REAL -- Karlin-Altschul parameter H } Hit ::= SEQUENCE { num INTEGER , -- hit number id VisibleString , -- SeqId of subject def VisibleString , -- definition line of subject accession VisibleString , -- accession len INTEGER , -- length of subject hsps SEQUENCE OF Hsp OPTIONAL -- all HSP regions for the given subject } Hsp ::= SEQUENCE { num INTEGER , -- HSP number bit-score REAL , -- score (in bits) of HSP score REAL , -- score of HSP evalue REAL , -- e-value of HSP query-from INTEGER , -- start of HSP in query query-to INTEGER , -- end of HSP hit-from INTEGER, -- start of HSP in subject hit-to INTEGER , -- end of HSP in subject pattern-from INTEGER OPTIONAL , -- start of PHI-BLAST pattern pattern-to INTEGER OPTIONAL , -- end of PHI-BLAST pattern query-frame INTEGER OPTIONAL , -- translation frame of query hit-frame INTEGER OPTIONAL , -- translation frame of subject identity INTEGER OPTIONAL , -- number of identities in HSP positive INTEGER OPTIONAL , -- number of positives in HSP gaps INTEGER OPTIONAL , -- number of gaps in HSP align-len INTEGER OPTIONAL , -- length of the alignment used density INTEGER OPTIONAL , -- score density qseq VisibleString , -- alignment string for the query (with gaps) hseq VisibleString, -- alignment string for subject (with gaps) midline VisibleString OPTIONAL -- formating middle line } END -- cdd.asn --$Revision: 109707 $ --********************************************************************** -- -- Definitions for CDD's -- -- NCBI Structure Group -- -- National Center for Biotechnology Information -- National Institutes of Health -- Bethesda, MD 20894 USA -- -- October 1999 -- -- asntool -m cdd.asn -w 100 -o cdd.h -- asntool -B objcdd -m cdd.asn -G -w 100 -I objseq.h objsset.h -K cdd.h \ -- -M asn.all --********************************************************************** NCBI-Cdd DEFINITIONS ::= -- NCBI Conserved Domain Definition BEGIN EXPORTS Cdd-id, Cdd-id-set, Cdd, Cdd-set, Cdd-tree, Cdd-tree-set, Cdd-pref-nodes, Cdd-Project; IMPORTS Date FROM NCBI-General Pub FROM NCBI-Pub Biostruc-annot-set FROM MMDB Bioseq FROM NCBI-Sequence Seq-annot FROM NCBI-Sequence Seq-entry FROM NCBI-Seqset Org-ref FROM NCBI-Organism Seq-id FROM NCBI-Seqloc Seq-interval FROM NCBI-Seqloc Seq-loc FROM NCBI-Seqloc Seq-feat FROM NCBI-Seqfeat Score-set FROM NCBI-Seqalign Cn3d-style-dictionary, Cn3d-user-annotations FROM NCBI-Cn3d PssmWithParameters FROM NCBI-ScoreMat; -- dealing with lists of preferred tax-nodes Cdd-org-ref ::= SEQUENCE { reference Org-ref, active BOOLEAN DEFAULT TRUE, parent-tax-id INTEGER OPTIONAL, rank VisibleString OPTIONAL } Cdd-org-ref-set ::= SET OF Cdd-org-ref Cdd-pref-node-descr ::= CHOICE { create-date Date, description VisibleString } Cdd-pref-node-descr-set ::= SET OF Cdd-pref-node-descr Cdd-pref-nodes ::= SEQUENCE { preferred-nodes Cdd-org-ref-set, model-organisms Cdd-org-ref-set OPTIONAL, optional-nodes Cdd-org-ref-set OPTIONAL, description Cdd-pref-node-descr-set OPTIONAL } -- Cdd's should not exist without a unique accession, but alternative id's may -- be present as well. It is conceivable that a CD which is created as a merged -- product of two highly redundant CDs will retain the source ids in addition -- to its new unique id Global-id ::= SEQUENCE { accession VisibleString, -- SMART, Pfam, LOAD or CD accession release VisibleString OPTIONAL, -- to hold CD-Database release number -- if desired, currently not used version INTEGER OPTIONAL, -- version 0 is the seed, version -- numbers increase with update/curate -- cycles database VisibleString OPTIONAL -- this is NOT the source!, rather the } -- database the object resides in -- currently not in use Cdd-id ::= CHOICE { uid INTEGER, -- for synchronization with Entrez -- holds PSSM-Ids gid Global-id -- holds accession/version pairs } Cdd-id-set ::= SEQUENCE OF Cdd-id Cdd-repeat ::= SEQUENCE { -- record whether the CD contains -- repeated sequence/structure motifs count INTEGER, -- number of tandem repeats in the CD location Seq-loc OPTIONAL, -- location on the representative avglen INTEGER OPTIONAL -- average repeat length } Cdd-book-ref ::= SEQUENCE { -- record a link to Entrez Books bookname VisibleString, -- abbreviated book title textelement ENUMERATED { unassigned(0), -- type of element section(1), -- a section or paragraph figgrp(2), -- a figure or set of figures table(3), -- a table chapter(4), -- a whole chapter biblist(5), -- a lisf of references box(6), -- an inserted box glossary(7), -- glossary appendix(8), -- appendix other(255) }, elementid INTEGER OPTIONAL, -- numerical address of the text-element subelementid INTEGER OPTIONAL, -- exact address, used with section celementid VisibleString OPTIONAL, -- address of the text element, if character string csubelementid VisibleString OPTIONAL -- exact address, if character string } -- The description of CDD's refers to the specific set of aligned sequences, -- the region that is being aligned and the information contained in the -- alignment. It may contain a lengthy comment -- describing the function of the domain as well as its origin and all -- other anecdotal information that can't be pressed into a rigid scheme. -- Crosslinks to reference papers available in PubMed are possible as well. -- There can be as many of these as you want in the CDD. Cdd-descr ::= CHOICE { othername VisibleString, -- alternative names for the CDD -- if domain has several common names category VisibleString, -- intracellular, extracellular, etc. -- to record spatial and/or temporal -- expression in free-text format comment VisibleString, -- this is where descriptions go reference Pub, -- a citation describing the domain create-date Date, -- Date of first creation/dump tax-source Org-ref, -- holds the highest common tax node source VisibleString, -- the database the seeds were created -- from, e.g. SMART, PFAM, etc.. status INTEGER { unassigned(0), finished-ok(1), -- a public curated CD pending-release(2), -- needs work done, not yet released other-asis(3), -- imported as-is, immediate release matrix-only(4), -- CD holds a Psi-Blast PSSM only, -- does not contain alignment data update-running(5), -- has been flagged for -- update (in queue) auto-updated(6), -- update finished, no -- work necessary claimed(7), -- is earmarked for curation curated-complete(8),-- public curated member of a -- completed family other(255) }, -- for CD production? update-date Date, -- Date of last version change scrapbook SEQUENCE OF VisibleString, -- for storing curation notes -- those won't make it into public -- distributions source-id Cdd-id-set, -- for linking back to source db repeats Cdd-repeat, -- to record repeat counts old-root Cdd-id-set, -- to record short-term history curation-status INTEGER { unassigned(0), -- to record curation status prein (1), -- when CD is checked out from ofc (2), -- the tracking database, for iac (3), -- use within curation software ofv1 (4), iav1 (5), ofv2 (6), iav2 (7), postin (8), other (255) }, readonly-status INTEGER { unassigned(0), -- to record read-only status readonly (1), -- when CD is checked out from readwrite (2), -- the tracking database, for other (255) }, -- use within curation software book-ref Cdd-book-ref, -- links to Entrez/books attribution Pub -- add citations and/or author names } Cdd-descr-set ::= SET OF Cdd-descr -- the Cdd-tree stores the hierarchy of CDDs. These objects are stored separate -- from the CDs to allow for fast retrieval and use as an 'index' into CDs -- all the components in a CD-tree match components in the full-sized CD -- and should be synchronized Cdd-tree ::= SEQUENCE { name VisibleString, -- short name copied from CD id Cdd-id-set, -- IDs copied from CD description Cdd-descr-set OPTIONAL, -- description copied from CD parent Cdd-id OPTIONAL, -- CD is the result of a split/merge children Cdd-id-set OPTIONAL, -- this CD has been split siblings Cdd-id-set OPTIONAL, -- related CDs (have common hits) neighbors Cdd-id-set OPTIONAL -- co-occurring CDs (non-overlapping -- hits to same sequences) } Cdd-tree-set ::= SEQUENCE OF Cdd-tree -- Matrix definitions, these are supposed to store PSSMs and corresponding -- matrices of relative residue frequencies. -- the number of columns and rows is listed explicitly, values in columns -- are stored column by column, i.e. in groups of nrows values for each column Matrix ::= SEQUENCE { ncolumns INTEGER, nrows INTEGER, row-labels SEQUENCE OF VisibleString OPTIONAL, scale-factor INTEGER, columns SEQUENCE OF INTEGER } -- definition for matrix of pairwise "distances", stored as the upper -- triangle of a squared n x n matrix (excluding the diagonal), this is -- supposed to store pairwise percentages of identical residues, pairwise -- alignment scores or E-values from pairwise BLAST sequence comparisons Triangle ::= SEQUENCE { nelements INTEGER, scores Score-set OPTIONAL, div-ranks SEQUENCE OF INTEGER OPTIONAL } -- Update-align is supposed to contain alignments that still need some work -- done to fit into the CD-proper alignment. These originate from the -- CD update process (generated by Blast, for example) or may be created in -- an editing session to save its state Update-comment ::= CHOICE { comment VisibleString, -- free text to describe nature of -- Update-align addthis Seq-loc, -- suggestion for inclusion in the CD -- without corresponding alignment replaces Seq-loc, -- if one or several alignment rows are -- to be replaced by the Update-align reject-loc Seq-loc, -- if used with Reject-id, specify a -- location on a sequence which should -- not be used reference Pub -- if update alignment imported from -- citation and for whenever it seems -- necessary to cite } -- Both fields are optional, as the Update-align may be a Seq-annot without -- description, or a suggestion to add a sequence without the corresponding -- alignment Update-align ::= SEQUENCE { description SEQUENCE OF Update-comment OPTIONAL, seqannot Seq-annot OPTIONAL, -- contains the SeqAlign type INTEGER { unassigned(0), update(1), update-3d(2), demoted(51), demoted-3d(52), other(255)} } Reject-id ::= SEQUENCE { description SEQUENCE OF Update-comment OPTIONAL, ids SET OF Seq-id } Feature-evidence ::= CHOICE { comment VisibleString, -- so we can spell out what doesn't -- fit in any other category reference Pub, -- evidence via a literature reference bsannot Biostruc-annot-set, -- evidence via Biostruc-features, such -- as structure superpositions seqfeat Seq-feat, -- evidence is a Sequence feature found -- elsewhere book-ref Cdd-book-ref -- evidence is a book chapter or figure } Align-annot ::= SEQUENCE { location Seq-loc, -- points to a location in one of the -- aligned sequences, usually the -- master/representative description VisibleString OPTIONAL, -- to hold descriptions/names like -- "Heme binding site" or "catalytic -- triad" etc., something that should -- be used for labels in visualization evidence SEQUENCE OF Feature-evidence OPTIONAL, -- evidence we can -- compute with type INTEGER OPTIONAL -- for typing annotated features } Align-annot-set ::= SEQUENCE OF Align-annot -- the Domain-parent records an evolutionary relationship which may not be -- as simple as a classical parent-child relationship in a typical hierarchy, -- i.e. where a CD is merely a specific subgroup ("child") of a more general -- diverse alignment model ("parent"). A CD alignment model may be the result -- of an ancient fusion event, combining two or more domains into a bigger unit -- which has subsequently undergone a divergent evolutionary process similar to -- what may have happened to a single "domain". A CD alignment model may -- also reflect the result of a deletion event, where a specific subgroup -- lacks part of a (set of) domain(s), but where the part present is found to -- be highly similar to a putative "parent", with some added evidence for -- an actual deletion, like from the distribution of truncated copies in phylogenetic -- lineages. Deletion events which affect different parts of a set of -- duplicated domain architectures may be indistinguishable from actual -- fission events, which means that we may want to represent the latter as -- deletions after duplication and do not need a special case for fissions. Domain-parent ::= SEQUENCE { parent-type INTEGER { classical (0), -- the classification of parent child relations fusion (1), deletion (2), permutation (3), other (255) }, parentid Cdd-id, -- identify the section parent by accession seqannot Seq-annot OPTIONAL -- contains the sequence alignment linking -- CD alignment models, should align the -- masters/representatives of each CD } -- record sequence trees generated by a suitable algorithm. Sequence-tree ::= SEQUENCE { cdAccession VisibleString OPTIONAL, algorithm Algorithm-type, isAnnotated BOOLEAN DEFAULT FALSE, root SeqTree-node } SeqTree-node ::= SEQUENCE { isAnnotated BOOLEAN DEFAULT FALSE, name VisibleString OPTIONAL, distance REAL OPTIONAL, children CHOICE { children SEQUENCE OF SeqTree-node, footprint SEQUENCE { seqRange Seq-interval, rowId INTEGER OPTIONAL } }, annotation Node-annotation OPTIONAL } Algorithm-type ::= SEQUENCE { scoring-Scheme INTEGER { unassigned (0), percent-id (1), kimura-corrected (2), aligned-score (3), aligned-score-ext (4), aligned-score-filled (5), blast-footprint (6), blast-full (7), hybrid-aligned-score (8), other (255) }, clustering-Method INTEGER { unassigned (0), single-linkage (1), neighbor-joining (2), fast-minimum-evolution (3), other (255) }, score-Matrix INTEGER { unassigned (0), blosum45 (1), blosum62 (2), blosum80 (3), pam30 (4), pam70 (5), pam250 (6), other (255) } OPTIONAL, gapOpen INTEGER OPTIONAL, gapExtend INTEGER OPTIONAL, gapScaleFactor INTEGER OPTIONAL, nTerminalExt INTEGER OPTIONAL, cTerminalExt INTEGER OPTIONAL, tree-scope INTEGER { allDescendants (0), immediateChildrenOnly(1), selfOnly (2), other (255) } OPTIONAL, coloring-scope INTEGER { allDescendants (0), immediateChildrenOnly (1), other (255) } OPTIONAL } Node-annotation ::= SEQUENCE { presentInChildCD VisibleString OPTIONAL, note VisibleString OPTIONAL } -- the Cdd is the basic ASN.1 object storing an annotated and curated set of -- alignments (formulated as a set of pairwise master-slave alignments). -- The alignment data are contained in Seq-annots, and a special type of -- object, the Update-align, contains additional alignment data from unfinished -- editing sessions and update processes. The Biostruc-annot-set holds -- structure superposition information for multiple structure-derived rows in -- the alignment. -- Version numbers in Global-ids are meant to be updated every time the Cdd is -- changed in a way that does not require Global-ids to be changed (sequences -- added in update cycle, annotation changed, alignment errors fixed) Cdd ::= SEQUENCE { name VisibleString, -- a short name (can be the accession..) id Cdd-id-set, -- this CD's Ids description Cdd-descr-set OPTIONAL, -- status, references, etc. seqannot SEQUENCE OF Seq-annot OPTIONAL, -- contains the CD alignment features Biostruc-annot-set OPTIONAL, -- contains structure -- alignment data -- or "core" definitions sequences Seq-entry OPTIONAL, -- store as bioseq-set inside seq-entry profile-range Seq-interval OPTIONAL, -- profile for this region only -- also stores the Seq-id of the master trunc-master Bioseq OPTIONAL, -- holds the truncated master, which -- may be something like a consensus, -- uses the same sequence coordinate -- frame as the profile-range posfreq Matrix OPTIONAL, -- relative residue frequencies scoremat Matrix OPTIONAL, -- Position dependent score matrix distance Triangle OPTIONAL, -- pairwise distances for all seqs. parent Cdd-id OPTIONAL, -- this CD is the result of a split children Cdd-id-set OPTIONAL, -- this CD has been split, not used siblings Cdd-id-set OPTIONAL, -- related CDs (common hits), not used neighbors Cdd-id-set OPTIONAL, -- co-occurring CDs, not used pending SEQUENCE OF Update-align OPTIONAL, -- contains alignments from -- update or "lower panel" rejects SEQUENCE OF Reject-id OPTIONAL, -- SeqIds of rejected CD- -- members, ignore in update master3d SET OF Seq-id OPTIONAL, -- record if CD has a 3D representative alignannot Align-annot-set OPTIONAL, -- alignment annotation style-dictionary Cn3d-style-dictionary OPTIONAL, -- record rendering styles user-annotations Cn3d-user-annotations OPTIONAL, -- user annotations in Cn3D ancestors SEQUENCE OF Domain-parent OPTIONAL, -- list of parents scoreparams PssmWithParameters OPTIONAL, seqtree Sequence-tree OPTIONAL } Cdd-set ::= SET OF Cdd -- Cdd projects store a set of CDs, typically related to each other -- relationships would be specified using the ancestors fields in the -- individual CD objects. For use with CD-Tree, a program to visualize -- curated CD hierarchies and evidence for hierarchical family structures. Cdd-Viewer-Rect ::= SEQUENCE { top INTEGER, -- top coordinate left INTEGER, -- left coordinate width INTEGER, -- width height INTEGER -- height } Cdd-Viewer ::= SEQUENCE { ctrl INTEGER { -- viewer type unassigned (0), cd-info (1), align-annot (2), seq-list (3), seq-tree (4), merge-preview (5), cross-hits (6), notes (7), tax-tree (8), dart (9), dart-selected-rows (10), other (255) }, rect Cdd-Viewer-Rect OPTIONAL, -- viewer rectangle accessions SEQUENCE OF VisibleString -- list of accessions associated with a viewer } Cdd-Script ::= SEQUENCE { type INTEGER { unassigned (0), user-recorded (1), server-generated (2), other (255) } OPTIONAL, name VisibleString OPTIONAL, -- user assigned name/description commands VisibleString -- actual script commands } -- cd colors are as: 0000FF for red, 00FF00 for green, FF0000 for blue Cdd-Project ::= SEQUENCE { cds SEQUENCE OF Cdd , -- cds cdcolor SEQUENCE OF INTEGER, -- colors viewers SEQUENCE OF Cdd-Viewer, -- Sequence viewers log VisibleString, -- log scripts SEQUENCE OF Cdd-Script OPTIONAL -- command scripts } END -- cn3d.asn --$Revision: 1.15 $ --********************************************************************** -- -- Definitions for Cn3D-specific data (rendering settings, -- user annotations, etc.) -- -- by Paul Thiessen -- -- National Center for Biotechnology Information -- National Institutes of Health -- Bethesda, MD 20894 USA -- -- asntool -m cn3d.asn -w 100 -o cn3d.h -- asntool -B objcn3d -m cn3d.asn -G -w 100 -K cn3d.h -I mapcn3d.h \ -- -M ../mmdb1.asn,../mmdb2.asn,../mmdb3.asn --********************************************************************** NCBI-Cn3d DEFINITIONS ::= -- Cn3D-specific information BEGIN EXPORTS Cn3d-style-dictionary, Cn3d-user-annotations; IMPORTS Biostruc-id FROM MMDB Molecule-id, Residue-id FROM MMDB-Chemical-graph; -- values of enumerations must match those in cn3d/style_manager.hpp! Cn3d-backbone-type ::= ENUMERATED { -- for different types of backbones off (1), trace (2), partial (3), complete (4) } Cn3d-drawing-style ::= ENUMERATED { -- atom/bond/object rendering styles -- for atoms and bonds wire (1), tubes (2), ball-and-stick (3), space-fill (4), wire-worm (5), tube-worm (6), -- for 3d-objects with-arrows (7), without-arrows (8) } Cn3d-color-scheme ::= ENUMERATED { -- available color schemes (not all -- necessarily applicable to all objects) element (1), object (2), molecule (3), domain (4), residue (20), secondary-structure (5), user-select (6), -- different alignment conservation coloring (currently only for proteins) aligned (7), identity (8), variety (9), weighted-variety (10), information-content (11), fit (12), block-fit (17), block-z-fit (18), block-row-fit (19), -- other schemes temperature (13), hydrophobicity (14), charge (15), rainbow (16) } -- RGB triplet, interpreted (after division by the scale-factor) as floating -- point values which should range from [0..1]. The default scale-factor is -- 255, so that one can conveniently set integer byte values [0..255] for -- colors with the scale-factor already set appropriately to map to [0..1]. -- An alpha value is allowed, but is currently ignored by Cn3D. Cn3d-color ::= SEQUENCE { scale-factor INTEGER DEFAULT 255, red INTEGER, green INTEGER, blue INTEGER, alpha INTEGER DEFAULT 255 } Cn3d-backbone-style ::= SEQUENCE { -- style blob for backbones only type Cn3d-backbone-type, style Cn3d-drawing-style, color-scheme Cn3d-color-scheme, user-color Cn3d-color } Cn3d-general-style ::= SEQUENCE { -- style blob for other objects is-on BOOLEAN, style Cn3d-drawing-style, color-scheme Cn3d-color-scheme, user-color Cn3d-color } Cn3d-backbone-label-style ::= SEQUENCE { -- style blob for backbone labels spacing INTEGER, -- zero means none type ENUMERATED { one-letter (1), three-letter (2) }, number ENUMERATED { none (0), sequential (1), -- from 1, by residues present, to match sequence pdb (2) -- use number assigned by PDB }, termini BOOLEAN, white BOOLEAN -- all white, or (if false) color of alpha carbon } -- rendering settings for Cn3D (mirrors StyleSettings class) Cn3d-style-settings ::= SEQUENCE { name VisibleString OPTIONAL, -- a name (for favorites) protein-backbone Cn3d-backbone-style, -- backbone styles nucleotide-backbone Cn3d-backbone-style, protein-sidechains Cn3d-general-style, -- styles for other stuff nucleotide-sidechains Cn3d-general-style, heterogens Cn3d-general-style, solvents Cn3d-general-style, connections Cn3d-general-style, helix-objects Cn3d-general-style, strand-objects Cn3d-general-style, virtual-disulfides-on BOOLEAN, -- virtual disulfides virtual-disulfide-color Cn3d-color, hydrogens-on BOOLEAN, -- hydrogens background-color Cn3d-color, -- background -- floating point parameters - scale-factor applies to all the following: scale-factor INTEGER, space-fill-proportion INTEGER, ball-radius INTEGER, stick-radius INTEGER, tube-radius INTEGER, tube-worm-radius INTEGER, helix-radius INTEGER, strand-width INTEGER, strand-thickness INTEGER, -- backbone labels (no labels if not present) protein-labels Cn3d-backbone-label-style OPTIONAL, nucleotide-labels Cn3d-backbone-label-style OPTIONAL, -- ion labels ion-labels BOOLEAN OPTIONAL } Cn3d-style-settings-set ::= SET OF Cn3d-style-settings Cn3d-style-table-id ::= INTEGER Cn3d-style-table-item ::= SEQUENCE { id Cn3d-style-table-id, style Cn3d-style-settings } -- the global settings, and a lookup table of styles for user annotations. Cn3d-style-dictionary ::= SEQUENCE { global-style Cn3d-style-settings, style-table SEQUENCE OF Cn3d-style-table-item OPTIONAL } -- a range of residues in a chain, identified by MMDB residue-id -- (e.g., numbered from 1) Cn3d-residue-range ::= SEQUENCE { from Residue-id, to Residue-id } -- set of locations on a particular chain Cn3d-molecule-location ::= SEQUENCE { molecule-id Molecule-id, -- MMDB molecule id -- which residues; whole molecule implied if absent residues SEQUENCE OF Cn3d-residue-range OPTIONAL } -- set of locations on a particular structure object (e.g., a PDB/MMDB -- structure), which may include multiple ranges of residues each on -- multiple chains. Cn3d-object-location ::= SEQUENCE { structure-id Biostruc-id, residues SEQUENCE OF Cn3d-molecule-location } -- information for an individual user annotation Cn3d-user-annotation ::= SEQUENCE { name VisibleString, -- a (short) name for this annotation description VisibleString OPTIONAL, -- an optional longer description style-id Cn3d-style-table-id, -- how to draw this annotation residues SEQUENCE OF Cn3d-object-location, -- which residues to cover is-on BOOLEAN -- whether this annotation is to be turned on in Cn3D } -- a GL-ordered transformation matrix Cn3d-GL-matrix ::= SEQUENCE { m0 REAL, m1 REAL, m2 REAL, m3 REAL, m4 REAL, m5 REAL, m6 REAL, m7 REAL, m8 REAL, m9 REAL, m10 REAL, m11 REAL, m12 REAL, m13 REAL, m14 REAL, m15 REAL } -- a floating point 3d vector Cn3d-vector ::= SEQUENCE { x REAL, y REAL, z REAL } -- parameters used to set up the camera in Cn3D Cn3d-view-settings ::= SEQUENCE { camera-distance REAL, -- camera on +Z axis this distance from origin camera-angle-rad REAL, -- camera angle camera-look-at-X REAL, -- X,Y of point in Z=0 plane camera points at camera-look-at-Y REAL, camera-clip-near REAL, -- distance of clipping planes from camera camera-clip-far REAL, matrix Cn3d-GL-matrix, -- transformation of objects in the scene rotation-center Cn3d-vector -- center of rotation of whole scene } -- The list of annotations for a given CDD/mime. If residue regions overlap -- between annotations that are turned on, the last annotation in this list -- that contains these residues will be used as the display style for these -- residues. -- Also contains the current viewpoint, so that user's camera angle -- can be stored and reproduced, for illustrations, on-line figures, etc. Cn3d-user-annotations ::= SEQUENCE { annotations SEQUENCE OF Cn3d-user-annotation OPTIONAL, view Cn3d-view-settings OPTIONAL } END -- docsum.asn -- ============================================ -- ::DATATOOL:: Generated from "docsum_3.0.xsd" -- ::DATATOOL:: by application DATATOOL version 1.8.6 -- ::DATATOOL:: on 05/02/2008 10:59:28 -- ============================================ -- edited with XMLSPY v5 rel. 4 U (http://www.xmlspy.com) by Michael Kholodov (National Library of Medicine) -- edited with XMLSpy v2005 rel. 3 U (http://www.altova.com) by Michael Feolo (NCBI/NLM/NIH) Docsum-3-0 DEFINITIONS ::= BEGIN Assay ::= SEQUENCE { attlist SET { handle VisibleString OPTIONAL, batch VisibleString OPTIONAL, batchId INTEGER OPTIONAL, batchType ENUMERATED { snpassay (1), validation (2), doublehit (3) } OPTIONAL, molType ENUMERATED { genomic (1), cDNA (2), mito (3), chloro (4) } OPTIONAL, sampleSize INTEGER OPTIONAL, population VisibleString OPTIONAL, linkoutUrl VisibleString OPTIONAL }, method SEQUENCE { eMethod SEQUENCE { attlist SET { name VisibleString OPTIONAL, --Submitters method identifier id VisibleString OPTIONAL --dbSNP method identifier }, exception VisibleString --description of deviation from/addition to given method } OPTIONAL }, taxonomy SEQUENCE { attlist SET { id INTEGER, --NCBI taxonomy ID for variation organism VisibleString OPTIONAL }, taxonomy NULL }, strains SEQUENCE OF VisibleString OPTIONAL, comment VisibleString OPTIONAL, citation SEQUENCE OF VisibleString OPTIONAL } --A collection of genome sequence records (curated gene regions (NG's), contigs (NWNT's) and chromosomes (NC/AC's) produced by a genome sequence project. Structure is populated from ContigInfo tables. Assembly ::= SEQUENCE { attlist SET { dbSnpBuild INTEGER, --dbSNP build number defining the rsid set aligned to this assembly genomeBuild VisibleString, --assembly build number with possible 'subbuild' version numbers to reflect updates in gene annotation (human e.g. 34_3, 35_1, 36_1) groupLabel VisibleString OPTIONAL, --High-level classification of the assembly to distinguish reference projects from alternate solutions. GroupLabel field from organism/build-specific ContigInfo tables. "reference" is occasionally used as the preferred assembly; standards will converge as additional organism genome projects are finished. Note that some organism assembly names include extended characters like '~' and '/' that may be incompatible with OS filename conventions. assemblySource VisibleString OPTIONAL, --Name of the group(s) or organization(s) that generated the assembly current BOOLEAN OPTIONAL, --Marks the current genomic assembly reference BOOLEAN OPTIONAL }, component SEQUENCE OF Component OPTIONAL, snpStat SEQUENCE { attlist SET { mapWeight ENUMERATED { unmapped (1), unique-in-contig (2), two-hits-in-contig (3), less-10-hits (4), multiple-hits (5) }, --summary measure of placement precision in the assembly chromCount INTEGER OPTIONAL, --number of distinct chromosomes in the mapset placedContigCount INTEGER OPTIONAL, --number of distinct contigs [ gi | accession[.version] ] in the mapset unplacedContigCount INTEGER OPTIONAL, --number of sequence postions to a contig with unknown chromosomal assignment seqlocCount INTEGER OPTIONAL, --total number of sequence positions in the mapset hapCount INTEGER OPTIONAL --Number of hits to alternative genomic haplotypes (e.g. HLA DR region, KIR, or pseudo-autosomal regions like PAR) within the assembly mapset. Note that positions on haplotypes defined in other assemblies (a different assembly_group_label value) will not be counted in this value. }, snpStat NULL } } --URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects. BaseURL ::= SEQUENCE { attlist SET { urlId INTEGER OPTIONAL, --Resource identifier from dbSNP_main.baseURL. resourceName VisibleString OPTIONAL, --Name of linked resource resourceId VisibleString OPTIONAL --identifier expected by resource for URL }, --URL value from dbSNP_main.BaseURL links table. attributes provide context information and URL id that is referenced within individual refSNP objects. baseURL VisibleString } Component ::= SEQUENCE { attlist SET { componentType ENUMERATED { contig (1), mrna (2) } OPTIONAL, --type of component: chromosome, contig, gene_region, etc. ctgId INTEGER OPTIONAL, --dbSNP contig_id used to join on contig hit / mapset data to these assembly properties accession VisibleString OPTIONAL, --Accession[.version] for the sequence component name VisibleString OPTIONAL, --contig name defined as either a submitter local id, element of a whole genome assembly set, or internal NCBI local id chromosome VisibleString OPTIONAL, --Organism appropriate chromosome tag, 'Un' reserved for default case of unplaced components start INTEGER OPTIONAL, --component starting position on the chromosome (base 0 inclusive) end INTEGER OPTIONAL, --component ending position on the chromosome (base 0 inclusive) orientation ENUMERATED { fwd (1), rev (2), unknown (3) } OPTIONAL, --orientation of this component to chromosome, forward (fwd) = 0, reverse (rev) = 1, unknown = NULL in ContigInfo.orient. gi VisibleString OPTIONAL, --NCBI gi for component sequence (equivalent to accession.version) for nucleotide sequence. groupTerm VisibleString OPTIONAL, --Identifier label for the genome assembly that defines the contigs in this mapset and their placement within the organism genome. contigLabel VisibleString OPTIONAL --Display label for component }, mapLoc SEQUENCE OF MapLoc } --Set of dbSNP refSNP docsums ExchangeSet ::= SEQUENCE { attlist SET { setType VisibleString OPTIONAL, --set-type: full dump; from query; single refSNP setDepth VisibleString OPTIONAL, --content depth: brief XML (only refSNP properties and summary subSNP element content); full XML (full refSNP, full subSNP content; all flanking sequences) specVersion VisibleString OPTIONAL, --version number of docsum.asn/docsum.dtd specification dbSnpBuild INTEGER OPTIONAL, --build number of database for this export generated VisibleString OPTIONAL --Generated date }, sourceDatabase SEQUENCE { attlist SET { taxId INTEGER, --NCBI taxonomy ID for variation organism VisibleString, --common name for species used as part of database name. dbSnpOrgAbbr VisibleString OPTIONAL, --organism abbreviation used in dbSNP. gpipeOrgAbbr VisibleString OPTIONAL --organism abbreviation used within NCBI genome pipeline data dumps. }, sourceDatabase NULL }, rs SEQUENCE OF Rs OPTIONAL, assay Assay OPTIONAL, query SEQUENCE { attlist SET { date VisibleString OPTIONAL, --yyyy-mm-dd string VisibleString OPTIONAL --Query terms or search constraints }, query NULL } OPTIONAL, summary SEQUENCE { attlist SET { numRsIds INTEGER OPTIONAL, --Total number of refsnp-ids in this exchange set totalSeqLength INTEGER OPTIONAL, --Total length of exemplar flanking sequences numContigHits INTEGER OPTIONAL, --Total number of contig locations from SNPContigLoc numGeneHits INTEGER OPTIONAL, --Total number of locus ids from SNPContigLocusId numGiHits INTEGER OPTIONAL, --Total number of gi hits from MapLink num3dStructs INTEGER OPTIONAL, --Total number of 3D structures from SNP3D numAlleleFreqs INTEGER OPTIONAL, --Total number of allele frequences from SubPopAllele numStsHits INTEGER OPTIONAL, --Total number of STS hits from SnpInSts numUnigeneCids INTEGER OPTIONAL --Total number of unigene cluster ids from UnigeneSnp }, summary NULL }, baseURL SEQUENCE OF BaseURL } --functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables. FxnSet ::= SEQUENCE { attlist SET { geneId INTEGER OPTIONAL, --gene-id of gene as aligned to contig symbol VisibleString OPTIONAL, --symbol (official if present in Entrez Gene) of gene mrnaAcc VisibleString OPTIONAL, --mRNA accession if variation in transcript mrnaVer INTEGER OPTIONAL, --mRNA sequence version if variation is in transcripot protAcc VisibleString OPTIONAL, --protein accession if variation in protein protVer INTEGER OPTIONAL, --protein version if variation is in protein --variation in region of gene, but not in transcript - deprecated -- synonymous change -- nonsynonymous change - deprecated -- untranslated region - deprecated -- splice-site - deprecated -- contig reference -- deprecated -- coding: synonymy unknown -- In gene segment with null mrna and protein. ex. IGLV4-69. geneId=28784 -- within 3' 0.5kb to a gene. -- changes to STOP codon. -- alters codon to make an altered amino acid in protein product. -- indel snp causing frameshift. -- 3 prime untranslated region -- 5 prime untranslated region -- 3 prime acceptor dinucleotide -- 5 prime donor dinucleotide fxnClass ENUMERATED { locus-region (1), coding-unknown (2), coding-synonymous (3), coding-nonsynonymous (4), mrna-utr (5), intron (6), splice-site (7), reference (8), coding-exception (9), synonymy-unknown (10), gene-segment (11), near-gene-3 (12), near-gene-5 (13), nonsense (14), missense (15), frameshift (16), utr-3 (17), utr-5 (18), splice-3 (19), splice-5 (20) } OPTIONAL, readingFrame INTEGER OPTIONAL, allele VisibleString OPTIONAL, --variation allele: * suffix indicates allele of contig at this location residue VisibleString OPTIONAL, --translated amino acid residue for allele aaPosition INTEGER OPTIONAL --position of the variant residue in peptide sequence }, --functional relationship of SNP (and possibly alleles) to genes at contig location as defined in organism-specific bxxx_SNPContigLocusId_xxx tables. fxnSet NULL } --Position of a single hit of a variation on a contig MapLoc ::= SEQUENCE { attlist SET { asnFrom INTEGER, --beginning of variation as feature on contig asnTo INTEGER, --end position of variation as feature on contig --defines the seq-loc symbol if asn_from != asn_to --insertion on contig --asn-from = asn-to write as 'asn-from' --deletion on contig locType ENUMERATED { insertion (1), exact (2), deletion (3), range-ins (4), range-exact (5), range-del (6) }, alnQuality REAL OPTIONAL, --alignment qualiity orient ENUMERATED { forward (1), reverse (2) } OPTIONAL, --orientation of refSNP sequence to contig sequence physMapInt INTEGER OPTIONAL, --chromosome position as integer for sorting leftFlankNeighborPos INTEGER OPTIONAL, --nearest aligned position in 5' flanking sequence of snp rightFlankNeighborPos INTEGER OPTIONAL, --nearest aligned position in 3' flanking sequence of snp leftContigNeighborPos INTEGER OPTIONAL, --nearest aligned position in 5' contig alignment of snp rightContigNeighborPos INTEGER OPTIONAL, --nearest aligned position in 3' contig alignment of snp numberOfMismatches INTEGER OPTIONAL, --number of Mismatched positions in this alignment numberOfDeletions INTEGER OPTIONAL, --number of deletions in this alignment numberOfInsertions INTEGER OPTIONAL --number of insetions in this alignment }, fxnSet SEQUENCE OF FxnSet OPTIONAL } PrimarySequence ::= SEQUENCE { attlist SET { dbSnpBuild INTEGER, gi INTEGER, source ENUMERATED { submitter (1), blastmb (2), xm (3) } OPTIONAL, accession VisibleString OPTIONAL }, mapLoc SEQUENCE OF MapLoc } --defines the docsum structure for refSNP clusters, where a refSNP cluster (rs) is a grouping of individual dbSNP submissions that all refer to the same variation. The refsnp provides a single unified record for annotation of NCBI resources such as reference genome sequence. Rs ::= SEQUENCE { attlist SET { rsId INTEGER, --refSNP (rs) number snpClass ENUMERATED { snp (1), in-del (2), heterozygous (3), microsatellite (4), named-locus (5), no-variation (6), mixed (7), multinucleotide-polymorphism (8) }, snpType ENUMERATED { notwithdrawn (1), artifact (2), gene-duplication (3), duplicate-submission (4), notspecified (5), ambiguous-location (6), low-map-quality (7) }, molType ENUMERATED { genomic (1), cDNA (2), mito (3), chloro (4), unknown (5) }, validProbMin INTEGER OPTIONAL, --minimum reported success rate of all submissions in cluster validProbMax INTEGER OPTIONAL, --maximum reported success rate of all submissions in cluster genotype BOOLEAN OPTIONAL, --at least one genotype reported for this refSNP bitField VisibleString OPTIONAL }, het SEQUENCE { attlist SET { type ENUMERATED { est (1), obs (2) }, --Est=Estimated average het from allele frequencies, Obs=Observed from genotype data value REAL, --Heterozygosity stdError REAL OPTIONAL --Standard error of Het estimate }, het NULL } OPTIONAL, validation SEQUENCE { attlist SET { byCluster BOOLEAN OPTIONAL, --at least one subsnp in cluster has frequency data submitted byFrequency BOOLEAN OPTIONAL, --cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method byOtherPop BOOLEAN OPTIONAL, by2Hit2Allele BOOLEAN OPTIONAL, --cluster has 2+ submissions, with 1+ submissions assayed with a non-computational method byHapMap BOOLEAN OPTIONAL --TBD }, otherPopBatchId SEQUENCE OF INTEGER OPTIONAL, --dbSNP batch-id's for other pop snp validation data. twoHit2AlleleBatchId SEQUENCE OF INTEGER OPTIONAL --dbSNP batch-id's for double-hit snp validation data. Use batch-id to get methods, etc. }, --date the refsnp cluster was instantiated create SEQUENCE { --date the refsnp cluster was instantiated attlist SET { build INTEGER OPTIONAL, --build number when the cluster was created date VisibleString OPTIONAL --yyyy-mm-dd }, --date the refsnp cluster was instantiated create NULL }, --date the refsnp cluster was instantiated --most recent date the cluster was updated (member added or deleted) update SEQUENCE { --most recent date the cluster was updated (member added or deleted) attlist SET { build INTEGER OPTIONAL, --build number when the cluster was updated date VisibleString OPTIONAL --yyyy-mm-dd }, --most recent date the cluster was updated (member added or deleted) update NULL } OPTIONAL, --most recent date the cluster was updated (member added or deleted) sequence SEQUENCE { attlist SET { exemplarSs INTEGER --dbSNP ss# selected as source of refSNP flanking sequence, ss# part of ss-list below }, --5' sequence that flanks the variation --5' sequence that flanks the variation seq5 VisibleString OPTIONAL, --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation observed VisibleString, --3' sequence that flanks the variation --3' sequence that flanks the variation seq3 VisibleString OPTIONAL }, ss SEQUENCE OF Ss, assembly SEQUENCE OF Assembly OPTIONAL, primarySequence SEQUENCE OF PrimarySequence OPTIONAL, rsStruct SEQUENCE OF RsStruct OPTIONAL, rsLinkout SEQUENCE OF RsLinkout OPTIONAL, mergeHistory SEQUENCE OF SEQUENCE { attlist SET { rsId INTEGER, --previously issued rs id whose member assays have now been merged buildId INTEGER OPTIONAL, --build id when rs id was merged into parent rs orientFlip BOOLEAN OPTIONAL --TRUE if strand of rs id is reverse to parent object's current strand }, mergeHistory NULL } OPTIONAL, hgvs SEQUENCE OF VisibleString OPTIONAL -- HGVS name list } --link data for another resource RsLinkout ::= SEQUENCE { attlist SET { resourceId VisibleString, --BaseURLList.url_id linkValue VisibleString --value to append to ResourceURL.base-url for complete link }, --link data for another resource rsLinkout NULL } --structure information for SNP RsStruct ::= SEQUENCE { attlist SET { protAcc VisibleString OPTIONAL, --accession of the protein with variation protGi INTEGER OPTIONAL, --GI of the protein with variation protLoc INTEGER OPTIONAL, --position of the residue for the protein GI protResidue VisibleString OPTIONAL, --residue specified for protein at prot-loc location rsResidue VisibleString OPTIONAL, --alternative residue specified by variation sequence structGi INTEGER OPTIONAL, --GI of the structure neighbor structLoc INTEGER OPTIONAL, --position of the residue for the structure GI structResidue VisibleString OPTIONAL --residue specified for protein at struct-loc location }, --structure information for SNP rsStruct NULL } --data for an individual submission to dbSNP Ss ::= SEQUENCE { attlist SET { ssId INTEGER, --dbSNP accession number for submission handle VisibleString, --Tag for the submitting laboratory batchId INTEGER, --dbSNP number for batch submission --submission (ss#) --submitter ID locSnpId VisibleString OPTIONAL, subSnpClass ENUMERATED { snp (1), in-del (2), heterozygous (3), microsatellite (4), named-locus (5), no-variation (6), mixed (7), multinucleotide-polymorphism (8) } OPTIONAL, --SubSNP classification by type of variation --orientation of refsnp cluster members to refsnp cluster sequence --ss flanking sequence is in same orientation as seq-ss-exemplar --lanking sequence and alleles are reverse complement of refSNP as defined by ss exemplar orient ENUMERATED { forward (1), reverse (2) } OPTIONAL, strand ENUMERATED { top (1), bottom (2) } OPTIONAL, --strand is defined as TOP/BOTTOM by nature of flanking nucleotide sequence molType ENUMERATED { genomic (1), cDNA (2), mito (3), chloro (4), unknown (5) } OPTIONAL, --moltype from Batch table buildId INTEGER OPTIONAL, --dbSNP build number when ss# was added to a refSNP (rs#) cluster --class of method used to assay for the variation --Denaturing High Pressure Liquid Chromatography used to detect SNP --a hybridization method (e.g. chip) was used to assay for variation --variation was mined from sequence alignment with software --samples were sequenced and resulting alignment used to define variation methodClass ENUMERATED { dHPLC (1), hybridize (2), computed (3), sSCP (4), other (5), unknown (6), rFLP (7), sequence (8) } OPTIONAL, --subsnp has been experimentally validated by submitter --subsnp has frequency data submitted --has 2+ submissions, with 1+ submission assayed with a non-computational method validated ENUMERATED { by-submitter (1), by-frequency (2), by-cluster (3) } OPTIONAL, linkoutUrl VisibleString OPTIONAL --append loc-snp-id to this base URL to construct a pointer to submitter data. }, sequence SEQUENCE { --5' sequence that flanks the variation --5' sequence that flanks the variation seq5 VisibleString OPTIONAL, --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation --list of all nucleotide alleles observed in ss-list members, correcting for reverse complementation of memebers reported in reverse orientation observed VisibleString, --3' sequence that flanks the variation --3' sequence that flanks the variation seq3 VisibleString OPTIONAL } } END -- entrez2.asn --$Revision: 1.12 $******************************************** -- -- entrez2.asn -- Version 1 -- -- API to Entrez Engine (1999) -- Retrieval of sequence done through ID1 module -- Also, SeqId queries -- Retrieval of PubMed records through PubMed module -- Retrieval of Structures through PubStruct module -- Retrieval of Genomes through Genomes module -- --*************************************************************** NCBI-Entrez2 DEFINITIONS ::= BEGIN --************************************** -- Entrez2 common elements --************************************** Entrez2-dt ::= INTEGER -- a date/time stamp Entrez2-db-id ::= VisibleString -- database name Entrez2-field-id ::= VisibleString -- field name Entrez2-link-id ::= VisibleString -- link name Entrez2-id-list ::= SEQUENCE { -- list of record UIDs db Entrez2-db-id , -- the database num INTEGER , -- number of uids uids OCTET STRING OPTIONAL } -- coded uids --**************************************** -- The structured form of the boolean is the same in a request or -- return so that it easy to modify a query. This means some -- fields are only considered in a return value, like counts -- by term. They are ignored in a request. -- The structured boolean supports specific boolean components, -- an unparsed string in query syntax, and UID lists as -- elements of a boolean. This makes it possible to submit -- a single string, a fully structured query, or a mixture. -- The UID list feature means one can also perform refinements -- on UID lists from links, neighbors, or other operations. -- UID list query now returns a history key for subsequent use. --***************************************** Entrez2-boolean-exp ::= SEQUENCE { db Entrez2-db-id , -- database for this query exp SEQUENCE OF Entrez2-boolean-element , -- the Boolean limits Entrez2-limits OPTIONAL } -- date bounds Entrez2-boolean-element ::= CHOICE { str VisibleString , -- unparsed query string op Entrez2-operator , -- logical operator term Entrez2-boolean-term , -- fielded term ids Entrez2-id-list , -- list of UIDs - returns history key in reply key VisibleString } -- history key for uploaded UID list or other query --***************************************** -- the term is both sent and received as parts of -- queries and replies. The attributes can be filled in -- by either, but may be ignored by one or the other. Flags are -- shown if a real value is only of use in the query (Q), only -- in the reply (R), or used in both (B) -- do-not-explode and do-not-translate are only active set by -- by the query. However, they retain those settings in the -- return value so they can be resent with a new query --****************************************** Entrez2-boolean-term ::= SEQUENCE { field Entrez2-field-id , -- B term VisibleString , -- B term-count INTEGER OPTIONAL, -- R count of records with term do-not-explode BOOLEAN DEFAULT FALSE, -- Q do not explode term do-not-translate BOOLEAN DEFAULT FALSE} -- Q do not use synonyms Entrez2-operator ::= INTEGER { and (1) , or (2) , butnot (3) , range (4) , left-paren (5) , right-paren (6) } --*************************************** -- Entrez2 Request types --*************************************** --**************************************** -- The basic request wrapper leaves space for a version which -- allow the server to support older clients -- The tool parameter allows us to log the client types for -- debugging and tuning -- The cookie is a session ID returned by the first Entrez2-reply --**************************************** Entrez2-request ::= SEQUENCE { -- a standard request request E2Request , -- the actual request version INTEGER , -- ASN1 spec version tool VisibleString OPTIONAL , -- tool making request cookie VisibleString OPTIONAL , -- history session cookie use-history BOOLEAN DEFAULT FALSE } -- request should use history E2Request ::= CHOICE { -- request types get-info NULL , -- ask for info block eval-boolean Entrez2-eval-boolean , -- Boolean lookup get-docsum Entrez2-id-list , -- get the DocSums get-term-pos Entrez2-term-query, -- get position in term list get-term-list Entrez2-term-pos , -- get Term list by position get-term-hierarchy Entrez2-hier-query, -- get a hierarchy from a term get-links Entrez2-get-links , -- get specific links from a UID list get-linked Entrez2-get-links , -- get subset of UID list which has links get-link-counts Entrez2-id } -- get all links from one UID --**************************************** -- When evaluating a boolean query the counts of hits is always -- returned. -- In addition, you can request the UIDs of the hits or the -- the parsed query in structured form (with counts by term), -- or both. --**************************************** Entrez2-eval-boolean ::= SEQUENCE { -- evaluate Boolean query return-UIDs BOOLEAN DEFAULT FALSE, -- return UID list? return-parse BOOLEAN DEFAULT FALSE, -- return parsed query? query Entrez2-boolean-exp } -- the actual query Entrez2-dt-filter ::= SEQUENCE { begin-date Entrez2-dt, end-date Entrez2-dt, type-date Entrez2-field-id } Entrez2-limits ::= SEQUENCE { -- date limits filter-date Entrez2-dt-filter OPTIONAL, max-UIDs INTEGER OPTIONAL, -- max UIDs to return in list offset-UIDs INTEGER OPTIONAL} -- start partway into UID list Entrez2-id ::= SEQUENCE { -- a single UID db Entrez2-db-id , uid INTEGER } Entrez2-term-query ::= SEQUENCE { db Entrez2-db-id , field Entrez2-field-id , term VisibleString } Entrez2-hier-query ::= SEQUENCE { db Entrez2-db-id , field Entrez2-field-id , term VisibleString OPTIONAL , -- query with either term txid INTEGER OPTIONAL } -- or Taxonomy ID Entrez2-term-pos ::= SEQUENCE { -- request portions of term list db Entrez2-db-id , field Entrez2-field-id , first-term-pos INTEGER , number-of-terms INTEGER OPTIONAL } -- optional for hierarchy only Entrez2-get-links ::= SEQUENCE { -- request links of one type uids Entrez2-id-list , -- docs to link from linktype Entrez2-link-id , -- type of link max-UIDS INTEGER OPTIONAL , -- maximum number of links to return count-only BOOLEAN OPTIONAL , -- return only the counts parents-persist BOOLEAN OPTIONAL } -- allow original uids in list --********************************************************** -- Replies from the Entrez server -- all replies contain the date/time stamp when they were executed -- to do reqular date bounded searches use this value+1 to search -- again later instead of recording the date/time on the client machine -- the cookie allows a simple key string to represent UID lists in the history --********************************************************** Entrez2-reply ::= SEQUENCE { reply E2Reply , -- the actual reply dt Entrez2-dt , -- date/time stamp from server server VisibleString , -- server version info msg VisibleString OPTIONAL , -- possibly a message to the user key VisibleString OPTIONAL , -- history key for query cookie VisibleString OPTIONAL } -- history session cookie E2Reply ::= CHOICE { error VisibleString , -- if nothing can be returned get-info Entrez2-info , -- the database info eval-boolean Entrez2-boolean-reply, -- result of boolean query get-docsum Entrez2-docsum-list, get-term-pos INTEGER, -- position of the term get-term-list Entrez2-term-list, get-term-hierarchy Entrez2-hier-node, get-links Entrez2-link-set, get-linked Entrez2-id-list, get-link-counts Entrez2-link-count-list } Entrez2-info ::= SEQUENCE { -- describes all the databases db-count INTEGER , -- number of databases build-date Entrez2-dt , -- build date of databases db-info SEQUENCE OF Entrez2-db-info } -- info by database Entrez2-db-info ::= SEQUENCE { -- info for one database db-name Entrez2-db-id , -- internal name db-menu VisibleString , -- short name for menu db-descr VisibleString , -- longer explanatory name doc-count INTEGER , -- total number of records field-count INTEGER , -- number of field types fields SEQUENCE OF Entrez2-field-info, link-count INTEGER , -- number of link types links SEQUENCE OF Entrez2-link-info, docsum-field-count INTEGER, docsum-fields SEQUENCE OF Entrez2-docsum-field-info } Entrez2-field-info ::= SEQUENCE { -- info about one field field-name Entrez2-field-id , -- the internal name field-menu VisibleString , -- short string suitable for menu field-descr VisibleString , -- longer, explanatory name term-count INTEGER , -- number of terms in field is-date BOOLEAN OPTIONAL , is-numerical BOOLEAN OPTIONAL , single-token BOOLEAN OPTIONAL , hierarchy-avail BOOLEAN OPTIONAL , is-rangable BOOLEAN OPTIONAL , is-truncatable BOOLEAN OPTIONAL } Entrez2-link-info ::= SEQUENCE { -- info about one link link-name Entrez2-link-id , link-menu VisibleString , link-descr VisibleString , db-to Entrez2-db-id , -- database it links to data-size INTEGER OPTIONAL } -- size of link data element Entrez2-docsum-field-type ::= INTEGER { string (1) , int (2) , float (3) , date-pubmed (4) } Entrez2-docsum-field-info ::= SEQUENCE { field-name VisibleString, field-description VisibleString, field-type Entrez2-docsum-field-type } Entrez2-boolean-reply ::= SEQUENCE { count INTEGER , -- records hit uids Entrez2-id-list OPTIONAL, -- if uids requested query Entrez2-boolean-exp OPTIONAL } -- if parsed query requested Entrez2-docsum-list ::= SEQUENCE { count INTEGER , -- number of docsums list SEQUENCE OF Entrez2-docsum } Entrez2-docsum ::= SEQUENCE { uid INTEGER , -- primary uid (gi, pubmedid) docsum-data SEQUENCE OF Entrez2-docsum-data } Entrez2-docsum-data::= SEQUENCE { field-name VisibleString, field-value VisibleString } Entrez2-term-list ::= SEQUENCE { pos INTEGER, -- position of first term in list num INTEGER, -- number of terms in list list SEQUENCE OF Entrez2-term } Entrez2-term ::= SEQUENCE { term VisibleString , txid INTEGER OPTIONAL, count INTEGER , -- count of records with this term is-leaf-node BOOLEAN OPTIONAL } -- used for hierarchy only Entrez2-hier-node ::= SEQUENCE { -- for hierarchical index cannonical-form VisibleString , -- the official name lineage-count INTEGER , -- number of strings in lineage lineage SEQUENCE OF Entrez2-term OPTIONAL , -- strings up the lineage child-count INTEGER , -- number of children of this node children SEQUENCE OF Entrez2-term , -- the children is-ambiguous BOOLEAN OPTIONAL } -- used for hierarchy only --******************************************* -- Links are returned in sets also using OCTET STRINGS --******************************************* Entrez2-link-set ::= SEQUENCE { -- set of links ids Entrez2-id-list , data-size INTEGER OPTIONAL , -- size of data elements data OCTET STRING OPTIONAL } -- coded scores Entrez2-link-count-list ::= SEQUENCE { -- all links from 1 uid link-type-count INTEGER , -- number of types of links links SEQUENCE OF Entrez2-link-count } Entrez2-link-count ::= SEQUENCE { -- link count of one type link-type Entrez2-link-id , link-count INTEGER } END -- entrezgene.asn --$Revision: 1.29 $ --********************************************************************** -- -- NCBI Entrezgene -- by James Ostell, 2001 -- -- Generic "Gene" object for Entrez Genes -- This object is designed to incorporate a subset of information from -- LocusLink and from records in Entrez Genomes to provide indexing, -- linkage, and a useful summary report in Entrez for "Genes" -- --********************************************************************** NCBI-Entrezgene DEFINITIONS ::= BEGIN EXPORTS Entrezgene, Entrezgene-Set; IMPORTS Gene-ref FROM NCBI-Gene Prot-ref FROM NCBI-Protein BioSource FROM NCBI-BioSource RNA-ref FROM NCBI-RNA Dbtag, Date FROM NCBI-General Seq-loc FROM NCBI-Seqloc Pub FROM NCBI-Pub; --******************************************** -- Entrezgene is the "document" indexed in Entrez -- and presented in the full display -- It also contains the Entrez ID and date information --******************************************* Entrezgene ::= SEQUENCE { track-info Gene-track OPTIONAL , -- not in submission, but in retrieval type INTEGER { -- type of Gene unknown (0) , tRNA (1) , rRNA (2) , snRNA (3) , scRNA (4) , snoRNA (5) , protein-coding (6) , pseudo (7) , transposon (8) , miscRNA (9) , other (255) } , source BioSource , gene Gene-ref , -- for locus-tag see note 3 prot Prot-ref OPTIONAL , rna RNA-ref OPTIONAL , summary VisibleString OPTIONAL , -- short summary location SEQUENCE OF Maps OPTIONAL, gene-source Gene-source OPTIONAL , -- NCBI source to Entrez locus SEQUENCE OF Gene-commentary OPTIONAL , -- location of gene on chromosome (if known) -- and all information about products -- (mRNA, proteins and so on) properties SEQUENCE OF Gene-commentary OPTIONAL , refgene SEQUENCE OF Gene-commentary OPTIONAL , -- NG for this? homology SEQUENCE OF Gene-commentary OPTIONAL , comments SEQUENCE OF Gene-commentary OPTIONAL , unique-keys SEQUENCE OF Dbtag OPTIONAL , -- see note 3 xtra-index-terms SEQUENCE OF VisibleString OPTIONAL , -- see note 2 xtra-properties SEQUENCE OF Xtra-Terms OPTIONAL , -- see note 2 xtra-iq SEQUENCE OF Xtra-Terms OPTIONAL, -- see note 2 non-unique-keys SEQUENCE OF Dbtag OPTIONAL } Entrezgene-Set ::= SET OF Entrezgene Gene-track ::= SEQUENCE { geneid INTEGER , -- required unique document id status INTEGER { live (0) , secondary (1) , -- synonym with merged discontinued (2), -- 'deleted', still index and display to public newentry (3) -- for GeneRif submission } DEFAULT live , current-id SEQUENCE OF Dbtag OPTIONAL , -- see note 1 below create-date Date , -- date created in Entrez update-date Date , -- last date updated in Entrez discontinue-date Date OPTIONAL } -- Gene-source ::= SEQUENCE { src VisibleString , -- key to the source within NCBI locuslink, Ecoli, etc src-int INTEGER OPTIONAL , -- eg. locuslink id src-str1 VisibleString OPTIONAL , -- eg. chromosome1 src-str2 VisibleString OPTIONAL , -- see note 3 gene-display BOOLEAN DEFAULT FALSE , -- do we have a URL for gene display? locus-display BOOLEAN DEFAULT FALSE , -- do we have a URL for map/locus display? extra-terms BOOLEAN DEFAULT FALSE } -- do we have a URL for extra indexing terms? Gene-commentary ::= SEQUENCE { type INTEGER { -- type of Gene Commentary genomic (1) , pre-RNA (2) , mRNA (3) , rRNA (4) , tRNA (5) , snRNA (6) , scRNA (7) , peptide (8) , other-genetic (9) , genomic-mRNA (10) , cRNA (11) , mature-peptide (12) , pre-protein (13) , miscRNA (14) , snoRNA (15) , property (16) , -- used to display tag/value pair -- for this type label is used as property tag, text is used as property value, -- other fields are not used. reference (17), -- currently not used generif (18), -- to include generif in the main blob phenotype(19), -- to display phenotype information complex (20), -- used (but not limited) to identify resulting -- interaction complexes compound (21), -- pubchem entities comment (254) , other (255) } , heading VisibleString OPTIONAL , -- appears above text label VisibleString OPTIONAL , -- occurs to left of text -- for protein and RNA types it is a name -- for property type it is a property tag text VisibleString OPTIONAL , -- block of text -- for property type it is a property value accession VisibleString OPTIONAL , -- accession for the gi in the seqloc, see note 3 version INTEGER OPTIONAL , -- version for the accession above xtra-properties SEQUENCE OF Xtra-Terms OPTIONAL , -- see note 2 refs SEQUENCE OF Pub OPTIONAL , -- refs for this source SEQUENCE OF Other-source OPTIONAL , -- links and refs genomic-coords SEQUENCE OF Seq-loc OPTIONAL , -- referenced sequences in genomic coords seqs SEQUENCE OF Seq-loc OPTIONAL , -- referenced sequences in non-genomic coords products SEQUENCE OF Gene-commentary OPTIONAL , properties SEQUENCE OF Gene-commentary OPTIONAL , comment SEQUENCE OF Gene-commentary OPTIONAL , create-date Date OPTIONAL , update-date Date OPTIONAL } Other-source ::= SEQUENCE { src Dbtag OPTIONAL , -- key to non-ncbi source pre-text VisibleString OPTIONAL , -- text before anchor anchor VisibleString OPTIONAL , -- text to show as highlight url VisibleString OPTIONAL , -- if present, use this URL not Dbtag and datbase post-text VisibleString OPTIONAL } -- text after anchor Maps::= SEQUENCE { display-str VisibleString , method CHOICE { proxy VisibleString , --url to non mapviewer mapviewing resource map-type ENUMERATED { -- units used in display-str to query mapviewer cyto (0) , bp (1) , cM (2) , cR (3) , min (4)}}} Xtra-Terms ::= SEQUENCE { -- see note 2 tag VisibleString , value VisibleString } END --********************************************************************** -- -- Comments, notes, etc. -- -- 1) Ignored unless status = secondary. This is where gene_ids (db = "GeneID") -- are placed toward which the interface will direct users. It is also -- available for placing other source-db specific tags (i.e., db = "LocusID"). -- -- 2) These 'xtra' objects are for submitting data for Entrez indexing -- that might not fit anywhere in the Entrezgene specification but -- are considered by the data source submittor to be important. -- xtra-index-terms is any string. -- xtra-properties are tag/value pairs of properties/feilds as -- defined in the Entrez database (i.e.: UNIGENE/Hs.74561) -- xtra-iq are tag/value pairs of Entrez database/UID as defined -- in the Entrezgene indexing code (i.e.: NUCLEOTIDE/20270626) -- -- 3) Locus-tag and src-str2 are expected to be unique per organism (tax_id). -- Protein accessions and the tag-value pairs in unique-keys -- are expected to be unique over all organisms. --********************************************************************** -- featdef.asn --$Revision: 6.0 $ --********************************************************************** -- -- NCBI Sequence Feature Definition Module -- by James Ostell, 1994 -- --********************************************************************** NCBI-FeatDef DEFINITIONS ::= BEGIN EXPORTS FeatDef, FeatDefSet, FeatDispGroup, FeatDispGroupSet; FeatDef ::= SEQUENCE { typelabel VisibleString , -- short label for type eg "CDS" menulabel VisibleString , -- label for a menu eg "Coding Region" featdef-key INTEGER , -- unique for this feature definition seqfeat-key INTEGER , -- SeqFeat.data.choice from objfeat.h entrygroup INTEGER , -- Group for data entry displaygroup INTEGER , -- Group for data display molgroup FeatMolType -- Type of Molecule used for } FeatMolType ::= ENUMERATED { aa (1), -- proteins na (2), -- nucleic acids both (3) } -- both FeatDefSet ::= SEQUENCE OF FeatDef -- collections of defintions FeatDispGroup ::= SEQUENCE { groupkey INTEGER , groupname VisibleString } FeatDispGroupSet ::= SEQUENCE OF FeatDispGroup FeatDefGroupSet ::= SEQUENCE { groups FeatDispGroupSet , defs FeatDefSet } END -- gbseq.asn --$Revision: 6.6 $ --********************************************************* -- -- ASN.1 and XML for the components of a GenBank format sequence -- J.Ostell 2002 -- Updated 14 December 2005 -- --********************************************************* NCBI-GBSeq DEFINITIONS ::= BEGIN --******** -- GBSeq represents the elements in a GenBank style report -- of a sequence with some small additions to structure and support -- for protein (GenPept) versions of GenBank format as seen in -- Entrez. While this represents the simplification, reduction of -- detail, and flattening to a single sequence perspective of GenBank -- format (compared with the full ASN.1 or XML from which GenBank and -- this format is derived at NCBI), it is presented in ASN.1 or XML for -- automated parsing and processing. It is hoped that this compromise -- will be useful for those bulk processing at the GenBank format level -- of detail today. Since it is a compromise, a number of pragmatic -- decisions have been made. -- -- In pursuit of simplicity and familiarity a number of -- fields do not have full substructure defined here where there is -- already a standard GenBank format string. For example: -- -- Date DD-Mon-YYYY -- Authors LastName, Intials (with periods) -- Journal JounalName Volume (issue), page-range (year) -- FeatureLocations as per GenBank feature table, but FeatureIntervals -- may also be provided as a convenience -- FeatureQualifiers as per GenBank feature table -- Primary has a string that represents a table to construct -- a third party (TPA) sequence. -- other-seqids can have strings with the "vertical bar format" sequence -- identifiers used in BLAST for example, when they are non-genbank types. -- Currently in GenBank format you only see GI, but there are others, like -- patents, submitter clone names, etc which will appear here, as they -- always have in the ASN.1 format, and full XML format. -- source-db is a formatted text block for peptides in GenPept format that -- carries information from the source protein database. -- -- There are also a number of elements that could have been -- more exactly specified, but in the interest of simplicity -- have been simply left as options. For example.. -- -- accession and accession.version will always appear in a GenBank record -- they are optional because this format can also be used for non-GenBank -- sequences, and in that case will have only "other-seqids". -- -- sequences will normally all have "sequence" filled in. But contig records -- will have a "join" statement in the "contig" slot, and no "sequence". -- We also may consider a retrieval option with no sequence of any kind -- and no feature table to quickly check minimal values. -- -- a reference may have an author list, or be from a consortium, or both. -- -- some fields, such as taxonomy, do appear as separate elements in GenBank -- format but without a specific linetype (in GenBank format this comes -- under ORGANISM). Another example is the separation of primary accession -- from the list of secondary accessions. In GenBank format primary -- accession is just the first one on the list that includes all secondaries -- after it. -- -- create-date deserves special comment. The date you see on the right hand -- side of the LOCUS line in GenBank format is actually the last date the -- the record was modified (or the update-date). The date the record was -- first submitted to GenBank appears in the first submission citation in -- the reference section. Internally in the databases and ASN.1 NCBI keeps -- the first date the record was released into the sequence database at -- NCBI as create-date. For records from EMBL, which supports create-date, -- it is the date provided by EMBL. For DDBJ records, which do not supply -- a create-date (same as GenBank format) the create-date is the first date -- NCBI saw the record from DDBJ. For older GenBank records, before NCBI -- took responsibility for GenBank, it is just the first date NCBI saw the -- record. Create-date can be very useful, so we expose it here, but users -- must understand it is only an approximation and comes from many sources, -- and with many exceptions and caveats. It does NOT tell you the first -- date the public might have seen this record and thus is NOT an accurate -- measure for legal issues of precedence. -- --******** GBSeq ::= SEQUENCE { locus VisibleString , length INTEGER , strandedness VisibleString OPTIONAL , moltype VisibleString , topology VisibleString OPTIONAL , division VisibleString , update-date VisibleString , create-date VisibleString OPTIONAL , update-release VisibleString OPTIONAL , create-release VisibleString OPTIONAL , definition VisibleString , primary-accession VisibleString OPTIONAL , entry-version VisibleString OPTIONAL , accession-version VisibleString OPTIONAL , other-seqids SEQUENCE OF GBSeqid OPTIONAL , secondary-accessions SEQUENCE OF GBSecondary-accn OPTIONAL, project VisibleString OPTIONAL , keywords SEQUENCE OF GBKeyword OPTIONAL , segment VisibleString OPTIONAL , source VisibleString OPTIONAL , organism VisibleString OPTIONAL , taxonomy VisibleString OPTIONAL , references SEQUENCE OF GBReference OPTIONAL , comment VisibleString OPTIONAL , primary VisibleString OPTIONAL , source-db VisibleString OPTIONAL , database-reference VisibleString OPTIONAL , feature-table SEQUENCE OF GBFeature OPTIONAL , sequence VisibleString OPTIONAL , -- Optional for other dump forms contig VisibleString OPTIONAL } GBSecondary-accn ::= VisibleString GBSeqid ::= VisibleString GBKeyword ::= VisibleString GBReference ::= SEQUENCE { reference VisibleString , position VisibleString OPTIONAL , authors SEQUENCE OF GBAuthor OPTIONAL , consortium VisibleString OPTIONAL , title VisibleString OPTIONAL , journal VisibleString , xref SET OF GBXref OPTIONAL , pubmed INTEGER OPTIONAL , remark VisibleString OPTIONAL } GBAuthor ::= VisibleString GBXref ::= SEQUENCE { dbname VisibleString , id VisibleString } GBFeature ::= SEQUENCE { key VisibleString , location VisibleString , intervals SEQUENCE OF GBInterval OPTIONAL , operator VisibleString OPTIONAL , partial5 BOOLEAN OPTIONAL , partial3 BOOLEAN OPTIONAL , quals SEQUENCE OF GBQualifier OPTIONAL } GBInterval ::= SEQUENCE { from INTEGER OPTIONAL , to INTEGER OPTIONAL , point INTEGER OPTIONAL , iscomp BOOLEAN OPTIONAL , interbp BOOLEAN OPTIONAL , accession VisibleString } GBQualifier ::= SEQUENCE { name VisibleString , value VisibleString OPTIONAL } GBSet ::= SEQUENCE OF GBSeq END -- general.asn --$Revision: 99135 $ --********************************************************************** -- -- NCBI General Data elements -- by James Ostell, 1990 -- Version 3.0 - June 1994 -- --********************************************************************** NCBI-General DEFINITIONS ::= BEGIN EXPORTS Date, Person-id, Object-id, Dbtag, Int-fuzz, User-object, User-field; -- StringStore is really a VisibleString. It is used to define very -- long strings which may need to be stored by the receiving program -- in special structures, such as a ByteStore, but it's just a hint. -- AsnTool stores StringStores in ByteStore structures. -- OCTET STRINGs are also stored in ByteStores by AsnTool -- -- typedef struct bsunit { /* for building multiline strings */ -- Nlm_Handle str; /* the string piece */ -- Nlm_Int2 len_avail, -- len; -- struct bsunit PNTR next; } /* the next one */ -- Nlm_BSUnit, PNTR Nlm_BSUnitPtr; -- -- typedef struct bytestore { -- Nlm_Int4 seekptr, /* current position */ -- totlen, /* total stored data length in bytes */ -- chain_offset; /* offset in ByteStore of first byte in curchain */ -- Nlm_BSUnitPtr chain, /* chain of elements */ -- curchain; /* the BSUnit containing seekptr */ -- } Nlm_ByteStore, PNTR Nlm_ByteStorePtr; -- -- AsnTool incorporates this as a primitive type, so the definition -- is here just for completeness -- -- StringStore ::= [APPLICATION 1] IMPLICIT OCTET STRING -- -- BigInt is really an INTEGER. It is used to warn the receiving code to expect -- a value bigger than Int4 (actually Int8). It will be stored in DataVal.bigintvalue -- -- Like StringStore, AsnTool incorporates it as a primitive. The definition would be: -- BigInt ::= [APPLICATION 2] IMPLICIT INTEGER -- -- Date is used to replace the (overly complex) UTCTtime, GeneralizedTime -- of ASN.1 -- It stores only a date -- Date ::= CHOICE { str VisibleString , -- for those unparsed dates std Date-std } -- use this if you can Date-std ::= SEQUENCE { -- NOTE: this is NOT a unix tm struct year INTEGER , -- full year (including 1900) month INTEGER OPTIONAL , -- month (1-12) day INTEGER OPTIONAL , -- day of month (1-31) season VisibleString OPTIONAL , -- for "spring", "may-june", etc hour INTEGER OPTIONAL , -- hour of day (0-23) minute INTEGER OPTIONAL , -- minute of hour (0-59) second INTEGER OPTIONAL } -- second of minute (0-59) -- Dbtag is generalized for tagging -- eg. { "Social Security", str "023-79-8841" } -- or { "member", id 8882224 } Dbtag ::= SEQUENCE { db VisibleString , -- name of database or system tag Object-id } -- appropriate tag -- Object-id can tag or name anything -- Object-id ::= CHOICE { id INTEGER , str VisibleString } -- Person-id is to define a std element for people -- Person-id ::= CHOICE { dbtag Dbtag , -- any defined database tag name Name-std , -- structured name ml VisibleString , -- MEDLINE name (semi-structured) -- eg. "Jones RM" str VisibleString, -- unstructured name consortium VisibleString } -- consortium name Name-std ::= SEQUENCE { -- Structured names last VisibleString , first VisibleString OPTIONAL , middle VisibleString OPTIONAL , full VisibleString OPTIONAL , -- full name eg. "J. John Smith, Esq" initials VisibleString OPTIONAL, -- first + middle initials suffix VisibleString OPTIONAL , -- Jr, Sr, III title VisibleString OPTIONAL } -- Dr., Sister, etc --**** Int-fuzz ********************************************** --* --* uncertainties in integer values Int-fuzz ::= CHOICE { p-m INTEGER , -- plus or minus fixed amount range SEQUENCE { -- max to min max INTEGER , min INTEGER } , pct INTEGER , -- % plus or minus (x10) 0-1000 lim ENUMERATED { -- some limit value unk (0) , -- unknown gt (1) , -- greater than lt (2) , -- less than tr (3) , -- space to right of position tl (4) , -- space to left of position circle (5) , -- artificial break at origin of circle other (255) } , -- something else alt SET OF INTEGER } -- set of alternatives for the integer --**** User-object ********************************************** --* --* a general object for a user defined structured data item --* used by Seq-feat and Seq-descr User-object ::= SEQUENCE { class VisibleString OPTIONAL , -- endeavor which designed this object type Object-id , -- type of object within class data SEQUENCE OF User-field } -- the object itself User-field ::= SEQUENCE { label Object-id , -- field label num INTEGER OPTIONAL , -- required for strs, ints, reals, oss data CHOICE { -- field contents str VisibleString , int INTEGER , real REAL , bool BOOLEAN , os OCTET STRING , object User-object , -- for using other definitions strs SEQUENCE OF VisibleString , ints SEQUENCE OF INTEGER , reals SEQUENCE OF REAL , oss SEQUENCE OF OCTET STRING , fields SEQUENCE OF User-field , objects SEQUENCE OF User-object } } END -- homologene.asn HomoloGene DEFINITIONS ::= BEGIN IMPORTS Date FROM NCBI-General Seq-loc FROM NCBI-Seqloc Seq-align FROM NCBI-Seqalign; -- HomoloGeneEntry taxid is the tax id of the group node, which can -- be the same as the Gene tax id in case of singletons HG-EntrySet ::= SEQUENCE { entries SET OF HG-Entry -- homologene entry } HG-Entry ::= SEQUENCE { hg-id INTEGER, version INTEGER OPTIONAL, title VisibleString OPTIONAL, caption VisibleString OPTIONAL, taxid INTEGER OPTIONAL, genes SET OF HG-Gene OPTIONAL, cr-date Date OPTIONAL, up-date Date OPTIONAL, distances SET OF HG-Stats OPTIONAL, commentaries SET OF HG-CommentarySet OPTIONAL, warnings SET OF VisibleString OPTIONAL, node HG-Node OPTIONAL } HG-Gene ::= SEQUENCE { geneid INTEGER, otherid INTEGER OPTIONAL, -- internal use only!!!!! symbol VisibleString OPTIONAL, aliases SET OF VisibleString OPTIONAL, title VisibleString, taxid INTEGER, --taxid of gene node prot-gi INTEGER OPTIONAL, prot-acc VisibleString OPTIONAL, prot-len INTEGER OPTIONAL, nuc-gi INTEGER OPTIONAL, nuc-acc VisibleString OPTIONAL, gene-links SET OF HG-Link OPTIONAL, prot-links SET OF HG-Link OPTIONAL, domains SET OF HG-Domain OPTIONAL, chr VisibleString OPTIONAL, location Seq-loc OPTIONAL, -- location on the genome locus-tag VisibleString OPTIONAL } HG-Stats ::= SEQUENCE { gi1 INTEGER, gi2 INTEGER, nuc-change REAL, nuc-change-jc REAL, prot-change REAL, ka REAL, ks REAL, knr REAL, knc REAL, recip-best BOOLEAN OPTIONAL } HG-Commentary ::= SEQUENCE { link HG-Link, description VisibleString OPTIONAL, -- main description caption VisibleString OPTIONAL, -- extra text provider VisibleString OPTIONAL, other-links SET OF HG-Link OPTIONAL, other-commentaries SET OF HG-Commentary OPTIONAL, taxid INTEGER OPTIONAL, geneid INTEGER OPTIONAL } HG-CommentarySet ::= SEQUENCE { hg-id INTEGER OPTIONAL, title VisibleString, commentaries SET OF HG-Commentary } HG-CommentaryContainer ::= SET OF HG-CommentarySet HG-Link ::= SEQUENCE { hypertext VisibleString, url VisibleString OPTIONAL } HG-Domain ::= SEQUENCE { begin INTEGER, end INTEGER, pssm-id INTEGER OPTIONAL, -- entrez uid cdd-id VisibleString OPTIONAL, cdd-name VisibleString OPTIONAL } HG-Node ::= SEQUENCE { type ENUMERATED { family(0), ortholog(1), paralog(2), leaf(3) }, id HG-Node-id, caption VisibleString OPTIONAL, current-node BOOLEAN DEFAULT FALSE, children SET OF HG-Node OPTIONAL, branch-len INTEGER OPTIONAL } HG-Node-id ::= SEQUENCE { id INTEGER OPTIONAL, id-type ENUMERATED { none(0), geneid(1), hid(2) } } HG-Alignment ::= SEQUENCE { hg-id INTEGER, alignment Seq-align } HG-AlignmentSet ::= SET OF HG-Alignment END -- id1.asn --$Revision: 1.12 $ --******************************************************************** -- -- Network Id server network access -- Yaschenko 1996 -- -- --********************************************************************* -- -- ID1.asn -- -- messages for id server network access -- --********************************************************************* NCBI-ID1Access DEFINITIONS ::= BEGIN IMPORTS Seq-id FROM NCBI-Seqloc Seq-entry FROM NCBI-Seqset Seq-hist FROM NCBI-Sequence; --********************************** -- requests -- ID1server-request ::= CHOICE { init NULL , -- DlInit getgi Seq-id , -- get a gi given a Seq-id getsefromgi ID1server-maxcomplex , -- given a gi, get the Seq-entry fini NULL, -- DlFini getseqidsfromgi INTEGER, --get all Seq-ids of given gi getgihist INTEGER, --get an historical list of gis getgirev INTEGER, --get a revision history of gi getgistate INTEGER, --get a state of gi getsewithinfo ID1server-maxcomplex, getblobinfo ID1server-maxcomplex } -- Complexity stuff will be for ID1 ID1server-maxcomplex ::= SEQUENCE { maxplex Entry-complexities , gi INTEGER , ent INTEGER OPTIONAL, -- needed when you want to retrieve a given ent sat VisibleString OPTIONAL -- satellite 0-id,1-dbEST } Entry-complexities ::= INTEGER { entry (0) , -- the "natural" entry for this (nuc-prot) bioseq (1) , -- only the bioseq identified bioseq-set (2) , -- any seg-set it may be part of nuc-prot (3) , -- any nuc-prot it may be part of pub-set (4) } ID1Seq-hist ::= SEQUENCE { hist Seq-hist } ID1server-back ::= CHOICE { init NULL , -- DlInit error INTEGER , gotgi INTEGER , gotseqentry Seq-entry, -- live gotdeadseqentry Seq-entry, -- dead fini NULL, -- DlFini gistate INTEGER, ids SET OF Seq-id, gihist SET OF ID1Seq-hist, -- because hand crafted Seq-hist does not follow -- same conventions girevhist SET OF ID1Seq-hist, gotsewithinfo ID1SeqEntry-info, gotblobinfo ID1blob-info } ID1server-debug ::= SET OF ID1server-back ID1blob-info ::= SEQUENCE { gi INTEGER , sat INTEGER, sat-key INTEGER, satname VisibleString, suppress INTEGER, withdrawn INTEGER, confidential INTEGER, -- blob-state now contains blob version info. -- it's actually minutes from 01/01/1970 -- and it's negative if blob is dead. blob-state INTEGER, comment VisibleString OPTIONAL, -- public comment for withdrawn record extfeatmask INTEGER OPTIONAL -- mask for external features (SNP,...) } ID1SeqEntry-info ::= SEQUENCE { blob-info ID1blob-info, blob Seq-entry OPTIONAL } END -- id2.asn --$Revision: 112545 $ --******************************************************************** -- -- Network Id server network access -- Vasilchenko 2003 -- -- --********************************************************************* -- -- ID2.asn -- -- messages for id server network access -- --********************************************************************* NCBI-ID2Access DEFINITIONS ::= BEGIN IMPORTS Seq-id, Seq-loc FROM NCBI-Seqloc ID2S-Chunk-Id, ID2S-Seq-annot-Info FROM NCBI-Seq-split; --********************************************************************* -- request types --********************************************************************* -- Requests are sent in packets to allow sending several requests at once -- to avoid network latency, without possiblity of deadlock with server. -- Server will not start sending replies until it will read the whole packet. ID2-Request-Packet ::= SEQUENCE OF ID2-Request ID2-Request ::= SEQUENCE { -- request's serial number, can be used in asynchronic clients -- server should copy it to corresponding field in reply serial-number INTEGER OPTIONAL, params ID2-Params OPTIONAL, request CHOICE { init NULL, get-packages ID2-Request-Get-Packages, get-seq-id ID2-Request-Get-Seq-id, get-blob-id ID2-Request-Get-Blob-Id, get-blob-info ID2-Request-Get-Blob-Info, reget-blob ID2-Request-ReGet-Blob, get-chunks ID2S-Request-Get-Chunks } } -- Request for set of params packages know by server. -- Packages can be used to abbreviate parameters of request. ID2-Request-Get-Packages ::= SEQUENCE { -- return known packages from this list -- if unset - return all known packages names SEQUENCE OF VisibleString OPTIONAL, -- return packages' names only no-contents NULL OPTIONAL } -- Requested sequence ID, can be any string or Seq-id. -- This request will be replied with one or more ID2-Reply-Get-Seq-id. ID2-Request-Get-Seq-id ::= SEQUENCE { seq-id ID2-Seq-id, seq-id-type INTEGER { any (0), -- return any qualified Seq-id gi (1), -- gi is preferred text (2), -- text Seq-id (accession etc) is preferred general (4), -- general Seq-id is preferred all (127), -- return all qualified Seq-ids of the sequence label (128) -- return a sequence string label as general id } DEFAULT any } ID2-Seq-id ::= CHOICE { string VisibleString, seq-id Seq-id } -- Return blob-id with specified seq-id. -- This request with be replied with one or more ID2-Reply-Get-Blob-Id. ID2-Request-Get-Blob-Id ::= SEQUENCE { -- id can be supplied by inner request seq-id ID2-Request-Get-Seq-id, -- return id of blob with sequence sources SEQUENCE OF VisibleString OPTIONAL, -- return Blob-Ids with external features on this Seq-id external NULL OPTIONAL } -- Return some information related to the blob. -- This request with be replied with one or more of: -- ID2-Reply-Get-Blob-Seq-ids - if requested by get-seq-ids field -- ID2-Reply-Get-Blob - if requested by get-data field -- ID2S-Reply-Get-Split-Info -- ID2S-Reply-Get-Chunk -- Last two can be sent in addition to ID2-Reply-Get-Blob -- if the blob is split on the server. -- The replies are made separate to allow server to create replies easier -- from precalculated data. Each of these replies have ID2-Reply-Data field. ID2-Request-Get-Blob-Info ::= SEQUENCE { -- id can be supplied by inner request blob-id CHOICE { -- id can be supplied by inner request blob-id ID2-Blob-Id, -- generate blob-ids from request resolve SEQUENCE { request ID2-Request-Get-Blob-Id, -- server will not send blobs listed here exclude-blobs SEQUENCE OF ID2-Blob-Id OPTIONAL } }, -- return in addition list of Seq-ids also resolving to this blob get-seq-ids NULL OPTIONAL, -- level of details requested immediately -- server will send relevant chunks if blob is splitted get-data ID2-Get-Blob-Details OPTIONAL } -- This is similar to FTP reget command. -- It may be unsupported by server. -- It's defined only for plain blobs (returned in ID2-Reply-Get-Blob) -- as all split data comes in small chunks, so reget doesn't make sense. ID2-Request-ReGet-Blob ::= SEQUENCE { blob-id ID2-Blob-Id, -- blob split version to resend split-version INTEGER, -- start offset of data to get offset INTEGER } -- Request for specific chunks. -- Server will reply with one or more ID2S-Reply-Get-Chunk. ID2S-Request-Get-Chunks ::= SEQUENCE { blob-id ID2-Blob-Id, -- requests for specific chunks of splitted blob chunks SEQUENCE OF ID2S-Chunk-Id, -- blob split version split-version INTEGER OPTIONAL } -- The following structure describes what parts of blob are required -- immediately after ID2-Request-Get-Blob-Info in case blob is split. -- Seq-entry level will have probably the same values as Entry-complexities. ID2-Get-Blob-Details ::= SEQUENCE { -- reference location for details - can be only part of sequence location Seq-loc OPTIONAL, -- Seq-entry level for all data except descriptors (sequnence, annots) seq-class-level INTEGER DEFAULT 1, -- Seq-entry level for descriptors descr-level INTEGER DEFAULT 1, -- mask of descriptor types - see Seqdesc for variants' values descr-type-mas