sparql-examples

A set of SPARQL examples that are used in different SIB resources

View the Project on GitHub sib-swiss/sparql-examples

107_uniprot_sequences_and_mark_which_is_cannonical_for_human

List all human UniProtKB entries and their sequences, marking if the sequence listed is the cannonical sequence of the matching entry.

Use at

PREFIX taxon: <http://purl.uniprot.org/taxonomy/>
PREFIX up: <http://purl.uniprot.org/core/>

SELECT ?entry ?sequence ?isCanonical
WHERE {
  # We don't want to look into the UniParc graph which will 
  # confuse matters
  GRAPH <http://sparql.uniprot.org/uniprot> {
      # we need the UniProt entries that are human
      ?entry a up:Protein ;
	up:organism taxon:9606 ;
      	up:sequence ?sequence .
      # If the sequence is a "Simple_Sequence" it is likely to be the 
      # cannonical sequence
      OPTIONAL {
       	?sequence a up:Simple_Sequence .
        BIND(true AS ?likelyIsCanonical)
      }
      # unless we are dealing with an external isoform
      # see https://www.uniprot.org/help/canonical_and_isoforms
      OPTIONAL {
       	FILTER(?likelyIsCanonical)
        ?sequence a up:External_Sequence .
        BIND(true AS ?isComplicated)
      }
      # If it is an external isoform it's id would not match the 
      # entry primary accession
      BIND(IF(?isComplicated, STRENDS(STR(?entry), STRBEFORE(SUBSTR(STR(?sequence), 34),'-')),?likelyIsCanonical) AS ?isCanonical)
  }
}
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
  v2("?entry"):::projected 
  v5("?isCanonical"):::projected 
  v4("?isComplicated")
  v4("?likelyIsCanonical")
  v3("?sequence"):::projected 
  c8(["up:External_Sequence"]):::iri 
  c2(["up:Protein"]):::iri 
  c7(["up:Simple_Sequence"]):::iri 
  c5(["taxon:9606"]):::iri 
  v2 --"a"-->  c2
  v2 --"up:organism"-->  c5
  v2 --"up:sequence"-->  v3
  subgraph optional0["(optional)"]
  style optional0 fill:#bbf,stroke-dasharray: 5 5;
    v3 -."a".->  c7
    bind0[/"'true^^xsd:boolean'"/]
    bind0 --as--o v4
  end
  subgraph optional1["(optional)"]
  style optional1 fill:#bbf,stroke-dasharray: 5 5;
    v3 -."a".->  c8
    bind1[/"'true^^xsd:boolean'"/]
    bind1 --as--o v4
  end
  bind2[/"if(?isComplicated,ends-with(str(?entry),substring-before(substring(str(?sequence),'34^^xsd:integer'),'-')),?likelyIsCanonical)"/]
  v4 --o bind2
  v2 --o bind2
  v3 --o bind2
  v4 --o bind2
  bind2 --as--o v5