sparql-examples

A set of SPARQL examples that are used in different SIB resources

View the Project on GitHub sib-swiss/sparql-examples

neXtProt/NXQ_00126

Peptides that are potential neo N-termini from undescribed isoforms

Use at

PREFIX : <http://nextprot.org/rdf/>

SELECT DISTINCT ?entry ?pep WHERE {
 ?entry :isoform ?iso .
 ?iso :peptideMapping ?pm.
 ?pm :start ?p1 ; :end ?p2 .
 ?pm :proteotypic true .
 filter(?p1 > 2). # must not be already N-terminal
 ?iso :sequence / :chain ?chain.
 bind (substr(?chain, ?p1, ?p2 - ?p1 + 1) as ?pep) .
 bind (substr(?chain, ?p1-1, 1) as ?prevAA) .
 bind (substr(?chain, ?p1, 1) as ?firstAA) .
 bind (substr(?chain, ?p2, 1) as ?lastAA) .
 filter(!regex (?prevAA,'[KR]')) # must be semi-tryptic in N-ter
 filter(regex (?prevAA,'M') || regex (?firstAA,'M')) # must be N-terminal
 filter(!regex (?firstAA,'[DEFIKLRY]')) #plausible 2nd AA
 filter(regex (?lastAA,'[KR]')) # must be tryptic in C-ter
 filter not exists { # The candidate peptide must not already exist as N-ter in a described isoform
 ?entry :isoform ?iso2.
 ?iso2 :sequence / :chain ?chain2.
 ?iso2 :matureProtein [ :start ?mstart ; :end ?mend]
 bind (substr(?chain2, ?mstart, ?mend - ?mstart + 1) as ?mat2) .
 filter(strlen(?mat2) > 30).
 bind (substr(?mat2, 2, strlen(?mat2) - 1) as ?mat22) .
 filter(regex(?mat2,concat("^", ?pep)) || regex(?mat22,concat("^", ?pep))).
 }
}
order by ?pep

# overestimated, need additional filter(s)
# but for instance AELEEVTLDGKPLQALR, AELEEVTLDGKPLQALRVTDLKAALEQR and AELEEVTLDGKPLQALRVTDLKAALEQR in Q9UKV3
# are N-acetylated and good markers for an additional iso starting at M-59
graph TD
classDef projected fill:lightgreen;
classDef literal fill:orange;
classDef iri fill:yellow;
  v16("?chain")
  v6("?chain2")
  v4("?entry"):::projected 
  v17("?firstAA")
  v13("?iso")
  v5("?iso2")
  v17("?lastAA")
  v9("?mat2")
  v9("?mat22")
  v8("?mend")
  v7("?mstart")
  v12("?p1")
  v15("?p2")
  v17("?pep"):::projected 
  v14("?pm")
  v17("?prevAA")
  a3((" "))
  a1((" "))
  a2((" "))
  c15(["true^^xsd:boolean"]):::literal 
  f0[["not  "]]
  subgraph f0e0["Exists Clause"]
    e0f0[["(regex(?mat2,concat('^',?pep)) || regex(?mat22,concat('^',?pep)))"]]
    e0f0 --> e0v9
    e0f0 --> e0v2
    e0f0 --> e0v9
    e0f1[["string-length(?mat2) > '30^^xsd:integer'"]]
    e0f1 --> e0v9
    e0v4 --":isoform"-->  e0v5
    e0v5 --":sequence"-->  e0a1
    e0a1 --":chain"-->  e0v6
    e0a2 --":start"-->  e0v7
    e0a2 --":end"-->  e0v8
    e0v5 --":matureProtein"-->  e0a2
    e0bind2[/"substring(?chain2,?mstart,?mend - ?mstart + '1^^xsd:integer')"/]
    e0v6 --o e0bind2
    e0v7 --o e0bind2
    e0v8 --o e0bind2
    e0bind2 --as--o e0v9
    e0bind3[/"substring(?mat2,'2^^xsd:integer',string-length(?mat2) - '1^^xsd:integer')"/]
    e0v9 --o e0bind3
    e0bind3 --as--o e0v9
    e0v6("?chain2"):::projected 
    e0v4("?entry"):::projected 
    e0v5("?iso2"):::projected 
    e0v9("?mat2"):::projected 
    e0v9("?mat22"):::projected 
    e0v8("?mend"):::projected 
    e0v7("?mstart"):::projected 
    e0v2("?pep"):::projected 
    e0a1((" ")):::projected 
    e0a2((" ")):::projected 
  end
  f0--EXISTS--> f0e0
  f0 --> v9
  f0 --> v17
  f0 --> v9
  f0 --> v4
  f0 --> c3
  f0 --> v5
  f0 --> c4
  f0 --> a1
  f0 --> c5
  f0 --> v6
  f0 --> a2
  f0 --> c6
  f0 --> v7
  f0 --> c7
  f0 --> v8
  f0 --> c8
  f1[["(regex(?mat2,concat('^',?pep)) || regex(?mat22,concat('^',?pep)))"]]
  f1 --> v9
  f1 --> v17
  f1 --> v9
  f2[["string-length(?mat2) > '30^^xsd:integer'"]]
  f2 --> v9
  v4 --":isoform"-->  v5
  v5 --":sequence"-->  a1
  a1 --":chain"-->  v6
  a2 --":start"-->  v7
  a2 --":end"-->  v8
  v5 --":matureProtein"-->  a2
  bind3[/"substring(?chain2,?mstart,?mend - ?mstart + '1^^xsd:integer')"/]
  v6 --o bind3
  v7 --o bind3
  v8 --o bind3
  bind3 --as--o v9
  bind4[/"substring(?mat2,'2^^xsd:integer',string-length(?mat2) - '1^^xsd:integer')"/]
  v9 --o bind4
  bind4 --as--o v9
  f5[["regex(?lastAA,'#91;KR#93;')"]]
  f5 --> v17
  f6[["not regex(?firstAA,'#91;DEFIKLRY#93;')"]]
  f6 --> v17
  f7[["(regex(?prevAA,'M') || regex(?firstAA,'M'))"]]
  f7 --> v17
  f7 --> v17
  f8[["not regex(?prevAA,'#91;KR#93;')"]]
  f8 --> v17
  f9[["?p1 > '2^^xsd:integer'"]]
  f9 --> v12
  v4 --":isoform"-->  v13
  v13 --":peptideMapping"-->  v14
  v14 --":start"-->  v12
  v14 --":end"-->  v15
  v14 --":proteotypic"-->  c15
  v13 --":sequence"-->  a3
  a3 --":chain"-->  v16
  bind10[/"substring(?chain,?p1,?p2 - ?p1 + '1^^xsd:integer')"/]
  v16 --o bind10
  v12 --o bind10
  v15 --o bind10
  bind10 --as--o v17
  bind11[/"substring(?chain,?p1 + '-1^^xsd:integer','1^^xsd:integer')"/]
  v16 --o bind11
  v12 --o bind11
  bind11 --as--o v17
  bind12[/"substring(?chain,?p1,'1^^xsd:integer')"/]
  v16 --o bind12
  v12 --o bind12
  bind12 --as--o v17
  bind13[/"substring(?chain,?p2,'1^^xsd:integer')"/]
  v16 --o bind13
  v15 --o bind13
  bind13 --as--o v17