LOD-Analysis-Statistics

From STLab

(Redirected from LOD-KCAP2011-Statistics)
Jump to: navigation, search


All KCAP 2011 'Recognizing Knowledge Patterns in LOD' pages

SPARQL

General remarks

All queries were performed on the SPARQL endpoint at http://wit.istc.cnr.it:8893/sparql

The <dataset> placeholder denotes one of:

  • <lmdb_dataset> for LinkedMDB
  • <dbtune_peel> for DBTune John Peel Sessions
  • <jamendo_dataset> for DBTune Jamendo

All queries share the following prefix mapping:

PREFIX laprop: <http://www.ontologydesignpatterns.org/ont/lod-analysis-properties.owl#>
PREFIX lapath: <http://www.ontologydesignpatterns.org/ont/lod-analysis-path.owl#>
PREFIX ladata: <http://www.ontologydesignpatterns.org/ont/lod-analysis-properties-data.owl#>

Path discovery queries

Query for discovering paths of length 2 in a dataset:

SELECT DISTINCT ?type1 ?p1 ?type2 ?p2 ?type3 COUNT(*) AS ?c
FROM <dataset>
WHERE { 
  ?x ?p1 ?y . 
  ?y ?p2 ?z
  OPTIONAL {
    ?x a ?type1 
  }
  OPTIONAL {
    ?y a ?type2
  }
  OPTIONAL {
    ?z a ?type3
  }
  FILTER (
       ?p1 != rdfs:subClassOf && ?p1 != rdf:type
    && ?p2 != rdfs:subClassOf && ?p2 != rdf:type
  )
}
ORDER BY DESC(?c)
LIMIT 10000


Query for discovering paths of length 3 in a dataset:

SELECT DISTINCT ?type1 ?p1 ?type2 ?p2 ?type3 ?p3 ?type4 COUNT(*) AS ?c
FROM <dataset>
WHERE { 
  ?x ?p1 ?y . 
  ?y ?p2 ?z . 
  ?z ?p3 ?w
  OPTIONAL {
    ?x a ?type1 
  }
  OPTIONAL {
    ?y a ?type2
  }
  OPTIONAL {
    ?z a ?type3
  }
  OPTIONAL {
    ?w a ?type4
  }
  FILTER (
       ?p1 != rdfs:subClassOf && ?p1 != rdf:type
    && ?p2 != rdfs:subClassOf && ?p2 != rdf:type
    && ?p3 != rdfs:subClassOf && ?p3 != rdf:type
  )
}
ORDER BY DESC(?c)
LIMIT 10000


Query for discovering paths of length 4 in a dataset:

SELECT DISTINCT ?type1 ?p1 ?type2 ?p2 ?type3 ?p3 ?type4 ?p4 ?type5 COUNT(*) AS ?c
FROM <dataset>
WHERE { 
  ?x ?p1 ?y . 
  ?y ?p2 ?z . 
  ?z ?p3 ?w . 
  ?w ?p4 ?q
  OPTIONAL {
    ?x a ?type1 
  }
  OPTIONAL {
    ?y a ?type2
  }
  OPTIONAL {
    ?z a ?type3
  }
  OPTIONAL {
    ?w a ?type4
  }
  OPTIONAL {
    ?q a ?type5
  }
  FILTER (
       ?p1 != rdfs:subClassOf && ?p1 != rdf:type
    && ?p2 != rdfs:subClassOf && ?p2 != rdf:type
    && ?p3 != rdfs:subClassOf && ?p3 != rdf:type
    && ?p4 != rdfs:subClassOf && ?p4 != rdf:type
  )
}
ORDER BY DESC(?c)
LIMIT 10000

Queries for stats gathering

Query for obtaining absolute (i.e. non-normalized) property betweenness values, non dataset-specific:

SELECT DISTINCT ?p COUNT(?path) as ?bet
FROM <lod_stats>
WHERE {
  ?path lapath:hasPathElement ?pe .
  ?pe lapath:hasProperty ?p .
}
GROUP BY ?p
ORDER BY ?p
LIMIT 1000


Query for obtaining absolute (i.e. non-normalized) property betweenness values for each dataset:

SELECT DISTINCT ?dataset ?p COUNT(?path) as ?bet
FROM <lod_stats>
WHERE {
  ?path lapath:hasPathElement ?pe .
  ?pe lapath:hasProperty ?p .
  ?pocc a lapath:PathOccurrencesInDataset .
  ?pocc lapath:hasPath ?path .
  ?pocc lapath:inDataset ?dataset .
}
GROUP BY ?dataset ?p
ORDER BY ?dataset DESC(?bet)
LIMIT 1000


Number of triples per dataset:

SELECT DISTINCT ?dataset (SUM (?n)) AS ?triples
FROM <lod_stats>
WHERE {
  ?pocc a laprop:PropertyUsageInDataset .
  ?pocc laprop:isPropertyUsageOf ?p .
  ?pocc lapath:inDataset ?dataset .
  ?pocc laprop:numberOfTriples ?n
}
GROUP BY ?dataset
LIMIT 5000


Number of properties used per dataset:

SELECT DISTINCT ?dataset COUNT (?p) AS ?nprops
FROM <lod_stats>
WHERE {
 ?pocc a laprop:PropertyUsageInDataset .
 ?pocc laprop:isPropertyUsageOf ?p .
 ?pocc lapath:inDataset ?dataset .
}
GROUP BY ?dataset
LIMIT 5000


Type usage per dataset<.

SELECT DISTINCT ?dataset ?type 
FROM <lod_stats>
WHERE {
  {
    {?pe lapath:hasPathElementSubjectType ?type}
    UNION
    {?pe lapath:hasPathElementObjectType ?type}
  }
  ?path lapath:hasPathElement ?pe .
  ?poc lapath:hasPath ?path .
  ?poc lapath:inDataset ?dataset .
}
LIMIT 5000


Property usage per dataset:

SELECT DISTINCT ?dataset ?p
FROM <lod_stats>
WHERE {
  ?pocc a laprop:PropertyUsageInDataset .
  ?pocc laprop:isPropertyUsageOf ?p .
  ?pocc lapath:inDataset ?dataset .
}
LIMIT 5000


Occurrences of paths in datasets (example for paths of length 2):

SELECT DISTINCT ?dataset SUM(?n)
FROM <lod_stats>
WHERE {
  ?pocc lapath:hasNumberOfOccurrences ?n .
  ?pocc lapath:hasPath ?path .
  ?path lapath:hasLength 2 .
  ?pocc lapath:inDataset ?dataset .
}
GROUP BY ?dataset
LIMIT 5000


Query for dataset pattern extraction:

SELECT DISTINCT ?dat ?ct ?pr ?rt
FROM <lod_stats>
WHERE {
  ?pocc a lapath:PathOccurrencesInDataset .
  ?pocc lapath:inDataset ?dat .
  ?pocc lapath:hasPath ?path .
  ?path lapath:hasLength 1 .
  ?path lapath:hasPathElement ?pe .
  ?pe lapath:hasPathElementSubjectType ?ct .
  ?pe lapath:hasPathElementObjectType ?rt .
  ?pe lapath:hasProperty ?pr .
}
LIMIT 5000

Personal tools