Wikidata Graph Split

From BITPlan cr Wiki
Revision as of 08:17, 12 July 2024 by Th (talk | contribs) (→‎Example)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Technical details on the Split

See: https://m.wikidata.org/wiki/Wikidata_talk:SPARQL_query_service/WDQS_graph_split

Scholarly graph:
   1. Find subjects whose P31 is a scholarly article (Q13442814) and find all quads whose context matches those subjects.
   2. Find the references for the elements in 1.
   3. Get the values for the elements in 1 and 2.
   4. Add those together to produce the scholarly graph.
   Main ("non-scholarly") graph:
   5. From the full graph, subtract the items identified in 1.
   6. Then remove from 5 the references and values that are only attached to the scholarly graph, but keep any other references or values - 

Endpoints:

wikidata-main
https://query-main-experimental.wikidata.org/
wikidata-scholarly
https://query-scholarly-experimental.wikidata.org/


Which entites are where?

  • The current endpoints only include entities of type scholarly article
    • the proposed additional subtypes/additional types are not included
  • If a entity has two entity classes e.g. scholarly article and an additional one the entity is still included in main
    • I do not know if this behavior is intended

Example

SELECT DISTINCT ?valueLabel (count(?valueLabel) as ?count)
WHERE 
{  
  ?entity wdt:P5008 wd:Q112895606; wdt:P31/wdt:P279* wd:Q1266946 .  
       ?entity p:P921 ?prop . OPTIONAL { ?prop ps:P921 ?value }  
       SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
      }
GROUP BY ?valueLabel
ORDER BY DESC(?count)


  • ?entity are of type thesis, which is of the candidate list of split entities. Thus, the query is currently not affected but it can be in the future
  • ?prop is of type subject and is in the main graph, thus a federated query needs to be used


Federated Query for unknown location of ?entity

SELECT DISTINCT ?valueLabel (COUNT(?valueLabel) AS ?count) WHERE {
  {
    ?entity wdt:P5008 wd:Q112895606;
      (wdt:P31/(wdt:P279*)) wd:Q1266946.
    ?entity p:P921 ?prop.
  }
  UNION
  {
    SERVICE <https://query-scholarly-experimental.wikidata.org/sparql> {
      ?entity wdt:P5008 wd:Q112895606;
        (wdt:P31/(wdt:P279*)) wd:Q1266946.
      ?entity p:P921 ?prop.
    }
  }
  
  OPTIONAL { ?prop ps:P921 ?value. }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?valueLabel
ORDER BY DESC (?count)

Federated Query for known location of ?entity (location=scholarly subgraph)

SELECT DISTINCT ?valueLabel (COUNT(?valueLabel) AS ?count) WHERE {
  SERVICE <https://query-scholarly-experimental.wikidata.org/sparql> {
    ?entity wdt:P5008 wd:Q112895606;
      (wdt:P31/(wdt:P279*)) wd:Q1266946.
    ?entity p:P921 ?prop.
  }
  OPTIONAL { ?prop ps:P921 ?value. }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?valueLabel
ORDER BY DESC (?count)


WikidataThesisToolkit Queries

Query Name Original Adapted for Graph Split
wikidata.org/WikidataThesisToolkit/5aHL
PREFIX bd:  <http://www.bigdata.com/rdf#>
PREFIX schema:  <http://schema.org/>
PREFIX wd:  <http://www.wikidata.org/entity/>
PREFIX wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX wikibase:  <http://wikiba.se/ontology#>
SELECT ?thesis ?thesisDescription ?thesisLabel ?authorLabel ?authorwp ?lse_url WHERE {
  ?thesis wdt:P31/wdt:P279* wd:Q1266946 ;
    wdt:P953 ?lse_url.
  OPTIONAL {
    ?thesis wdt:P50 ?author.
    OPTIONAL {
      ?authorwp schema:about ?author;
        schema:isPartOf <https://en.wikipedia.org/>.
    }
  }
  FILTER(STRSTARTS(STR(?lse_url), "http://etheses.lse.ac.uk"))
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY (?thesisDescription)
SELECT ?thesis ?thesisDescription ?thesisLabel ?authorLabel ?authorwp ?lse_url WHERE {
  {
    ?thesis (wdt:P31/(wdt:P279*)) wd:Q1266946;
      wdt:P953 ?lse_url.
    FILTER(STRSTARTS(STR(?lse_url), "http://etheses.lse.ac.uk"))
    OPTIONAL {
      ?thesis wdt:P50 ?author.
      OPTIONAL {
        ?authorwp schema:about ?author;
          schema:isPartOf <https://en.wikipedia.org/>.
      }
    }
  }
  UNION
  {
    SERVICE <https://query-scholarly-experimental.wikidata.org/sparql> {
      ?thesis (wdt:P31/(wdt:P279*)) wd:Q1266946;
        wdt:P953 ?lse_url.
      FILTER(STRSTARTS(STR(?lse_url), "http://etheses.lse.ac.uk"))
      OPTIONAL {
        ?thesis wdt:P50 ?author.
        SERVICE <https://query-main-experimental.wikidata.org/sparql> {
          OPTIONAL {
            ?authorwp schema:about ?author;
              schema:isPartOf <https://en.wikipedia.org/>.
          }
        }
      }
    }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY (?thesisDescription)
wikidata.org/WikidataThesisToolkit/5aG5
PREFIX bd:  <http://www.bigdata.com/rdf#>
PREFIX p:  <http://www.wikidata.org/prop/>
PREFIX ps:  <http://www.wikidata.org/prop/statement/>
PREFIX wd:  <http://www.wikidata.org/entity/>
PREFIX wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX wikibase:  <http://wikiba.se/ontology#>
#defaultView:BubbleChart
SELECT DISTINCT ?valueLabel (count(?valueLabel) as ?count)
WHERE 
{  
  ?entity wdt:P5008 wd:Q112895606; wdt:P31/wdt:P279* wd:Q1266946 .  
       ?entity p:P921 ?prop . OPTIONAL { ?prop ps:P921 ?value }  
       SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
      }
GROUP BY ?valueLabel
ORDER BY DESC(?count)
PREFIX bd:  <http://www.bigdata.com/rdf#>
PREFIX p:  <http://www.wikidata.org/prop/>
PREFIX ps:  <http://www.wikidata.org/prop/statement/>
PREFIX wd:  <http://www.wikidata.org/entity/>
PREFIX wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX wikibase:  <http://wikiba.se/ontology#>
SELECT DISTINCT ?valueLabel (COUNT(?valueLabel) AS ?count) WHERE {
  {
    ?entity wdt:P5008 wd:Q112895606;
            (wdt:P31/(wdt:P279*)) wd:Q1266946;
            p:P921 ?prop.
    OPTIONAL { ?prop ps:P921 ?value. }
  }
  UNION
  {
    SERVICE <https://query-scholarly-experimental.wikidata.org/sparql> {
      ?entity wdt:P5008 wd:Q112895606;
              (wdt:P31/(wdt:P279*)) wd:Q1266946;
              p:P921 ?prop.
      SERVICE <https://query-main-experimental.wikidata.org/sparql> { 
        OPTIONAL { ?prop ps:P921 ?value. }
      }
    }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?valueLabel
ORDER BY DESC (?count)
wikidata.org/WikidataThesisToolkit/7Kiz
PREFIX bd:  <http://www.bigdata.com/rdf#>
PREFIX p:  <http://www.wikidata.org/prop/>
PREFIX ps:  <http://www.wikidata.org/prop/statement/>
PREFIX wd:  <http://www.wikidata.org/entity/>
PREFIX wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX wikibase:  <http://wikiba.se/ontology#>
#defaultView:BubbleChart
SELECT DISTINCT ?instanceLabel (count(?instanceLabel) as ?count)
WHERE 
{  
  ?entity wdt:P5008 wd:Q112895606; wdt:P31/wdt:P279* wd:Q1266946 .  
       ?entity p:P921 ?prop . 
  OPTIONAL { ?prop ps:P921 ?value } 
  OPTIONAL { ?value wdt:P31 ?instance }
       SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
      }
GROUP BY ?instanceLabel
ORDER BY DESC(?count)
PREFIX bd:  <http://www.bigdata.com/rdf#>
PREFIX p:  <http://www.wikidata.org/prop/>
PREFIX ps:  <http://www.wikidata.org/prop/statement/>
PREFIX wd:  <http://www.wikidata.org/entity/>
PREFIX wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX wikibase:  <http://wikiba.se/ontology#>
#defaultView:BubbleChart
SELECT DISTINCT ?instanceLabel (count(?instanceLabel) as ?count)
WHERE 
{  
  {
    ?entity wdt:P5008 wd:Q112895606;
      wdt:P31/wdt:P279* wd:Q1266946.
    ?entity p:P921 ?prop.
  }
  UNION
  {
    SERVICE <https://query-scholarly-experimental.wikidata.org/sparql> {
      ?entity wdt:P5008 wd:Q112895606;
        wdt:P31/wdt:P279* wd:Q1266946.
      ?entity p:P921 ?prop.
    }
  }
  OPTIONAL { ?prop ps:P921 ?value } 
  OPTIONAL { ?value wdt:P31 ?instance }
       SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
      }
GROUP BY ?instanceLabel
ORDER BY DESC(?count)
wikidata.org/WikidataThesisToolkit/5ckW
PREFIX bd:  <http://www.bigdata.com/rdf#>
PREFIX wd:  <http://www.wikidata.org/entity/>
PREFIX wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX wikibase:  <http://wikiba.se/ontology#>
SELECT ?thesis ?thesisLabel ?cites_work ?cites_workLabel WHERE {
  
  ?thesis wdt:P5008 wd:Q112895606 .
  ?thesis wdt:P31/wdt:P279* wd:Q1266946 .  
  ?thesis wdt:P2860 ?cites_work. 
  
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
PREFIX bd:  <http://www.bigdata.com/rdf#>
PREFIX wd:  <http://www.wikidata.org/entity/>
PREFIX wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX wikibase:  <http://wikiba.se/ontology#>
SELECT ?thesis ?thesisLabel ?cites_work ?cites_workLabel WHERE {
  
  {
    ?thesis wdt:P5008 wd:Q112895606 .
    ?thesis wdt:P31/wdt:P279* wd:Q1266946 .  
    ?thesis wdt:P2860 ?cites_work. 
  }
  UNION
  {
    SERVICE <https://query-scholarly-experimental.wikidata.org/sparql> {
      ?thesis wdt:P5008 wd:Q112895606 .
      ?thesis wdt:P31/wdt:P279* wd:Q1266946 .  
      ?thesis wdt:P2860 ?cites_work. 
    }
  } 
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
wikidata.org/WikidataThesisToolkit/5eTJ
PREFIX bd:  <http://www.bigdata.com/rdf#>
PREFIX wd:  <http://www.wikidata.org/entity/>
PREFIX wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX wikibase:  <http://wikiba.se/ontology#>
SELECT ?item ?itemLabel ?cites_thesis ?cites_thesisLabel WHERE {
  
 
 ?item wdt:P2860 ?cites_thesis .
 ?cites_thesis wdt:P5008 wd:Q112895606 .
        

  
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
PREFIX bd:  <http://www.bigdata.com/rdf#>
PREFIX wd:  <http://www.wikidata.org/entity/>
PREFIX wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX wikibase:  <http://wikiba.se/ontology#>
SELECT ?item ?itemLabel ?cites_thesis ?cites_thesisLabel WHERE {
  {
     ?item wdt:P2860 ?cites_thesis .
     ?cites_thesis wdt:P5008 wd:Q112895606 .
  }
  UNION
  {
    SERVICE <https://query-scholarly-experimental.wikidata.org/sparql> {
       ?item wdt:P2860 ?cites_thesis .
       ?cites_thesis wdt:P5008 wd:Q112895606 .
    }
  }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
wikidata.org/WikidataThesisToolkit/jwZ
SELECT (COUNT(?thesis) As ?count) ?institutionLabel WHERE {
  ?thesis wdt:P31 wd:Q187685;
wdt:P4101?institution 
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
} GROUP BY  ?institutionLabel ORDER BY DESC(?count)
SELECT (COUNT(?thesis) AS ?count) ?institutionLabel WHERE {
  {
    ?thesis wdt:P31 wd:Q187685;
            wdt:P4101 ?institution.
  }
  UNION
  {
    SERVICE <https://query-scholarly-experimental.wikidata.org/sparql> {
      ?thesis wdt:P31 wd:Q187685;
              wdt:P4101 ?institution.
    }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?institutionLabel
ORDER BY DESC (?count)