CEUR-WS Wikidata Contributions
Jump to navigation
Jump to search
Workshops
SELECT ?item
WHERE
{
?item wdt:P31 wd:Q40444998.
}
Conferences
SELECT ?item
WHERE
{
?item wdt:P31 wd:Q2020153.
}
Proceedings
SELECT ?item
WHERE
{
?item wdt:P31 wd:Q1143604.
}
Sourcecode
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, o):
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
return super().default(o)
@dataclass
class HistoryRecord:
qid: str
creator: str
editors: typing.List[str]
class WdItemCreator:
def extract_item_creators(self, entity_class: str):
query = f"""
SELECT ?item
WHERE
{{
?item wdt:P31 wd:{entity_class}.
}}
"""
wd = SPARQL("https://query.wikidata.org/sparql")
lod = wd.queryAsListOfDicts(query)
qids = [d.get("item")[len("http://www.wikidata.org/entity/"):] for d in lod]
history_records = []
total = len(qids)
for i, qid in enumerate(qids, 1):
print(f"{i:04}/{total:04}…", end="")
if i % 100 == 0:
time.sleep(30)
try:
page_history = PageHistory(pageTitle=qid, wikiId="wikidata")
hr = HistoryRecord(
qid=qid,
creator=page_history.getFirstUser(),
editors=list({pr.user for pr in page_history.revisions})
)
history_records.append(hr)
print("✓")
except Exception as e:
print(f"failed ({e})")
with open(f"history_records_{entity_class}.json", mode="w") as fp:
json.dump({"workshops":history_records}, fp, cls=EnhancedJSONEncoder, indent=4)
print(Counter([hr.creator for hr in history_records]).most_common())
def plot_history_records(self, entity_class: str):
logs = "history_records_{entity_class}.json"
with open(logs, mode="r") as fp:
record = json.load(fp)
for entity_type, lod in record.items():
history_records = [HistoryRecord(**d) for d in lod]
distribution = {"others":0}
print(Counter([hr.creator for hr in history_records]).most_common())
for label, count in Counter([hr.creator for hr in history_records]).most_common():
if count < 15:
distribution["others"] += count
else:
distribution[label] = count
labels = distribution.keys()
sizes = distribution.values()
fig1, ax1 = plt.subplots()
explode = [0.1 if label in ["Tholzheim", "WolfgangFahl", "Seppl2013"]else 0 for label in labels]
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', startangle=90)
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title(f"Distribution of Item Creators of {entity_type.title()}\n (As of 2023-03-11 with {len(lod)} {entity_type})")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()