CEUR-WS Wikidata Contributions: Difference between revisions
Jump to navigation
Jump to search
(Created page with "= Workshops = 400px <source lang="sparql"> SELECT ?item WHERE { ?item wdt:P31 wd:Q40444998. } </source> = Conferences =...") |
No edit summary |
||
| Line 28: | Line 28: | ||
} | } | ||
</source> | </source> | ||
= Sourcecode = | |||
<syntaxhighlight lang="python"> | |||
class EnhancedJSONEncoder(json.JSONEncoder): | |||
def default(self, o): | |||
if dataclasses.is_dataclass(o): | |||
return dataclasses.asdict(o) | |||
return super().default(o) | |||
@dataclass | |||
class HistoryRecord: | |||
qid: str | |||
creator: str | |||
editors: typing.List[str] | |||
class WdItemCreator: | |||
def extract_item_creators(self, entity_class: str): | |||
query = f""" | |||
SELECT ?item | |||
WHERE | |||
{{ | |||
?item wdt:P31 wd:{entity_class}. | |||
}} | |||
""" | |||
wd = SPARQL("https://query.wikidata.org/sparql") | |||
lod = wd.queryAsListOfDicts(query) | |||
qids = [d.get("item")[len("http://www.wikidata.org/entity/"):] for d in lod] | |||
history_records = [] | |||
total = len(qids) | |||
for i, qid in enumerate(qids, 1): | |||
print(f"{i:04}/{total:04}…", end="") | |||
if i % 100 == 0: | |||
time.sleep(30) | |||
try: | |||
page_history = PageHistory(pageTitle=qid, wikiId="wikidata") | |||
hr = HistoryRecord( | |||
qid=qid, | |||
creator=page_history.getFirstUser(), | |||
editors=list({pr.user for pr in page_history.revisions}) | |||
) | |||
history_records.append(hr) | |||
print("✓") | |||
except Exception as e: | |||
print(f"failed ({e})") | |||
with open(f"history_records_{entity_class}.json", mode="w") as fp: | |||
json.dump({"workshops":history_records}, fp, cls=EnhancedJSONEncoder, indent=4) | |||
print(Counter([hr.creator for hr in history_records]).most_common()) | |||
def plot_history_records(self, entity_class: str): | |||
logs = "history_records_{entity_class}.json" | |||
with open(logs, mode="r") as fp: | |||
record = json.load(fp) | |||
for entity_type, lod in record.items(): | |||
history_records = [HistoryRecord(**d) for d in lod] | |||
distribution = {"others":0} | |||
print(Counter([hr.creator for hr in history_records]).most_common()) | |||
for label, count in Counter([hr.creator for hr in history_records]).most_common(): | |||
if count < 15: | |||
distribution["others"] += count | |||
else: | |||
distribution[label] = count | |||
labels = distribution.keys() | |||
sizes = distribution.values() | |||
fig1, ax1 = plt.subplots() | |||
explode = [0.1 if label in ["Tholzheim", "WolfgangFahl", "Seppl2013"]else 0 for label in labels] | |||
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', startangle=90) | |||
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. | |||
plt.title(f"Distribution of Item Creators of {entity_type.title()}\n (As of 2023-03-11 with {len(lod)} {entity_type})") | |||
plt.legend(loc="lower right") | |||
plt.tight_layout() | |||
plt.show() | |||
</syntaxhighlight> | |||
[[Category:Text2KG]] | [[Category:Text2KG]] | ||
Latest revision as of 09:14, 29 May 2023
Workshops
SELECT ?item
WHERE
{
?item wdt:P31 wd:Q40444998.
}
Conferences
SELECT ?item
WHERE
{
?item wdt:P31 wd:Q2020153.
}
Proceedings
SELECT ?item
WHERE
{
?item wdt:P31 wd:Q1143604.
}
Sourcecode
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, o):
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
return super().default(o)
@dataclass
class HistoryRecord:
qid: str
creator: str
editors: typing.List[str]
class WdItemCreator:
def extract_item_creators(self, entity_class: str):
query = f"""
SELECT ?item
WHERE
{{
?item wdt:P31 wd:{entity_class}.
}}
"""
wd = SPARQL("https://query.wikidata.org/sparql")
lod = wd.queryAsListOfDicts(query)
qids = [d.get("item")[len("http://www.wikidata.org/entity/"):] for d in lod]
history_records = []
total = len(qids)
for i, qid in enumerate(qids, 1):
print(f"{i:04}/{total:04}…", end="")
if i % 100 == 0:
time.sleep(30)
try:
page_history = PageHistory(pageTitle=qid, wikiId="wikidata")
hr = HistoryRecord(
qid=qid,
creator=page_history.getFirstUser(),
editors=list({pr.user for pr in page_history.revisions})
)
history_records.append(hr)
print("✓")
except Exception as e:
print(f"failed ({e})")
with open(f"history_records_{entity_class}.json", mode="w") as fp:
json.dump({"workshops":history_records}, fp, cls=EnhancedJSONEncoder, indent=4)
print(Counter([hr.creator for hr in history_records]).most_common())
def plot_history_records(self, entity_class: str):
logs = "history_records_{entity_class}.json"
with open(logs, mode="r") as fp:
record = json.load(fp)
for entity_type, lod in record.items():
history_records = [HistoryRecord(**d) for d in lod]
distribution = {"others":0}
print(Counter([hr.creator for hr in history_records]).most_common())
for label, count in Counter([hr.creator for hr in history_records]).most_common():
if count < 15:
distribution["others"] += count
else:
distribution[label] = count
labels = distribution.keys()
sizes = distribution.values()
fig1, ax1 = plt.subplots()
explode = [0.1 if label in ["Tholzheim", "WolfgangFahl", "Seppl2013"]else 0 for label in labels]
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', startangle=90)
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title(f"Distribution of Item Creators of {entity_type.title()}\n (As of 2023-03-11 with {len(lod)} {entity_type})")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()