Difference between revisions of "CEUR-WS Wikidata Contributions"
Jump to navigation
Jump to search
(Created page with "= Workshops = 400px <source lang="sparql"> SELECT ?item WHERE { ?item wdt:P31 wd:Q40444998. } </source> = Conferences =...") |
|||
Line 28: | Line 28: | ||
} | } | ||
</source> | </source> | ||
+ | |||
+ | = Sourcecode = | ||
+ | |||
+ | <syntaxhighlight lang="python"> | ||
+ | |||
+ | class EnhancedJSONEncoder(json.JSONEncoder): | ||
+ | def default(self, o): | ||
+ | if dataclasses.is_dataclass(o): | ||
+ | return dataclasses.asdict(o) | ||
+ | return super().default(o) | ||
+ | |||
+ | @dataclass | ||
+ | class HistoryRecord: | ||
+ | qid: str | ||
+ | creator: str | ||
+ | editors: typing.List[str] | ||
+ | |||
+ | |||
+ | class WdItemCreator: | ||
+ | |||
+ | def extract_item_creators(self, entity_class: str): | ||
+ | query = f""" | ||
+ | SELECT ?item | ||
+ | WHERE | ||
+ | {{ | ||
+ | ?item wdt:P31 wd:{entity_class}. | ||
+ | }} | ||
+ | """ | ||
+ | wd = SPARQL("https://query.wikidata.org/sparql") | ||
+ | lod = wd.queryAsListOfDicts(query) | ||
+ | qids = [d.get("item")[len("http://www.wikidata.org/entity/"):] for d in lod] | ||
+ | history_records = [] | ||
+ | total = len(qids) | ||
+ | for i, qid in enumerate(qids, 1): | ||
+ | print(f"{i:04}/{total:04}…", end="") | ||
+ | if i % 100 == 0: | ||
+ | time.sleep(30) | ||
+ | try: | ||
+ | page_history = PageHistory(pageTitle=qid, wikiId="wikidata") | ||
+ | hr = HistoryRecord( | ||
+ | qid=qid, | ||
+ | creator=page_history.getFirstUser(), | ||
+ | editors=list({pr.user for pr in page_history.revisions}) | ||
+ | ) | ||
+ | history_records.append(hr) | ||
+ | print("✓") | ||
+ | except Exception as e: | ||
+ | print(f"failed ({e})") | ||
+ | with open(f"history_records_{entity_class}.json", mode="w") as fp: | ||
+ | json.dump({"workshops":history_records}, fp, cls=EnhancedJSONEncoder, indent=4) | ||
+ | print(Counter([hr.creator for hr in history_records]).most_common()) | ||
+ | |||
+ | def plot_history_records(self, entity_class: str): | ||
+ | logs = "history_records_{entity_class}.json" | ||
+ | with open(logs, mode="r") as fp: | ||
+ | record = json.load(fp) | ||
+ | for entity_type, lod in record.items(): | ||
+ | history_records = [HistoryRecord(**d) for d in lod] | ||
+ | distribution = {"others":0} | ||
+ | print(Counter([hr.creator for hr in history_records]).most_common()) | ||
+ | for label, count in Counter([hr.creator for hr in history_records]).most_common(): | ||
+ | if count < 15: | ||
+ | distribution["others"] += count | ||
+ | else: | ||
+ | distribution[label] = count | ||
+ | labels = distribution.keys() | ||
+ | sizes = distribution.values() | ||
+ | |||
+ | fig1, ax1 = plt.subplots() | ||
+ | explode = [0.1 if label in ["Tholzheim", "WolfgangFahl", "Seppl2013"]else 0 for label in labels] | ||
+ | ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', startangle=90) | ||
+ | ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. | ||
+ | plt.title(f"Distribution of Item Creators of {entity_type.title()}\n (As of 2023-03-11 with {len(lod)} {entity_type})") | ||
+ | plt.legend(loc="lower right") | ||
+ | plt.tight_layout() | ||
+ | plt.show() | ||
+ | </syntaxhighlight> | ||
+ | |||
[[Category:Text2KG]] | [[Category:Text2KG]] |
Latest revision as of 10:14, 29 May 2023
Workshops
SELECT ?item
WHERE
{
?item wdt:P31 wd:Q40444998.
}
Conferences
SELECT ?item
WHERE
{
?item wdt:P31 wd:Q2020153.
}
Proceedings
SELECT ?item
WHERE
{
?item wdt:P31 wd:Q1143604.
}
Sourcecode
class EnhancedJSONEncoder(json.JSONEncoder):
def default(self, o):
if dataclasses.is_dataclass(o):
return dataclasses.asdict(o)
return super().default(o)
@dataclass
class HistoryRecord:
qid: str
creator: str
editors: typing.List[str]
class WdItemCreator:
def extract_item_creators(self, entity_class: str):
query = f"""
SELECT ?item
WHERE
{{
?item wdt:P31 wd:{entity_class}.
}}
"""
wd = SPARQL("https://query.wikidata.org/sparql")
lod = wd.queryAsListOfDicts(query)
qids = [d.get("item")[len("http://www.wikidata.org/entity/"):] for d in lod]
history_records = []
total = len(qids)
for i, qid in enumerate(qids, 1):
print(f"{i:04}/{total:04}…", end="")
if i % 100 == 0:
time.sleep(30)
try:
page_history = PageHistory(pageTitle=qid, wikiId="wikidata")
hr = HistoryRecord(
qid=qid,
creator=page_history.getFirstUser(),
editors=list({pr.user for pr in page_history.revisions})
)
history_records.append(hr)
print("✓")
except Exception as e:
print(f"failed ({e})")
with open(f"history_records_{entity_class}.json", mode="w") as fp:
json.dump({"workshops":history_records}, fp, cls=EnhancedJSONEncoder, indent=4)
print(Counter([hr.creator for hr in history_records]).most_common())
def plot_history_records(self, entity_class: str):
logs = "history_records_{entity_class}.json"
with open(logs, mode="r") as fp:
record = json.load(fp)
for entity_type, lod in record.items():
history_records = [HistoryRecord(**d) for d in lod]
distribution = {"others":0}
print(Counter([hr.creator for hr in history_records]).most_common())
for label, count in Counter([hr.creator for hr in history_records]).most_common():
if count < 15:
distribution["others"] += count
else:
distribution[label] = count
labels = distribution.keys()
sizes = distribution.values()
fig1, ax1 = plt.subplots()
explode = [0.1 if label in ["Tholzheim", "WolfgangFahl", "Seppl2013"]else 0 for label in labels]
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', startangle=90)
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title(f"Distribution of Item Creators of {entity_type.title()}\n (As of 2023-03-11 with {len(lod)} {entity_type})")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()