CEUR-WS Wikidata Contributions


Tim Holzheim

Workshops

Distribution of workshop wd creators.png

SELECT ?item 
WHERE 
{
   ?item wdt:P31 wd:Q40444998.
}

Conferences

Distribution of conference wd creators.png

SELECT ?item 
WHERE 
{
   ?item wdt:P31 wd:Q2020153.
}

Proceedings

Distribution of proceedings wd creators.png

SELECT ?item 
WHERE 
{
   ?item wdt:P31 wd:Q1143604.
}

Sourcecode

class EnhancedJSONEncoder(json.JSONEncoder):
    def default(self, o):
        if dataclasses.is_dataclass(o):
            return dataclasses.asdict(o)
        return super().default(o)

@dataclass
class HistoryRecord:
    qid: str
    creator: str
    editors: typing.List[str]


class WdItemCreator:

    def extract_item_creators(self, entity_class: str):
        query = f"""
        SELECT ?item 
        WHERE 
        {{
          ?item wdt:P31 wd:{entity_class}.
        }}
        """
        wd = SPARQL("https://query.wikidata.org/sparql")
        lod = wd.queryAsListOfDicts(query)
        qids = [d.get("item")[len("http://www.wikidata.org/entity/"):] for d in lod]
        history_records = []
        total = len(qids)
        for i, qid in enumerate(qids, 1):
            print(f"{i:04}/{total:04}…", end="")
            if i % 100 == 0:
                time.sleep(30)
            try:
                page_history = PageHistory(pageTitle=qid, wikiId="wikidata")
                hr = HistoryRecord(
                        qid=qid,
                        creator=page_history.getFirstUser(),
                        editors=list({pr.user for pr in page_history.revisions})
                )
                history_records.append(hr)
                print("✓")
            except Exception as e:
                print(f"failed ({e})")
        with open(f"history_records_{entity_class}.json", mode="w") as fp:
            json.dump({"workshops":history_records}, fp, cls=EnhancedJSONEncoder, indent=4)
        print(Counter([hr.creator for hr in history_records]).most_common())

    def plot_history_records(self, entity_class: str):
        logs = "history_records_{entity_class}.json"
        with open(logs, mode="r") as fp:
            record = json.load(fp)
        for entity_type, lod in record.items():
            history_records = [HistoryRecord(**d) for d in lod]
            distribution = {"others":0}
            print(Counter([hr.creator for hr in history_records]).most_common())
            for label, count in Counter([hr.creator for hr in history_records]).most_common():
                if count < 15:
                    distribution["others"] += count
                else:
                    distribution[label] = count
            labels = distribution.keys()
            sizes = distribution.values()

            fig1, ax1 = plt.subplots()
            explode = [0.1 if label in ["Tholzheim", "WolfgangFahl", "Seppl2013"]else 0 for label in labels]
            ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', startangle=90)
            ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
            plt.title(f"Distribution of Item Creators of {entity_type.title()}\n (As of 2023-03-11 with {len(lod)} {entity_type})")
            plt.legend(loc="lower right")
            plt.tight_layout()
            plt.show()

Workshops[edit]

Distribution of workshop wd creators.png

SELECT ?item 
WHERE 
{
   ?item wdt:P31 wd:Q40444998.
}

Conferences[edit]

Distribution of conference wd creators.png

SELECT ?item 
WHERE 
{
   ?item wdt:P31 wd:Q2020153.
}

Proceedings[edit]

Distribution of proceedings wd creators.png

SELECT ?item 
WHERE 
{
   ?item wdt:P31 wd:Q1143604.
}

Sourcecode[edit]

class EnhancedJSONEncoder(json.JSONEncoder):
    def default(self, o):
        if dataclasses.is_dataclass(o):
            return dataclasses.asdict(o)
        return super().default(o)

@dataclass
class HistoryRecord:
    qid: str
    creator: str
    editors: typing.List[str]


class WdItemCreator:

    def extract_item_creators(self, entity_class: str):
        query = f"""
        SELECT ?item 
        WHERE 
        {{
          ?item wdt:P31 wd:{entity_class}.
        }}
        """
        wd = SPARQL("https://query.wikidata.org/sparql")
        lod = wd.queryAsListOfDicts(query)
        qids = [d.get("item")[len("http://www.wikidata.org/entity/"):] for d in lod]
        history_records = []
        total = len(qids)
        for i, qid in enumerate(qids, 1):
            print(f"{i:04}/{total:04}…", end="")
            if i % 100 == 0:
                time.sleep(30)
            try:
                page_history = PageHistory(pageTitle=qid, wikiId="wikidata")
                hr = HistoryRecord(
                        qid=qid,
                        creator=page_history.getFirstUser(),
                        editors=list({pr.user for pr in page_history.revisions})
                )
                history_records.append(hr)
                print("✓")
            except Exception as e:
                print(f"failed ({e})")
        with open(f"history_records_{entity_class}.json", mode="w") as fp:
            json.dump({"workshops":history_records}, fp, cls=EnhancedJSONEncoder, indent=4)
        print(Counter([hr.creator for hr in history_records]).most_common())

    def plot_history_records(self, entity_class: str):
        logs = "history_records_{entity_class}.json"
        with open(logs, mode="r") as fp:
            record = json.load(fp)
        for entity_type, lod in record.items():
            history_records = [HistoryRecord(**d) for d in lod]
            distribution = {"others":0}
            print(Counter([hr.creator for hr in history_records]).most_common())
            for label, count in Counter([hr.creator for hr in history_records]).most_common():
                if count < 15:
                    distribution["others"] += count
                else:
                    distribution[label] = count
            labels = distribution.keys()
            sizes = distribution.values()

            fig1, ax1 = plt.subplots()
            explode = [0.1 if label in ["Tholzheim", "WolfgangFahl", "Seppl2013"]else 0 for label in labels]
            ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', startangle=90)
            ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
            plt.title(f"Distribution of Item Creators of {entity_type.title()}\n (As of 2023-03-11 with {len(lod)} {entity_type})")
            plt.legend(loc="lower right")
            plt.tight_layout()
            plt.show()
🖨 🚪