Difference between revisions of "Workdocumentation 2021-05-03"
(→Word) |
|||
Line 215: | Line 215: | ||
That P is the most common first letter could be since the word "Proceedings" starts with "P" and might be one of the most common words | That P is the most common first letter could be since the word "Proceedings" starts with "P" and might be one of the most common words | ||
== Word == | == Word == | ||
− | + | === first Letter === | |
+ | {| class="wikitable" style="text-align: left;" | ||
+ | |+ <!-- caption --> | ||
+ | |- | ||
+ | ! # !! key !! align="right"| count !! align="right"| % | ||
+ | |- | ||
+ | | total || 90 || align="right"| 809260 || align="right"| | ||
+ | |- | ||
+ | | 1 || 2 || align="right"| 96484 || align="right"| 11.92 | ||
+ | |- | ||
+ | | 2 || I || align="right"| 65214 || align="right"| 8.06 | ||
+ | |- | ||
+ | | 3 || C || align="right"| 62697 || align="right"| 7.75 | ||
+ | |- | ||
+ | | 4 || S || align="right"| 59363 || align="right"| 7.34 | ||
+ | |- | ||
+ | | 5 || P || align="right"| 50577 || align="right"| 6.25 | ||
+ | |- | ||
+ | | 6 || o || align="right"| 47073 || align="right"| 5.82 | ||
+ | |- | ||
+ | | 7 || A || align="right"| 40291 || align="right"| 4.98 | ||
+ | |- | ||
+ | | 8 || 1 || align="right"| 33935 || align="right"| 4.19 | ||
+ | |- | ||
+ | | 9 || a || align="right"| 26085 || align="right"| 3.22 | ||
+ | |- | ||
+ | | 10 || M || align="right"| 25474 || align="right"| 3.15 | ||
+ | |- | ||
+ | | 11 || T || align="right"| 19690 || align="right"| 2.43 | ||
+ | |- | ||
+ | | 12 || W || align="right"| 19391 || align="right"| 2.40 | ||
+ | |- | ||
+ | | 13 || t || align="right"| 18726 || align="right"| 2.31 | ||
+ | |- | ||
+ | | 14 || D || align="right"| 17672 || align="right"| 2.18 | ||
+ | |- | ||
+ | | 15 || E || align="right"| 16201 || align="right"| 2.00 | ||
+ | |- | ||
+ | | 16 || U || align="right"| 15969 || align="right"| 1.97 | ||
+ | |- | ||
+ | | 17 || J || align="right"| 15688 || align="right"| 1.94 | ||
+ | |- | ||
+ | | 18 || N || align="right"| 15309 || align="right"| 1.89 | ||
+ | |- | ||
+ | | 19 || - || align="right"| 14558 || align="right"| 1.80 | ||
+ | |- | ||
+ | | 20 || F || align="right"| 13717 || align="right"| 1.70 | ||
+ | |- | ||
+ | | 21 || R || align="right"| 13104 || align="right"| 1.62 | ||
+ | |- | ||
+ | | 22 || B || align="right"| 11044 || align="right"| 1.36 | ||
+ | |- | ||
+ | | 23 || L || align="right"| 10255 || align="right"| 1.27 | ||
+ | |- | ||
+ | | 24 || G || align="right"| 10170 || align="right"| 1.26 | ||
+ | |- | ||
+ | | 25 || O || align="right"| 9972 || align="right"| 1.23 | ||
+ | |- | ||
+ | | 26 || V || align="right"| 8218 || align="right"| 1.02 | ||
+ | |- | ||
+ | | 27 || H || align="right"| 8078 || align="right"| 1.00 | ||
+ | |- | ||
+ | | 28 || i || align="right"| 7781 || align="right"| 0.96 | ||
+ | |- | ||
+ | | 29 || 3 || align="right"| 5577 || align="right"| 0.69 | ||
+ | |- | ||
+ | | 30 || f || align="right"| 4769 || align="right"| 0.59 | ||
+ | |- | ||
+ | | 31 || ( || align="right"| 4738 || align="right"| 0.59 | ||
+ | |- | ||
+ | | 32 || K || align="right"| 4666 || align="right"| 0.58 | ||
+ | |- | ||
+ | | 33 || 4 || align="right"| 3501 || align="right"| 0.43 | ||
+ | |- | ||
+ | | 34 || 5 || align="right"| 3107 || align="right"| 0.38 | ||
+ | |- | ||
+ | | 35 || 6 || align="right"| 2910 || align="right"| 0.36 | ||
+ | |- | ||
+ | | 36 || 8 || align="right"| 2875 || align="right"| 0.36 | ||
+ | |- | ||
+ | | 37 || 7 || align="right"| 2832 || align="right"| 0.35 | ||
+ | |- | ||
+ | | 38 || 9 || align="right"| 2826 || align="right"| 0.35 | ||
+ | |- | ||
+ | | 39 || ' || align="right"| 2311 || align="right"| 0.29 | ||
+ | |- | ||
+ | | 40 || w || align="right"| 2124 || align="right"| 0.26 | ||
+ | |- | ||
+ | | 41 || d || align="right"| 2000 || align="right"| 0.25 | ||
+ | |- | ||
+ | | 42 || c || align="right"| 1614 || align="right"| 0.20 | ||
+ | |- | ||
+ | | 43 || Q || align="right"| 1210 || align="right"| 0.15 | ||
+ | |- | ||
+ | | 44 || e || align="right"| 975 || align="right"| 0.12 | ||
+ | |- | ||
+ | | 45 || & || align="right"| 929 || align="right"| 0.11 | ||
+ | |- | ||
+ | | 46 || X || align="right"| 802 || align="right"| 0.10 | ||
+ | |- | ||
+ | | 47 || u || align="right"| 695 || align="right"| 0.09 | ||
+ | |- | ||
+ | | 48 || Y || align="right"| 688 || align="right"| 0.09 | ||
+ | |- | ||
+ | | 49 || Z || align="right"| 686 || align="right"| 0.08 | ||
+ | |- | ||
+ | | 50 || 0 || align="right"| 632 || align="right"| 0.08 | ||
+ | |} | ||
+ | === word === | ||
+ | {| class="wikitable" style="text-align: left;" | ||
+ | |+ <!-- caption --> | ||
+ | |- | ||
+ | ! # !! key !! align="right"| count !! align="right"| % | ||
+ | |- | ||
+ | | total || 30492 || align="right"| 809260 || align="right"| | ||
+ | |- | ||
+ | | 1 || International || align="right"| 26360 || align="right"| 3.26 | ||
+ | |- | ||
+ | | 2 || on || align="right"| 25486 || align="right"| 3.15 | ||
+ | |- | ||
+ | | 3 || and || align="right"| 24329 || align="right"| 3.01 | ||
+ | |- | ||
+ | | 4 || Proceedings || align="right"| 22995 || align="right"| 2.84 | ||
+ | |- | ||
+ | | 5 || of || align="right"| 21438 || align="right"| 2.65 | ||
+ | |- | ||
+ | | 6 || the || align="right"| 17733 || align="right"| 2.19 | ||
+ | |- | ||
+ | | 7 || Conference || align="right"| 14916 || align="right"| 1.84 | ||
+ | |- | ||
+ | | 8 || - || align="right"| 14527 || align="right"| 1.80 | ||
+ | |- | ||
+ | | 9 || USA, || align="right"| 9163 || align="right"| 1.13 | ||
+ | |- | ||
+ | | 10 || Conference, || align="right"| 8668 || align="right"| 1.07 | ||
+ | |- | ||
+ | | 11 || Workshop || align="right"| 7152 || align="right"| 0.88 | ||
+ | |- | ||
+ | | 12 || in || align="right"| 7106 || align="right"| 0.88 | ||
+ | |- | ||
+ | | 13 || September || align="right"| 6424 || align="right"| 0.79 | ||
+ | |- | ||
+ | | 14 || June || align="right"| 5651 || align="right"| 0.70 | ||
+ | |- | ||
+ | | 15 || October || align="right"| 4955 || align="right"| 0.61 | ||
+ | |- | ||
+ | | 16 || IEEE || align="right"| 4731 || align="right"| 0.58 | ||
+ | |- | ||
+ | | 17 || Symposium || align="right"| 4426 || align="right"| 0.55 | ||
+ | |- | ||
+ | | 18 || July || align="right"| 4349 || align="right"| 0.54 | ||
+ | |- | ||
+ | | 19 || Information || align="right"| 4170 || align="right"| 0.52 | ||
+ | |- | ||
+ | | 20 || November || align="right"| 3972 || align="right"| 0.49 | ||
+ | |- | ||
+ | | 21 || for || align="right"| 3798 || align="right"| 0.47 | ||
+ | |- | ||
+ | | 22 || August || align="right"| 3756 || align="right"| 0.46 | ||
+ | |- | ||
+ | | 23 || Computer || align="right"| 3685 || align="right"| 0.46 | ||
+ | |- | ||
+ | | 24 || Systems || align="right"| 3634 || align="right"| 0.45 | ||
+ | |- | ||
+ | | 25 || Papers || align="right"| 3411 || align="right"| 0.42 | ||
+ | |- | ||
+ | | 26 || Systems, || align="right"| 3373 || align="right"| 0.42 | ||
+ | |- | ||
+ | | 27 || May || align="right"| 3254 || align="right"| 0.40 | ||
+ | |- | ||
+ | | 28 || 2018, || align="right"| 3248 || align="right"| 0.40 | ||
+ | |- | ||
+ | | 29 || 2017, || align="right"| 3036 || align="right"| 0.38 | ||
+ | |- | ||
+ | | 30 || 2019, || align="right"| 2983 || align="right"| 0.37 | ||
+ | |- | ||
+ | | 31 || 2016, || align="right"| 2956 || align="right"| 0.37 | ||
+ | |- | ||
+ | | 32 || Revised || align="right"| 2948 || align="right"| 0.36 | ||
+ | |- | ||
+ | | 33 || Selected || align="right"| 2881 || align="right"| 0.36 | ||
+ | |- | ||
+ | | 34 || December || align="right"| 2879 || align="right"| 0.36 | ||
+ | |- | ||
+ | | 35 || Workshop, || align="right"| 2827 || align="right"| 0.35 | ||
+ | |- | ||
+ | | 36 || 2015, || align="right"| 2794 || align="right"| 0.35 | ||
+ | |- | ||
+ | | 37 || Software || align="right"| 2704 || align="right"| 0.33 | ||
+ | |- | ||
+ | | 38 || ACM || align="right"| 2687 || align="right"| 0.33 | ||
+ | |- | ||
+ | | 39 || April || align="right"| 2652 || align="right"| 0.33 | ||
+ | |- | ||
+ | | 40 || Computing || align="right"| 2373 || align="right"| 0.29 | ||
+ | |- | ||
+ | | 41 || China, || align="right"| 2339 || align="right"| 0.29 | ||
+ | |- | ||
+ | | 42 || 2014, || align="right"| 2321 || align="right"| 0.29 | ||
+ | |- | ||
+ | | 43 || Germany, || align="right"| 2319 || align="right"| 0.29 | ||
+ | |- | ||
+ | | 44 || Part || align="right"| 2240 || align="right"| 0.28 | ||
+ | |- | ||
+ | | 45 || 2013, || align="right"| 2214 || align="right"| 0.27 | ||
+ | |- | ||
+ | | 46 || 2011, || align="right"| 2207 || align="right"| 0.27 | ||
+ | |- | ||
+ | | 47 || 2010, || align="right"| 2106 || align="right"| 0.26 | ||
+ | |- | ||
+ | | 48 || 2015 || align="right"| 2101 || align="right"| 0.26 | ||
+ | |- | ||
+ | | 49 || Italy, || align="right"| 2072 || align="right"| 0.26 | ||
+ | |- | ||
+ | | 50 || 2009, || align="right"| 2056 || align="right"| 0.25 | ||
+ | |} | ||
+ | === Ordinal === | ||
+ | {| class="wikitable" style="text-align: left;" | ||
+ | |+ <!-- caption --> | ||
+ | |- | ||
+ | ! # !! align="right"| key !! align="right"| count !! align="right"| % | ||
+ | |- | ||
+ | | total || align="right"| 93 || align="right"| 809260 || align="right"| | ||
+ | |- | ||
+ | | 1 || align="right"| || align="right"| 781321 || align="right"| 96.55 | ||
+ | |- | ||
+ | | 2 || align="right"| 2 || align="right"| 2337 || align="right"| 0.29 | ||
+ | |- | ||
+ | | 3 || align="right"| 3 || align="right"| 2152 || align="right"| 0.27 | ||
+ | |- | ||
+ | | 4 || align="right"| 1 || align="right"| 2099 || align="right"| 0.26 | ||
+ | |- | ||
+ | | 5 || align="right"| 4 || align="right"| 1955 || align="right"| 0.24 | ||
+ | |- | ||
+ | | 6 || align="right"| 5 || align="right"| 1865 || align="right"| 0.23 | ||
+ | |- | ||
+ | | 7 || align="right"| 6 || align="right"| 1716 || align="right"| 0.21 | ||
+ | |- | ||
+ | | 8 || align="right"| 7 || align="right"| 1622 || align="right"| 0.20 | ||
+ | |- | ||
+ | | 9 || align="right"| 8 || align="right"| 1490 || align="right"| 0.18 | ||
+ | |- | ||
+ | | 10 || align="right"| 9 || align="right"| 1451 || align="right"| 0.18 | ||
+ | |- | ||
+ | | 11 || align="right"| 10 || align="right"| 1380 || align="right"| 0.17 | ||
+ | |- | ||
+ | | 12 || align="right"| 14 || align="right"| 979 || align="right"| 0.12 | ||
+ | |- | ||
+ | | 13 || align="right"| 15 || align="right"| 873 || align="right"| 0.11 | ||
+ | |- | ||
+ | | 14 || align="right"| 16 || align="right"| 748 || align="right"| 0.09 | ||
+ | |- | ||
+ | | 15 || align="right"| 17 || align="right"| 680 || align="right"| 0.08 | ||
+ | |- | ||
+ | | 16 || align="right"| 18 || align="right"| 637 || align="right"| 0.08 | ||
+ | |- | ||
+ | | 17 || align="right"| 19 || align="right"| 592 || align="right"| 0.07 | ||
+ | |- | ||
+ | | 18 || align="right"| 20 || align="right"| 509 || align="right"| 0.06 | ||
+ | |- | ||
+ | | 19 || align="right"| 21 || align="right"| 480 || align="right"| 0.06 | ||
+ | |- | ||
+ | | 20 || align="right"| 22 || align="right"| 392 || align="right"| 0.05 | ||
+ | |- | ||
+ | | 21 || align="right"| 23 || align="right"| 379 || align="right"| 0.05 | ||
+ | |- | ||
+ | | 22 || align="right"| 24 || align="right"| 353 || align="right"| 0.04 | ||
+ | |- | ||
+ | | 23 || align="right"| 25 || align="right"| 332 || align="right"| 0.04 | ||
+ | |- | ||
+ | | 24 || align="right"| 26 || align="right"| 289 || align="right"| 0.04 | ||
+ | |- | ||
+ | | 25 || align="right"| 27 || align="right"| 245 || align="right"| 0.03 | ||
+ | |- | ||
+ | | 26 || align="right"| 28 || align="right"| 230 || align="right"| 0.03 | ||
+ | |- | ||
+ | | 27 || align="right"| 30 || align="right"| 198 || align="right"| 0.02 | ||
+ | |- | ||
+ | | 28 || align="right"| 29 || align="right"| 183 || align="right"| 0.02 | ||
+ | |- | ||
+ | | 29 || align="right"| 31 || align="right"| 157 || align="right"| 0.02 | ||
+ | |- | ||
+ | | 30 || align="right"| 11 || align="right"| 130 || align="right"| 0.02 | ||
+ | |- | ||
+ | | 31 || align="right"| 32 || align="right"| 114 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 32 || align="right"| 12 || align="right"| 104 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 33 || align="right"| 13 || align="right"| 100 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 34 || align="right"| 34 || align="right"| 98 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 35 || align="right"| 33 || align="right"| 94 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 36 || align="right"| 35 || align="right"| 83 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 37 || align="right"| 36 || align="right"| 81 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 38 || align="right"| 37 || align="right"| 79 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 39 || align="right"| 38 || align="right"| 77 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 40 || align="right"| 39 || align="right"| 64 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 41 || align="right"| 40 || align="right"| 60 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 42 || align="right"| 60 || align="right"| 55 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 43 || align="right"| 41 || align="right"| 52 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 44 || align="right"| 42 || align="right"| 41 || align="right"| 0.01 | ||
+ | |- | ||
+ | | 45 || align="right"| 44 || align="right"| 31 || align="right"| 0.00 | ||
+ | |- | ||
+ | | 46 || align="right"| 43 || align="right"| 29 || align="right"| 0.00 | ||
+ | |- | ||
+ | | 47 || align="right"| 46 || align="right"| 27 || align="right"| 0.00 | ||
+ | |- | ||
+ | | 48 || align="right"| 47 || align="right"| 25 || align="right"| 0.00 | ||
+ | |- | ||
+ | | 49 || align="right"| 49 || align="right"| 24 || align="right"| 0.00 | ||
+ | |- | ||
+ | | 50 || align="right"| 45 || align="right"| 23 || align="right"| 0.00 | ||
+ | |} | ||
[[Category:Research]] | [[Category:Research]] | ||
[[Category:Experiment]] | [[Category:Experiment]] |
Revision as of 12:55, 3 May 2021
Introduction
The Relevance Matrix approach has been discussed in https://rq.bitplan.com/index.php/Hackathon_2021-04-27#Relevance_Matrix. It might be feasible to create a systematic analysis approach/design and solution approach based on this idea.
By walking the path from 1st decile down the dependency tree at each cell an observation is made:
- how many items fall in this cell?
- what is the category of this cell if it is not known in advanced. E.g. the in the Country/Region/City hierarchy we have assumed the category knowledge in advanced. For the Proceedings Title Parsing Problem we'll try the approach out as if we wouldn't know about the categories yet and only go with general parsing categories for a start.
If we find an element in a cell we'll then categorize it.
Question
What happens if the relevance matrix approach is applied to proceedings title parsing (later: parsing in general)?
Assumption
Following a hierarchy of letter, token, grammatical structure and sentence along the relevance matrix path column first (depth first) leads to interesting observations.
Experiment
Hierarchy of: - Letter - Token - Grammatical structure - Sentence
Input: Proceedings titles of dblp conference entries.
Letter
def testMostCommonFirstLetter(self):
'''
get the most common first letters
'''
dblp,foundEvents=self.getEvents()
self.assertTrue(foundEvents>43950)
# collect first letters
counter=Counter()
total=0
for eventId in dblp.em.events:
if eventId.startswith("conf"):
event=dblp.em.events[eventId]
first=ord(event.title[0])
counter[first]+=1
total+=1
bins=len(counter.keys())
print(f"found {bins} different first letters in {total} titles")
for o,count in counter.most_common(bins):
c=chr(o)
print (f"{c}: {count:5} {count/total*100:4.1f} %")
read 43976 Events from dblp in 0.2 s found 46 different first letters in 43398 titles P: 12599 29.0 % 2: 3526 8.1 % I: 3515 8.1 % A: 3296 7.6 % C: 2333 5.4 % S: 2260 5.2 % 1: 2105 4.9 % T: 1559 3.6 % M: 1312 3.0 % E: 1252 2.9 % F: 1246 2.9 % D: 1177 2.7 % R: 624 1.4 % H: 578 1.3 % N: 566 1.3 % 3: 564 1.3 % W: 522 1.2 % L: 502 1.2 % G: 501 1.2 % B: 479 1.1 % 4: 354 0.8 % V: 334 0.8 % K: 257 0.6 % O: 255 0.6 % 5: 252 0.6 % U: 236 0.5 % 9: 215 0.5 % 6: 211 0.5 % 7: 199 0.5 % 8: 187 0.4 % J: 150 0.3 % X: 88 0.2 % Q: 76 0.2 % e: 19 0.0 % Z: 13 0.0 % i: 12 0.0 % p: 7 0.0 % «: 5 0.0 % (: 3 0.0 % ": 2 0.0 % d: 2 0.0 % f: 1 0.0 % t: 1 0.0 % s: 1 0.0 % ': 1 0.0 % Y: 1 0.0 % ---------------------------------------------------------------------- Ran 1 test in 0.557s
Most common first letters
# | key | count | % |
---|---|---|---|
total | 46 | 43398 | |
1 | P | 12599 | 29.03 |
2 | 2 | 3526 | 8.12 |
3 | I | 3515 | 8.10 |
4 | A | 3296 | 7.59 |
5 | C | 2333 | 5.38 |
6 | S | 2260 | 5.21 |
7 | 1 | 2105 | 4.85 |
8 | T | 1559 | 3.59 |
9 | M | 1312 | 3.02 |
10 | E | 1252 | 2.88 |
11 | F | 1246 | 2.87 |
12 | D | 1177 | 2.71 |
13 | R | 624 | 1.44 |
14 | H | 578 | 1.33 |
15 | N | 566 | 1.30 |
16 | 3 | 564 | 1.30 |
17 | W | 522 | 1.20 |
18 | L | 502 | 1.16 |
19 | G | 501 | 1.15 |
20 | B | 479 | 1.10 |
21 | 4 | 354 | 0.82 |
22 | V | 334 | 0.77 |
23 | K | 257 | 0.59 |
24 | O | 255 | 0.59 |
25 | 5 | 252 | 0.58 |
26 | U | 236 | 0.54 |
27 | 9 | 215 | 0.50 |
28 | 6 | 211 | 0.49 |
29 | 7 | 199 | 0.46 |
30 | 8 | 187 | 0.43 |
31 | J | 150 | 0.35 |
32 | X | 88 | 0.20 |
33 | Q | 76 | 0.18 |
34 | e | 19 | 0.04 |
35 | Z | 13 | 0.03 |
36 | i | 12 | 0.03 |
37 | p | 7 | 0.02 |
38 | « | 5 | 0.01 |
39 | ( | 3 | 0.01 |
40 | " | 2 | 0.00 |
41 | d | 2 | 0.00 |
42 | f | 1 | 0.00 |
43 | t | 1 | 0.00 |
44 | s | 1 | 0.00 |
45 | ' | 1 | 0.00 |
46 | Y | 1 | 0.00 |
Observation for Letter
Top categories: Letter and Digit.
Relevance Matrix
top 10% | top 20% | top 30% | |
---|---|---|---|
Letter | 1:P | 1:P | 2: P, 2 |
Token | |||
Grammar structure |
Interpretation for Letter
That P is the most common first letter could be since the word "Proceedings" starts with "P" and might be one of the most common words
Word
first Letter
# | key | count | % |
---|---|---|---|
total | 90 | 809260 | |
1 | 2 | 96484 | 11.92 |
2 | I | 65214 | 8.06 |
3 | C | 62697 | 7.75 |
4 | S | 59363 | 7.34 |
5 | P | 50577 | 6.25 |
6 | o | 47073 | 5.82 |
7 | A | 40291 | 4.98 |
8 | 1 | 33935 | 4.19 |
9 | a | 26085 | 3.22 |
10 | M | 25474 | 3.15 |
11 | T | 19690 | 2.43 |
12 | W | 19391 | 2.40 |
13 | t | 18726 | 2.31 |
14 | D | 17672 | 2.18 |
15 | E | 16201 | 2.00 |
16 | U | 15969 | 1.97 |
17 | J | 15688 | 1.94 |
18 | N | 15309 | 1.89 |
19 | - | 14558 | 1.80 |
20 | F | 13717 | 1.70 |
21 | R | 13104 | 1.62 |
22 | B | 11044 | 1.36 |
23 | L | 10255 | 1.27 |
24 | G | 10170 | 1.26 |
25 | O | 9972 | 1.23 |
26 | V | 8218 | 1.02 |
27 | H | 8078 | 1.00 |
28 | i | 7781 | 0.96 |
29 | 3 | 5577 | 0.69 |
30 | f | 4769 | 0.59 |
31 | ( | 4738 | 0.59 |
32 | K | 4666 | 0.58 |
33 | 4 | 3501 | 0.43 |
34 | 5 | 3107 | 0.38 |
35 | 6 | 2910 | 0.36 |
36 | 8 | 2875 | 0.36 |
37 | 7 | 2832 | 0.35 |
38 | 9 | 2826 | 0.35 |
39 | ' | 2311 | 0.29 |
40 | w | 2124 | 0.26 |
41 | d | 2000 | 0.25 |
42 | c | 1614 | 0.20 |
43 | Q | 1210 | 0.15 |
44 | e | 975 | 0.12 |
45 | & | 929 | 0.11 |
46 | X | 802 | 0.10 |
47 | u | 695 | 0.09 |
48 | Y | 688 | 0.09 |
49 | Z | 686 | 0.08 |
50 | 0 | 632 | 0.08 |
word
# | key | count | % |
---|---|---|---|
total | 30492 | 809260 | |
1 | International | 26360 | 3.26 |
2 | on | 25486 | 3.15 |
3 | and | 24329 | 3.01 |
4 | Proceedings | 22995 | 2.84 |
5 | of | 21438 | 2.65 |
6 | the | 17733 | 2.19 |
7 | Conference | 14916 | 1.84 |
8 | - | 14527 | 1.80 |
9 | USA, | 9163 | 1.13 |
10 | Conference, | 8668 | 1.07 |
11 | Workshop | 7152 | 0.88 |
12 | in | 7106 | 0.88 |
13 | September | 6424 | 0.79 |
14 | June | 5651 | 0.70 |
15 | October | 4955 | 0.61 |
16 | IEEE | 4731 | 0.58 |
17 | Symposium | 4426 | 0.55 |
18 | July | 4349 | 0.54 |
19 | Information | 4170 | 0.52 |
20 | November | 3972 | 0.49 |
21 | for | 3798 | 0.47 |
22 | August | 3756 | 0.46 |
23 | Computer | 3685 | 0.46 |
24 | Systems | 3634 | 0.45 |
25 | Papers | 3411 | 0.42 |
26 | Systems, | 3373 | 0.42 |
27 | May | 3254 | 0.40 |
28 | 2018, | 3248 | 0.40 |
29 | 2017, | 3036 | 0.38 |
30 | 2019, | 2983 | 0.37 |
31 | 2016, | 2956 | 0.37 |
32 | Revised | 2948 | 0.36 |
33 | Selected | 2881 | 0.36 |
34 | December | 2879 | 0.36 |
35 | Workshop, | 2827 | 0.35 |
36 | 2015, | 2794 | 0.35 |
37 | Software | 2704 | 0.33 |
38 | ACM | 2687 | 0.33 |
39 | April | 2652 | 0.33 |
40 | Computing | 2373 | 0.29 |
41 | China, | 2339 | 0.29 |
42 | 2014, | 2321 | 0.29 |
43 | Germany, | 2319 | 0.29 |
44 | Part | 2240 | 0.28 |
45 | 2013, | 2214 | 0.27 |
46 | 2011, | 2207 | 0.27 |
47 | 2010, | 2106 | 0.26 |
48 | 2015 | 2101 | 0.26 |
49 | Italy, | 2072 | 0.26 |
50 | 2009, | 2056 | 0.25 |
Ordinal
# | key | count | % |
---|---|---|---|
total | 93 | 809260 | |
1 | 781321 | 96.55 | |
2 | 2 | 2337 | 0.29 |
3 | 3 | 2152 | 0.27 |
4 | 1 | 2099 | 0.26 |
5 | 4 | 1955 | 0.24 |
6 | 5 | 1865 | 0.23 |
7 | 6 | 1716 | 0.21 |
8 | 7 | 1622 | 0.20 |
9 | 8 | 1490 | 0.18 |
10 | 9 | 1451 | 0.18 |
11 | 10 | 1380 | 0.17 |
12 | 14 | 979 | 0.12 |
13 | 15 | 873 | 0.11 |
14 | 16 | 748 | 0.09 |
15 | 17 | 680 | 0.08 |
16 | 18 | 637 | 0.08 |
17 | 19 | 592 | 0.07 |
18 | 20 | 509 | 0.06 |
19 | 21 | 480 | 0.06 |
20 | 22 | 392 | 0.05 |
21 | 23 | 379 | 0.05 |
22 | 24 | 353 | 0.04 |
23 | 25 | 332 | 0.04 |
24 | 26 | 289 | 0.04 |
25 | 27 | 245 | 0.03 |
26 | 28 | 230 | 0.03 |
27 | 30 | 198 | 0.02 |
28 | 29 | 183 | 0.02 |
29 | 31 | 157 | 0.02 |
30 | 11 | 130 | 0.02 |
31 | 32 | 114 | 0.01 |
32 | 12 | 104 | 0.01 |
33 | 13 | 100 | 0.01 |
34 | 34 | 98 | 0.01 |
35 | 33 | 94 | 0.01 |
36 | 35 | 83 | 0.01 |
37 | 36 | 81 | 0.01 |
38 | 37 | 79 | 0.01 |
39 | 38 | 77 | 0.01 |
40 | 39 | 64 | 0.01 |
41 | 40 | 60 | 0.01 |
42 | 60 | 55 | 0.01 |
43 | 41 | 52 | 0.01 |
44 | 42 | 41 | 0.01 |
45 | 44 | 31 | 0.00 |
46 | 43 | 29 | 0.00 |
47 | 46 | 27 | 0.00 |
48 | 47 | 25 | 0.00 |
49 | 49 | 24 | 0.00 |
50 | 45 | 23 | 0.00 |