Mailing List Archive

[MediaWiki-commits] [Gerrit] pywikibot/core[master]: [IMPR] Make preloading generators work with arbitrary entity...
Mat?j Suchánek has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/342588 )

Change subject: [IMPR] Make preloading generators work with arbitrary entity types
......................................................................

[IMPR] Make preloading generators work with arbitrary entity types

Bug: T160397
Change-Id: Ie068ca3427063ff13ba4545544a1b3965ab7d88d
---
M pywikibot/pagegenerators.py
M pywikibot/site.py
2 files changed, 47 insertions(+), 19 deletions(-)


git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/88/342588/2

diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 13618af..fab81fc 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -14,7 +14,7 @@
&params;
"""
#
-# (C) Pywikibot team, 2008-2017
+# (C) Pywikibot team, 2008-2018
#
# Distributed under the terms of the MIT license.
#
@@ -488,7 +488,7 @@
dupfiltergen, self.subpage_max_depth)

if self.claimfilter_list:
- dupfiltergen = PreloadingItemGenerator(dupfiltergen)
+ dupfiltergen = PreloadingEntityGenerator(dupfiltergen)
for claim in self.claimfilter_list:
dupfiltergen = ItemClaimFilterPageGenerator(dupfiltergen,
claim[0], claim[1],
@@ -1948,11 +1948,11 @@


@deprecated_args(step='groupsize')
-def PreloadingItemGenerator(generator, groupsize=50):
+def PreloadingEntityGenerator(generator, groupsize=50):
"""
Yield preloaded pages taken from another generator.

- Function basically is copied from above, but for ItemPage's
+ Function basically is copied from above, but for Wikibase entites.

@param generator: pages to iterate over
@param groupsize: how many pages to preload at once
@@ -1960,26 +1960,16 @@
"""
sites = {}
for page in generator:
- if not isinstance(page, pywikibot.page.WikibasePage):
- datasite = page.site.data_repository()
- if page.namespace() != datasite.item_namespace:
- pywikibot.output(
- u'PreloadingItemGenerator skipping %s as it is not in %s'
- % (page, datasite.item_namespace))
- continue
-
- page = pywikibot.ItemPage(datasite, page.title())
-
site = page.site
sites.setdefault(site, []).append(page)
if len(sites[site]) >= groupsize:
# if this site is at the groupsize, process it
group = sites.pop(site)
- for i in site.preloaditempages(group, groupsize):
+ for i in site.preload_entities(group, groupsize):
yield i
for site, pages in sites.items():
# process any leftover sites that never reached the groupsize
- for i in site.preloaditempages(pages, groupsize):
+ for i in site.preload_entities(pages, groupsize):
yield i


@@ -2880,6 +2870,8 @@
yield page


+PreloadingItemGenerator = redirect_func(PreloadingEntityGenerator,
+ old_name='PreloadingItemGenerator')
# Deprecated old names available for compatibility with compat.
ImageGenerator = redirect_func(PageClassGenerator, old_name='ImageGenerator')
FileGenerator = redirect_func(PageClassGenerator, old_name='FileGenerator')
diff --git a/pywikibot/site.py b/pywikibot/site.py
index c715902..7451931 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -6,7 +6,7 @@
groups of wikis on the same topic in different languages.
"""
#
-# (C) Pywikibot team, 2008-2017
+# (C) Pywikibot team, 2008-2018
#
# Distributed under the terms of the MIT license.
#
@@ -7056,6 +7056,11 @@

class DataSite(APISite):

+ type_to_class = {
+ 'item': pywikibot.page.ItemPage,
+ 'property': pywikibot.page.PropertyPage,
+ }
+
"""Wikibase data capable site."""

def __init__(self, *args, **kwargs):
@@ -7255,9 +7260,9 @@
raise api.APIError(data['errors'])
return data['entities']

- def preloaditempages(self, pagelist, groupsize=50):
+ def preload_entities(self, pagelist, groupsize=50):
"""
- Yield ItemPages with content prefilled.
+ Yield sublasses of WikibasePages with content prefilled.

Note that pages will be iterated in a different order
than in the underlying pagelist.
@@ -7275,6 +7280,37 @@
for key in ident:
req[key].append(ident[key])
else:
+ if p.site == self and p.namespace() in (
+ self.item_namespace, self.property_namespace):
+ req['ids'].append(p.title(withNamespace=False))
+ else:
+ assert p.site.has_data_repository, \
+ 'Site must have a data repository'
+ req['sites'].append(p.site.dbName())
+ req['titles'].append(p._link._text)
+
+ req = self._simple_request(action='wbgetentities', **req)
+ data = req.submit()
+ for entity in data['entities']:
+ if 'missing' in data['entities'][entity]:
+ continue
+ cls = self.type_to_class[data['entities'][entity]['type']]
+ page = cls(self, entity)
+ page._content = data['entities'][entity]
+ page.get()
+ yield page
+
+ @deprecated('DataSite.preload_entities')
+ def preloaditempages(self, pagelist, groupsize=50):
+ """DEPRECATED"""
+ for sublist in itergroup(pagelist, groupsize):
+ req = {'ids': [], 'titles': [], 'sites': []}
+ for p in sublist:
+ if isinstance(p, pywikibot.page.WikibasePage):
+ ident = p._defined_by()
+ for key in ident:
+ req[key].append(ident[key])
+ else:
assert p.site.has_data_repository, \
'Site must have a data repository'
if (p.site == p.site.data_repository() and

--
To view, visit https://gerrit.wikimedia.org/r/342588
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie068ca3427063ff13ba4545544a1b3965ab7d88d
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mat?j Suchánek <matejsuchanek97@gmail.com>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits