Mailing List Archive

[MediaWiki-commits] [Gerrit] mediawiki...Wikibase[master]: Rearrange UsageDeduplicator implementation
Thiemo Kreuz (WMDE) has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/405896 )

Change subject: Rearrange UsageDeduplicator implementation
......................................................................

Rearrange UsageDeduplicator implementation

This is basically the exact same implementation, just moved around a
bit. I found the way the code was previously arranged not that optimal.
The code was spread out quite a lot.

Significant changes are:
* The actual deduplication intentionally replaces an array of usages
with a single usage. This makes the later array_walk_recursive run
faster because it does have less 1-element arrays to iterate.
* The actual deduplication use &-references to manipulate the one array,
instead of constantly constructing new ones.
* Structuring is done in one step instead of two. $array[$foo][$bar][]
works just fine here.
* Note that labels and descriptions are not mentioned any more! The same
deduplication logic is applied to all aspects, not only labels and
descriptions. This might be a mistake, but no test fails. If you think
this is a mistake, please add a test that demonstrates why.

Bug: T178079
Change-Id: Ic8ec86088d49b5979a78e99d2cafce5f6c86f94d
---
M client/includes/Usage/UsageDeduplicator.php
1 file changed, 37 insertions(+), 47 deletions(-)


git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/96/405896/1

diff --git a/client/includes/Usage/UsageDeduplicator.php b/client/includes/Usage/UsageDeduplicator.php
index 8c7e4ce..a9126dc 100644
--- a/client/includes/Usage/UsageDeduplicator.php
+++ b/client/includes/Usage/UsageDeduplicator.php
@@ -12,81 +12,71 @@

/**
* @param EntityUsage[] $usages
+ *
* @return EntityUsage[]
*/
public function deduplicate( array $usages ) {
- $structuredUsages = $this->structureUsages( $usages );
-
- foreach ( $structuredUsages as $entityId => $usages ) {
- $structuredUsages[$entityId] = $this->deduplicateUsagesPerEntity( $usages );
- }
-
- // Flatten the structured array
- $return = [];
- array_walk_recursive(
- $structuredUsages,
- function ( EntityUsage $usage ) use ( &$return ) {
- $return[$usage->getIdentityString()] = $usage;
- }
+ return $this->flattenStructuredUsages(
+ $this->deduplicateStructuredUsages(
+ $this->structureUsages( $usages )
+ )
);
- return $return;
}

/**
* @param EntityUsage[] $usages
- * @return array[]
+ *
+ * @return array[] three-dimensional array of
+ * [ $entityId => [ $aspectKey => [ EntityUsage $usage, … ], … ], … ]
*/
private function structureUsages( array $usages ) {
$structuredUsages = [];
- foreach ( $usages as $usage ) {
- $entityId = $usage->getEntityId();
- $structuredUsages[$entityId->getSerialization()][] = $usage;
- }

- return array_map( [ $this, 'structureUsagesPerEntity' ], $structuredUsages );
- }
-
- /**
- * @param EntityUsage[] $usages
- * @return array[]
- */
- private function structureUsagesPerEntity( array $usages ) {
- $structuredUsages = [
- EntityUsage::DESCRIPTION_USAGE => [],
- EntityUsage::LABEL_USAGE => [],
- ];
foreach ( $usages as $usage ) {
+ $entityId = $usage->getEntityId()->getSerialization();
$aspect = $usage->getAspect();
- $structuredUsages[$aspect][] = $usage;
+ $structuredUsages[$entityId][$aspect][] = $usage;
}

return $structuredUsages;
}

/**
- * @param array[] $usages
+ * @param array[] $structuredUsages
+ *
* @return array[]
*/
- private function deduplicateUsagesPerEntity( array $usages ) {
- $usages[EntityUsage::DESCRIPTION_USAGE] = $this->deduplicatePerType(
- $usages[EntityUsage::DESCRIPTION_USAGE]
- );
- $usages[EntityUsage::LABEL_USAGE] = $this->deduplicatePerType(
- $usages[EntityUsage::LABEL_USAGE]
- );
- return $usages;
+ private function deduplicateStructuredUsages( array $structuredUsages ) {
+ foreach ( $structuredUsages as &$usagesPerEntity ) {
+ /** @var EntityUsage[] $usagesPerType */
+ foreach ( $usagesPerEntity as &$usagesPerType ) {
+ foreach ( $usagesPerType as $usage ) {
+ if ( $usage->getModifier() === null ) {
+ // This intentionally flattens the array to a single value
+ $usagesPerType = $usage;
+ break;
+ }
+ }
+ }
+ }
+
+ return $structuredUsages;
}

/**
- * @param EntityUsage[] $usages
+ * @param array[] $structuredUsages
+ *
* @return EntityUsage[]
*/
- private function deduplicatePerType( array $usages ) {
- foreach ( $usages as $usage ) {
- if ( $usage->getModifier() === null ) {
- return [ $usage ];
+ private function flattenStructuredUsages( array $structuredUsages ) {
+ $usages = [];
+
+ array_walk_recursive(
+ $structuredUsages,
+ function ( EntityUsage $usage ) use ( &$usages ) {
+ $usages[$usage->getIdentityString()] = $usage;
}
- }
+ );

return $usages;
}

--
To view, visit https://gerrit.wikimedia.org/r/405896
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic8ec86088d49b5979a78e99d2cafce5f6c86f94d
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Thiemo Kreuz (WMDE) <thiemo.kreuz@wikimedia.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
[MediaWiki-commits] [Gerrit] mediawiki...Wikibase[master]: Rearrange UsageDeduplicator implementation [ In reply to ]
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/405896 )

Change subject: Rearrange UsageDeduplicator implementation
......................................................................


Rearrange UsageDeduplicator implementation

This is basically the exact same implementation, just moved around a
bit. I found the way the code was previously arranged not that optimal.
The code was spread out quite a lot.

Significant changes are:
* The actual deduplication intentionally replaces an array of usages
with a single usage. This makes the later array_walk_recursive run
faster because it does have less 1-element arrays to iterate.
* The actual deduplication use &-references to manipulate the one array,
instead of constantly constructing new ones.
* Structuring is done in one step instead of two. $array[$foo][$bar][]
works just fine here.
* Note that labels and descriptions are not mentioned any more! The same
deduplication logic is applied to all aspects, not only labels and
descriptions. This might be a mistake, but no test fails. If you think
this is a mistake, please add a test that demonstrates why.

Bug: T178079
Change-Id: Ic8ec86088d49b5979a78e99d2cafce5f6c86f94d
---
M client/includes/Usage/UsageDeduplicator.php
1 file changed, 39 insertions(+), 45 deletions(-)

Approvals:
Ladsgroup: Looks good to me, approved
jenkins-bot: Verified



diff --git a/client/includes/Usage/UsageDeduplicator.php b/client/includes/Usage/UsageDeduplicator.php
index 8c7e4ce..8a5fe66 100644
--- a/client/includes/Usage/UsageDeduplicator.php
+++ b/client/includes/Usage/UsageDeduplicator.php
@@ -12,81 +12,75 @@

/**
* @param EntityUsage[] $usages
+ *
* @return EntityUsage[]
*/
public function deduplicate( array $usages ) {
$structuredUsages = $this->structureUsages( $usages );
-
- foreach ( $structuredUsages as $entityId => $usages ) {
- $structuredUsages[$entityId] = $this->deduplicateUsagesPerEntity( $usages );
- }
-
- // Flatten the structured array
- $return = [];
- array_walk_recursive(
- $structuredUsages,
- function ( EntityUsage $usage ) use ( &$return ) {
- $return[$usage->getIdentityString()] = $usage;
- }
- );
- return $return;
+ $structuredUsages = $this->deduplicateStructuredUsages( $structuredUsages );
+ return $this->flattenStructuredUsages( $structuredUsages );
}

/**
* @param EntityUsage[] $usages
- * @return array[]
+ *
+ * @return array[][] three-dimensional array of
+ * [ $entityId => [ $aspectKey => [ EntityUsage $usage, … ], … ], … ]
*/
private function structureUsages( array $usages ) {
$structuredUsages = [];
- foreach ( $usages as $usage ) {
- $entityId = $usage->getEntityId();
- $structuredUsages[$entityId->getSerialization()][] = $usage;
- }

- return array_map( [ $this, 'structureUsagesPerEntity' ], $structuredUsages );
- }
-
- /**
- * @param EntityUsage[] $usages
- * @return array[]
- */
- private function structureUsagesPerEntity( array $usages ) {
- $structuredUsages = [
- EntityUsage::DESCRIPTION_USAGE => [],
- EntityUsage::LABEL_USAGE => [],
- ];
foreach ( $usages as $usage ) {
+ $entityId = $usage->getEntityId()->getSerialization();
$aspect = $usage->getAspect();
- $structuredUsages[$aspect][] = $usage;
+ $structuredUsages[$entityId][$aspect][] = $usage;
}

return $structuredUsages;
}

/**
- * @param array[] $usages
+ * @param array[][] $structuredUsages
+ *
* @return array[]
*/
- private function deduplicateUsagesPerEntity( array $usages ) {
- $usages[EntityUsage::DESCRIPTION_USAGE] = $this->deduplicatePerType(
- $usages[EntityUsage::DESCRIPTION_USAGE]
- );
- $usages[EntityUsage::LABEL_USAGE] = $this->deduplicatePerType(
- $usages[EntityUsage::LABEL_USAGE]
- );
- return $usages;
+ private function deduplicateStructuredUsages( array $structuredUsages ) {
+ foreach ( $structuredUsages as &$usagesPerEntity ) {
+ foreach ( $usagesPerEntity as &$usagesPerAspect ) {
+ $this->deduplicatePerAspect( $usagesPerAspect );
+ }
+ }
+
+ return $structuredUsages;
}

/**
- * @param EntityUsage[] $usages
- * @return EntityUsage[]
+ * @param EntityUsage[] &$usages
*/
- private function deduplicatePerType( array $usages ) {
+ private function deduplicatePerAspect( array &$usages ) {
foreach ( $usages as $usage ) {
if ( $usage->getModifier() === null ) {
- return [ $usage ];
+ // This intentionally flattens the array to a single value
+ $usages = $usage;
+ return;
}
}
+ }
+
+ /**
+ * @param array[] $structuredUsages
+ *
+ * @return EntityUsage[]
+ */
+ private function flattenStructuredUsages( array $structuredUsages ) {
+ $usages = [];
+
+ array_walk_recursive(
+ $structuredUsages,
+ function ( EntityUsage $usage ) use ( &$usages ) {
+ $usages[$usage->getIdentityString()] = $usage;
+ }
+ );

return $usages;
}

--
To view, visit https://gerrit.wikimedia.org/r/405896
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ic8ec86088d49b5979a78e99d2cafce5f6c86f94d
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Thiemo Kreuz (WMDE) <thiemo.kreuz@wikimedia.de>
Gerrit-Reviewer: Ladsgroup <Ladsgroup@gmail.com>
Gerrit-Reviewer: Thiemo Kreuz (WMDE) <thiemo.kreuz@wikimedia.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits