Mailing List Archive

[MediaWiki-commits] [Gerrit] mediawiki...Wikibase[master]: Deduplicate entity usages when returning from ParserOutputUs...
Ladsgroup has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/405013 )

Change subject: Deduplicate entity usages when returning from ParserOutputUsageAccumulator
......................................................................

Deduplicate entity usages when returning from ParserOutputUsageAccumulator

Bug: T178079
Change-Id: Ic101f58f6d6f700cd9da02844b518973d2a891d8
---
M client/autoload.php
M client/includes/Usage/ParserOutputUsageAccumulator.php
A client/includes/Usage/UsageDeduplicator.php
M client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
A client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php
5 files changed, 153 insertions(+), 3 deletions(-)


git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/13/405013/1

diff --git a/client/autoload.php b/client/autoload.php
index 91bd32b..223e319 100644
--- a/client/autoload.php
+++ b/client/autoload.php
@@ -112,6 +112,7 @@
'Wikibase\\Client\\Usage\\SubscriptionManager' => __DIR__ . '/includes/Usage/SubscriptionManager.php',
'Wikibase\\Client\\Usage\\UsageAccumulator' => __DIR__ . '/includes/Usage/UsageAccumulator.php',
'Wikibase\\Client\\Usage\\UsageAspectTransformer' => __DIR__ . '/includes/Usage/UsageAspectTransformer.php',
+ 'Wikibase\\Client\\Usage\\UsageDeduplicator' => __DIR__ . '/includes/Usage/UsageDeduplicator.php',
'Wikibase\\Client\\Usage\\UsageLookup' => __DIR__ . '/includes/Usage/UsageLookup.php',
'Wikibase\\Client\\Usage\\UsageTracker' => __DIR__ . '/includes/Usage/UsageTracker.php',
'Wikibase\\Client\\Usage\\UsageTrackerException' => __DIR__ . '/includes/Usage/UsageTrackerException.php',
diff --git a/client/includes/Usage/ParserOutputUsageAccumulator.php b/client/includes/Usage/ParserOutputUsageAccumulator.php
index 819ef59..9d5d579 100644
--- a/client/includes/Usage/ParserOutputUsageAccumulator.php
+++ b/client/includes/Usage/ParserOutputUsageAccumulator.php
@@ -42,7 +42,10 @@
*/
public function getUsages() {
$usages = $this->parserOutput->getExtensionData( 'wikibase-entity-usage' );
- return $usages ?: [];
+ if ( $usages ) {
+ return ( new UsageDeduplicator() )->deduplicate( $usages );
+ }
+ return [];
}

}
diff --git a/client/includes/Usage/UsageDeduplicator.php b/client/includes/Usage/UsageDeduplicator.php
new file mode 100644
index 0000000..8ba3a55
--- /dev/null
+++ b/client/includes/Usage/UsageDeduplicator.php
@@ -0,0 +1,96 @@
+<?php
+
+namespace Wikibase\Client\Usage;
+
+/**
+ * This class de-duplicates entity usages for performance and storage reasons
+ *
+ * @license GPL-2.0+
+ * @author Amir Sarabadani
+ */
+class UsageDeduplicator {
+
+ /**
+ * @param EntityUsage[] $usages
+ */
+ public function deduplicate( array $usages ) {
+ $structuredUsages = $this->structureUsages( $usages );
+
+ foreach ( $structuredUsages as $entityId => $usages ) {
+ $structuredUsages[$entityId] = $this->deduplicateUsagesPerEntity( $usages );
+ }
+
+ // Flatten the structured array
+ $return = [];
+ array_walk_recursive( $structuredUsages,
+ function( $a ) use ( &$return ) {
+ $return[] = $a;
+ }
+ );
+ return $return;
+ }
+
+ /**
+ * @param EntityUsage[] $usages
+ */
+ private function structureUsages( array $usages ) {
+ $structuredUsages = [];
+ foreach ( $usages as $usage ) {
+ $entityId = $usage->getEntityId();
+ if ( isset( $structuredUsages[$entityId->getSerialization()] ) ) {
+ $structuredUsages[$entityId->getSerialization()][] = $usage;
+ } else {
+ $structuredUsages[$entityId->getSerialization()] = [ $usage ];
+ }
+ }
+
+ $reallyStructuredUsages = [];
+ foreach ( $structuredUsages as $entityId => $usages ) {
+ $reallyStructuredUsages[$entityId] = $this->structureUsagesPerEntity( $usages );
+ }
+
+ return $reallyStructuredUsages;
+ }
+
+ private function structureUsagesPerEntity( array $usages ) {
+ $structuredUsages = [
+ EntityUsage::DESCRIPTION_USAGE => [],
+ EntityUsage::LABEL_USAGE => [],
+ ];
+ foreach ( $usages as $usage ) {
+ $aspect = $usage->getAspect();
+ if ( isset( $structuredUsages[$aspect] ) ) {
+ $structuredUsages[$aspect][] = $usage;
+ } else {
+ $structuredUsages[$aspect] = [ $usage ];
+ }
+ }
+
+ return $structuredUsages;
+ }
+
+ private function deduplicateUsagesPerEntity( $usages ) {
+ $usages[EntityUsage::DESCRIPTION_USAGE] = $this->deduplicatePerType(
+ $usages[EntityUsage::DESCRIPTION_USAGE]
+ );
+ $usages[EntityUsage::LABEL_USAGE] = $this->deduplicatePerType(
+ $usages[EntityUsage::LABEL_USAGE]
+ );
+ return $usages;
+ }
+
+ /**
+ * @param EntityUsage[] $usages
+ * @return EntityUsage[]
+ */
+ private function deduplicatePerType( array $usages ) {
+ foreach ( $usages as $usage ) {
+ if ( $usage->getModifier() === null ) {
+ return [ $usage ];
+ }
+ }
+
+ return $usages;
+ }
+
+}
diff --git a/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php b/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
index 6c40387..13b1d29 100644
--- a/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
+++ b/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
@@ -45,11 +45,11 @@
$q3 = new ItemId( 'Q3' );
$expected = [.
new EntityUsage( $q2, EntityUsage::LABEL_USAGE, 'xx' ),
- new EntityUsage( $q3, EntityUsage::DESCRIPTION_USAGE, 'ru' ),
new EntityUsage( $q2, EntityUsage::TITLE_USAGE ),
- new EntityUsage( $q3, EntityUsage::STATEMENT_USAGE, 'P42' ),
new EntityUsage( $q2, EntityUsage::SITELINK_USAGE ),
new EntityUsage( $q2, EntityUsage::OTHER_USAGE ),
+ new EntityUsage( $q3, EntityUsage::DESCRIPTION_USAGE, 'ru' ),
+ new EntityUsage( $q3, EntityUsage::STATEMENT_USAGE, 'P42' ),
new EntityUsage( $q3, EntityUsage::ALL_USAGE ),
];

diff --git a/client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php b/client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php
new file mode 100644
index 0000000..2cb2140
--- /dev/null
+++ b/client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php
@@ -0,0 +1,50 @@
+<?php
+
+namespace Wikibase\Client\Tests\Usage;
+
+use Wikibase\Client\Usage\EntityUsage;
+use Wikibase\Client\Usage\UsageDeduplicator;
+use Wikibase\DataModel\Entity\ItemId;
+
+/**
+ * @covers Wikibase\Client\Usage\UsageDeduplicator
+ *
+ * @group Wikibase
+ * @group WikibaseClient
+ * @group WikibaseUsageTracking
+ *
+ * @license GPL-2.0+
+ * @author Amir Sarabadani
+ */
+class UsageDeduplicatorTest extends \PHPUnit_Framework_TestCase {
+
+ public function provideDeduplicate() {
+ $q1 = new ItemId( 'Q1' );
+ $q1Label = new EntityUsage( $q1, EntityUsage::LABEL_USAGE );
+ $q1LabelEn = new EntityUsage( $q1, EntityUsage::LABEL_USAGE, 'en' );
+ $q1All = new EntityUsage( $q1, EntityUsage::ALL_USAGE );
+ $q1Statement = new EntityUsage( $q1, EntityUsage::LABEL_USAGE, 'en' );
+
+ $q2 = new ItemId( 'Q2' );
+ $q2Label = new EntityUsage( $q2, EntityUsage::LABEL_USAGE );
+ $q2Description = new EntityUsage( $q2, EntityUsage::DESCRIPTION_USAGE );
+ $q2DescriptionFa = new EntityUsage( $q2, EntityUsage::DESCRIPTION_USAGE, 'fa' );
+
+ return [
+ [ [ $q1LabelEn, $q1Label ], [ $q1Label ] ],
+ [ [ $q1LabelEn ], [ $q1LabelEn ] ],
+ [ [ $q1LabelEn, $q1Label, $q2Description, $q1All ], [ $q1Label, $q1All, $q2Description ] ],
+ [ [ $q1LabelEn, $q2Label, $q1Statement ], [ $q1LabelEn, $q1Statement, $q2Label ] ],
+ [ [ $q2DescriptionFa, $q2Description, $q1All ], [ $q2Description, $q1All ] ],
+ ];
+ }
+
+ /**
+ * @covers \Wikibase\Client\Usage\UsageDeduplicator::deduplicate
+ * @dataProvider provideDeduplicate
+ */
+ public function testDeduplicate( $usages, $expected ) {
+ $this->assertEquals( $expected, ( new UsageDeduplicator() )->deduplicate( $usages ) );
+ }
+
+}

--
To view, visit https://gerrit.wikimedia.org/r/405013
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic101f58f6d6f700cd9da02844b518973d2a891d8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <Ladsgroup@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
[MediaWiki-commits] [Gerrit] mediawiki...Wikibase[master]: Deduplicate entity usages when returning from ParserOutputUs... [ In reply to ]
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/405013 )

Change subject: Deduplicate entity usages when returning from ParserOutputUsageAccumulator
......................................................................


Deduplicate entity usages when returning from ParserOutputUsageAccumulator

Bug: T178079
Change-Id: Ic101f58f6d6f700cd9da02844b518973d2a891d8
---
M client/autoload.php
M client/includes/Usage/ParserOutputUsageAccumulator.php
A client/includes/Usage/UsageDeduplicator.php
M client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
A client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php
5 files changed, 156 insertions(+), 1 deletion(-)

Approvals:
jenkins-bot: Verified
Thiemo Kreuz (WMDE): Looks good to me, approved



diff --git a/client/autoload.php b/client/autoload.php
index 91bd32b..223e319 100644
--- a/client/autoload.php
+++ b/client/autoload.php
@@ -112,6 +112,7 @@
'Wikibase\\Client\\Usage\\SubscriptionManager' => __DIR__ . '/includes/Usage/SubscriptionManager.php',
'Wikibase\\Client\\Usage\\UsageAccumulator' => __DIR__ . '/includes/Usage/UsageAccumulator.php',
'Wikibase\\Client\\Usage\\UsageAspectTransformer' => __DIR__ . '/includes/Usage/UsageAspectTransformer.php',
+ 'Wikibase\\Client\\Usage\\UsageDeduplicator' => __DIR__ . '/includes/Usage/UsageDeduplicator.php',
'Wikibase\\Client\\Usage\\UsageLookup' => __DIR__ . '/includes/Usage/UsageLookup.php',
'Wikibase\\Client\\Usage\\UsageTracker' => __DIR__ . '/includes/Usage/UsageTracker.php',
'Wikibase\\Client\\Usage\\UsageTrackerException' => __DIR__ . '/includes/Usage/UsageTrackerException.php',
diff --git a/client/includes/Usage/ParserOutputUsageAccumulator.php b/client/includes/Usage/ParserOutputUsageAccumulator.php
index 819ef59..9d5d579 100644
--- a/client/includes/Usage/ParserOutputUsageAccumulator.php
+++ b/client/includes/Usage/ParserOutputUsageAccumulator.php
@@ -42,7 +42,10 @@
*/
public function getUsages() {
$usages = $this->parserOutput->getExtensionData( 'wikibase-entity-usage' );
- return $usages ?: [];
+ if ( $usages ) {
+ return ( new UsageDeduplicator() )->deduplicate( $usages );
+ }
+ return [];
}

}
diff --git a/client/includes/Usage/UsageDeduplicator.php b/client/includes/Usage/UsageDeduplicator.php
new file mode 100644
index 0000000..10c5feb
--- /dev/null
+++ b/client/includes/Usage/UsageDeduplicator.php
@@ -0,0 +1,95 @@
+<?php
+
+namespace Wikibase\Client\Usage;
+
+/**
+ * This class de-duplicates entity usages for performance and storage reasons
+ *
+ * @license GPL-2.0+
+ * @author Amir Sarabadani
+ */
+class UsageDeduplicator {
+
+ /**
+ * @param EntityUsage[] $usages
+ * @return EntityUsage[]
+ */
+ public function deduplicate( array $usages ) {
+ $structuredUsages = $this->structureUsages( $usages );
+
+ foreach ( $structuredUsages as $entityId => $usages ) {
+ $structuredUsages[$entityId] = $this->deduplicateUsagesPerEntity( $usages );
+ }
+
+ // Flatten the structured array
+ $return = [];
+ array_walk_recursive(
+ $structuredUsages,
+ function( $a ) use ( &$return ) {
+ /* @var EntityUsage $a */
+ $return[$a->getIdentityString()] = $a;
+ }
+ );
+ return $return;
+ }
+
+ /**
+ * @param EntityUsage[] $usages
+ * @return array[]
+ */
+ private function structureUsages( array $usages ) {
+ $structuredUsages = [];
+ foreach ( $usages as $usage ) {
+ $entityId = $usage->getEntityId();
+ $structuredUsages[$entityId->getSerialization()][] = $usage;
+ }
+
+ return array_map( [ $this, 'structureUsagesPerEntity' ], $structuredUsages );
+ }
+
+ /**
+ * @param EntityUsage[] $usages
+ * @return array[]
+ */
+ private function structureUsagesPerEntity( array $usages ) {
+ $structuredUsages = [
+ EntityUsage::DESCRIPTION_USAGE => [],
+ EntityUsage::LABEL_USAGE => [],
+ ];
+ foreach ( $usages as $usage ) {
+ $aspect = $usage->getAspect();
+ $structuredUsages[$aspect][] = $usage;
+ }
+
+ return $structuredUsages;
+ }
+
+ /**
+ * @param EntityUsage[] $usages
+ * @return EntityUsage[]
+ */
+ private function deduplicateUsagesPerEntity( array $usages ) {
+ $usages[EntityUsage::DESCRIPTION_USAGE] = $this->deduplicatePerType(
+ $usages[EntityUsage::DESCRIPTION_USAGE]
+ );
+ $usages[EntityUsage::LABEL_USAGE] = $this->deduplicatePerType(
+ $usages[EntityUsage::LABEL_USAGE]
+ );
+ return $usages;
+ }
+
+ /**
+ * @param EntityUsage[] $usages
+ * @return EntityUsage[]
+ */
+ private function deduplicatePerType( array $usages ) {
+ foreach ( $usages as $usage ) {
+ if ( $usage->getModifier() === null ) {
+ return [ $usage ];
+ }
+ }
+
+ return $usages;
+ }
+
+}
diff --git a/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php b/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
index 6c40387..6283971 100644
--- a/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
+++ b/client/tests/phpunit/includes/Usage/UsageAccumulatorContractTester.php
@@ -156,6 +156,8 @@
private function assertSameUsages( array $expected, array $actual, $message = '' ) {
$expected = $this->getIdentityStrings( $expected );
$actual = $this->getIdentityStrings( $actual );
+ sort( $expected );
+ sort( $actual );

Assert::assertEquals( $expected, $actual, $message );
}
diff --git a/client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php b/client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php
new file mode 100644
index 0000000..cf2041e
--- /dev/null
+++ b/client/tests/phpunit/includes/Usage/UsageDeduplicatorTest.php
@@ -0,0 +1,54 @@
+<?php
+
+namespace Wikibase\Client\Tests\Usage;
+
+use Wikibase\Client\Usage\EntityUsage;
+use Wikibase\Client\Usage\UsageDeduplicator;
+use Wikibase\DataModel\Entity\ItemId;
+
+/**
+ * @covers Wikibase\Client\Usage\UsageDeduplicator
+ *
+ * @group Wikibase
+ * @group WikibaseClient
+ * @group WikibaseUsageTracking
+ *
+ * @license GPL-2.0+
+ * @author Amir Sarabadani
+ */
+class UsageDeduplicatorTest extends \PHPUnit_Framework_TestCase {
+
+ public function provideDeduplicate() {
+ $q1 = new ItemId( 'Q1' );
+ $q1Label = new EntityUsage( $q1, EntityUsage::LABEL_USAGE );
+ $q1LabelEn = new EntityUsage( $q1, EntityUsage::LABEL_USAGE, 'en' );
+ $q1All = new EntityUsage( $q1, EntityUsage::ALL_USAGE );
+ $q1Statement = new EntityUsage( $q1, EntityUsage::STATEMENT_USAGE, 'P15' );
+
+ $q2 = new ItemId( 'Q2' );
+ $q2Label = new EntityUsage( $q2, EntityUsage::LABEL_USAGE );
+ $q2Description = new EntityUsage( $q2, EntityUsage::DESCRIPTION_USAGE );
+ $q2DescriptionFa = new EntityUsage( $q2, EntityUsage::DESCRIPTION_USAGE, 'fa' );
+
+ return [
+ [ [ $q1LabelEn, $q1Label ], [ $q1Label ] ],
+ [ [ $q1LabelEn ], [ $q1LabelEn ] ],
+ [ [ $q1LabelEn, $q1Label, $q2Description, $q1All ], [ $q1Label, $q1All, $q2Description ] ],
+ [ [ $q1LabelEn, $q2Label, $q1Statement ], [ $q1LabelEn, $q1Statement, $q2Label ] ],
+ [ [ $q2DescriptionFa, $q2Description, $q1All ], [ $q2Description, $q1All ] ],
+ ];
+ }
+
+ /**
+ * @covers \Wikibase\Client\Usage\UsageDeduplicator::deduplicate
+ * @dataProvider provideDeduplicate
+ */
+ public function testDeduplicate( $usages, $output ) {
+ $expected = [];
+ foreach ( $output as $usage ) {
+ $expected[$usage->getIdentityString()] = $usage;
+ }
+ $this->assertEquals( $expected, ( new UsageDeduplicator() )->deduplicate( $usages ) );
+ }
+
+}

--
To view, visit https://gerrit.wikimedia.org/r/405013
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ic101f58f6d6f700cd9da02844b518973d2a891d8
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <Ladsgroup@gmail.com>
Gerrit-Reviewer: Eranroz <eranroz89@gmail.com>
Gerrit-Reviewer: Hoo man <hoo@online.de>
Gerrit-Reviewer: Ladsgroup <Ladsgroup@gmail.com>
Gerrit-Reviewer: Lucas Werkmeister (WMDE) <lucas.werkmeister@wikimedia.de>
Gerrit-Reviewer: Thiemo Kreuz (WMDE) <thiemo.kreuz@wikimedia.de>
Gerrit-Reviewer: WMDE-leszek <leszek.manicki@wikimedia.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits