Mailing List Archive

[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Consolidate bailing out from ext/wiki links
Arlolra has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/406126 )

Change subject: Consolidate bailing out from ext/wiki links
......................................................................

Consolidate bailing out from ext/wiki links

So that extlinks get the same expanded attribute treatment.

Fixes the regression in,
http://localhost:8000/ar.wikipedia.org/v3/page/html/%D8%B0%D8%B1%D8%A9_%D8%A7%D9%84%D9%87%D9%8A%D8%AF%D8%B1%D9%88%D8%AC%D9%8A%D9%86/25945469

Follows up to 217c68a by adding typeof "mw:ExpandedAttrs" to the nodes
where xmlish attributes contain extension tags for expanding.

Change-Id: I188f1c903619320052b0502ba13c7b3be7040e6f
---
M lib/wt2html/tt/AttributeExpander.js
M lib/wt2html/tt/LinkHandler.js
M tests/parserTests.txt
3 files changed, 75 insertions(+), 102 deletions(-)


git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/26/406126/1

diff --git a/lib/wt2html/tt/AttributeExpander.js b/lib/wt2html/tt/AttributeExpander.js
index 59d88e8..bb7a751 100644
--- a/lib/wt2html/tt/AttributeExpander.js
+++ b/lib/wt2html/tt/AttributeExpander.js
@@ -124,7 +124,6 @@
* ---------------------------------------------------------- */
function stripMetaTags(env, tokens, wrapTemplates) {
var buf = [];
- var isPushed = false;
var hasGeneratedContent = false;

for (var i = 0, l = tokens.length; i < l; i++) {
@@ -141,10 +140,12 @@
return prev + next.textContent;
}, '');
buf.push(str);
+ hasGeneratedContent = true;
+ continue;
// TODO: Maybe cleanup the remaining about sibbling wrappers
// but the sanitizer will drop them anyways
}
- isPushed = false;
+
if (wrapTemplates) {
// Strip all meta tags.
var type = t.getAttribute("typeof");
@@ -154,12 +155,12 @@
hasGeneratedContent = true;
}
} else {
- isPushed = true;
buf.push(t);
+ continue;
}
}

- if (!isPushed && t.name !== "meta") {
+ if (t.name !== "meta") {
// Dont strip token if it is not a meta-tag
buf.push(t);
}
diff --git a/lib/wt2html/tt/LinkHandler.js b/lib/wt2html/tt/LinkHandler.js
index 7397b3c..a9aa56c 100644
--- a/lib/wt2html/tt/LinkHandler.js
+++ b/lib/wt2html/tt/LinkHandler.js
@@ -210,13 +210,71 @@
});
};

+var bailTokens = function(env, token, isExtLink) {
+ var count = isExtLink ? 1 : 2;
+ var tokens = ["[".repeat(count)];
+
+ if (/mw:ExpandedAttrs/.test(token.getAttribute("typeof"))) {
+ var dataMW = JSON.parse(token.getAttribute("data-mw")).attribs;
+ var html;
+ for (var i = 0; i < dataMW.length; i++) {
+ if (dataMW[i][0].txt === "href") {
+ html = dataMW[i][1].html;
+ break;
+ }
+ }
+
+ // Since we are splicing off '['s and ']'s from the incoming token,
+ // adjust TSR of the DOM-fragment by `count` each on both end.
+ var tsr = token.dataAttribs && token.dataAttribs.tsr;
+ if (tsr && typeof (tsr[0]) === 'number' && typeof (tsr[1]) === 'number') {
+ tsr = [tsr[0] + count, tsr[1] - count];
+ } else {
+ tsr = null;
+ }
+
+ var body = DU.ppToDOM(html);
+ var dft = DU.buildDOMFragmentTokens(env, token, body, null, {
+ noPWrapping: true,
+ tsr: tsr,
+ });
+
+ tokens = tokens.concat(dft);
+ } else {
+ // FIXME: Duplicate work
+ tokens = tokens.concat(token.getAttribute("href"));
+ }
+
+ // Append rest of the attributes
+ if (isExtLink) {
+ var content = Util.lookup(token.attribs, 'mw:content');
+
+ // FIXME: Use this attribute in regular extline
+ // cases to rt spaces correctly maybe? Unsure
+ // it is worth it.
+ var spaces = token.getAttribute('spaces') || '';
+
+ tokens = tokens.concat(spaces, content);
+ } else {
+ token.attribs.forEach(function(a) {
+ if (a.k === "mw:maybeContent") {
+ tokens = tokens.concat("|", a.v);
+ }
+ });
+ }
+
+ tokens.push("]".repeat(count));
+
+ return tokens;
+};
+
/**
* Handle a mw:WikiLink token.
*/
WikiLinkHandler.prototype.onWikiLink = function(token, frame, cb) {
var env = this.manager.env;
var hrefKV = Util.lookupKV(token.attribs, 'href');
- var target, tokens, tsr;
+ var target;

try {
target = this.getWikiLinkTargetInfo(token, hrefKV);
@@ -226,47 +284,7 @@
}

if (!target) {
- tokens = ["[["];
- if (/mw:ExpandedAttrs/.test(token.getAttribute("typeof"))) {
- var dataMW = JSON.parse(token.getAttribute("data-mw")).attribs;
- var html;
- for (var i = 0; i < dataMW.length; i++) {
- if (dataMW[i][0].txt === "href") {
- html = dataMW[i][1].html;
- break;
- }
- }
-
- // Since we are splicing off '[[' and ']]' from the incoming token,
- // adjust TSR of the DOM-fragment by 2 each on both end.
- tsr = token.dataAttribs && token.dataAttribs.tsr;
- if (tsr && typeof (tsr[0]) === 'number' && typeof (tsr[1]) === 'number') {
- tsr = [tsr[0] + 2, tsr[1] - 2];
- } else {
- tsr = null;
- }
-
- var body = DU.ppToDOM(html);
- var dft = DU.buildDOMFragmentTokens(env, token, body, null, {
- noPWrapping: true,
- tsr: tsr,
- });
-
- tokens = tokens.concat(dft);
- } else {
- // FIXME: Duplicate work
- tokens = tokens.concat(token.getAttribute("href"));
- }
-
- // Append rest of the attributes
- token.attribs.forEach(function(a) {
- if (a.k === "mw:maybeContent") {
- tokens = tokens.concat("|", a.v);
- }
- });
-
- tokens.push("]]");
- cb({ tokens: tokens });
+ cb({ tokens: bailTokens(env, token, false) });
return;
}

@@ -293,13 +311,13 @@

var extToks = this.urlParser.tokenizeExtlink(src);
if (extToks) {
- tsr = token.dataAttribs && token.dataAttribs.tsr;
+ var tsr = token.dataAttribs && token.dataAttribs.tsr;
Util.shiftTokenTSR(extToks, 1 + (tsr ? tsr[0] : 0));
} else {
extToks = src;
}

- tokens = ['['].concat(extToks, ']');
+ var tokens = ['['].concat(extToks, ']');
tokens.rank = this.rank - 0.002; // Magic rank, since extlink is -0.001
cb({ tokens: tokens });
return;
@@ -308,7 +326,6 @@
// Ok, it looks like we have a sane href. Figure out which handler to use.
var isRedirect = !!token.getAttribute('redirect');
var handler = this.getWikiLinkHandler(token, target, isRedirect);
- // and call it.
handler(token, frame, cb, target);
};

@@ -2137,51 +2154,7 @@
});
} else {
// Not a link, convert href to plain text.
- var tokens = ['['];
- var closingTok = null;
- var spaces = token.getAttribute('spaces') || '';
-
- if ((token.getAttribute("typeof") || "").match(/mw:ExpandedAttrs/)) {
- // The token 'non-url' came from a template.
- // Introduce a span and capture the original source for RT purposes.
- var da = token.dataAttribs;
- // targetOff covers all spaces before content
- // and we need src without those spaces.
- var tsr0b = da.tsr[0] + 1;
- var tsr1b = da.targetOff - spaces.length;
- var span = Util.placeholder({}, { // {} !== null
- tsr: [tsr0b, tsr1b],
- src: env.page.src.substring(tsr0b, tsr1b),
- })[0]; // We only want the first token.
- tokens.push(span);
- closingTok = new EndTagTk('span');
- }
-
- var hrefText = token.getAttribute("href");
- if (Array.isArray(hrefText)) {
- tokens = tokens.concat(hrefText);
- } else {
- tokens.push(hrefText);
- }
-
- if (closingTok) {
- tokens.push(closingTok);
- }
-
- // FIXME: Use this attribute in regular extline
- // cases to rt spaces correctly maybe? Unsure
- // it is worth it.
- if (spaces) {
- tokens.push(spaces);
- }
-
- if (content.length) {
- tokens = tokens.concat(content);
- }
-
- tokens.push(']');
-
- cb({ tokens: tokens });
+ cb({ tokens: bailTokens(env, token, true) });
}
};

diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index fd05393..7d87a78 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -5888,12 +5888,12 @@
[foo <i>bar</i>]
[fool's] errand
[fool's errand]
-[<span typeof="mw:Placeholder" data-parsoid='{"src":"{{echo|foo}}"}'>foo</span>]
-[<span typeof="mw:Placeholder" data-parsoid='{"src":"{{echo|foo}}"}'>foo</span> bar]
-[<span typeof="mw:Placeholder" data-parsoid='{"src":"{{echo|foo}}"}'>foo</span> <i>bar</i>]
-[<span typeof="mw:Placeholder" data-parsoid='{"src":"{{echo|foo}}l&#39;s"}'>fool's</span>] errand
-[<span typeof="mw:Placeholder" data-parsoid='{"src":"{{echo|foo}}l&#39;s"}'>fool's</span> errand]
-[<span typeof="mw:Placeholder" data-parsoid='{"src":"url={{echo|foo}}"}'>url=foo</span>]
+[<span about="#mwt19" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>]
+[<span about="#mwt20" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span> bar]
+[<span about="#mwt21" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span> <i>bar</i>]
+[<span about="#mwt22" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>l's] errand
+[<span about="#mwt23" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>l's errand]
+[url=<span about="#mwt24" typeof="mw:Transclusion" data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>]
[url=<a rel="mw:ExtLink" href="http://example.com">http://example.com</a>]
[http:// bare protocols don't count]</p>
!! end
@@ -17397,13 +17397,12 @@
<div id="title=" data-parsoid='{"stx":"html"}'>HTML rocks</div>
!! end

-## QUESTION: Should this have ExpandedAttrs?
!! test
Extension tag in attribute value
!! wikitext
<span title="<translate>123</translate>">ok</span>
!! html/parsoid
-<p><span title="123" data-parsoid='{"stx":"html","a":{"title":"123"},"sa":{"title":"&lt;translate>123&lt;/translate>"}}'>ok</span></p>
+<p><span title="123" about="#mwt4" typeof="mw:ExpandedAttrs" data-parsoid='{"stx":"html","a":{"title":"123"},"sa":{"title":"&lt;translate>123&lt;/translate>"}}' data-mw='{"attribs":[[{"txt":"title"},{"html":"&lt;translate typeof=\"mw:Extension/translate\" about=\"#mwt3\" data-parsoid=&apos;{\"dsr\":[13,39,2,2]}&apos; data-mw=&apos;{\"name\":\"translate\",\"attrs\":{},\"body\":{\"extsrc\":\"123\"}}&apos;>123&lt;/translate>"}]]}'>ok</span></p>
!! end

!! test

--
To view, visit https://gerrit.wikimedia.org/r/406126
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I188f1c903619320052b0502ba13c7b3be7040e6f
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <abreault@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits