Commit c278016f authored by Alain Takoudjou's avatar Alain Takoudjou

erp5_web_mynij_search: Prevent build to fetch all url which are in html response.

When loading html element result, browser will fetch all images, links, css, ... present in html string. We removed all url before load html element.

Also cleanup textContent so that there is not useless content.
parent 32a5fd6c
......@@ -374,6 +374,7 @@
showError(my_error);
} else if (my_error.target.status === 500 && attempt <= 3) {
return gadget.get_sitemap_item(link, index_name, attempt + 1);
console.log("Retry " + (attempt + 1) + " on url: " + link);
}
else
showError(my_error);
......@@ -396,12 +397,12 @@
if (regex) {
tmp_div = document.createElement("div");
tmp_div.innerHTML = regex[0]
// remove all <script> sections
.replace(/(<script[^>]*>[^>]*.*<\/script>)/gm, '');
// remove all <script>, <link>, href, img, style and comments sections
.replace(/(<script[^>]*>[^>]*.*<\/script>)|(<meta [^>]*.*\/?>)|(<link [^>]*.*\/?>)|(<!--[^>]*.*-->)|(<style[^>]*>[^>]*.*<\/style>)|(<source[^>]*>[^>]*.*<\/source>)|(<img [^>]*.*\/?>)|(href="[^"]*")/gm, '');
// remove footers, headers
tmp_div.querySelectorAll(
'link, style, script, footer, header, noscript, picture')
.forEach(function (item) {
'link, style, script, footer, header, noscript, picture, img, video, label'
).forEach(function (item) {
item.remove();
});
}
......
......@@ -242,7 +242,7 @@
</item>
<item>
<key> <string>serial</string> </key>
<value> <string>994.32596.8054.51882</string> </value>
<value> <string>994.61246.30369.5734</string> </value>
</item>
<item>
<key> <string>state</string> </key>
......@@ -260,7 +260,7 @@
</tuple>
<state>
<tuple>
<float>1630513271.74</float>
<float>1632233029.49</float>
<string>UTC</string>
</tuple>
</state>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment