Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
Mynij-test
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Mynij
Mynij-test
Commits
42119f6a
Commit
42119f6a
authored
Sep 04, 2019
by
Alexandra Rogova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
index received from db is bad
parent
3ff3b171
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
199 additions
and
56 deletions
+199
-56
external/jsSHA
external/jsSHA
+1
-0
external/rusha
external/rusha
+1
-0
src/html/gadget_model.html
src/html/gadget_model.html
+2
-1
src/js/gadget_loader.js
src/js/gadget_loader.js
+19
-30
src/js/gadget_model.js
src/js/gadget_model.js
+172
-23
src/js/gadget_parser.js
src/js/gadget_parser.js
+4
-2
No files found.
jsSHA
@
766f8ff7
Subproject commit 766f8ff7d926347b008a252a41b06565df747ac5
rusha
@
7dc20211
Subproject commit 7dc2021195219e54b2696af524141696f35694dd
src/html/gadget_model.html
View file @
42119f6a
...
...
@@ -6,7 +6,8 @@
<script
src=
"../../external/jio/dist/jio-latest.js"
></script>
<script
src=
"../../external/renderjs/dist/renderjs-latest.js"
></script>
<script
src=
"../../external/flexsearch/flexsearch.js"
></script>
<script
src=
"../../external/msgpack-lite/dist/msgpack.min.js"
></script>
<script
src=
"../../external/msgpack-lite/dist/msgpack.min.js"
></script>
<script
src=
"../../external/jsSHA/src/sha1.js"
></script>
<script
src=
"../js/gadget_model.js"
></script>
</head>
<body>
...
...
src/js/gadget_loader.js
View file @
42119f6a
...
...
@@ -18,31 +18,30 @@
.
setState
({
to_load
:
[
"
44_svt.xml
"
,
//135 urls
// "allemandfacile.xml", //650 urls
// "anglaisfacile.xml", //567 urls
// "bescherelle.xml", //60 urls
// "codeacademy.xml", //28 urls
// "francaisfacile.xml", //1119 urls
//"44_svt.xml", //135 urls
//"allemandfacile.xml", //650 urls
//"anglaisfacile.xml", //567 urls
"
codeacademy.xml
"
,
//28 urls
//"francaisfacile.xml", //1119 urls
//"hgeo_college.xml", //227 urls
//"histoirencours.xml", //1415 urls
//
"italienfacile.xml", //1477
urls
//
"jerevise.xml", //918
urls
//
"histoirencours.xml", //1415 urls
//
"italienfacile.xml", //1478
urls
//
"jerevise.xml", //919
urls
// "junior_science_et_vie.xml", //532 urls
//
"kmusic.xml", //107 urls
//"kmusic.xml", //107 urls
// "larousse.xml", //4563 urls
// //"letudiant.xml", //41649 urls
// "lewebpedagogique.xml", //298 urls
// //"livrespourtous.xml", //12061 urls
// "mathovore.xml", //222
1
urls
//
"monanneeaucollege.xml", //121 urls
// "mathovore.xml", //222
2
urls
//"monanneeaucollege.xml", //121 urls
// "nosdevoirs.xml", //462 urls
//
"physagreg.xml", //150 urls
//
"physique_chimie_college.xml", //282 urls
//
"reviser_brevet.xml", //229 urls
//"physagreg.xml", //150 urls
//"physique_chimie_college.xml", //282 urls
//"reviser_brevet.xml", //229 urls
// "soutien67.xml", //1604 urls
// //"superprof.xml", //12296 urls
//
"technologieaucollege27.xml", //129 urls
//"technologieaucollege27.xml", //129 urls
// "espagnolfacile.xml", //3352 urls
// "vivelessvt.xml", //1257 urls
// // TEST SITEMAPS TO FILL INDEX
...
...
@@ -67,17 +66,17 @@
// "vox.xml", //1194 urls
// "cbsnews.xml", //1260 urls
// "mirror.xml", //3528 urls
// "abcnews.xml", //
1077
urls
// "abcnews.xml", //
971
urls
// "lequipe.xml", //3455 urls
//
"rugbyrama.xml", //1817 urls
//"rugbyrama.xml", //1817 urls
// "elle.xml", //3532 urls
// "figaro.xml", //2965 urls
// "lepoint.xml", //3747 urls
// "telerama.xml", //2593 urls
// "liberation.xml", //819 urls
// "lemonde.xml", //3517 urls
//
"leparisien.xml", //2189 urls
//
"latribune.xml" //3190 urls
//"leparisien.xml", //2189 urls
//"latribune.xml" //3190 urls
]
})
...
...
@@ -118,16 +117,6 @@
return
RSVP
.
all
(
promise_list
);
}
});
/* return gadget.is_db_empty()
.push(function(empty){
if (empty) {
for (var i=0; i<gadget.state.to_load.length; i+=1){
promise_list.push(gadget.load_file("../../../crawler_test/" + gadget.state.to_load[i]));
}
return RSVP.all(promise_list);
}
});*/
})
.
declareMethod
(
"
load_file
"
,
function
(
file_path
,
file_name
){
//OK
...
...
src/js/gadget_model.js
View file @
42119f6a
...
...
@@ -2,23 +2,18 @@
/*global window, RSVP, rJS, jIO*/
(
function
(
window
,
document
,
RSVP
,
rJS
,
jIO
)
{
"
use strict
"
;
rJS
(
window
)
.
ready
(
function
(){
var
index
,
db
;
var
index
=
FlexSearch
.
create
(
"
memory
"
);
index
=
FlexSearch
.
create
(
"
memory
"
);
db
=
jIO
.
createJIO
(
{
type
:
"
query
"
,
sub_storage
:
{
type
:
"
uuid
"
,
sub_storage
:
{
type
:
"
indexeddb
"
,
database
:
"
mynij
"
}
}
type
:
"
indexeddb
"
,
database
:
"
mynij
"
}
);
...
...
@@ -45,24 +40,24 @@
})
.
declareMethod
(
"
add_page
"
,
function
(
page_info
){
//page_info = {link, title, description, item}
console
.
log
(
this
.
state
.
index
.
info
());
var
gadget
=
this
,
tmp
;
tmp
=
page_info
;
tmp
.
portal_type
=
"
page
"
;
return
gadget
.
state
.
db
.
put
(
page_info
.
link
,
tmp
)
.
push
(
function
(){
var
defer
=
RSVP
.
defer
();
/*
var defer = RSVP.defer();
gadget.state.index.add(page_info.link, page_info.title + " " + page_info.item, defer.resolve.bind(defer));
return
defer
.
promise
;
})
.
push
(
function
(){
return
gadget
.
_save_index
();
return defer.promise;*/
return
gadget
.
state
.
index
.
add
(
page_info
.
link
,
page_info
.
title
+
"
"
+
page_info
.
item
);
});
})
.
declareMethod
(
"
loaded
"
,
function
(
file_name
){
var
gadget
=
this
,
var
gadget
=
this
,
id
;
console
.
log
(
file_name
+
"
done loading
"
);
return
gadget
.
state
.
db
.
get
(
"
loaded
"
)
.
push
(
function
(
result
){
...
...
@@ -71,29 +66,183 @@
return
gadget
.
state
.
db
.
put
(
"
loaded
"
,
tmp
);
})
.
push
(
undefined
,
function
(
my_error
)
{
console
.
log
(
my_error
);
var
tmp
=
{};
tmp
[
file_name
]
=
true
;
return
gadget
.
state
.
db
.
put
(
"
loaded
"
,
tmp
);
})
.
always
(
function
(){
return
gadget
.
_save_index
();
});
})
.
declareMethod
(
"
_save_index
"
,
function
(){
//OK
.
declareMethod
(
"
_save_index
"
,
function
(){
/* var gadget = this;
console.log("getting index");
var serialized = this.state.index.export_test();
return
this
.
state
.
db
.
put
(
"
index
"
,
serialized
);
console.log("Index received. Saving");
return this.state.db.put("index", this.state.index.info())
.push(function(){
console.log(1);
//return gadget.state.db.putAttachment("index", "ids", new Blob([serialized.ids], {type : "application/json"}));
return gadget.state.db.putAttachment("index", "ids", new Blob([serialized.ids], {type : "text/plain"}));
})
.push(function(){
console.log(2);
//return gadget.state.db.putAttachment("index", "map", new Blob([serialized.map], {type : "application/json"}));
var shaObj = new jsSHA("SHA-1", "TEXT");
shaObj.update(serialized.map);
console.log("map sha1 : " + shaObj.getHash("HEX"));
return gadget.state.db.putAttachment("index", "map", new Blob([serialized.map], {type : "text/plain"}));
})
.push(function(){
console.log(3);
//return gadget.state.db.putAttachment("index", "ctx", new Blob([serialized.ctx], {type : "application/json"}));
return gadget.state.db.putAttachment("index", "ctx", new Blob([serialized.ctx], {type : "text/plain"}));
})
.push(function(){
console.log("index saved");
})
.push(undefined, function (my_error) {
throw my_error;
//console.log(my_error);
});*/
var
gadget
=
this
,
serialized
,
i
,
j
=
0
,
promise_list
=
[];
console
.
log
(
"
getting index
"
);
serialized
=
this
.
state
.
index
.
export_test
();
console
.
log
(
"
Index received. Saving
"
);
return
this
.
state
.
db
.
put
(
"
index_map
"
,
{})
.
push
(
function
(){
console
.
log
(
serialized
.
map
.
slice
(
0
,
1000
));
for
(
i
=
0
;
i
<
serialized
.
map
.
length
;
i
+=
10000
){
promise_list
.
push
(
gadget
.
state
.
db
.
putAttachment
(
"
index_map
"
,
j
+
""
,
new
Blob
([
serialized
.
map
.
slice
(
i
,
i
+
10000
)],
{
type
:
"
application/json
"
})));
j
+=
1
;
}
j
=
0
;
return
RSVP
.
all
(
promise_list
);
})
.
push
(
function
(){
return
gadget
.
state
.
db
.
put
(
"
index_ids
"
,
{});
})
.
push
(
function
(){
promise_list
=
[];
for
(
i
=
0
;
i
<
serialized
.
map
.
length
;
i
+=
10000
){
promise_list
.
push
(
gadget
.
state
.
db
.
putAttachment
(
"
index_ids
"
,
j
+
""
,
new
Blob
([
serialized
.
ids
.
slice
(
i
,
i
+
10000
)],
{
type
:
"
application/json
"
})));
j
+=
1
;
}
j
=
0
;
return
RSVP
.
all
(
promise_list
);
})
.
push
(
function
(){
return
gadget
.
state
.
db
.
put
(
"
index_ctx
"
,
{});
})
.
push
(
function
(){
promise_list
=
[];
for
(
i
=
0
;
i
<
serialized
.
ctx
.
length
;
i
+=
10000
){
promise_list
.
push
(
gadget
.
state
.
db
.
putAttachment
(
"
index_ctx
"
,
j
+
""
,
new
Blob
([
serialized
.
map
.
slice
(
i
,
i
+
10000
)],
{
type
:
"
application/json
"
})));
j
+=
1
;
}
return
RSVP
.
all
(
promise_list
);
})
.
push
(
function
(){
console
.
log
(
"
index saved
"
);
})
.
push
(
undefined
,
function
(
my_error
)
{
console
.
log
(
my_error
);
});
})
.
declareMethod
(
"
search
"
,
function
(
query
){
return
this
.
state
.
index
.
search
(
query
);
})
.
declareMethod
(
"
_load_index
"
,
function
(
msgpack
){
//OK
var
gadget
=
this
;
return
gadget
.
state
.
db
.
get
(
"
index
"
)
.
push
(
function
(
index
){
gadget
.
state
.
index
.
import_test
(
index
.
ids
,
index
.
map
,
index
.
ctx
);
.
declareMethod
(
"
_load_index
"
,
function
(
msgpack
){
/* var gadget = this,
ids,
map;
return gadget.state.db.getAttachment("index", "ids", {"format": "text"})
.push(function(result){
ids = result;
return gadget.state.db.getAttachment("index", "map", {"format": "text"});
})
.push(function(result){
map = result;
var shaObj = new jsSHA("SHA-1", "TEXT");
shaObj.update(map);
console.log("map sha1 : " + shaObj.getHash("HEX"));
return gadget.state.db.getAttachment("index", "ctx", {"format": "text"});
})
.push(function(result){
gadget.state.index.import_test(ids, map, result);
console.log("index imported from memory");
})
.push(undefined, function (my_error) {console.log(my_error)});*/
var
gadget
=
this
,
ids
,
map
,
ctx
,
promise_list
=
[],
i
;
return
gadget
.
state
.
db
.
allAttachments
(
"
index_ids
"
)
.
push
(
function
(
result
){
if
(
Object
.
keys
(
result
).
length
!==
0
){
for
(
i
=
0
;
i
<
Object
.
keys
(
result
).
length
;
i
+=
1
){
promise_list
.
push
(
gadget
.
state
.
db
.
getAttachment
(
"
index_ids
"
,
i
+
""
,
{
format
:
"
text
"
}));
}
return
RSVP
.
all
(
promise_list
);
}
else
{
return
null
;
}
})
.
push
(
function
(
result
){
if
(
result
===
null
)
ids
=
""
;
else
ids
=
result
.
join
(
""
);
return
gadget
.
state
.
db
.
allAttachments
(
"
index_map
"
);
})
.
push
(
function
(
result
){
if
(
Object
.
keys
(
result
).
length
!==
0
){
for
(
i
=
0
;
i
<
Object
.
keys
(
result
).
length
;
i
+=
1
){
promise_list
.
push
(
gadget
.
state
.
db
.
getAttachment
(
"
index_map
"
,
i
+
""
,
{
format
:
"
text
"
}));
}
return
RSVP
.
all
(
promise_list
);
}
else
{
return
null
;
}
})
.
push
(
function
(
result
){
console
.
log
(
result
[
0
]);
if
(
result
===
null
)
map
=
""
;
else
map
=
result
.
join
(
""
);
console
.
log
(
map
.
slice
(
0
,
1000
));
return
gadget
.
state
.
db
.
allAttachments
(
"
index_ctx
"
);
})
.
push
(
function
(
result
){
if
(
Object
.
keys
(
result
).
length
!==
0
){
for
(
i
=
0
;
i
<
Object
.
keys
(
result
).
length
;
i
+=
1
){
promise_list
.
push
(
gadget
.
state
.
db
.
getAttachment
(
"
index_ctx
"
,
i
+
""
,
{
format
:
"
text
"
}));
}
return
RSVP
.
all
(
promise_list
);
}
else
{
return
null
;
}
})
.
push
(
function
(
result
){
if
(
result
===
null
)
ctx
=
""
;
else
ctx
=
result
.
join
(
""
);
return
gadget
.
add_index
(
ids
,
map
,
ctx
);
})
.
push
(
function
(
result
){
console
.
log
(
"
index imported from memory
"
);
})
.
push
(
undefined
,
function
(
my_error
)
{});
.
push
(
undefined
,
function
(
my_error
)
{
console
.
log
(
my_error
);
});
})
.
declareMethod
(
"
is_empty
"
,
function
(){
...
...
src/js/gadget_parser.js
View file @
42119f6a
...
...
@@ -14,6 +14,7 @@
return
;
}
if
(
argument_list
.
length
>
0
)
{
console
.
log
(
argument_list
.
length
);
function_used
.
apply
(
context
,
argument_list
.
shift
())
.
then
(
function
(
result
)
{
pushAndExecute
(
global_defer
);
...
...
@@ -47,11 +48,12 @@
links
=
new
DOMParser
().
parseFromString
(
links_file
,
"
text/xml
"
).
getElementsByTagName
(
"
url
"
),
links_modified
=
[],
i
;
console
.
log
(
file_name
+
"
:
"
+
links
.
length
);
for
(
i
=
0
;
i
<
links
.
length
;
i
+=
1
){
links_modified
[
i
]
=
[
links
[
i
].
getElementsByTagName
(
'
loc
'
)[
0
].
textContent
];
}
return
new
RSVP
.
Queue
().
push
(
function
()
{
return
dispatchQueue
(
gadget
,
gadget
.
_get
,
links_modified
,
1
);
return
dispatchQueue
(
gadget
,
gadget
.
_get
,
links_modified
,
3
);
})
.
push
(
function
(){
return
gadget
.
add_file
(
file_name
);
...
...
@@ -62,7 +64,7 @@
var
gadget
=
this
;
return
new
RSVP
.
Queue
()
.
push
(
function
(){
var
rng
=
Math
.
floor
(
Math
.
random
()
*
Math
.
floor
(
10
));
var
rng
=
Math
.
floor
(
Math
.
random
()
*
Math
.
floor
(
10
));
if
(
rng
%
2
===
0
)
return
jIO
.
util
.
ajax
({
url
:
"
https://softinst116265.host.vifib.net/erp5/ERP5Site_getHTTPResource?url=
"
+
link
});
else
return
jIO
.
util
.
ajax
({
url
:
"
https://softinst116446.host.vifib.net/erp5/ERP5Site_getHTTPResource?url=
"
+
link
});
})
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment