Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
a50d201b
Commit
a50d201b
authored
May 26, 2006
by
Fredrik Lundh
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
needforspeed: stringlib refactoring (in progress)
parent
877ab9bc
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
111 additions
and
179 deletions
+111
-179
Objects/stringlib/README.txt
Objects/stringlib/README.txt
+5
-0
Objects/stringlib/fastsearch.h
Objects/stringlib/fastsearch.h
+97
-0
Objects/stringobject.c
Objects/stringobject.c
+7
-92
Objects/unicodeobject.c
Objects/unicodeobject.c
+2
-87
No files found.
Objects/stringlib/README.txt
0 → 100644
View file @
a50d201b
bits shared by the stringobject and unicodeobject implementations (and
possibly other modules, in a not too distant future).
the stuff in here is included into relevant places; see the individual
source files for details.
Objects/stringlib/fastsearch.h
0 → 100644
View file @
a50d201b
/* stringlib: fastsearch implementation */
#ifndef STRINGLIB_FASTSEARCH_H
#define STRINGLIB_FASTSEARCH_H
/* fast search/count implementation, based on a mix between boyer-
moore and horspool, with a few more bells and whistles on the top.
for some more background, see: http://effbot.org/stringlib */
/* note: fastsearch may access s[n], which isn't a problem when using
Python's ordinary string types, but may cause problems if you're
using this code in other contexts. also, the count mode returns -1
if there cannot possible be a match in the target string, and 0 if
it has actually checked for matches, but didn't find any. callers
beware! */
#define FAST_COUNT 0
#define FAST_SEARCH 1
Py_LOCAL
(
Py_ssize_t
)
fastsearch
(
const
STRINGLIB_CHAR
*
s
,
Py_ssize_t
n
,
const
STRINGLIB_CHAR
*
p
,
Py_ssize_t
m
,
int
mode
)
{
long
mask
;
Py_ssize_t
skip
,
count
=
0
;
Py_ssize_t
i
,
j
,
mlast
,
w
;
w
=
n
-
m
;
if
(
w
<
0
)
return
-
1
;
/* look for special cases */
if
(
m
<=
1
)
{
if
(
m
<=
0
)
return
-
1
;
/* use special case for 1-character strings */
if
(
mode
==
FAST_COUNT
)
{
for
(
i
=
0
;
i
<
n
;
i
++
)
if
(
s
[
i
]
==
p
[
0
])
count
++
;
return
count
;
}
else
{
for
(
i
=
0
;
i
<
n
;
i
++
)
if
(
s
[
i
]
==
p
[
0
])
return
i
;
}
return
-
1
;
}
mlast
=
m
-
1
;
/* create compressed boyer-moore delta 1 table */
skip
=
mlast
-
1
;
/* process pattern[:-1] */
for
(
mask
=
i
=
0
;
i
<
mlast
;
i
++
)
{
mask
|=
(
1
<<
(
p
[
i
]
&
0x1F
));
if
(
p
[
i
]
==
p
[
mlast
])
skip
=
mlast
-
i
-
1
;
}
/* process pattern[-1] outside the loop */
mask
|=
(
1
<<
(
p
[
mlast
]
&
0x1F
));
for
(
i
=
0
;
i
<=
w
;
i
++
)
{
/* note: using mlast in the skip path slows things down on x86 */
if
(
s
[
i
+
m
-
1
]
==
p
[
m
-
1
])
{
/* candidate match */
for
(
j
=
0
;
j
<
mlast
;
j
++
)
if
(
s
[
i
+
j
]
!=
p
[
j
])
break
;
if
(
j
==
mlast
)
{
/* got a match! */
if
(
mode
!=
FAST_COUNT
)
return
i
;
count
++
;
i
=
i
+
mlast
;
continue
;
}
/* miss: check if next character is part of pattern */
if
(
!
(
mask
&
(
1
<<
(
s
[
i
+
m
]
&
0x1F
))))
i
=
i
+
m
;
else
i
=
i
+
skip
;
}
else
{
/* skip: check if next character is part of pattern */
if
(
!
(
mask
&
(
1
<<
(
s
[
i
+
m
]
&
0x1F
))))
i
=
i
+
m
;
}
}
if
(
mode
!=
FAST_COUNT
)
return
-
1
;
return
count
;
}
#endif
Objects/stringobject.c
View file @
a50d201b
...
...
@@ -765,102 +765,17 @@ PyString_AsStringAndSize(register PyObject *obj,
}
/* -------------------------------------------------------------------- */
/*
Helper
s */
/*
stringlib component
s */
#define USE_FAST
/* experimental fast search implementation */
#define USE_FAST
/* XXX - this code is copied from unicodeobject.c. we really should
refactor the core implementations (see _sre.c for how this can be
done), but that'll have to wait -- fredrik */
/* fast search/count implementation, based on a mix between boyer-
moore and horspool, with a few more bells and whistles on the top.
for some more background, see: http://effbot.org/stringlib */
/* note: fastsearch may access s[n], which isn't a problem when using
Python's ordinary string types, but may cause problems if you're
using this code in other contexts. also, the count mode returns -1
if there cannot possibly be a match in the target string, and 0 if
it has actually checked for matches, but didn't find any. callers
beware! */
#define FAST_COUNT 0
#define FAST_SEARCH 1
Py_LOCAL
(
Py_ssize_t
)
fastsearch
(
const
char
*
s
,
Py_ssize_t
n
,
const
char
*
p
,
Py_ssize_t
m
,
int
mode
)
{
long
mask
;
Py_ssize_t
skip
,
count
=
0
;
Py_ssize_t
i
,
j
,
mlast
,
w
;
w
=
n
-
m
;
if
(
w
<
0
)
return
-
1
;
#ifdef USE_FAST
/* look for special cases */
if
(
m
<=
1
)
{
if
(
m
<=
0
)
return
-
1
;
/* use special case for 1-character strings */
if
(
mode
==
FAST_COUNT
)
{
for
(
i
=
0
;
i
<
n
;
i
++
)
if
(
s
[
i
]
==
p
[
0
])
count
++
;
return
count
;
}
else
{
for
(
i
=
0
;
i
<
n
;
i
++
)
if
(
s
[
i
]
==
p
[
0
])
return
i
;
}
return
-
1
;
}
#define STRINGLIB_CHAR char
mlast
=
m
-
1
;
/* create compressed boyer-moore delta 1 table */
skip
=
mlast
-
1
;
/* process pattern[:-1] */
for
(
mask
=
i
=
0
;
i
<
mlast
;
i
++
)
{
mask
|=
(
1
<<
(
p
[
i
]
&
0x1F
));
if
(
p
[
i
]
==
p
[
mlast
])
skip
=
mlast
-
i
-
1
;
}
/* process pattern[-1] outside the loop */
mask
|=
(
1
<<
(
p
[
mlast
]
&
0x1F
));
for
(
i
=
0
;
i
<=
w
;
i
++
)
{
/* note: using mlast in the skip path slows things down on x86 */
if
(
s
[
i
+
m
-
1
]
==
p
[
m
-
1
])
{
/* candidate match */
for
(
j
=
0
;
j
<
mlast
;
j
++
)
if
(
s
[
i
+
j
]
!=
p
[
j
])
break
;
if
(
j
==
mlast
)
{
/* got a match! */
if
(
mode
!=
FAST_COUNT
)
return
i
;
count
++
;
i
=
i
+
mlast
;
continue
;
}
/* miss: check if next character is part of pattern */
if
(
!
(
mask
&
(
1
<<
(
s
[
i
+
m
]
&
0x1F
))))
i
=
i
+
m
;
else
i
=
i
+
skip
;
}
else
{
/* skip: check if next character is part of pattern */
if
(
!
(
mask
&
(
1
<<
(
s
[
i
+
m
]
&
0x1F
))))
i
=
i
+
m
;
}
}
#include "stringlib/fastsearch.h"
if
(
mode
!=
FAST_COUNT
)
return
-
1
;
return
count
;
}
#endif
/* -------------------------------------------------------------------- */
/* Methods */
...
...
@@ -2416,7 +2331,7 @@ string_count(PyStringObject *self, PyObject *args)
#else
r
=
0
;
while
(
i
<
m
)
{
const
char
*
t
const
char
*
t
;
if
(
!
memcmp
(
s
+
i
,
sub
,
n
))
{
r
++
;
i
+=
n
;
...
...
Objects/unicodeobject.c
View file @
a50d201b
...
...
@@ -3854,94 +3854,9 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
/* --- Helpers ------------------------------------------------------------ */
/* fast search/count implementation, based on a mix between boyer-
moore and horspool, with a few more bells and whistles on the top.
for some more background, see: http://effbot.org/stringlib */
#define STRINGLIB_CHAR Py_UNICODE
/* note: fastsearch may access s[n], which isn't a problem when using
Python's ordinary string types, but may cause problems if you're
using this code in other contexts. also, the count mode returns -1
if there cannot possible be a match in the target string, and 0 if
it has actually checked for matches, but didn't find any. callers
beware! */
#define FAST_COUNT 0
#define FAST_SEARCH 1
Py_LOCAL
(
Py_ssize_t
)
fastsearch
(
Py_UNICODE
*
s
,
Py_ssize_t
n
,
Py_UNICODE
*
p
,
Py_ssize_t
m
,
int
mode
)
{
long
mask
;
Py_ssize_t
skip
,
count
=
0
;
Py_ssize_t
i
,
j
,
mlast
,
w
;
w
=
n
-
m
;
if
(
w
<
0
)
return
-
1
;
/* look for special cases */
if
(
m
<=
1
)
{
if
(
m
<=
0
)
return
-
1
;
/* use special case for 1-character strings */
if
(
mode
==
FAST_COUNT
)
{
for
(
i
=
0
;
i
<
n
;
i
++
)
if
(
s
[
i
]
==
p
[
0
])
count
++
;
return
count
;
}
else
{
for
(
i
=
0
;
i
<
n
;
i
++
)
if
(
s
[
i
]
==
p
[
0
])
return
i
;
}
return
-
1
;
}
mlast
=
m
-
1
;
/* create compressed boyer-moore delta 1 table */
skip
=
mlast
-
1
;
/* process pattern[:-1] */
for
(
mask
=
i
=
0
;
i
<
mlast
;
i
++
)
{
mask
|=
(
1
<<
(
p
[
i
]
&
0x1F
));
if
(
p
[
i
]
==
p
[
mlast
])
skip
=
mlast
-
i
-
1
;
}
/* process pattern[-1] outside the loop */
mask
|=
(
1
<<
(
p
[
mlast
]
&
0x1F
));
for
(
i
=
0
;
i
<=
w
;
i
++
)
{
/* note: using mlast in the skip path slows things down on x86 */
if
(
s
[
i
+
m
-
1
]
==
p
[
m
-
1
])
{
/* candidate match */
for
(
j
=
0
;
j
<
mlast
;
j
++
)
if
(
s
[
i
+
j
]
!=
p
[
j
])
break
;
if
(
j
==
mlast
)
{
/* got a match! */
if
(
mode
!=
FAST_COUNT
)
return
i
;
count
++
;
i
=
i
+
mlast
;
continue
;
}
/* miss: check if next character is part of pattern */
if
(
!
(
mask
&
(
1
<<
(
s
[
i
+
m
]
&
0x1F
))))
i
=
i
+
m
;
else
i
=
i
+
skip
;
}
else
{
/* skip: check if next character is part of pattern */
if
(
!
(
mask
&
(
1
<<
(
s
[
i
+
m
]
&
0x1F
))))
i
=
i
+
m
;
}
}
if
(
mode
!=
FAST_COUNT
)
return
-
1
;
return
count
;
}
#include "stringlib/fastsearch.h"
Py_LOCAL
(
Py_ssize_t
)
count
(
PyUnicodeObject
*
self
,
Py_ssize_t
start
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment