Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
fb81d3cb
Commit
fb81d3cb
authored
May 05, 2016
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #26765: Moved common code for the replace() method of bytes and bytearray
to a template file.
parent
9fc38574
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
527 additions
and
1062 deletions
+527
-1062
Objects/bytearrayobject.c
Objects/bytearrayobject.c
+3
-500
Objects/bytesobject.c
Objects/bytesobject.c
+3
-505
Objects/stringlib/transmogrify.h
Objects/stringlib/transmogrify.h
+521
-57
No files found.
Objects/bytearrayobject.c
View file @
fb81d3cb
...
...
@@ -1307,503 +1307,6 @@ bytearray_maketrans_impl(Py_buffer *frm, Py_buffer *to)
}
/* find and count characters and substrings */
#define findchar(target, target_len, c) \
((char *)memchr((const void *)(target), c, target_len))
/* Bytes ops must return a string, create a copy */
Py_LOCAL
(
PyByteArrayObject
*
)
return_self
(
PyByteArrayObject
*
self
)
{
/* always return a new bytearray */
return
(
PyByteArrayObject
*
)
PyByteArray_FromStringAndSize
(
PyByteArray_AS_STRING
(
self
),
PyByteArray_GET_SIZE
(
self
));
}
Py_LOCAL_INLINE
(
Py_ssize_t
)
countchar
(
const
char
*
target
,
Py_ssize_t
target_len
,
char
c
,
Py_ssize_t
maxcount
)
{
Py_ssize_t
count
=
0
;
const
char
*
start
=
target
;
const
char
*
end
=
target
+
target_len
;
while
(
(
start
=
findchar
(
start
,
end
-
start
,
c
))
!=
NULL
)
{
count
++
;
if
(
count
>=
maxcount
)
break
;
start
+=
1
;
}
return
count
;
}
/* Algorithms for different cases of string replacement */
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Py_LOCAL
(
PyByteArrayObject
*
)
replace_interleave
(
PyByteArrayObject
*
self
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
,
i
;
PyByteArrayObject
*
result
;
self_len
=
PyByteArray_GET_SIZE
(
self
);
/* 1 at the end plus 1 after every character;
count = min(maxcount, self_len + 1) */
if
(
maxcount
<=
self_len
)
count
=
maxcount
;
else
/* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
count
=
self_len
+
1
;
/* Check for overflow */
/* result_len = count * to_len + self_len; */
assert
(
count
>
0
);
if
(
to_len
>
(
PY_SSIZE_T_MAX
-
self_len
)
/
count
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"replace string is too long"
);
return
NULL
;
}
result_len
=
count
*
to_len
+
self_len
;
if
(
!
(
result
=
(
PyByteArrayObject
*
)
PyByteArray_FromStringAndSize
(
NULL
,
result_len
))
)
return
NULL
;
self_s
=
PyByteArray_AS_STRING
(
self
);
result_s
=
PyByteArray_AS_STRING
(
result
);
if
(
to_len
>
1
)
{
/* Lay the first one down (guaranteed this will occur) */
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
count
-=
1
;
for
(
i
=
0
;
i
<
count
;
i
++
)
{
*
result_s
++
=
*
self_s
++
;
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
}
}
else
{
result_s
[
0
]
=
to_s
[
0
];
result_s
+=
to_len
;
count
-=
1
;
for
(
i
=
0
;
i
<
count
;
i
++
)
{
*
result_s
++
=
*
self_s
++
;
result_s
[
0
]
=
to_s
[
0
];
result_s
+=
to_len
;
}
}
/* Copy the rest of the original string */
Py_MEMCPY
(
result_s
,
self_s
,
self_len
-
i
);
return
result
;
}
/* Special case for deleting a single character */
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Py_LOCAL
(
PyByteArrayObject
*
)
replace_delete_single_character
(
PyByteArrayObject
*
self
,
char
from_c
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
;
char
*
start
,
*
next
,
*
end
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
;
PyByteArrayObject
*
result
;
self_len
=
PyByteArray_GET_SIZE
(
self
);
self_s
=
PyByteArray_AS_STRING
(
self
);
count
=
countchar
(
self_s
,
self_len
,
from_c
,
maxcount
);
if
(
count
==
0
)
{
return
return_self
(
self
);
}
result_len
=
self_len
-
count
;
/* from_len == 1 */
assert
(
result_len
>=
0
);
if
(
(
result
=
(
PyByteArrayObject
*
)
PyByteArray_FromStringAndSize
(
NULL
,
result_len
))
==
NULL
)
return
NULL
;
result_s
=
PyByteArray_AS_STRING
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
next
=
findchar
(
start
,
end
-
start
,
from_c
);
if
(
next
==
NULL
)
break
;
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
start
=
next
+
1
;
}
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
Py_LOCAL
(
PyByteArrayObject
*
)
replace_delete_substring
(
PyByteArrayObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
;
char
*
start
,
*
next
,
*
end
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
,
offset
;
PyByteArrayObject
*
result
;
self_len
=
PyByteArray_GET_SIZE
(
self
);
self_s
=
PyByteArray_AS_STRING
(
self
);
count
=
stringlib_count
(
self_s
,
self_len
,
from_s
,
from_len
,
maxcount
);
if
(
count
==
0
)
{
/* no matches */
return
return_self
(
self
);
}
result_len
=
self_len
-
(
count
*
from_len
);
assert
(
result_len
>=
0
);
if
(
(
result
=
(
PyByteArrayObject
*
)
PyByteArray_FromStringAndSize
(
NULL
,
result_len
))
==
NULL
)
return
NULL
;
result_s
=
PyByteArray_AS_STRING
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
offset
=
stringlib_find
(
start
,
end
-
start
,
from_s
,
from_len
,
0
);
if
(
offset
==
-
1
)
break
;
next
=
start
+
offset
;
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
start
=
next
+
from_len
;
}
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Py_LOCAL
(
PyByteArrayObject
*
)
replace_single_character_in_place
(
PyByteArrayObject
*
self
,
char
from_c
,
char
to_c
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
,
*
start
,
*
end
,
*
next
;
Py_ssize_t
self_len
;
PyByteArrayObject
*
result
;
/* The result string will be the same size */
self_s
=
PyByteArray_AS_STRING
(
self
);
self_len
=
PyByteArray_GET_SIZE
(
self
);
next
=
findchar
(
self_s
,
self_len
,
from_c
);
if
(
next
==
NULL
)
{
/* No matches; return the original bytes */
return
return_self
(
self
);
}
/* Need to make a new bytes */
result
=
(
PyByteArrayObject
*
)
PyByteArray_FromStringAndSize
(
NULL
,
self_len
);
if
(
result
==
NULL
)
return
NULL
;
result_s
=
PyByteArray_AS_STRING
(
result
);
Py_MEMCPY
(
result_s
,
self_s
,
self_len
);
/* change everything in-place, starting with this one */
start
=
result_s
+
(
next
-
self_s
);
*
start
=
to_c
;
start
++
;
end
=
result_s
+
self_len
;
while
(
--
maxcount
>
0
)
{
next
=
findchar
(
start
,
end
-
start
,
from_c
);
if
(
next
==
NULL
)
break
;
*
next
=
to_c
;
start
=
next
+
1
;
}
return
result
;
}
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Py_LOCAL
(
PyByteArrayObject
*
)
replace_substring_in_place
(
PyByteArrayObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
char
*
result_s
,
*
start
,
*
end
;
char
*
self_s
;
Py_ssize_t
self_len
,
offset
;
PyByteArrayObject
*
result
;
/* The result bytes will be the same size */
self_s
=
PyByteArray_AS_STRING
(
self
);
self_len
=
PyByteArray_GET_SIZE
(
self
);
offset
=
stringlib_find
(
self_s
,
self_len
,
from_s
,
from_len
,
0
);
if
(
offset
==
-
1
)
{
/* No matches; return the original bytes */
return
return_self
(
self
);
}
/* Need to make a new bytes */
result
=
(
PyByteArrayObject
*
)
PyByteArray_FromStringAndSize
(
NULL
,
self_len
);
if
(
result
==
NULL
)
return
NULL
;
result_s
=
PyByteArray_AS_STRING
(
result
);
Py_MEMCPY
(
result_s
,
self_s
,
self_len
);
/* change everything in-place, starting with this one */
start
=
result_s
+
offset
;
Py_MEMCPY
(
start
,
to_s
,
from_len
);
start
+=
from_len
;
end
=
result_s
+
self_len
;
while
(
--
maxcount
>
0
)
{
offset
=
stringlib_find
(
start
,
end
-
start
,
from_s
,
from_len
,
0
);
if
(
offset
==-
1
)
break
;
Py_MEMCPY
(
start
+
offset
,
to_s
,
from_len
);
start
+=
offset
+
from_len
;
}
return
result
;
}
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Py_LOCAL
(
PyByteArrayObject
*
)
replace_single_character
(
PyByteArrayObject
*
self
,
char
from_c
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
;
char
*
start
,
*
next
,
*
end
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
;
PyByteArrayObject
*
result
;
self_s
=
PyByteArray_AS_STRING
(
self
);
self_len
=
PyByteArray_GET_SIZE
(
self
);
count
=
countchar
(
self_s
,
self_len
,
from_c
,
maxcount
);
if
(
count
==
0
)
{
/* no matches, return unchanged */
return
return_self
(
self
);
}
/* use the difference between current and new, hence the "-1" */
/* result_len = self_len + count * (to_len-1) */
assert
(
count
>
0
);
if
(
to_len
-
1
>
(
PY_SSIZE_T_MAX
-
self_len
)
/
count
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"replace bytes is too long"
);
return
NULL
;
}
result_len
=
self_len
+
count
*
(
to_len
-
1
);
if
(
(
result
=
(
PyByteArrayObject
*
)
PyByteArray_FromStringAndSize
(
NULL
,
result_len
))
==
NULL
)
return
NULL
;
result_s
=
PyByteArray_AS_STRING
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
next
=
findchar
(
start
,
end
-
start
,
from_c
);
if
(
next
==
NULL
)
break
;
if
(
next
==
start
)
{
/* replace with the 'to' */
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
+=
1
;
}
else
{
/* copy the unchanged old then the 'to' */
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
=
next
+
1
;
}
}
/* Copy the remainder of the remaining bytes */
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Py_LOCAL
(
PyByteArrayObject
*
)
replace_substring
(
PyByteArrayObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
;
char
*
start
,
*
next
,
*
end
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
,
offset
;
PyByteArrayObject
*
result
;
self_s
=
PyByteArray_AS_STRING
(
self
);
self_len
=
PyByteArray_GET_SIZE
(
self
);
count
=
stringlib_count
(
self_s
,
self_len
,
from_s
,
from_len
,
maxcount
);
if
(
count
==
0
)
{
/* no matches, return unchanged */
return
return_self
(
self
);
}
/* Check for overflow */
/* result_len = self_len + count * (to_len-from_len) */
assert
(
count
>
0
);
if
(
to_len
-
from_len
>
(
PY_SSIZE_T_MAX
-
self_len
)
/
count
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"replace bytes is too long"
);
return
NULL
;
}
result_len
=
self_len
+
count
*
(
to_len
-
from_len
);
if
(
(
result
=
(
PyByteArrayObject
*
)
PyByteArray_FromStringAndSize
(
NULL
,
result_len
))
==
NULL
)
return
NULL
;
result_s
=
PyByteArray_AS_STRING
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
offset
=
stringlib_find
(
start
,
end
-
start
,
from_s
,
from_len
,
0
);
if
(
offset
==
-
1
)
break
;
next
=
start
+
offset
;
if
(
next
==
start
)
{
/* replace with the 'to' */
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
+=
from_len
;
}
else
{
/* copy the unchanged old then the 'to' */
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
=
next
+
from_len
;
}
}
/* Copy the remainder of the remaining bytes */
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
Py_LOCAL
(
PyByteArrayObject
*
)
replace
(
PyByteArrayObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
if
(
maxcount
<
0
)
{
maxcount
=
PY_SSIZE_T_MAX
;
}
else
if
(
maxcount
==
0
||
PyByteArray_GET_SIZE
(
self
)
==
0
)
{
/* nothing to do; return the original bytes */
return
return_self
(
self
);
}
if
(
maxcount
==
0
||
(
from_len
==
0
&&
to_len
==
0
))
{
/* nothing to do; return the original bytes */
return
return_self
(
self
);
}
/* Handle zero-length special cases */
if
(
from_len
==
0
)
{
/* insert the 'to' bytes everywhere. */
/* >>> "Python".replace("", ".") */
/* '.P.y.t.h.o.n.' */
return
replace_interleave
(
self
,
to_s
,
to_len
,
maxcount
);
}
/* Except for "".replace("", "A") == "A" there is no way beyond this */
/* point for an empty self bytes to generate a non-empty bytes */
/* Special case so the remaining code always gets a non-empty bytes */
if
(
PyByteArray_GET_SIZE
(
self
)
==
0
)
{
return
return_self
(
self
);
}
if
(
to_len
==
0
)
{
/* delete all occurrences of 'from' bytes */
if
(
from_len
==
1
)
{
return
replace_delete_single_character
(
self
,
from_s
[
0
],
maxcount
);
}
else
{
return
replace_delete_substring
(
self
,
from_s
,
from_len
,
maxcount
);
}
}
/* Handle special case where both bytes have the same length */
if
(
from_len
==
to_len
)
{
if
(
from_len
==
1
)
{
return
replace_single_character_in_place
(
self
,
from_s
[
0
],
to_s
[
0
],
maxcount
);
}
else
{
return
replace_substring_in_place
(
self
,
from_s
,
from_len
,
to_s
,
to_len
,
maxcount
);
}
}
/* Otherwise use the more generic algorithms */
if
(
from_len
==
1
)
{
return
replace_single_character
(
self
,
from_s
[
0
],
to_s
,
to_len
,
maxcount
);
}
else
{
/* len('from')>=2, len('to')>=1 */
return
replace_substring
(
self
,
from_s
,
from_len
,
to_s
,
to_len
,
maxcount
);
}
}
/*[clinic input]
bytearray.replace
...
...
@@ -1825,9 +1328,9 @@ bytearray_replace_impl(PyByteArrayObject *self, Py_buffer *old,
Py_buffer
*
new
,
Py_ssize_t
count
)
/*[clinic end generated code: output=d39884c4dc59412a input=aa379d988637c7fb]*/
{
return
(
PyObject
*
)
replace
((
PyByteArrayObject
*
)
self
,
old
->
buf
,
old
->
len
,
new
->
buf
,
new
->
len
,
count
);
return
stringlib_replace
((
PyObject
*
)
self
,
(
const
char
*
)
old
->
buf
,
old
->
len
,
(
const
char
*
)
new
->
buf
,
new
->
len
,
count
);
}
/*[clinic input]
...
...
Objects/bytesobject.c
View file @
fb81d3cb
...
...
@@ -2198,508 +2198,6 @@ bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
return
_Py_bytes_maketrans
(
frm
,
to
);
}
/* find and count characters and substrings */
#define findchar(target, target_len, c) \
((char *)memchr((const void *)(target), c, target_len))
/* String ops must return a string. */
/* If the object is subclass of string, create a copy */
Py_LOCAL
(
PyBytesObject
*
)
return_self
(
PyBytesObject
*
self
)
{
if
(
PyBytes_CheckExact
(
self
))
{
Py_INCREF
(
self
);
return
self
;
}
return
(
PyBytesObject
*
)
PyBytes_FromStringAndSize
(
PyBytes_AS_STRING
(
self
),
PyBytes_GET_SIZE
(
self
));
}
Py_LOCAL_INLINE
(
Py_ssize_t
)
countchar
(
const
char
*
target
,
Py_ssize_t
target_len
,
char
c
,
Py_ssize_t
maxcount
)
{
Py_ssize_t
count
=
0
;
const
char
*
start
=
target
;
const
char
*
end
=
target
+
target_len
;
while
(
(
start
=
findchar
(
start
,
end
-
start
,
c
))
!=
NULL
)
{
count
++
;
if
(
count
>=
maxcount
)
break
;
start
+=
1
;
}
return
count
;
}
/* Algorithms for different cases of string replacement */
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Py_LOCAL
(
PyBytesObject
*
)
replace_interleave
(
PyBytesObject
*
self
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
,
i
;
PyBytesObject
*
result
;
self_len
=
PyBytes_GET_SIZE
(
self
);
/* 1 at the end plus 1 after every character;
count = min(maxcount, self_len + 1) */
if
(
maxcount
<=
self_len
)
count
=
maxcount
;
else
/* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
count
=
self_len
+
1
;
/* Check for overflow */
/* result_len = count * to_len + self_len; */
assert
(
count
>
0
);
if
(
to_len
>
(
PY_SSIZE_T_MAX
-
self_len
)
/
count
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"replacement bytes are too long"
);
return
NULL
;
}
result_len
=
count
*
to_len
+
self_len
;
if
(
!
(
result
=
(
PyBytesObject
*
)
PyBytes_FromStringAndSize
(
NULL
,
result_len
))
)
return
NULL
;
self_s
=
PyBytes_AS_STRING
(
self
);
result_s
=
PyBytes_AS_STRING
(
result
);
if
(
to_len
>
1
)
{
/* Lay the first one down (guaranteed this will occur) */
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
count
-=
1
;
for
(
i
=
0
;
i
<
count
;
i
++
)
{
*
result_s
++
=
*
self_s
++
;
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
}
}
else
{
result_s
[
0
]
=
to_s
[
0
];
result_s
+=
to_len
;
count
-=
1
;
for
(
i
=
0
;
i
<
count
;
i
++
)
{
*
result_s
++
=
*
self_s
++
;
result_s
[
0
]
=
to_s
[
0
];
result_s
+=
to_len
;
}
}
/* Copy the rest of the original string */
Py_MEMCPY
(
result_s
,
self_s
,
self_len
-
i
);
return
result
;
}
/* Special case for deleting a single character */
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Py_LOCAL
(
PyBytesObject
*
)
replace_delete_single_character
(
PyBytesObject
*
self
,
char
from_c
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
;
char
*
start
,
*
next
,
*
end
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
;
PyBytesObject
*
result
;
self_len
=
PyBytes_GET_SIZE
(
self
);
self_s
=
PyBytes_AS_STRING
(
self
);
count
=
countchar
(
self_s
,
self_len
,
from_c
,
maxcount
);
if
(
count
==
0
)
{
return
return_self
(
self
);
}
result_len
=
self_len
-
count
;
/* from_len == 1 */
assert
(
result_len
>=
0
);
if
(
(
result
=
(
PyBytesObject
*
)
PyBytes_FromStringAndSize
(
NULL
,
result_len
))
==
NULL
)
return
NULL
;
result_s
=
PyBytes_AS_STRING
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
next
=
findchar
(
start
,
end
-
start
,
from_c
);
if
(
next
==
NULL
)
break
;
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
start
=
next
+
1
;
}
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
Py_LOCAL
(
PyBytesObject
*
)
replace_delete_substring
(
PyBytesObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
;
char
*
start
,
*
next
,
*
end
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
,
offset
;
PyBytesObject
*
result
;
self_len
=
PyBytes_GET_SIZE
(
self
);
self_s
=
PyBytes_AS_STRING
(
self
);
count
=
stringlib_count
(
self_s
,
self_len
,
from_s
,
from_len
,
maxcount
);
if
(
count
==
0
)
{
/* no matches */
return
return_self
(
self
);
}
result_len
=
self_len
-
(
count
*
from_len
);
assert
(
result_len
>=
0
);
if
(
(
result
=
(
PyBytesObject
*
)
PyBytes_FromStringAndSize
(
NULL
,
result_len
))
==
NULL
)
return
NULL
;
result_s
=
PyBytes_AS_STRING
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
offset
=
stringlib_find
(
start
,
end
-
start
,
from_s
,
from_len
,
0
);
if
(
offset
==
-
1
)
break
;
next
=
start
+
offset
;
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
start
=
next
+
from_len
;
}
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Py_LOCAL
(
PyBytesObject
*
)
replace_single_character_in_place
(
PyBytesObject
*
self
,
char
from_c
,
char
to_c
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
,
*
start
,
*
end
,
*
next
;
Py_ssize_t
self_len
;
PyBytesObject
*
result
;
/* The result string will be the same size */
self_s
=
PyBytes_AS_STRING
(
self
);
self_len
=
PyBytes_GET_SIZE
(
self
);
next
=
findchar
(
self_s
,
self_len
,
from_c
);
if
(
next
==
NULL
)
{
/* No matches; return the original string */
return
return_self
(
self
);
}
/* Need to make a new string */
result
=
(
PyBytesObject
*
)
PyBytes_FromStringAndSize
(
NULL
,
self_len
);
if
(
result
==
NULL
)
return
NULL
;
result_s
=
PyBytes_AS_STRING
(
result
);
Py_MEMCPY
(
result_s
,
self_s
,
self_len
);
/* change everything in-place, starting with this one */
start
=
result_s
+
(
next
-
self_s
);
*
start
=
to_c
;
start
++
;
end
=
result_s
+
self_len
;
while
(
--
maxcount
>
0
)
{
next
=
findchar
(
start
,
end
-
start
,
from_c
);
if
(
next
==
NULL
)
break
;
*
next
=
to_c
;
start
=
next
+
1
;
}
return
result
;
}
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Py_LOCAL
(
PyBytesObject
*
)
replace_substring_in_place
(
PyBytesObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
char
*
result_s
,
*
start
,
*
end
;
char
*
self_s
;
Py_ssize_t
self_len
,
offset
;
PyBytesObject
*
result
;
/* The result string will be the same size */
self_s
=
PyBytes_AS_STRING
(
self
);
self_len
=
PyBytes_GET_SIZE
(
self
);
offset
=
stringlib_find
(
self_s
,
self_len
,
from_s
,
from_len
,
0
);
if
(
offset
==
-
1
)
{
/* No matches; return the original string */
return
return_self
(
self
);
}
/* Need to make a new string */
result
=
(
PyBytesObject
*
)
PyBytes_FromStringAndSize
(
NULL
,
self_len
);
if
(
result
==
NULL
)
return
NULL
;
result_s
=
PyBytes_AS_STRING
(
result
);
Py_MEMCPY
(
result_s
,
self_s
,
self_len
);
/* change everything in-place, starting with this one */
start
=
result_s
+
offset
;
Py_MEMCPY
(
start
,
to_s
,
from_len
);
start
+=
from_len
;
end
=
result_s
+
self_len
;
while
(
--
maxcount
>
0
)
{
offset
=
stringlib_find
(
start
,
end
-
start
,
from_s
,
from_len
,
0
);
if
(
offset
==-
1
)
break
;
Py_MEMCPY
(
start
+
offset
,
to_s
,
from_len
);
start
+=
offset
+
from_len
;
}
return
result
;
}
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Py_LOCAL
(
PyBytesObject
*
)
replace_single_character
(
PyBytesObject
*
self
,
char
from_c
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
;
char
*
start
,
*
next
,
*
end
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
;
PyBytesObject
*
result
;
self_s
=
PyBytes_AS_STRING
(
self
);
self_len
=
PyBytes_GET_SIZE
(
self
);
count
=
countchar
(
self_s
,
self_len
,
from_c
,
maxcount
);
if
(
count
==
0
)
{
/* no matches, return unchanged */
return
return_self
(
self
);
}
/* use the difference between current and new, hence the "-1" */
/* result_len = self_len + count * (to_len-1) */
assert
(
count
>
0
);
if
(
to_len
-
1
>
(
PY_SSIZE_T_MAX
-
self_len
)
/
count
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"replacement bytes are too long"
);
return
NULL
;
}
result_len
=
self_len
+
count
*
(
to_len
-
1
);
if
(
(
result
=
(
PyBytesObject
*
)
PyBytes_FromStringAndSize
(
NULL
,
result_len
))
==
NULL
)
return
NULL
;
result_s
=
PyBytes_AS_STRING
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
next
=
findchar
(
start
,
end
-
start
,
from_c
);
if
(
next
==
NULL
)
break
;
if
(
next
==
start
)
{
/* replace with the 'to' */
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
+=
1
;
}
else
{
/* copy the unchanged old then the 'to' */
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
=
next
+
1
;
}
}
/* Copy the remainder of the remaining string */
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Py_LOCAL
(
PyBytesObject
*
)
replace_substring
(
PyBytesObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
char
*
self_s
,
*
result_s
;
char
*
start
,
*
next
,
*
end
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
,
offset
;
PyBytesObject
*
result
;
self_s
=
PyBytes_AS_STRING
(
self
);
self_len
=
PyBytes_GET_SIZE
(
self
);
count
=
stringlib_count
(
self_s
,
self_len
,
from_s
,
from_len
,
maxcount
);
if
(
count
==
0
)
{
/* no matches, return unchanged */
return
return_self
(
self
);
}
/* Check for overflow */
/* result_len = self_len + count * (to_len-from_len) */
assert
(
count
>
0
);
if
(
to_len
-
from_len
>
(
PY_SSIZE_T_MAX
-
self_len
)
/
count
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"replacement bytes are too long"
);
return
NULL
;
}
result_len
=
self_len
+
count
*
(
to_len
-
from_len
);
if
(
(
result
=
(
PyBytesObject
*
)
PyBytes_FromStringAndSize
(
NULL
,
result_len
))
==
NULL
)
return
NULL
;
result_s
=
PyBytes_AS_STRING
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
offset
=
stringlib_find
(
start
,
end
-
start
,
from_s
,
from_len
,
0
);
if
(
offset
==
-
1
)
break
;
next
=
start
+
offset
;
if
(
next
==
start
)
{
/* replace with the 'to' */
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
+=
from_len
;
}
else
{
/* copy the unchanged old then the 'to' */
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
=
next
+
from_len
;
}
}
/* Copy the remainder of the remaining string */
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
Py_LOCAL
(
PyBytesObject
*
)
replace
(
PyBytesObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
if
(
maxcount
<
0
)
{
maxcount
=
PY_SSIZE_T_MAX
;
}
else
if
(
maxcount
==
0
||
PyBytes_GET_SIZE
(
self
)
==
0
)
{
/* nothing to do; return the original string */
return
return_self
(
self
);
}
if
(
maxcount
==
0
||
(
from_len
==
0
&&
to_len
==
0
))
{
/* nothing to do; return the original string */
return
return_self
(
self
);
}
/* Handle zero-length special cases */
if
(
from_len
==
0
)
{
/* insert the 'to' string everywhere. */
/* >>> "Python".replace("", ".") */
/* '.P.y.t.h.o.n.' */
return
replace_interleave
(
self
,
to_s
,
to_len
,
maxcount
);
}
/* Except for "".replace("", "A") == "A" there is no way beyond this */
/* point for an empty self string to generate a non-empty string */
/* Special case so the remaining code always gets a non-empty string */
if
(
PyBytes_GET_SIZE
(
self
)
==
0
)
{
return
return_self
(
self
);
}
if
(
to_len
==
0
)
{
/* delete all occurrences of 'from' string */
if
(
from_len
==
1
)
{
return
replace_delete_single_character
(
self
,
from_s
[
0
],
maxcount
);
}
else
{
return
replace_delete_substring
(
self
,
from_s
,
from_len
,
maxcount
);
}
}
/* Handle special case where both strings have the same length */
if
(
from_len
==
to_len
)
{
if
(
from_len
==
1
)
{
return
replace_single_character_in_place
(
self
,
from_s
[
0
],
to_s
[
0
],
maxcount
);
}
else
{
return
replace_substring_in_place
(
self
,
from_s
,
from_len
,
to_s
,
to_len
,
maxcount
);
}
}
/* Otherwise use the more generic algorithms */
if
(
from_len
==
1
)
{
return
replace_single_character
(
self
,
from_s
[
0
],
to_s
,
to_len
,
maxcount
);
}
else
{
/* len('from')>=2, len('to')>=1 */
return
replace_substring
(
self
,
from_s
,
from_len
,
to_s
,
to_len
,
maxcount
);
}
}
/*[clinic input]
bytes.replace
...
...
@@ -2722,9 +2220,9 @@ bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
Py_ssize_t
count
)
/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
{
return
(
PyObject
*
)
replace
((
PyBytesObject
*
)
self
,
(
const
char
*
)
old
->
buf
,
old
->
len
,
(
const
char
*
)
new
->
buf
,
new
->
len
,
count
);
return
stringlib_replace
((
PyObject
*
)
self
,
(
const
char
*
)
old
->
buf
,
old
->
len
,
(
const
char
*
)
new
->
buf
,
new
->
len
,
count
);
}
/** End DALKE **/
...
...
Objects/stringlib/transmogrify.h
View file @
fb81d3cb
...
...
@@ -4,6 +4,18 @@
/* the more complicated methods. parts of these should be pulled out into the
shared code in bytes_methods.c to cut down on duplicate code bloat. */
Py_LOCAL_INLINE
(
PyObject
*
)
return_self
(
PyObject
*
self
)
{
#if !STRINGLIB_MUTABLE
if
(
STRINGLIB_CHECK_EXACT
(
self
))
{
Py_INCREF
(
self
);
return
self
;
}
#endif
return
STRINGLIB_NEW
(
STRINGLIB_STR
(
self
),
STRINGLIB_LEN
(
self
));
}
static
PyObject
*
stringlib_expandtabs
(
PyObject
*
self
,
PyObject
*
args
,
PyObject
*
kwds
)
{
...
...
@@ -87,28 +99,20 @@ pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
if
(
right
<
0
)
right
=
0
;
if
(
left
==
0
&&
right
==
0
&&
STRINGLIB_CHECK_EXACT
(
self
))
{
#if STRINGLIB_MUTABLE
/* We're defined as returning a copy; If the object is mutable
* that means we must make an identical copy. */
return
STRINGLIB_NEW
(
STRINGLIB_STR
(
self
),
STRINGLIB_LEN
(
self
));
#else
Py_INCREF
(
self
);
return
(
PyObject
*
)
self
;
#endif
/* STRINGLIB_MUTABLE */
if
(
left
==
0
&&
right
==
0
)
{
return
return_self
(
self
);
}
u
=
STRINGLIB_NEW
(
NULL
,
left
+
STRINGLIB_LEN
(
self
)
+
right
);
u
=
STRINGLIB_NEW
(
NULL
,
left
+
STRINGLIB_LEN
(
self
)
+
right
);
if
(
u
)
{
if
(
left
)
memset
(
STRINGLIB_STR
(
u
),
fill
,
left
);
Py_MEMCPY
(
STRINGLIB_STR
(
u
)
+
left
,
STRINGLIB_STR
(
self
),
STRINGLIB_LEN
(
self
));
STRINGLIB_STR
(
self
),
STRINGLIB_LEN
(
self
));
if
(
right
)
memset
(
STRINGLIB_STR
(
u
)
+
left
+
STRINGLIB_LEN
(
self
),
fill
,
right
);
fill
,
right
);
}
return
u
;
...
...
@@ -123,15 +127,8 @@ stringlib_ljust(PyObject *self, PyObject *args)
if
(
!
PyArg_ParseTuple
(
args
,
"n|c:ljust"
,
&
width
,
&
fillchar
))
return
NULL
;
if
(
STRINGLIB_LEN
(
self
)
>=
width
&&
STRINGLIB_CHECK_EXACT
(
self
))
{
#if STRINGLIB_MUTABLE
/* We're defined as returning a copy; If the object is mutable
* that means we must make an identical copy. */
return
STRINGLIB_NEW
(
STRINGLIB_STR
(
self
),
STRINGLIB_LEN
(
self
));
#else
Py_INCREF
(
self
);
return
(
PyObject
*
)
self
;
#endif
if
(
STRINGLIB_LEN
(
self
)
>=
width
)
{
return
return_self
(
self
);
}
return
pad
(
self
,
0
,
width
-
STRINGLIB_LEN
(
self
),
fillchar
);
...
...
@@ -147,15 +144,8 @@ stringlib_rjust(PyObject *self, PyObject *args)
if
(
!
PyArg_ParseTuple
(
args
,
"n|c:rjust"
,
&
width
,
&
fillchar
))
return
NULL
;
if
(
STRINGLIB_LEN
(
self
)
>=
width
&&
STRINGLIB_CHECK_EXACT
(
self
))
{
#if STRINGLIB_MUTABLE
/* We're defined as returning a copy; If the object is mutable
* that means we must make an identical copy. */
return
STRINGLIB_NEW
(
STRINGLIB_STR
(
self
),
STRINGLIB_LEN
(
self
));
#else
Py_INCREF
(
self
);
return
(
PyObject
*
)
self
;
#endif
if
(
STRINGLIB_LEN
(
self
)
>=
width
)
{
return
return_self
(
self
);
}
return
pad
(
self
,
width
-
STRINGLIB_LEN
(
self
),
0
,
fillchar
);
...
...
@@ -172,15 +162,8 @@ stringlib_center(PyObject *self, PyObject *args)
if
(
!
PyArg_ParseTuple
(
args
,
"n|c:center"
,
&
width
,
&
fillchar
))
return
NULL
;
if
(
STRINGLIB_LEN
(
self
)
>=
width
&&
STRINGLIB_CHECK_EXACT
(
self
))
{
#if STRINGLIB_MUTABLE
/* We're defined as returning a copy; If the object is mutable
* that means we must make an identical copy. */
return
STRINGLIB_NEW
(
STRINGLIB_STR
(
self
),
STRINGLIB_LEN
(
self
));
#else
Py_INCREF
(
self
);
return
(
PyObject
*
)
self
;
#endif
if
(
STRINGLIB_LEN
(
self
)
>=
width
)
{
return
return_self
(
self
);
}
marg
=
width
-
STRINGLIB_LEN
(
self
);
...
...
@@ -201,21 +184,7 @@ stringlib_zfill(PyObject *self, PyObject *args)
return
NULL
;
if
(
STRINGLIB_LEN
(
self
)
>=
width
)
{
if
(
STRINGLIB_CHECK_EXACT
(
self
))
{
#if STRINGLIB_MUTABLE
/* We're defined as returning a copy; If the object is mutable
* that means we must make an identical copy. */
return
STRINGLIB_NEW
(
STRINGLIB_STR
(
self
),
STRINGLIB_LEN
(
self
));
#else
Py_INCREF
(
self
);
return
(
PyObject
*
)
self
;
#endif
}
else
return
STRINGLIB_NEW
(
STRINGLIB_STR
(
self
),
STRINGLIB_LEN
(
self
)
);
return
return_self
(
self
);
}
fill
=
width
-
STRINGLIB_LEN
(
self
);
...
...
@@ -232,5 +201,500 @@ stringlib_zfill(PyObject *self, PyObject *args)
p
[
fill
]
=
'0'
;
}
return
(
PyObject
*
)
s
;
return
s
;
}
/* find and count characters and substrings */
#define findchar(target, target_len, c) \
((char *)memchr((const void *)(target), c, target_len))
Py_LOCAL_INLINE
(
Py_ssize_t
)
countchar
(
const
char
*
target
,
Py_ssize_t
target_len
,
char
c
,
Py_ssize_t
maxcount
)
{
Py_ssize_t
count
=
0
;
const
char
*
start
=
target
;
const
char
*
end
=
target
+
target_len
;
while
((
start
=
findchar
(
start
,
end
-
start
,
c
))
!=
NULL
)
{
count
++
;
if
(
count
>=
maxcount
)
break
;
start
+=
1
;
}
return
count
;
}
/* Algorithms for different cases of string replacement */
/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Py_LOCAL
(
PyObject
*
)
stringlib_replace_interleave
(
PyObject
*
self
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
const
char
*
self_s
;
char
*
result_s
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
,
i
;
PyObject
*
result
;
self_len
=
STRINGLIB_LEN
(
self
);
/* 1 at the end plus 1 after every character;
count = min(maxcount, self_len + 1) */
if
(
maxcount
<=
self_len
)
{
count
=
maxcount
;
}
else
{
/* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
count
=
self_len
+
1
;
}
/* Check for overflow */
/* result_len = count * to_len + self_len; */
assert
(
count
>
0
);
if
(
to_len
>
(
PY_SSIZE_T_MAX
-
self_len
)
/
count
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"replace bytes are too long"
);
return
NULL
;
}
result_len
=
count
*
to_len
+
self_len
;
result
=
STRINGLIB_NEW
(
NULL
,
result_len
);
if
(
result
==
NULL
)
{
return
NULL
;
}
self_s
=
STRINGLIB_STR
(
self
);
result_s
=
STRINGLIB_STR
(
result
);
if
(
to_len
>
1
)
{
/* Lay the first one down (guaranteed this will occur) */
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
count
-=
1
;
for
(
i
=
0
;
i
<
count
;
i
++
)
{
*
result_s
++
=
*
self_s
++
;
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
}
}
else
{
result_s
[
0
]
=
to_s
[
0
];
result_s
+=
to_len
;
count
-=
1
;
for
(
i
=
0
;
i
<
count
;
i
++
)
{
*
result_s
++
=
*
self_s
++
;
result_s
[
0
]
=
to_s
[
0
];
result_s
+=
to_len
;
}
}
/* Copy the rest of the original string */
Py_MEMCPY
(
result_s
,
self_s
,
self_len
-
i
);
return
result
;
}
/* Special case for deleting a single character */
/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Py_LOCAL
(
PyObject
*
)
stringlib_replace_delete_single_character
(
PyObject
*
self
,
char
from_c
,
Py_ssize_t
maxcount
)
{
const
char
*
self_s
,
*
start
,
*
next
,
*
end
;
char
*
result_s
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
;
PyObject
*
result
;
self_len
=
STRINGLIB_LEN
(
self
);
self_s
=
STRINGLIB_STR
(
self
);
count
=
countchar
(
self_s
,
self_len
,
from_c
,
maxcount
);
if
(
count
==
0
)
{
return
return_self
(
self
);
}
result_len
=
self_len
-
count
;
/* from_len == 1 */
assert
(
result_len
>=
0
);
result
=
STRINGLIB_NEW
(
NULL
,
result_len
);
if
(
result
==
NULL
)
{
return
NULL
;
}
result_s
=
STRINGLIB_STR
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
next
=
findchar
(
start
,
end
-
start
,
from_c
);
if
(
next
==
NULL
)
break
;
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
start
=
next
+
1
;
}
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
Py_LOCAL
(
PyObject
*
)
stringlib_replace_delete_substring
(
PyObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
Py_ssize_t
maxcount
)
{
const
char
*
self_s
,
*
start
,
*
next
,
*
end
;
char
*
result_s
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
,
offset
;
PyObject
*
result
;
self_len
=
STRINGLIB_LEN
(
self
);
self_s
=
STRINGLIB_STR
(
self
);
count
=
stringlib_count
(
self_s
,
self_len
,
from_s
,
from_len
,
maxcount
);
if
(
count
==
0
)
{
/* no matches */
return
return_self
(
self
);
}
result_len
=
self_len
-
(
count
*
from_len
);
assert
(
result_len
>=
0
);
result
=
STRINGLIB_NEW
(
NULL
,
result_len
);
if
(
result
==
NULL
)
{
return
NULL
;
}
result_s
=
STRINGLIB_STR
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
offset
=
stringlib_find
(
start
,
end
-
start
,
from_s
,
from_len
,
0
);
if
(
offset
==
-
1
)
break
;
next
=
start
+
offset
;
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
start
=
next
+
from_len
;
}
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Py_LOCAL
(
PyObject
*
)
stringlib_replace_single_character_in_place
(
PyObject
*
self
,
char
from_c
,
char
to_c
,
Py_ssize_t
maxcount
)
{
const
char
*
self_s
,
*
end
;
char
*
result_s
,
*
start
,
*
next
;
Py_ssize_t
self_len
;
PyObject
*
result
;
/* The result string will be the same size */
self_s
=
STRINGLIB_STR
(
self
);
self_len
=
STRINGLIB_LEN
(
self
);
next
=
findchar
(
self_s
,
self_len
,
from_c
);
if
(
next
==
NULL
)
{
/* No matches; return the original bytes */
return
return_self
(
self
);
}
/* Need to make a new bytes */
result
=
STRINGLIB_NEW
(
NULL
,
self_len
);
if
(
result
==
NULL
)
{
return
NULL
;
}
result_s
=
STRINGLIB_STR
(
result
);
Py_MEMCPY
(
result_s
,
self_s
,
self_len
);
/* change everything in-place, starting with this one */
start
=
result_s
+
(
next
-
self_s
);
*
start
=
to_c
;
start
++
;
end
=
result_s
+
self_len
;
while
(
--
maxcount
>
0
)
{
next
=
findchar
(
start
,
end
-
start
,
from_c
);
if
(
next
==
NULL
)
break
;
*
next
=
to_c
;
start
=
next
+
1
;
}
return
result
;
}
/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Py_LOCAL
(
PyObject
*
)
stringlib_replace_substring_in_place
(
PyObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
const
char
*
self_s
,
*
end
;
char
*
result_s
,
*
start
;
Py_ssize_t
self_len
,
offset
;
PyObject
*
result
;
/* The result bytes will be the same size */
self_s
=
STRINGLIB_STR
(
self
);
self_len
=
STRINGLIB_LEN
(
self
);
offset
=
stringlib_find
(
self_s
,
self_len
,
from_s
,
from_len
,
0
);
if
(
offset
==
-
1
)
{
/* No matches; return the original bytes */
return
return_self
(
self
);
}
/* Need to make a new bytes */
result
=
STRINGLIB_NEW
(
NULL
,
self_len
);
if
(
result
==
NULL
)
{
return
NULL
;
}
result_s
=
STRINGLIB_STR
(
result
);
Py_MEMCPY
(
result_s
,
self_s
,
self_len
);
/* change everything in-place, starting with this one */
start
=
result_s
+
offset
;
Py_MEMCPY
(
start
,
to_s
,
from_len
);
start
+=
from_len
;
end
=
result_s
+
self_len
;
while
(
--
maxcount
>
0
)
{
offset
=
stringlib_find
(
start
,
end
-
start
,
from_s
,
from_len
,
0
);
if
(
offset
==
-
1
)
break
;
Py_MEMCPY
(
start
+
offset
,
to_s
,
from_len
);
start
+=
offset
+
from_len
;
}
return
result
;
}
/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Py_LOCAL
(
PyObject
*
)
stringlib_replace_single_character
(
PyObject
*
self
,
char
from_c
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
const
char
*
self_s
,
*
start
,
*
next
,
*
end
;
char
*
result_s
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
;
PyObject
*
result
;
self_s
=
STRINGLIB_STR
(
self
);
self_len
=
STRINGLIB_LEN
(
self
);
count
=
countchar
(
self_s
,
self_len
,
from_c
,
maxcount
);
if
(
count
==
0
)
{
/* no matches, return unchanged */
return
return_self
(
self
);
}
/* use the difference between current and new, hence the "-1" */
/* result_len = self_len + count * (to_len-1) */
assert
(
count
>
0
);
if
(
to_len
-
1
>
(
PY_SSIZE_T_MAX
-
self_len
)
/
count
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"replace bytes is too long"
);
return
NULL
;
}
result_len
=
self_len
+
count
*
(
to_len
-
1
);
result
=
STRINGLIB_NEW
(
NULL
,
result_len
);
if
(
result
==
NULL
)
{
return
NULL
;
}
result_s
=
STRINGLIB_STR
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
next
=
findchar
(
start
,
end
-
start
,
from_c
);
if
(
next
==
NULL
)
break
;
if
(
next
==
start
)
{
/* replace with the 'to' */
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
+=
1
;
}
else
{
/* copy the unchanged old then the 'to' */
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
=
next
+
1
;
}
}
/* Copy the remainder of the remaining bytes */
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Py_LOCAL
(
PyObject
*
)
stringlib_replace_substring
(
PyObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
const
char
*
self_s
,
*
start
,
*
next
,
*
end
;
char
*
result_s
;
Py_ssize_t
self_len
,
result_len
;
Py_ssize_t
count
,
offset
;
PyObject
*
result
;
self_s
=
STRINGLIB_STR
(
self
);
self_len
=
STRINGLIB_LEN
(
self
);
count
=
stringlib_count
(
self_s
,
self_len
,
from_s
,
from_len
,
maxcount
);
if
(
count
==
0
)
{
/* no matches, return unchanged */
return
return_self
(
self
);
}
/* Check for overflow */
/* result_len = self_len + count * (to_len-from_len) */
assert
(
count
>
0
);
if
(
to_len
-
from_len
>
(
PY_SSIZE_T_MAX
-
self_len
)
/
count
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"replace bytes is too long"
);
return
NULL
;
}
result_len
=
self_len
+
count
*
(
to_len
-
from_len
);
result
=
STRINGLIB_NEW
(
NULL
,
result_len
);
if
(
result
==
NULL
)
{
return
NULL
;
}
result_s
=
STRINGLIB_STR
(
result
);
start
=
self_s
;
end
=
self_s
+
self_len
;
while
(
count
--
>
0
)
{
offset
=
stringlib_find
(
start
,
end
-
start
,
from_s
,
from_len
,
0
);
if
(
offset
==
-
1
)
break
;
next
=
start
+
offset
;
if
(
next
==
start
)
{
/* replace with the 'to' */
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
+=
from_len
;
}
else
{
/* copy the unchanged old then the 'to' */
Py_MEMCPY
(
result_s
,
start
,
next
-
start
);
result_s
+=
(
next
-
start
);
Py_MEMCPY
(
result_s
,
to_s
,
to_len
);
result_s
+=
to_len
;
start
=
next
+
from_len
;
}
}
/* Copy the remainder of the remaining bytes */
Py_MEMCPY
(
result_s
,
start
,
end
-
start
);
return
result
;
}
Py_LOCAL
(
PyObject
*
)
stringlib_replace
(
PyObject
*
self
,
const
char
*
from_s
,
Py_ssize_t
from_len
,
const
char
*
to_s
,
Py_ssize_t
to_len
,
Py_ssize_t
maxcount
)
{
if
(
maxcount
<
0
)
{
maxcount
=
PY_SSIZE_T_MAX
;
}
else
if
(
maxcount
==
0
||
STRINGLIB_LEN
(
self
)
==
0
)
{
/* nothing to do; return the original bytes */
return
return_self
(
self
);
}
/* Handle zero-length special cases */
if
(
from_len
==
0
)
{
if
(
to_len
==
0
)
{
/* nothing to do; return the original bytes */
return
return_self
(
self
);
}
/* insert the 'to' bytes everywhere. */
/* >>> b"Python".replace(b"", b".") */
/* b'.P.y.t.h.o.n.' */
return
stringlib_replace_interleave
(
self
,
to_s
,
to_len
,
maxcount
);
}
/* Except for b"".replace(b"", b"A") == b"A" there is no way beyond this */
/* point for an empty self bytes to generate a non-empty bytes */
/* Special case so the remaining code always gets a non-empty bytes */
if
(
STRINGLIB_LEN
(
self
)
==
0
)
{
return
return_self
(
self
);
}
if
(
to_len
==
0
)
{
/* delete all occurrences of 'from' bytes */
if
(
from_len
==
1
)
{
return
stringlib_replace_delete_single_character
(
self
,
from_s
[
0
],
maxcount
);
}
else
{
return
stringlib_replace_delete_substring
(
self
,
from_s
,
from_len
,
maxcount
);
}
}
/* Handle special case where both bytes have the same length */
if
(
from_len
==
to_len
)
{
if
(
from_len
==
1
)
{
return
stringlib_replace_single_character_in_place
(
self
,
from_s
[
0
],
to_s
[
0
],
maxcount
);
}
else
{
return
stringlib_replace_substring_in_place
(
self
,
from_s
,
from_len
,
to_s
,
to_len
,
maxcount
);
}
}
/* Otherwise use the more generic algorithms */
if
(
from_len
==
1
)
{
return
stringlib_replace_single_character
(
self
,
from_s
[
0
],
to_s
,
to_len
,
maxcount
);
}
else
{
/* len('from')>=2, len('to')>=1 */
return
stringlib_replace_substring
(
self
,
from_s
,
from_len
,
to_s
,
to_len
,
maxcount
);
}
}
#undef findchar
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment