@Mendelt Siebenga
字符串是编程语言中的基石对象。不同的语言以不同的方式实现字符串。的实现join()
强烈依赖于字符串的底层实现。伪代码不反映底层实现。
join()
在 Python 中考虑。它可以很容易地使用:
print ", ".join(["Alpha", "Beta", "Gamma"])
# Alpha, Beta, Gamma
它可以很容易地实现如下:
def join(seq, sep=" "):
if not seq: return ""
elif len(seq) == 1: return seq[0]
return reduce(lambda x, y: x + sep + y, seq)
print join(["Alpha", "Beta", "Gamma"], ", ")
# Alpha, Beta, Gamma
以及如何join()
在 C 中实现方法(取自trunk):
PyDoc_STRVAR(join__doc__,
"S.join(sequence) -> string\n\
\n\
Return a string which is the concatenation of the strings in the\n\
sequence. The separator between elements is S.");
static PyObject *
string_join(PyStringObject *self, PyObject *orig)
{
char *sep = PyString_AS_STRING(self);
const Py_ssize_t seplen = PyString_GET_SIZE(self);
PyObject *res = NULL;
char *p;
Py_ssize_t seqlen = 0;
size_t sz = 0;
Py_ssize_t i;
PyObject *seq, *item;
seq = PySequence_Fast(orig, "");
if (seq == NULL) {
return NULL;
}
seqlen = PySequence_Size(seq);
if (seqlen == 0) {
Py_DECREF(seq);
return PyString_FromString("");
}
if (seqlen == 1) {
item = PySequence_Fast_GET_ITEM(seq, 0);
if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
Py_INCREF(item);
Py_DECREF(seq);
return item;
}
}
/* There are at least two things to join, or else we have a subclass
* of the builtin types in the sequence.
* Do a pre-pass to figure out the total amount of space we'll
* need (sz), see whether any argument is absurd, and defer to
* the Unicode join if appropriate.
*/
for (i = 0; i < seqlen; i++) {
const size_t old_sz = sz;
item = PySequence_Fast_GET_ITEM(seq, i);
if (!PyString_Check(item)){
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(item)) {
/* Defer to Unicode join.
* CAUTION: There's no gurantee that the
* original sequence can be iterated over
* again, so we must pass seq here.
*/
PyObject *result;
result = PyUnicode_Join((PyObject *)self, seq);
Py_DECREF(seq);
return result;
}
#endif
PyErr_Format(PyExc_TypeError,
"sequence item %zd: expected string,"
" %.80s found",
i, Py_TYPE(item)->tp_name);
Py_DECREF(seq);
return NULL;
}
sz += PyString_GET_SIZE(item);
if (i != 0)
sz += seplen;
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"join() result is too long for a Python string");
Py_DECREF(seq);
return NULL;
}
}
/* Allocate result space. */
res = PyString_FromStringAndSize((char*)NULL, sz);
if (res == NULL) {
Py_DECREF(seq);
return NULL;
}
/* Catenate everything. */
p = PyString_AS_STRING(res);
for (i = 0; i < seqlen; ++i) {
size_t n;
item = PySequence_Fast_GET_ITEM(seq, i);
n = PyString_GET_SIZE(item);
Py_MEMCPY(p, PyString_AS_STRING(item), n);
p += n;
if (i < seqlen - 1) {
Py_MEMCPY(p, sep, seplen);
p += seplen;
}
}
Py_DECREF(seq);
return res;
}
请注意,上面的Catenate everything.
代码只是整个函数的一小部分。
在伪代码中:
/* Catenate everything. */
for each item in sequence
copy-assign item
if not last item
copy-assign separator