Commit f30c12fd authored by Linus Torvalds's avatar Linus Torvalds

Merge bk://kernel.bkbits.net/davem/net-2.6

into ppc970.osdl.org:/home/torvalds/v2.6/linux
parents 4b1dbe33 9f3cd7bd
......@@ -229,6 +229,7 @@ Khazad algorithm contributors:
Whirlpool algorithm contributors:
Aaron Grothe
Jean-Luc Cooke
Generic scatterwalk code by Adam J. Richter <adam@yggdrasil.com>
......
......@@ -67,16 +67,13 @@ config CRYPTO_SHA512
This code also includes SHA-384, a 384 bit hash with 192 bits
of security against collision attacks.
config CRYPTO_WHIRLPOOL
tristate "Whirlpool digest algorithm"
config CRYPTO_WP512
tristate "Whirlpool digest algorithms"
depends on CRYPTO
help
Whirlpool hash algorithm.
Whirlpool is part of the NESSIE cryptographic primtives.
Whirlpool works on messages shorter than 2^256 bits and
produces a 512 bit hash.
Whirlpool hash algorithm 512, 384 and 256-bit hashes
Whirlpool-512 is part of the NESSIE cryptographic primtives.
Whirlpool will be part of the ISO/IEC 10118-3:2003(E) standard
See also:
......
......@@ -14,7 +14,7 @@ obj-$(CONFIG_CRYPTO_MD5) += md5.o
obj-$(CONFIG_CRYPTO_SHA1) += sha1.o
obj-$(CONFIG_CRYPTO_SHA256) += sha256.o
obj-$(CONFIG_CRYPTO_SHA512) += sha512.o
obj-$(CONFIG_CRYPTO_WHIRLPOOL) += whirlpool.o
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
obj-$(CONFIG_CRYPTO_DES) += des.o
obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish.o
obj-$(CONFIG_CRYPTO_TWOFISH) += twofish.o
......
......@@ -63,7 +63,7 @@ static char *check[] = {
"des", "md5", "des3_ede", "rot13", "sha1", "sha256", "blowfish",
"twofish", "serpent", "sha384", "sha512", "md4", "aes", "cast6",
"arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
"whirlpool", NULL
"wp512", "wp384", "wp256", NULL
};
static void
......@@ -682,7 +682,9 @@ do_test(void)
test_hash("sha384", sha384_tv_template, SHA384_TEST_VECTORS);
test_hash("sha512", sha512_tv_template, SHA512_TEST_VECTORS);
test_hash("whirlpool", whirlpool_tv_template, WHIRLPOOL_TEST_VECTORS);
test_hash("wp512", wp512_tv_template, WP512_TEST_VECTORS);
test_hash("wp384", wp384_tv_template, WP384_TEST_VECTORS);
test_hash("wp256", wp256_tv_template, WP256_TEST_VECTORS);
test_deflate();
test_crc32c();
#ifdef CONFIG_CRYPTO_HMAC
......@@ -795,7 +797,15 @@ do_test(void)
break;
case 22:
test_hash("whirlpool", whirlpool_tv_template, WHIRLPOOL_TEST_VECTORS);
test_hash("wp512", wp512_tv_template, WP512_TEST_VECTORS);
break;
case 23:
test_hash("wp384", wp384_tv_template, WP384_TEST_VECTORS);
break;
case 24:
test_hash("wp256", wp256_tv_template, WP256_TEST_VECTORS);
break;
......
......@@ -307,9 +307,9 @@ struct hash_testvec sha512_tv_template[] = {
* by Vincent Rijmen and Paulo S. L. M. Barreto as part of the NESSIE
* submission
*/
#define WHIRLPOOL_TEST_VECTORS 8
#define WP512_TEST_VECTORS 8
struct hash_testvec whirlpool_tv_template[] = {
struct hash_testvec wp512_tv_template[] = {
{
.plaintext = "",
.psize = 0,
......@@ -405,6 +405,155 @@ struct hash_testvec whirlpool_tv_template[] = {
},
};
#define WP384_TEST_VECTORS 8
struct hash_testvec wp384_tv_template[] = {
{
.plaintext = "",
.psize = 0,
.digest = { 0x19, 0xFA, 0x61, 0xD7, 0x55, 0x22, 0xA4, 0x66,
0x9B, 0x44, 0xE3, 0x9C, 0x1D, 0x2E, 0x17, 0x26,
0xC5, 0x30, 0x23, 0x21, 0x30, 0xD4, 0x07, 0xF8,
0x9A, 0xFE, 0xE0, 0x96, 0x49, 0x97, 0xF7, 0xA7,
0x3E, 0x83, 0xBE, 0x69, 0x8B, 0x28, 0x8F, 0xEB,
0xCF, 0x88, 0xE3, 0xE0, 0x3C, 0x4F, 0x07, 0x57 },
}, {
.plaintext = "a",
.psize = 1,
.digest = { 0x8A, 0xCA, 0x26, 0x02, 0x79, 0x2A, 0xEC, 0x6F,
0x11, 0xA6, 0x72, 0x06, 0x53, 0x1F, 0xB7, 0xD7,
0xF0, 0xDF, 0xF5, 0x94, 0x13, 0x14, 0x5E, 0x69,
0x73, 0xC4, 0x50, 0x01, 0xD0, 0x08, 0x7B, 0x42,
0xD1, 0x1B, 0xC6, 0x45, 0x41, 0x3A, 0xEF, 0xF6,
0x3A, 0x42, 0x39, 0x1A, 0x39, 0x14, 0x5A, 0x59 },
}, {
.plaintext = "abc",
.psize = 3,
.digest = { 0x4E, 0x24, 0x48, 0xA4, 0xC6, 0xF4, 0x86, 0xBB,
0x16, 0xB6, 0x56, 0x2C, 0x73, 0xB4, 0x02, 0x0B,
0xF3, 0x04, 0x3E, 0x3A, 0x73, 0x1B, 0xCE, 0x72,
0x1A, 0xE1, 0xB3, 0x03, 0xD9, 0x7E, 0x6D, 0x4C,
0x71, 0x81, 0xEE, 0xBD, 0xB6, 0xC5, 0x7E, 0x27,
0x7D, 0x0E, 0x34, 0x95, 0x71, 0x14, 0xCB, 0xD6 },
}, {
.plaintext = "message digest",
.psize = 14,
.digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6,
0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC,
0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C,
0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B,
0x84, 0x21, 0x55, 0x76, 0x59, 0xEF, 0x55, 0xC1,
0x06, 0xB4, 0xB5, 0x2A, 0xC5, 0xA4, 0xAA, 0xA6 },
}, {
.plaintext = "abcdefghijklmnopqrstuvwxyz",
.psize = 26,
.digest = { 0xF1, 0xD7, 0x54, 0x66, 0x26, 0x36, 0xFF, 0xE9,
0x2C, 0x82, 0xEB, 0xB9, 0x21, 0x2A, 0x48, 0x4A,
0x8D, 0x38, 0x63, 0x1E, 0xAD, 0x42, 0x38, 0xF5,
0x44, 0x2E, 0xE1, 0x3B, 0x80, 0x54, 0xE4, 0x1B,
0x08, 0xBF, 0x2A, 0x92, 0x51, 0xC3, 0x0B, 0x6A,
0x0B, 0x8A, 0xAE, 0x86, 0x17, 0x7A, 0xB4, 0xA6 },
}, {
.plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz0123456789",
.psize = 62,
.digest = { 0xDC, 0x37, 0xE0, 0x08, 0xCF, 0x9E, 0xE6, 0x9B,
0xF1, 0x1F, 0x00, 0xED, 0x9A, 0xBA, 0x26, 0x90,
0x1D, 0xD7, 0xC2, 0x8C, 0xDE, 0xC0, 0x66, 0xCC,
0x6A, 0xF4, 0x2E, 0x40, 0xF8, 0x2F, 0x3A, 0x1E,
0x08, 0xEB, 0xA2, 0x66, 0x29, 0x12, 0x9D, 0x8F,
0xB7, 0xCB, 0x57, 0x21, 0x1B, 0x92, 0x81, 0xA6 },
}, {
.plaintext = "1234567890123456789012345678901234567890"
"1234567890123456789012345678901234567890",
.psize = 80,
.digest = { 0x46, 0x6E, 0xF1, 0x8B, 0xAB, 0xB0, 0x15, 0x4D,
0x25, 0xB9, 0xD3, 0x8A, 0x64, 0x14, 0xF5, 0xC0,
0x87, 0x84, 0x37, 0x2B, 0xCC, 0xB2, 0x04, 0xD6,
0x54, 0x9C, 0x4A, 0xFA, 0xDB, 0x60, 0x14, 0x29,
0x4D, 0x5B, 0xD8, 0xDF, 0x2A, 0x6C, 0x44, 0xE5,
0x38, 0xCD, 0x04, 0x7B, 0x26, 0x81, 0xA5, 0x1A },
}, {
.plaintext = "abcdbcdecdefdefgefghfghighijhijk",
.psize = 32,
.digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61,
0xF5, 0xD6, 0xF0, 0xA0, 0xE4, 0x64, 0x4F, 0x48,
0x8A, 0x7A, 0x5A, 0x52, 0xDE, 0xEE, 0x65, 0x62,
0x07, 0xC5, 0x62, 0xF9, 0x88, 0xE9, 0x5C, 0x69,
0x16, 0xBD, 0xC8, 0x03, 0x1B, 0xC5, 0xBE, 0x1B,
0x7B, 0x94, 0x76, 0x39, 0xFE, 0x05, 0x0B, 0x56 },
},
};
#define WP256_TEST_VECTORS 8
struct hash_testvec wp256_tv_template[] = {
{
.plaintext = "",
.psize = 0,
.digest = { 0x19, 0xFA, 0x61, 0xD7, 0x55, 0x22, 0xA4, 0x66,
0x9B, 0x44, 0xE3, 0x9C, 0x1D, 0x2E, 0x17, 0x26,
0xC5, 0x30, 0x23, 0x21, 0x30, 0xD4, 0x07, 0xF8,
0x9A, 0xFE, 0xE0, 0x96, 0x49, 0x97, 0xF7, 0xA7 },
}, {
.plaintext = "a",
.psize = 1,
.digest = { 0x8A, 0xCA, 0x26, 0x02, 0x79, 0x2A, 0xEC, 0x6F,
0x11, 0xA6, 0x72, 0x06, 0x53, 0x1F, 0xB7, 0xD7,
0xF0, 0xDF, 0xF5, 0x94, 0x13, 0x14, 0x5E, 0x69,
0x73, 0xC4, 0x50, 0x01, 0xD0, 0x08, 0x7B, 0x42 },
}, {
.plaintext = "abc",
.psize = 3,
.digest = { 0x4E, 0x24, 0x48, 0xA4, 0xC6, 0xF4, 0x86, 0xBB,
0x16, 0xB6, 0x56, 0x2C, 0x73, 0xB4, 0x02, 0x0B,
0xF3, 0x04, 0x3E, 0x3A, 0x73, 0x1B, 0xCE, 0x72,
0x1A, 0xE1, 0xB3, 0x03, 0xD9, 0x7E, 0x6D, 0x4C },
}, {
.plaintext = "message digest",
.psize = 14,
.digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6,
0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC,
0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C,
0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B },
}, {
.plaintext = "abcdefghijklmnopqrstuvwxyz",
.psize = 26,
.digest = { 0xF1, 0xD7, 0x54, 0x66, 0x26, 0x36, 0xFF, 0xE9,
0x2C, 0x82, 0xEB, 0xB9, 0x21, 0x2A, 0x48, 0x4A,
0x8D, 0x38, 0x63, 0x1E, 0xAD, 0x42, 0x38, 0xF5,
0x44, 0x2E, 0xE1, 0x3B, 0x80, 0x54, 0xE4, 0x1B },
}, {
.plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz0123456789",
.psize = 62,
.digest = { 0xDC, 0x37, 0xE0, 0x08, 0xCF, 0x9E, 0xE6, 0x9B,
0xF1, 0x1F, 0x00, 0xED, 0x9A, 0xBA, 0x26, 0x90,
0x1D, 0xD7, 0xC2, 0x8C, 0xDE, 0xC0, 0x66, 0xCC,
0x6A, 0xF4, 0x2E, 0x40, 0xF8, 0x2F, 0x3A, 0x1E },
}, {
.plaintext = "1234567890123456789012345678901234567890"
"1234567890123456789012345678901234567890",
.psize = 80,
.digest = { 0x46, 0x6E, 0xF1, 0x8B, 0xAB, 0xB0, 0x15, 0x4D,
0x25, 0xB9, 0xD3, 0x8A, 0x64, 0x14, 0xF5, 0xC0,
0x87, 0x84, 0x37, 0x2B, 0xCC, 0xB2, 0x04, 0xD6,
0x54, 0x9C, 0x4A, 0xFA, 0xDB, 0x60, 0x14, 0x29 },
}, {
.plaintext = "abcdbcdecdefdefgefghfghighijhijk",
.psize = 32,
.digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61,
0xF5, 0xD6, 0xF0, 0xA0, 0xE4, 0x64, 0x4F, 0x48,
0x8A, 0x7A, 0x5A, 0x52, 0xDE, 0xEE, 0x65, 0x62,
0x07, 0xC5, 0x62, 0xF9, 0x88, 0xE9, 0x5C, 0x69 },
},
};
#ifdef CONFIG_CRYPTO_HMAC
/*
* HMAC-MD5 test vectors from RFC2202
......
......@@ -25,18 +25,21 @@
#include <asm/scatterlist.h>
#include <linux/crypto.h>
#define WHIRLPOOL_DIGEST_SIZE 64
#define WHIRLPOOL_BLOCK_SIZE 64
#define WHIRLPOOL_LENGTHBYTES 32
#define WP512_DIGEST_SIZE 64
#define WP384_DIGEST_SIZE 48
#define WP256_DIGEST_SIZE 32
#define WP512_BLOCK_SIZE 64
#define WP512_LENGTHBYTES 32
#define WHIRLPOOL_ROUNDS 10
struct whirlpool_ctx {
u8 bitLength[WHIRLPOOL_LENGTHBYTES];
u8 buffer[WHIRLPOOL_BLOCK_SIZE];
struct wp512_ctx {
u8 bitLength[WP512_LENGTHBYTES];
u8 buffer[WP512_BLOCK_SIZE];
int bufferBits;
int bufferPos;
u64 hash[WHIRLPOOL_DIGEST_SIZE/8];
u64 hash[WP512_DIGEST_SIZE/8];
};
/*
......@@ -769,7 +772,7 @@ static const u64 rc[WHIRLPOOL_ROUNDS + 1] = {
* The core Whirlpool transform.
*/
static void whirlpool_process_buffer(struct whirlpool_ctx *wctx) {
static void wp512_process_buffer(struct wp512_ctx *wctx) {
int i, r;
u64 K[8]; /* the round key */
u64 block[8]; /* mu(buffer) */
......@@ -985,9 +988,9 @@ static void whirlpool_process_buffer(struct whirlpool_ctx *wctx) {
}
static void whirlpool_init (void *ctx) {
static void wp512_init (void *ctx) {
int i;
struct whirlpool_ctx *wctx = ctx;
struct wp512_ctx *wctx = ctx;
memset(wctx->bitLength, 0, 32);
wctx->bufferBits = wctx->bufferPos = 0;
......@@ -997,10 +1000,10 @@ static void whirlpool_init (void *ctx) {
}
}
static void whirlpool_update(void *ctx, const u8 *source, unsigned int len)
static void wp512_update(void *ctx, const u8 *source, unsigned int len)
{
struct whirlpool_ctx *wctx = ctx;
struct wp512_ctx *wctx = ctx;
int sourcePos = 0;
unsigned int bits_len = len * 8; // convert to number of bits
int sourceGap = (8 - ((int)bits_len & 7)) & 7;
......@@ -1024,8 +1027,8 @@ static void whirlpool_update(void *ctx, const u8 *source, unsigned int len)
((source[sourcePos + 1] & 0xff) >> (8 - sourceGap));
buffer[bufferPos++] |= (u8)(b >> bufferRem);
bufferBits += 8 - bufferRem;
if (bufferBits == WHIRLPOOL_DIGEST_SIZE * 8) {
whirlpool_process_buffer(wctx);
if (bufferBits == WP512_BLOCK_SIZE * 8) {
wp512_process_buffer(wctx);
bufferBits = bufferPos = 0;
}
buffer[bufferPos] = b << (8 - bufferRem);
......@@ -1045,8 +1048,8 @@ static void whirlpool_update(void *ctx, const u8 *source, unsigned int len)
bufferPos++;
bufferBits += 8 - bufferRem;
bits_len -= 8 - bufferRem;
if (bufferBits == WHIRLPOOL_DIGEST_SIZE * 8) {
whirlpool_process_buffer(wctx);
if (bufferBits == WP512_BLOCK_SIZE * 8) {
wp512_process_buffer(wctx);
bufferBits = bufferPos = 0;
}
buffer[bufferPos] = b << (8 - bufferRem);
......@@ -1058,9 +1061,9 @@ static void whirlpool_update(void *ctx, const u8 *source, unsigned int len)
}
static void whirlpool_final(void *ctx, u8 *out)
static void wp512_final(void *ctx, u8 *out)
{
struct whirlpool_ctx *wctx = ctx;
struct wp512_ctx *wctx = ctx;
int i;
u8 *buffer = wctx->buffer;
u8 *bitLength = wctx->bitLength;
......@@ -1070,22 +1073,22 @@ static void whirlpool_final(void *ctx, u8 *out)
buffer[bufferPos] |= 0x80U >> (bufferBits & 7);
bufferPos++;
if (bufferPos > WHIRLPOOL_BLOCK_SIZE - WHIRLPOOL_LENGTHBYTES) {
if (bufferPos < WHIRLPOOL_BLOCK_SIZE) {
memset(&buffer[bufferPos], 0, WHIRLPOOL_BLOCK_SIZE - bufferPos);
if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) {
if (bufferPos < WP512_BLOCK_SIZE) {
memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos);
}
whirlpool_process_buffer(wctx);
wp512_process_buffer(wctx);
bufferPos = 0;
}
if (bufferPos < WHIRLPOOL_BLOCK_SIZE - WHIRLPOOL_LENGTHBYTES) {
if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) {
memset(&buffer[bufferPos], 0,
(WHIRLPOOL_BLOCK_SIZE - WHIRLPOOL_LENGTHBYTES) - bufferPos);
(WP512_BLOCK_SIZE - WP512_LENGTHBYTES) - bufferPos);
}
bufferPos = WHIRLPOOL_BLOCK_SIZE - WHIRLPOOL_LENGTHBYTES;
memcpy(&buffer[WHIRLPOOL_BLOCK_SIZE - WHIRLPOOL_LENGTHBYTES],
bitLength, WHIRLPOOL_LENGTHBYTES);
whirlpool_process_buffer(wctx);
for (i = 0; i < WHIRLPOOL_DIGEST_SIZE/8; i++) {
bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES;
memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES],
bitLength, WP512_LENGTHBYTES);
wp512_process_buffer(wctx);
for (i = 0; i < WP512_DIGEST_SIZE/8; i++) {
digest[0] = (u8)(wctx->hash[i] >> 56);
digest[1] = (u8)(wctx->hash[i] >> 48);
digest[2] = (u8)(wctx->hash[i] >> 40);
......@@ -1100,30 +1103,104 @@ static void whirlpool_final(void *ctx, u8 *out)
wctx->bufferPos = bufferPos;
}
static struct crypto_alg alg = {
.cra_name = "whirlpool",
static void wp384_final(void *ctx, u8 *out)
{
struct wp512_ctx *wctx = ctx;
u8 D[64];
wp512_final (wctx, D);
memcpy (out, D, WP384_DIGEST_SIZE);
memset (D, 0, WP512_DIGEST_SIZE);
}
static void wp256_final(void *ctx, u8 *out)
{
struct wp512_ctx *wctx = ctx;
u8 D[64];
wp512_final (wctx, D);
memcpy (out, D, WP256_DIGEST_SIZE);
memset (D, 0, WP512_DIGEST_SIZE);
}
static struct crypto_alg wp512 = {
.cra_name = "wp512",
.cra_flags = CRYPTO_ALG_TYPE_DIGEST,
.cra_blocksize = WP512_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct wp512_ctx),
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(wp512.cra_list),
.cra_u = { .digest = {
.dia_digestsize = WP512_DIGEST_SIZE,
.dia_init = wp512_init,
.dia_update = wp512_update,
.dia_final = wp512_final } }
};
static struct crypto_alg wp384 = {
.cra_name = "wp384",
.cra_flags = CRYPTO_ALG_TYPE_DIGEST,
.cra_blocksize = WHIRLPOOL_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct whirlpool_ctx),
.cra_blocksize = WP512_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct wp512_ctx),
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(alg.cra_list),
.cra_list = LIST_HEAD_INIT(wp384.cra_list),
.cra_u = { .digest = {
.dia_digestsize = WHIRLPOOL_DIGEST_SIZE,
.dia_init = whirlpool_init,
.dia_update = whirlpool_update,
.dia_final = whirlpool_final } }
.dia_digestsize = WP384_DIGEST_SIZE,
.dia_init = wp512_init,
.dia_update = wp512_update,
.dia_final = wp384_final } }
};
static struct crypto_alg wp256 = {
.cra_name = "wp256",
.cra_flags = CRYPTO_ALG_TYPE_DIGEST,
.cra_blocksize = WP512_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct wp512_ctx),
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(wp256.cra_list),
.cra_u = { .digest = {
.dia_digestsize = WP256_DIGEST_SIZE,
.dia_init = wp512_init,
.dia_update = wp512_update,
.dia_final = wp256_final } }
};
static int __init init(void)
{
return crypto_register_alg(&alg);
int ret = 0;
ret = crypto_register_alg(&wp512);
if (ret < 0)
goto out;
ret = crypto_register_alg(&wp384);
if (ret < 0)
{
crypto_unregister_alg(&wp512);
goto out;
}
ret = crypto_register_alg(&wp256);
if (ret < 0)
{
crypto_unregister_alg(&wp512);
crypto_unregister_alg(&wp384);
}
out:
return ret;
}
static void __exit fini(void)
{
crypto_unregister_alg(&alg);
crypto_unregister_alg(&wp512);
crypto_unregister_alg(&wp384);
crypto_unregister_alg(&wp256);
}
MODULE_ALIAS("wp384");
MODULE_ALIAS("wp256");
module_init(init);
module_exit(fini);
......
......@@ -18,7 +18,7 @@
#include <asm/bitops.h>
#include <linux/smp_lock.h>
struct proc_dir_entry *proc_net, *proc_bus, *proc_root_fs, *proc_root_driver;
struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
#ifdef CONFIG_SYSCTL
struct proc_dir_entry *proc_sys_root;
......@@ -53,6 +53,8 @@ void __init proc_root_init(void)
}
proc_misc_init();
proc_net = proc_mkdir("net", NULL);
proc_net_stat = proc_mkdir("net/stat", NULL);
#ifdef CONFIG_SYSVIPC
proc_mkdir("sysvipc", NULL);
#endif
......@@ -157,5 +159,6 @@ EXPORT_SYMBOL(remove_proc_entry);
EXPORT_SYMBOL(proc_root);
EXPORT_SYMBOL(proc_root_fs);
EXPORT_SYMBOL(proc_net);
EXPORT_SYMBOL(proc_net_stat);
EXPORT_SYMBOL(proc_bus);
EXPORT_SYMBOL(proc_root_driver);
......@@ -79,6 +79,7 @@ struct kcore_list {
extern struct proc_dir_entry proc_root;
extern struct proc_dir_entry *proc_root_fs;
extern struct proc_dir_entry *proc_net;
extern struct proc_dir_entry *proc_net_stat;
extern struct proc_dir_entry *proc_bus;
extern struct proc_dir_entry *proc_root_driver;
extern struct proc_dir_entry *proc_root_kcore;
......
......@@ -7,6 +7,11 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
* Changes:
*
* Harald Welte: <laforge@gnumonks.org>
* - Add neighbour cache statistics like rtstat
*/
/* The following flags & states are exported to user space,
......@@ -90,12 +95,25 @@ struct neigh_parms
struct neigh_statistics
{
unsigned long allocs;
unsigned long res_failed;
unsigned long rcv_probes_mcast;
unsigned long rcv_probes_ucast;
unsigned long allocs; /* number of allocated neighs */
unsigned long destroys; /* number of destroyed neighs */
unsigned long hash_grows; /* number of hash resizes */
unsigned long res_failed; /* nomber of failed resolutions */
unsigned long lookups; /* number of lookups */
unsigned long hits; /* number of hits (among lookups) */
unsigned long rcv_probes_mcast; /* number of received mcast ipv6 */
unsigned long rcv_probes_ucast; /* number of received ucast ipv6 */
unsigned long periodic_gc_runs; /* number of periodic GC runs */
unsigned long forced_gc_runs; /* number of forced GC runs */
};
#define NEIGH_CACHE_STAT_INC(tbl, field) \
(per_cpu_ptr((tbl)->stats, smp_processor_id())->field++)
struct neighbour
{
struct neighbour *next;
......@@ -172,12 +190,15 @@ struct neigh_table
unsigned long last_rand;
struct neigh_parms *parms_list;
kmem_cache_t *kmem_cachep;
struct neigh_statistics stats;
struct neigh_statistics *stats;
struct neighbour **hash_buckets;
unsigned int hash_mask;
__u32 hash_rnd;
unsigned int hash_chain_gc;
struct pneigh_entry **phash_buckets;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *pde;
#endif
};
/* flags for neigh_update() */
......
......@@ -80,6 +80,7 @@ struct Qdisc
int padded;
struct Qdisc_ops *ops;
u32 handle;
u32 parent;
atomic_t refcnt;
struct sk_buff_head q;
struct net_device *dev;
......
......@@ -961,7 +961,8 @@ extern void tcp_clear_xmit_timers(struct sock *);
extern void tcp_delete_keepalive_timer (struct sock *);
extern void tcp_reset_keepalive_timer (struct sock *, unsigned long);
extern int tcp_sync_mss(struct sock *sk, u32 pmtu);
extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
extern unsigned int tcp_current_mss(struct sock *sk, int large);
extern const char timer_bug_msg[];
......@@ -1035,37 +1036,6 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long
};
}
/* Compute the current effective MSS, taking SACKs and IP options,
* and even PMTU discovery events into account.
*
* LARGESEND note: !urg_mode is overkill, only frames up to snd_up
* cannot be large. However, taking into account rare use of URG, this
* is not a big flaw.
*/
static inline unsigned int tcp_current_mss(struct sock *sk, int large)
{
struct tcp_opt *tp = tcp_sk(sk);
struct dst_entry *dst = __sk_dst_get(sk);
int do_large, mss_now;
do_large = (large &&
(sk->sk_route_caps & NETIF_F_TSO) &&
!tp->urg_mode);
mss_now = do_large ? tp->mss_cache : tp->mss_cache_std;
if (dst) {
u32 mtu = dst_pmtu(dst);
if (mtu != tp->pmtu_cookie ||
tp->ext2_header_len != dst->header_len)
mss_now = tcp_sync_mss(sk, mtu);
}
if (tp->eff_sacks)
mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
(tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
return mss_now;
}
/* Initialize RCV_MSS value.
* RCV_MSS is an our guess about MSS used by the peer.
* We haven't any direct information about the MSS.
......@@ -1180,7 +1150,8 @@ struct tcp_skb_cb {
__u16 urg_ptr; /* Valid w/URG flags is set. */
__u32 ack_seq; /* Sequence number ACK'd */
__u32 tso_factor;
__u16 tso_factor; /* If > 1, TSO frame */
__u16 tso_mss; /* MSS that FACTOR's in terms of*/
};
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
......
......@@ -12,6 +12,7 @@
*
* Fixes:
* Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
* Harald Welte Add neighbour cache statistics like rtstat
*/
#include <linux/config.h>
......@@ -21,6 +22,7 @@
#include <linux/socket.h>
#include <linux/sched.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
......@@ -59,6 +61,7 @@ void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
static int neigh_glbl_allocs;
static struct neigh_table *neigh_tables;
static struct file_operations neigh_stat_seq_fops;
/*
Neighbour hash table buckets are protected with rwlock tbl->lock.
......@@ -116,6 +119,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)
int shrunk = 0;
int i;
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
write_lock_bh(&tbl->lock);
for (i = 0; i <= tbl->hash_mask; i++) {
struct neighbour *n, **np;
......@@ -273,7 +278,8 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
init_timer(&n->timer);
n->timer.function = neigh_timer_handler;
n->timer.data = (unsigned long)n;
tbl->stats.allocs++;
NEIGH_CACHE_STAT_INC(tbl, allocs);
neigh_glbl_allocs++;
tbl->entries++;
n->tbl = tbl;
......@@ -315,6 +321,8 @@ static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
struct neighbour **new_hash, **old_hash;
unsigned int i, new_hash_mask, old_entries;
NEIGH_CACHE_STAT_INC(tbl, hash_grows);
BUG_ON(new_entries & (new_entries - 1));
new_hash = neigh_hash_alloc(new_entries);
if (!new_hash)
......@@ -350,11 +358,14 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct neighbour *n;
int key_len = tbl->key_len;
u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
NEIGH_CACHE_STAT_INC(tbl, lookups);
read_lock_bh(&tbl->lock);
for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
neigh_hold(n);
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
}
}
......@@ -368,10 +379,13 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
int key_len = tbl->key_len;
u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask;
NEIGH_CACHE_STAT_INC(tbl, lookups);
read_lock_bh(&tbl->lock);
for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
if (!memcmp(n->primary_key, pkey, key_len)) {
neigh_hold(n);
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
}
}
......@@ -556,6 +570,8 @@ void neigh_destroy(struct neighbour *neigh)
{
struct hh_cache *hh;
NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
if (!neigh->dead) {
printk(KERN_WARNING
"Destroying alive neighbour %p\n", neigh);
......@@ -631,6 +647,8 @@ static void neigh_periodic_timer(unsigned long arg)
struct neighbour *n, **np;
unsigned long expire, now = jiffies;
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
write_lock(&tbl->lock);
/*
......@@ -762,7 +780,7 @@ static void neigh_timer_handler(unsigned long arg)
neigh->nud_state = NUD_FAILED;
notify = 1;
neigh->tbl->stats.res_failed++;
NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
/* It is very thin place. report_unreachable is very complicated
......@@ -1311,6 +1329,18 @@ void neigh_table_init(struct neigh_table *tbl)
if (!tbl->kmem_cachep)
panic("cannot create neighbour cache");
tbl->stats = alloc_percpu(struct neigh_statistics);
if (!tbl->stats)
panic("cannot create neighbour cache statistics");
#ifdef CONFIG_PROC_FS
tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
if (!tbl->pde)
panic("cannot create neighbour proc dir entry");
tbl->pde->proc_fops = &neigh_stat_seq_fops;
tbl->pde->data = tbl;
#endif
tbl->hash_mask = 1;
tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
......@@ -1857,6 +1887,106 @@ void neigh_seq_stop(struct seq_file *seq, void *v)
}
EXPORT_SYMBOL(neigh_seq_stop);
/* statistics via seq_file */
static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
{
struct proc_dir_entry *pde = seq->private;
struct neigh_table *tbl = pde->data;
int cpu;
if (*pos == 0)
return SEQ_START_TOKEN;
for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
return per_cpu_ptr(tbl->stats, cpu);
}
return NULL;
}
static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct proc_dir_entry *pde = seq->private;
struct neigh_table *tbl = pde->data;
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
return per_cpu_ptr(tbl->stats, cpu);
}
return NULL;
}
static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
{
}
static int neigh_stat_seq_show(struct seq_file *seq, void *v)
{
struct proc_dir_entry *pde = seq->private;
struct neigh_table *tbl = pde->data;
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs forced_gc_goal_miss\n");
return 0;
}
seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
"%08lx %08lx %08lx %08lx\n",
tbl->entries,
st->allocs,
st->destroys,
st->hash_grows,
st->lookups,
st->hits,
st->res_failed,
st->rcv_probes_mcast,
st->rcv_probes_ucast,
st->periodic_gc_runs,
st->forced_gc_runs
);
return 0;
}
static struct seq_operations neigh_stat_seq_ops = {
.start = neigh_stat_seq_start,
.next = neigh_stat_seq_next,
.stop = neigh_stat_seq_stop,
.show = neigh_stat_seq_show,
};
static int neigh_stat_seq_open(struct inode *inode, struct file *file)
{
int ret = seq_open(file, &neigh_stat_seq_ops);
if (!ret) {
struct seq_file *sf = file->private_data;
sf->private = PDE(inode);
}
return ret;
};
static struct file_operations neigh_stat_seq_fops = {
.owner = THIS_MODULE,
.open = neigh_stat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_ARPD
......
......@@ -688,6 +688,12 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
int set_reply;
int ret;
/* Previously seen (loopback or untracked)? Ignore. */
if ((*pskb)->nfct) {
CONNTRACK_STAT_INC(ignore);
return NF_ACCEPT;
}
/* Never happen */
if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
if (net_ratelimit()) {
......@@ -715,12 +721,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
}
#endif
/* Previously seen (loopback or untracked)? Ignore. */
if ((*pskb)->nfct) {
CONNTRACK_STAT_INC(ignore);
return NF_ACCEPT;
}
proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
/* It may be an special packet, error, unclean...
......
......@@ -268,10 +268,13 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (*pos == 0)
return SEQ_START_TOKEN;
for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu;
*pos = cpu+1;
return &per_cpu(ip_conntrack_stat, cpu);
}
......@@ -282,10 +285,10 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
int cpu;
for (cpu = *pos + 1; cpu < NR_CPUS; ++cpu) {
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu;
*pos = cpu+1;
return &per_cpu(ip_conntrack_stat, cpu);
}
......@@ -301,6 +304,11 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
return 0;
}
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x %08x %08x %08x %08x %08x %08x \n",
nr_conntracks,
......@@ -735,10 +743,11 @@ static int init_or_cleanup(int init)
&exp_file_ops);
if (!proc_exp) goto cleanup_proc;
proc_stat = proc_net_fops_create("ip_conntrack_stat", S_IRUGO,
&ct_cpu_seq_fops);
proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
if (!proc_stat)
goto cleanup_proc_exp;
proc_stat->proc_fops = &ct_cpu_seq_fops;
proc_stat->owner = THIS_MODULE;
#endif
......
......@@ -356,10 +356,13 @@ static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (*pos == 0)
return SEQ_START_TOKEN;
for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu;
*pos = cpu+1;
return per_cpu_ptr(rt_cache_stat, cpu);
}
return NULL;
......@@ -369,10 +372,10 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
int cpu;
for (cpu = *pos + 1; cpu < NR_CPUS; ++cpu) {
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu;
*pos = cpu+1;
return per_cpu_ptr(rt_cache_stat, cpu);
}
return NULL;
......@@ -387,6 +390,11 @@ static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
static int rt_cpu_seq_show(struct seq_file *seq, void *v)
{
struct rt_cache_stat *st = v;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "entries in_hit in_slow_tot in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
return 0;
}
seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
" %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
......@@ -2783,12 +2791,16 @@ int __init ip_rt_init(void)
add_timer(&rt_secret_timer);
#ifdef CONFIG_PROC_FS
{
struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
!proc_net_fops_create("rt_cache_stat", S_IRUGO, &rt_cpu_seq_fops)) {
!(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
proc_net_stat))) {
free_percpu(rt_cache_stat);
return -ENOMEM;
}
rtstat_pde->proc_fops = &rt_cpu_seq_fops;
}
#ifdef CONFIG_NET_CLS_ROUTE
create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
#endif
......
......@@ -802,10 +802,10 @@ __u32 tcp_init_cwnd(struct tcp_opt *tp, struct dst_entry *dst)
__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
if (!cwnd) {
if (tp->mss_cache > 1460)
if (tp->mss_cache_std > 1460)
cwnd = 2;
else
cwnd = (tp->mss_cache > 1095) ? 3 : 4;
cwnd = (tp->mss_cache_std > 1095) ? 3 : 4;
}
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
}
......@@ -2355,6 +2355,86 @@ static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
}
}
/* There is one downside to this scheme. Although we keep the
* ACK clock ticking, adjusting packet counters and advancing
* congestion window, we do not liberate socket send buffer
* space.
*
* Mucking with skb->truesize and sk->sk_wmem_alloc et al.
* then making a write space wakeup callback is a possible
* future enhancement. WARNING: it is not trivial to make.
*/
static int tcp_tso_acked(struct tcp_opt *tp, struct sk_buff *skb,
__u32 now, __s32 *seq_rtt)
{
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
__u32 mss = scb->tso_mss;
__u32 snd_una = tp->snd_una;
__u32 seq = scb->seq;
__u32 packets_acked = 0;
int acked = 0;
/* If we get here, the whole TSO packet has not been
* acked.
*/
BUG_ON(!after(scb->end_seq, snd_una));
while (!after(seq + mss, snd_una)) {
packets_acked++;
seq += mss;
}
if (packets_acked) {
__u8 sacked = scb->sacked;
/* We adjust scb->seq but we do not pskb_pull() the
* SKB. We let tcp_retransmit_skb() handle this case
* by checking skb->len against the data sequence span.
* This way, we avoid the pskb_pull() work unless we
* actually need to retransmit the SKB.
*/
scb->seq = seq;
acked |= FLAG_DATA_ACKED;
if (sacked) {
if (sacked & TCPCB_RETRANS) {
if (sacked & TCPCB_SACKED_RETRANS)
tcp_dec_pcount_explicit(&tp->retrans_out,
packets_acked);
acked |= FLAG_RETRANS_DATA_ACKED;
*seq_rtt = -1;
} else if (*seq_rtt < 0)
*seq_rtt = now - scb->when;
if (sacked & TCPCB_SACKED_ACKED)
tcp_dec_pcount_explicit(&tp->sacked_out,
packets_acked);
if (sacked & TCPCB_LOST)
tcp_dec_pcount_explicit(&tp->lost_out,
packets_acked);
if (sacked & TCPCB_URG) {
if (tp->urg_mode &&
!before(scb->seq, tp->snd_up))
tp->urg_mode = 0;
}
} else if (*seq_rtt < 0)
*seq_rtt = now - scb->when;
if (tcp_get_pcount(&tp->fackets_out)) {
__u32 dval = min(tcp_get_pcount(&tp->fackets_out),
packets_acked);
tcp_dec_pcount_explicit(&tp->fackets_out, dval);
}
tcp_dec_pcount_explicit(&tp->packets_out, packets_acked);
scb->tso_factor -= packets_acked;
BUG_ON(scb->tso_factor == 0);
BUG_ON(!before(scb->seq, scb->end_seq));
}
return acked;
}
/* Remove acknowledged frames from the retransmission queue. */
static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
{
......@@ -2373,8 +2453,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
* discard it as it's confirmed to have arrived at
* the other end.
*/
if (after(scb->end_seq, tp->snd_una))
if (after(scb->end_seq, tp->snd_una)) {
if (scb->tso_factor > 1)
acked |= tcp_tso_acked(tp, skb,
now, &seq_rtt);
break;
}
/* Initial outgoing SYN's get put onto the write_queue
* just like anything else we transmit. It is not
......
......@@ -436,6 +436,7 @@ void tcp_set_skb_tso_factor(struct sk_buff *skb, unsigned int mss_std)
factor /= mss_std;
TCP_SKB_CB(skb)->tso_factor = factor;
}
TCP_SKB_CB(skb)->tso_mss = mss_std;
}
/* Function to create two new TCP segments. Shrinks the given segment
......@@ -552,7 +553,7 @@ unsigned char * __pskb_trim_head(struct sk_buff *skb, int len)
return skb->tail;
}
static int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
static int __tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
{
if (skb_cloned(skb) &&
pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
......@@ -565,11 +566,20 @@ static int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
return -ENOMEM;
}
TCP_SKB_CB(skb)->seq += len;
skb->ip_summed = CHECKSUM_HW;
return 0;
}
static inline int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
{
int err = __tcp_trim_head(sk, skb, len);
if (!err)
TCP_SKB_CB(skb)->seq += len;
return err;
}
/* This function synchronize snd mss to current pmtu/exthdr set.
tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
......@@ -593,7 +603,7 @@ static int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
this function. --ANK (980731)
*/
int tcp_sync_mss(struct sock *sk, u32 pmtu)
unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
{
struct tcp_opt *tp = tcp_sk(sk);
struct dst_entry *dst = __sk_dst_get(sk);
......@@ -629,14 +639,45 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu)
tp->pmtu_cookie = pmtu;
tp->mss_cache = tp->mss_cache_std = mss_now;
if (sk->sk_route_caps & NETIF_F_TSO) {
return mss_now;
}
/* Compute the current effective MSS, taking SACKs and IP options,
* and even PMTU discovery events into account.
*
* LARGESEND note: !urg_mode is overkill, only frames up to snd_up
* cannot be large. However, taking into account rare use of URG, this
* is not a big flaw.
*/
unsigned int tcp_current_mss(struct sock *sk, int large)
{
struct tcp_opt *tp = tcp_sk(sk);
struct dst_entry *dst = __sk_dst_get(sk);
int do_large, mss_now;
mss_now = tp->mss_cache_std;
if (dst) {
u32 mtu = dst_pmtu(dst);
if (mtu != tp->pmtu_cookie ||
tp->ext2_header_len != dst->header_len)
mss_now = tcp_sync_mss(sk, mtu);
}
do_large = (large &&
(sk->sk_route_caps & NETIF_F_TSO) &&
!tp->urg_mode);
if (do_large) {
int large_mss, factor;
large_mss = 65535 - tp->af_specific->net_header_len -
tp->ext_header_len - tp->ext2_header_len - tp->tcp_header_len;
tp->ext_header_len - tp->ext2_header_len -
tp->tcp_header_len;
if (tp->max_window && large_mss > (tp->max_window>>1))
large_mss = max((tp->max_window>>1), 68U - tp->tcp_header_len);
large_mss = max((tp->max_window>>1),
68U - tp->tcp_header_len);
/* Always keep large mss multiple of real mss, but
* do not exceed congestion window.
......@@ -646,12 +687,16 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu)
factor = tp->snd_cwnd;
tp->mss_cache = mss_now * factor;
mss_now = tp->mss_cache;
}
if (tp->eff_sacks)
mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
(tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
return mss_now;
}
/* This routine writes packets to the network. It advances the
* send_head. This happens as incoming acks open up the remote
* window for us.
......@@ -949,6 +994,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_opt *tp = tcp_sk(sk);
unsigned int cur_mss = tcp_current_mss(sk, 0);
__u32 data_seq, data_end_seq;
int err;
/* Do not sent more than we queued. 1/4 is reserved for possible
......@@ -958,6 +1004,24 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
return -EAGAIN;
/* What is going on here? When TSO packets are partially ACK'd,
* we adjust the TCP_SKB_CB(skb)->seq value forward but we do
* not adjust the data area of the SKB. We defer that to here
* so that we can avoid the work unless we really retransmit
* the packet.
*/
data_seq = TCP_SKB_CB(skb)->seq;
data_end_seq = TCP_SKB_CB(skb)->end_seq;
if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
data_end_seq--;
if (skb->len > (data_end_seq - data_seq)) {
u32 to_trim = skb->len - (data_end_seq - data_seq);
if (__tcp_trim_head(sk, skb, to_trim))
return -ENOMEM;
}
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
BUG();
......@@ -1191,6 +1255,7 @@ void tcp_send_fin(struct sock *sk)
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->tso_factor = 1;
TCP_SKB_CB(skb)->tso_mss = tp->mss_cache_std;
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
TCP_SKB_CB(skb)->seq = tp->write_seq;
......@@ -1223,6 +1288,7 @@ void tcp_send_active_reset(struct sock *sk, int priority)
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->tso_factor = 1;
TCP_SKB_CB(skb)->tso_mss = tp->mss_cache_std;
/* Send it off. */
TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
......@@ -1304,6 +1370,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->tso_factor = 1;
TCP_SKB_CB(skb)->tso_mss = tp->mss_cache_std;
th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(req->rcv_isn + 1);
if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
......@@ -1406,6 +1473,7 @@ int tcp_connect(struct sock *sk)
TCP_ECN_send_syn(sk, tp, buff);
TCP_SKB_CB(buff)->sacked = 0;
TCP_SKB_CB(buff)->tso_factor = 1;
TCP_SKB_CB(buff)->tso_mss = tp->mss_cache_std;
buff->csum = 0;
TCP_SKB_CB(buff)->seq = tp->write_seq++;
TCP_SKB_CB(buff)->end_seq = tp->write_seq;
......@@ -1506,6 +1574,7 @@ void tcp_send_ack(struct sock *sk)
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(buff)->sacked = 0;
TCP_SKB_CB(buff)->tso_factor = 1;
TCP_SKB_CB(buff)->tso_mss = tp->mss_cache_std;
/* Send it off, this clears delayed acks for us. */
TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
......@@ -1541,6 +1610,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(skb)->sacked = urgent;
TCP_SKB_CB(skb)->tso_factor = 1;
TCP_SKB_CB(skb)->tso_mss = tp->mss_cache_std;
/* Use a previous sequence. This should cause the other
* end to send an ack. Don't queue or clone SKB, just
......
......@@ -314,9 +314,11 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
dst_release(xchg(&skb->dst, NULL));
ip6_route_input(skb);
if (skb->dst->error) {
skb_push(skb, skb->data - skb->nh.raw);
dst_input(skb);
return -1;
}
if (skb->dst->dev->flags&IFF_LOOPBACK) {
if (skb->nh.ipv6h->hop_limit <= 1) {
IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
......@@ -329,6 +331,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
goto looped_back;
}
skb_push(skb, skb->data - skb->nh.raw);
dst_input(skb);
return -1;
}
......
......@@ -802,9 +802,9 @@ static void ndisc_recv_ns(struct sk_buff *skb)
}
if (inc)
nd_tbl.stats.rcv_probes_mcast++;
NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
else
nd_tbl.stats.rcv_probes_ucast++;
NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
/*
* update / create cache entry
......
......@@ -1003,11 +1003,12 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
* and then put it into the queue to be sent.
*/
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
GFP_ATOMIC);
if (buff == NULL)
return;
skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
......@@ -1065,14 +1066,15 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
struct flowi fl;
int tot_len = sizeof(struct tcphdr);
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr), GFP_ATOMIC);
if (ts)
tot_len += 3*4;
buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
GFP_ATOMIC);
if (buff == NULL)
return;
skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr));
if (ts)
tot_len += 3*4;
skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
t1 = (struct tcphdr *) skb_push(buff,tot_len);
......
......@@ -1621,6 +1621,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
sipx.sipx_family = AF_IPX;
sipx.sipx_type = ipxs->type;
sipx.sipx_zero = 0;
memcpy(uaddr, &sipx, sizeof(sipx));
rc = 0;
......@@ -1808,6 +1809,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
memcpy(sipx->sipx_node, ipx->ipx_source.node, IPX_NODE_LEN);
sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net;
sipx->sipx_type = ipx->ipx_type;
sipx->sipx_zero = 0;
}
rc = copied;
......
......@@ -536,12 +536,31 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
sock_put(sk);
}
static inline void netlink_trim(struct sk_buff *skb, int allocation)
{
int delta = skb->end - skb->tail;
/* If the packet is charged to a socket, the modification
* of truesize below is illegal and will corrupt socket
* buffer accounting state.
*/
BUG_ON(skb->list != NULL);
if (delta * 2 < skb->truesize)
return;
if (pskb_expand_head(skb, 0, -delta, allocation))
return;
skb->truesize -= delta;
}
int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock)
{
struct sock *sk;
int err;
long timeo;
netlink_trim(skb, gfp_any());
timeo = sock_sndtimeo(ssk, nonblock);
retry:
sk = netlink_getsockbypid(ssk, pid);
......@@ -588,6 +607,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
int protocol = ssk->sk_protocol;
int failure = 0, delivered = 0;
netlink_trim(skb, allocation);
/* While we sleep in clone, do not allow to change socket list */
netlink_lock_table();
......@@ -1220,7 +1241,6 @@ MODULE_ALIAS_NETPROTO(PF_NETLINK);
EXPORT_SYMBOL(netlink_ack);
EXPORT_SYMBOL(netlink_broadcast);
EXPORT_SYMBOL(netlink_broadcast_deliver);
EXPORT_SYMBOL(netlink_dump_start);
EXPORT_SYMBOL(netlink_kernel_create);
EXPORT_SYMBOL(netlink_register_notifier);
......
......@@ -371,6 +371,8 @@ int qdisc_graft(struct net_device *dev, struct Qdisc *parent, u32 classid,
unsigned long cl = cops->get(parent, classid);
if (cl) {
err = cops->graft(parent, cl, new, old);
if (new)
new->parent = classid;
cops->put(parent, cl);
}
}
......@@ -821,7 +823,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
q_idx++;
continue;
}
if (tc_fill_qdisc(skb, q, 0, NETLINK_CB(cb->skb).pid,
if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
read_unlock_bh(&qdisc_tree_lock);
goto done;
......
......@@ -372,7 +372,6 @@ static void sfq_perturbation(unsigned long arg)
struct sfq_sched_data *q = qdisc_priv(sch);
q->perturbation = net_random()&0x1F;
q->perturb_timer.expires = jiffies + q->perturb_period;
if (q->perturb_period) {
q->perturb_timer.expires = jiffies + q->perturb_period;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment