数据校验是对数据进行检查、验证和清理的过程,主要目的是确保数据的准确性、完整性和一致性。在通讯或者数据仓库中,数据校验的重要性体现在提高数据质量、保证数据一致性和提高数据可靠性等方面。下面记录几种常用的数据校验方法。
1.和校验
原理:
数据的各个部分(通常按字节或字)相加,生成一个校验值(checksum),然后在接收端重新计算并比较校验值以验证数据完整性。
使用场景:
网络通信(TCP/IP、HTTP/FTP)、文件校验、嵌入式系统等。
代码实现:
#include
#include
// 计算8位和校验
uint8_t calculate_checksum(uint8_t *data, size_t len) {
uint8_t sum = 0;
for (size_t i = 0; i < len; i++) {
sum += data[i];
}
return sum;
}
int verify_checksum(uint8_t *data, size_t len, uint8_t received_checksum) {
uint8_t calculated_checksum = calculate_checksum(data, len);
return calculated_checksum == received_checksum;
}
//测
int main() {
uint8_t data[] = {0x12, 0x34, 0x56, 0x78};
size_t len = sizeof(data);
uint8_t checksum = calculate_checksum(data, len);
printf("Data: ");
for (size_t i = 0; i < len; i++) {
printf("0x%02X ", data[i]);
}
printf("\nChecksum: 0x%02X\n", checksum);
printf("Verification: %s\n", verify_checksum(data, len, checksum) ? "Valid" : "Invalid");
return 0;
}
2.异或校验
原理:
异或校验通过对数据的每个字节逐一进行*异或(XOR)*运算生成校验值。发送端计算校验值并附加到数据,接收端对数据(含校验值)再次异或,若结果为0,则数据无误。
使用场景:
串口通信、嵌入式系统、简单协议小数据传输
代码实现:
uint8_t CheckXOR(uint8_t *Buf, uint8_t Len)
{
uint8_t i = 0;
uint8_t x = 0;
for(i=0; i { x = x^(*(Buf+i)); } return x; } 3.奇偶校验 原理: 奇偶校验通过在数据末尾添加一位校验位,确保数据中1的总数符合奇数(奇校验)或偶数(偶校验)。发送端计算校验位,接收端重新计算并比较,若不符则表示有错误。 使用场景: 串口通信、存储设备、低速通信、嵌入式系统 代码实现: #include #include uint8_t calculate_parity(uint8_t data) { uint8_t ones = 0; while (data) { ones += data & 1; data >>= 1; } return ones % 2; // 返回0(偶数个1)或1(奇数个1) } int check_parity(uint8_t data, uint8_t parity_bit) { return calculate_parity(data) == parity_bit; } int main() { uint8_t data = 0b10110010; uint8_t parity = calculate_parity(data); printf("Data: 0x%02X, Parity Bit: %d\n", data, parity); printf("Parity Check: %s\n", check_parity(data, parity) ? "Valid" : "Invalid"); return 0; } 4.冗余校验 原理: 冗余校验通过在数据中添加冗余信息(如额外位、字节或数据块)来检测错误。常见形式包括重复数据、校验位或编码(如汉明码)。发送端生成冗余信息,接收端检查其一致性,若不符则表示错误。能检测甚至纠正错误,视冗余程度而定。 使用场景: 通信系统、存储设备、网络协议、航空航天 代码实现: #include #include #define CRC16_POLY 0x8005 uint16_t calculate_crc16(uint8_t *data, size_t len) { uint16_t crc = 0xFFFF; for (size_t i = 0; i < len; i++) { crc ^= (uint16_t)data[i] << 8; for (int j = 0; j < 8; j++) { if (crc & 0x8000) { crc = (crc << 1) ^ CRC16_POLY; } else { crc <<= 1; } } } return crc; } int main() { uint8_t data[] = {0x12, 0x34, 0x56}; uint16_t crc = calculate_crc16(data, sizeof(data)); printf("CRC-16: 0x%04X\n", crc); return 0; } 5.Luhn校验 原理: Luhn算法用于验证数字序列(如信用卡号)的有效性。从右到左,奇数位数字直接相加,偶数位数字乘2(若结果大于9则减9)后相加。所有数字之和若能被10整除,则校验通过。简单高效,主要检测输入错误。 使用场景: 金融行业、身份验证、数据录入、电子商务 代码实现: #include #include #include int luhn_check(const char *number) { int sum = 0, is_even = 0; for (int i = strlen(number) - 1; i >= 0; i--) { if (!isdigit(number[i])) continue; int digit = number[i] - '0'; if (is_even) { digit *= 2; if (digit > 9) digit -= 9; } sum += digit; is_even = !is_even; } return sum % 10 == 0; } int main() { const char *number = "4532015112830366"; // 卡号 printf("Number: %s\n", number); printf("Luhn Check: %s\n", luhn_check(number) ? "Valid" : "Invalid"); return 0; } 6.MD5 原理: MD5(Message Digest Algorithm 5)是一种哈希算法,将任意长度的数据输入映射为固定128位(16字节)的哈希值。核心步骤包括数据填充、分块处理、循环运算(通过位运算和非线性函数),生成唯一性强的校验值。数据微小变化会导致哈希值完全不同,适合验证数据完整性,但不适合加密(因碰撞风险)。 使用场景: 文件校验、数据完整性、软件分发、版本控制 代码实现: #include #include #include // MD5结构体 typedef struct { uint32_t state[4]; uint32_t count[2]; unsigned char buffer[64]; } MD5_CTX; //MD5转换规则 #define S11 7 #define S12 12 #define S13 17 #define S14 22 #define S21 5 #define S22 9 #define S23 14 #define S24 20 #define S31 4 #define S32 11 #define S33 16 #define S34 23 #define S41 6 #define S42 10 #define S43 15 #define S44 21 static unsigned char PADDING[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #define F(x, y, z) (((x) & (y)) | ((~x) & (z))) #define G(x, y, z) (((x) & (z)) | ((y) & (~z))) #define H(x, y, z) ((x) ^ (y) ^ (z)) #define I(x, y, z) ((y) ^ ((x) | (~z))) #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) #define FF(a, b, c, d, x, s, ac) { \ (a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define GG(a, b, c, d, x, s, ac) { \ (a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define HH(a, b, c, d, x, s, ac) { \ (a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define II(a, b, c, d, x, s, ac) { \ (a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } static void Encode (unsigned char *output, uint32_t *input, unsigned int len) { unsigned int i, j; for (i = 0, j = 0; j < len; i++, j += 4) { output[j] = (unsigned char)(input[i] & 0xff); output[j+1] = (unsigned char)((input[i] >> 8) & 0xff); output[j+2] = (unsigned char)((input[i] >> 16) & 0xff); output[j+3] = (unsigned char)((input[i] >> 24) & 0xff); } } static void Decode (uint32_t *output, unsigned char *input, unsigned int len) { unsigned int i, j; for (i = 0, j = 0; j < len; i++, j += 4) output[i] = ((uint32_t)input[j]) | (((uint32_t)input[j+1]) << 8) | (((uint32_t)input[j+2]) << 16) | (((uint32_t)input[j+3]) << 24); } static void MD5Transform (uint32_t state[4], unsigned char block[64]) { uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16]; Decode (x, block, 64); //Round 1 FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ FF (c, d, a, b, x[10], S13, 0xfffa3942); /* 11 */ FF (b, c, d, a, x[11], S14, 0x8771f681); /* 12 */ FF (a, b, c, d, x[12], S11, 0x6d9d6122); /* 13 */ FF (d, a, b, c, x[13], S12, 0xfde5380c); /* 14 */ FF (c, d, a, b, x[14], S13, 0xa4beea44); /* 15 */ FF (b, c, d, a, x[15], S14, 0x4bdecfa9); /* 16 */ // Round 2 GG (a, b, c, d, x[ 1], S21, 0xf6bb4b60); /* 17 */ GG (d, a, b, c, x[ 6], S22, 0xbebfbc70); /* 18 */ GG (c, d, a, b, x[11], S23, 0x289b7ec6); /* 19 */ GG (b, c, d, a, x[ 0], S24, 0xeaa127fa); /* 20 */ GG (a, b, c, d, x[ 5], S21, 0xd4ef3085); /* 21 */ GG (d, a, b, c, x[10], S22, 0x04881d05); /* 22 */ GG (c, d, a, b, x[15], S23, 0xd9d4d039); /* 23 */ GG (b, c, d, a, x[ 4], S24, 0xe6db99e5); /* 24 */ GG (a, b, c, d, x[ 9], S21, 0x1fa27cf8); /* 25 */ GG (d, a, b, c, x[14], S22, 0xc4ac5665); /* 26 */ GG (c, d, a, b, x[ 3], S23, 0xf4292244); /* 27 */ GG (b, c, d, a, x[ 8], S24, 0x432aff97); /* 28 */ GG (a, b, c, d, x[13], S21, 0xab9423a7); /* 29 */ GG (d, a, b, c, x[ 2], S22, 0xfc93a039); /* 30 */ GG (c, d, a, b, x[ 7], S23, 0x655b59c3); /* 31 */ GG (b, c, d, a, x[12], S24, 0x8f0ccc92); /* 32 */ //Round 3 HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ HH (b, c, d, a, x[ 6], S34, 0x04881d05); /* 44 */ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ //Round 4 II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ II (c, d, a, b, x[10], S43, 0xfffa3942); /* 55 */ II (b, c, d, a, x[ 1], S44, 0x8771f681); /* 56 */ II (a, b, c, d, x[ 8], S41, 0x6d9d6122); /* 57 */ II (d, a, b, c, x[15], S42, 0xfde5380c); /* 58 */ II (c, d, a, b, x[ 6], S43, 0xa4beea44); /* 59 */ II (b, c, d, a, x[13], S44, 0x4bdecfa9); /* 60 */ II (a, b, c, d, x[ 4], S41, 0xf6bb4b60); /* 61 */ II (d, a, b, c, x[11], S42, 0xbebfbc70); /* 62 */ II (c, d, a, b, x[ 2], S43, 0x289b7ec6); /* 63 */ II (b, c, d, a, x[ 9], S44, 0xeaa127fa); /* 64 */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; } // MD5初始化 void MD5Init (MD5_CTX *context) { context->count[0] = context->count[1] = 0; // Load magic initialization constants. context->state[0] = 0x67452301; context->state[1] = 0xefcdab89; context->state[2] = 0x98badcfe; context->state[3] = 0x10325476; } void MD5Update (MD5_CTX *context, unsigned char *input, unsigned int inputLen) { unsigned int i, index, partLen; index = (unsigned int)((context->count[0] >> 3) & 0x3F); if ((context->count[0] += ((uint32_t)inputLen << 3)) < ((uint32_t)inputLen << 3)) context->count[1]++; context->count[1] += ((uint32_t)inputLen >> 29); partLen = 64 - index; if (inputLen >= partLen) { memcpy (&context->buffer[index], input, partLen); MD5Transform (context->state, context->buffer); for (i = partLen; i + 63 < inputLen; i += 64) MD5Transform (context->state, &input[i]); index = 0; memcpy (context->buffer, &input[i], inputLen - i); } else memcpy (&context->buffer[index], input, inputLen); } // MD5 完成 void MD5Final (unsigned char digest[16], MD5_CTX *context) { unsigned int index, padLen; unsigned char bits[8]; index = (unsigned int)((context->count[0] >> 3) & 0x3f); padLen = (index < 56) ? (56 - index) : (120 - index); MD5Update (context, PADDING, padLen); Encode (bits, context->count, 8); MD5Update (context, bits, 8); Encode (digest, context->state, 16); memset (context, 0, sizeof (*context)); } //计算MD5哈希值 void MD5(const char *string, unsigned char digest[16]) { MD5_CTX context; MD5Init(&context); MD5Update(&context, (unsigned char*)string, strlen(string)); MD5Final(digest, &context); } //测试 int main(int argc, char *argv[]) { if (argc != 2) { printf("Usage: %s return 1; } unsigned char digest[16]; MD5(argv[1], digest); printf("MD5 (\"%s\") = ", argv[1]); for (int i = 0; i < 16; i++) printf("%02x", digest[i]); printf("\n"); return 0; }