整理几种常用的数据校验方法

数据校验是对数据进行检查、验证和清理的过程,主要目的是确保数据的准确性、完整性和一致性。在通讯或者数据仓库中,数据校验的重要性体现在提高数据质量、保证数据一致性和提高数据可靠性等方面。下面记录几种常用的数据校验方法。

1.和校验

原理:

数据的各个部分(通常按字节或字)相加,生成一个校验值(checksum),然后在接收端重新计算并比较校验值以验证数据完整性。

使用场景:

网络通信(TCP/IP、HTTP/FTP)、文件校验、嵌入式系统等。

代码实现:

#include

#include

// 计算8位和校验

uint8_t calculate_checksum(uint8_t *data, size_t len) {

uint8_t sum = 0;

for (size_t i = 0; i < len; i++) {

sum += data[i];

}

return sum;

}

int verify_checksum(uint8_t *data, size_t len, uint8_t received_checksum) {

uint8_t calculated_checksum = calculate_checksum(data, len);

return calculated_checksum == received_checksum;

}

//测

int main() {

uint8_t data[] = {0x12, 0x34, 0x56, 0x78};

size_t len = sizeof(data);

uint8_t checksum = calculate_checksum(data, len);

printf("Data: ");

for (size_t i = 0; i < len; i++) {

printf("0x%02X ", data[i]);

}

printf("\nChecksum: 0x%02X\n", checksum);

printf("Verification: %s\n", verify_checksum(data, len, checksum) ? "Valid" : "Invalid");

return 0;

}

2.异或校验

原理:

异或校验通过对数据的每个字节逐一进行*异或(XOR)*运算生成校验值。发送端计算校验值并附加到数据,接收端对数据(含校验值)再次异或,若结果为0,则数据无误。

使用场景:

串口通信、嵌入式系统、简单协议小数据传输

代码实现:

uint8_t CheckXOR(uint8_t *Buf, uint8_t Len)

{

uint8_t i = 0;

uint8_t x = 0;

for(i=0; i

{

x = x^(*(Buf+i));

}

return x;

}

3.奇偶校验

原理:

奇偶校验通过在数据末尾添加一位校验位,确保数据中1的总数符合奇数(奇校验)或偶数(偶校验)。发送端计算校验位,接收端重新计算并比较,若不符则表示有错误。

使用场景:

串口通信、存储设备、低速通信、嵌入式系统

代码实现:

#include

#include

uint8_t calculate_parity(uint8_t data) {

uint8_t ones = 0;

while (data) {

ones += data & 1;

data >>= 1;

}

return ones % 2; // 返回0(偶数个1)或1(奇数个1)

}

int check_parity(uint8_t data, uint8_t parity_bit) {

return calculate_parity(data) == parity_bit;

}

int main() {

uint8_t data = 0b10110010;

uint8_t parity = calculate_parity(data);

printf("Data: 0x%02X, Parity Bit: %d\n", data, parity);

printf("Parity Check: %s\n", check_parity(data, parity) ? "Valid" : "Invalid");

return 0;

}

4.冗余校验

原理:

冗余校验通过在数据中添加冗余信息(如额外位、字节或数据块)来检测错误。常见形式包括重复数据、校验位或编码(如汉明码)。发送端生成冗余信息,接收端检查其一致性,若不符则表示错误。能检测甚至纠正错误,视冗余程度而定。

使用场景:

通信系统、存储设备、网络协议、航空航天

代码实现:

#include

#include

#define CRC16_POLY 0x8005

uint16_t calculate_crc16(uint8_t *data, size_t len) {

uint16_t crc = 0xFFFF;

for (size_t i = 0; i < len; i++) {

crc ^= (uint16_t)data[i] << 8;

for (int j = 0; j < 8; j++) {

if (crc & 0x8000) {

crc = (crc << 1) ^ CRC16_POLY;

} else {

crc <<= 1;

}

}

}

return crc;

}

int main() {

uint8_t data[] = {0x12, 0x34, 0x56};

uint16_t crc = calculate_crc16(data, sizeof(data));

printf("CRC-16: 0x%04X\n", crc);

return 0;

}

5.Luhn校验

原理:

Luhn算法用于验证数字序列(如信用卡号)的有效性。从右到左,奇数位数字直接相加,偶数位数字乘2(若结果大于9则减9)后相加。所有数字之和若能被10整除,则校验通过。简单高效,主要检测输入错误。

使用场景:

金融行业、身份验证、数据录入、电子商务

代码实现:

#include

#include

#include

int luhn_check(const char *number) {

int sum = 0, is_even = 0;

for (int i = strlen(number) - 1; i >= 0; i--) {

if (!isdigit(number[i])) continue;

int digit = number[i] - '0';

if (is_even) {

digit *= 2;

if (digit > 9) digit -= 9;

}

sum += digit;

is_even = !is_even;

}

return sum % 10 == 0;

}

int main() {

const char *number = "4532015112830366"; // 卡号

printf("Number: %s\n", number);

printf("Luhn Check: %s\n", luhn_check(number) ? "Valid" : "Invalid");

return 0;

}

6.MD5

原理:

MD5(Message Digest Algorithm 5)是一种哈希算法,将任意长度的数据输入映射为固定128位(16字节)的哈希值。核心步骤包括数据填充、分块处理、循环运算(通过位运算和非线性函数),生成唯一性强的校验值。数据微小变化会导致哈希值完全不同,适合验证数据完整性,但不适合加密(因碰撞风险)。

使用场景:

文件校验、数据完整性、软件分发、版本控制

代码实现:

#include

#include

#include

// MD5结构体

typedef struct {

uint32_t state[4];

uint32_t count[2];

unsigned char buffer[64];

} MD5_CTX;

//MD5转换规则

#define S11 7

#define S12 12

#define S13 17

#define S14 22

#define S21 5

#define S22 9

#define S23 14

#define S24 20

#define S31 4

#define S32 11

#define S33 16

#define S34 23

#define S41 6

#define S42 10

#define S43 15

#define S44 21

static unsigned char PADDING[64] = {

0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

};

#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))

#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))

#define H(x, y, z) ((x) ^ (y) ^ (z))

#define I(x, y, z) ((y) ^ ((x) | (~z)))

#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))

#define FF(a, b, c, d, x, s, ac) { \

(a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \

(a) = ROTATE_LEFT ((a), (s)); \

(a) += (b); \

}

#define GG(a, b, c, d, x, s, ac) { \

(a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \

(a) = ROTATE_LEFT ((a), (s)); \

(a) += (b); \

}

#define HH(a, b, c, d, x, s, ac) { \

(a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \

(a) = ROTATE_LEFT ((a), (s)); \

(a) += (b); \

}

#define II(a, b, c, d, x, s, ac) { \

(a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \

(a) = ROTATE_LEFT ((a), (s)); \

(a) += (b); \

}

static void Encode (unsigned char *output, uint32_t *input, unsigned int len) {

unsigned int i, j;

for (i = 0, j = 0; j < len; i++, j += 4) {

output[j] = (unsigned char)(input[i] & 0xff);

output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);

output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);

output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);

}

}

static void Decode (uint32_t *output, unsigned char *input, unsigned int len) {

unsigned int i, j;

for (i = 0, j = 0; j < len; i++, j += 4)

output[i] = ((uint32_t)input[j]) | (((uint32_t)input[j+1]) << 8) |

(((uint32_t)input[j+2]) << 16) | (((uint32_t)input[j+3]) << 24);

}

static void MD5Transform (uint32_t state[4], unsigned char block[64]) {

uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];

Decode (x, block, 64);

//Round 1

FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */

FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */

FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */

FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */

FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */

FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */

FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */

FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */

FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */

FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */

FF (c, d, a, b, x[10], S13, 0xfffa3942); /* 11 */

FF (b, c, d, a, x[11], S14, 0x8771f681); /* 12 */

FF (a, b, c, d, x[12], S11, 0x6d9d6122); /* 13 */

FF (d, a, b, c, x[13], S12, 0xfde5380c); /* 14 */

FF (c, d, a, b, x[14], S13, 0xa4beea44); /* 15 */

FF (b, c, d, a, x[15], S14, 0x4bdecfa9); /* 16 */

// Round 2

GG (a, b, c, d, x[ 1], S21, 0xf6bb4b60); /* 17 */

GG (d, a, b, c, x[ 6], S22, 0xbebfbc70); /* 18 */

GG (c, d, a, b, x[11], S23, 0x289b7ec6); /* 19 */

GG (b, c, d, a, x[ 0], S24, 0xeaa127fa); /* 20 */

GG (a, b, c, d, x[ 5], S21, 0xd4ef3085); /* 21 */

GG (d, a, b, c, x[10], S22, 0x04881d05); /* 22 */

GG (c, d, a, b, x[15], S23, 0xd9d4d039); /* 23 */

GG (b, c, d, a, x[ 4], S24, 0xe6db99e5); /* 24 */

GG (a, b, c, d, x[ 9], S21, 0x1fa27cf8); /* 25 */

GG (d, a, b, c, x[14], S22, 0xc4ac5665); /* 26 */

GG (c, d, a, b, x[ 3], S23, 0xf4292244); /* 27 */

GG (b, c, d, a, x[ 8], S24, 0x432aff97); /* 28 */

GG (a, b, c, d, x[13], S21, 0xab9423a7); /* 29 */

GG (d, a, b, c, x[ 2], S22, 0xfc93a039); /* 30 */

GG (c, d, a, b, x[ 7], S23, 0x655b59c3); /* 31 */

GG (b, c, d, a, x[12], S24, 0x8f0ccc92); /* 32 */

//Round 3

HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */

HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */

HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */

HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */

HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */

HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */

HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */

HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */

HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */

HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */

HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */

HH (b, c, d, a, x[ 6], S34, 0x04881d05); /* 44 */

HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */

HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */

HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */

HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */

//Round 4

II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */

II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */

II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */

II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */

II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */

II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */

II (c, d, a, b, x[10], S43, 0xfffa3942); /* 55 */

II (b, c, d, a, x[ 1], S44, 0x8771f681); /* 56 */

II (a, b, c, d, x[ 8], S41, 0x6d9d6122); /* 57 */

II (d, a, b, c, x[15], S42, 0xfde5380c); /* 58 */

II (c, d, a, b, x[ 6], S43, 0xa4beea44); /* 59 */

II (b, c, d, a, x[13], S44, 0x4bdecfa9); /* 60 */

II (a, b, c, d, x[ 4], S41, 0xf6bb4b60); /* 61 */

II (d, a, b, c, x[11], S42, 0xbebfbc70); /* 62 */

II (c, d, a, b, x[ 2], S43, 0x289b7ec6); /* 63 */

II (b, c, d, a, x[ 9], S44, 0xeaa127fa); /* 64 */

state[0] += a;

state[1] += b;

state[2] += c;

state[3] += d;

}

// MD5初始化

void MD5Init (MD5_CTX *context) {

context->count[0] = context->count[1] = 0;

// Load magic initialization constants.

context->state[0] = 0x67452301;

context->state[1] = 0xefcdab89;

context->state[2] = 0x98badcfe;

context->state[3] = 0x10325476;

}

void MD5Update (MD5_CTX *context, unsigned char *input, unsigned int inputLen) {

unsigned int i, index, partLen;

index = (unsigned int)((context->count[0] >> 3) & 0x3F);

if ((context->count[0] += ((uint32_t)inputLen << 3)) < ((uint32_t)inputLen << 3))

context->count[1]++;

context->count[1] += ((uint32_t)inputLen >> 29);

partLen = 64 - index;

if (inputLen >= partLen) {

memcpy (&context->buffer[index], input, partLen);

MD5Transform (context->state, context->buffer);

for (i = partLen; i + 63 < inputLen; i += 64)

MD5Transform (context->state, &input[i]);

index = 0;

memcpy (context->buffer, &input[i], inputLen - i);

} else

memcpy (&context->buffer[index], input, inputLen);

}

// MD5 完成

void MD5Final (unsigned char digest[16], MD5_CTX *context) {

unsigned int index, padLen;

unsigned char bits[8];

index = (unsigned int)((context->count[0] >> 3) & 0x3f);

padLen = (index < 56) ? (56 - index) : (120 - index);

MD5Update (context, PADDING, padLen);

Encode (bits, context->count, 8);

MD5Update (context, bits, 8);

Encode (digest, context->state, 16);

memset (context, 0, sizeof (*context));

}

//计算MD5哈希值

void MD5(const char *string, unsigned char digest[16]) {

MD5_CTX context;

MD5Init(&context);

MD5Update(&context, (unsigned char*)string, strlen(string));

MD5Final(digest, &context);

}

//测试

int main(int argc, char *argv[]) {

if (argc != 2) {

printf("Usage: %s \n", argv[0]);

return 1;

}

unsigned char digest[16];

MD5(argv[1], digest);

printf("MD5 (\"%s\") = ", argv[1]);

for (int i = 0; i < 16; i++)

printf("%02x", digest[i]);

printf("\n");

return 0;

}