背景
最近在做项目的过程中,有一个需要是连续读5个以上的 500MB+的文件,对齐进行解压缩。常见的做法是通过 std::ifstream 或者 fopen 来进行读取。但是在第二次读取的时候文件就一直为0。
代码
main.cpp
cpp
#include <mutex>
#include <unordered_map>
#include <cstring>
#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
#include <chrono>
#include <unistd.h>
#include <dirent.h>
#include <sys/stat.h>
#include <algorithm>
#include "md5.h"
using namespace std;
static inline unsigned char hex2char(unsigned char hex)
{
if (hex >= 0 && hex <= 9)
hex += '0';
else
hex += 'a' - 10;
return hex;
}
static std::string md5_digest(unsigned char *buf, size_t len)
{
int i = 0;
unsigned char digest[16];
unsigned char md5_string[256];
struct MD5Context ctx;
MD5Init(&ctx);
MD5Update(&ctx, buf, len);
MD5Final(digest, &ctx);
for (i = 0; i < 16; i++)
{
md5_string[i * 2] = hex2char((digest[i] >> 4) & 0xf);
md5_string[i * 2 + 1] = hex2char(digest[i] & 0xf);
}
md5_string[i * 2] = 0;
snprintf((char *)digest, 16, "%ld", len);
strncat((char *)md5_string, (char *)digest, 16);
return std::string((char *)md5_string);
}
int main(int argc, char* argv[]) {
if (argc != 2) {
cout << "Usage: " << argv[0] << " <filename>" << endl;
return 1;
}
std::ifstream input_file(argv[1], std::ios::binary);
if (!input_file) {
std::cout<<"Failed to open input file.\n"<<std::endl;
return -1;
}
// 获取文件大小
input_file.seekg(0, std::ios::end);
auto input_size = input_file.tellg();
input_file.seekg(0, std::ios::beg);
char* input_data = new (std::nothrow) char[input_size];
if (input_data == nullptr) {
// 内存分配失败
std::cout<<"ZIP Failed to allocate memory input_data"<<std::endl;
return -1;
}
memset(input_data,0,input_size);
/*
long long read_size = fread(input_data, sizeof(char), input_size, input_file);
if(read_size < input_size)
{
delete[] input_data;
ALOGE("ZIP model_md5 input_size read error!,read_size : %lld",read_size);
return -1;
}
*/
input_file.read(input_data, input_size);
input_file.close();
/
std::ifstream input_file2(argv[1], std::ios::binary);
if (!input_file2) {
std::cout<<"Failed to open input file.\n"<<std::endl;
return -1;
}
// 获取文件大小
input_file2.seekg(0, std::ios::end);
auto input_size = input_file2.tellg(); //这里获取的 size 一直都是0,很奇怪
input_file2.seekg(0, std::ios::beg);
//
std::string md5_model = md5_digest((unsigned char*)input_data, input_size);
std::cout<< "model name : "<< argv[1] <<"model md5 : "<<md5_model.c_str()<<endl;
delete[] input_data;
return 0;
}
md5.h
cpp
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: liuli@google.com (Liu Li)
#ifndef COMMON_MD5_H__
#define COMMON_MD5_H__
#include <stdint.h>
typedef uint32_t u32;
typedef uint8_t u8;
struct MD5Context {
u32 buf[4];
u32 bits[2];
u8 in[64];
};
void MD5Init(struct MD5Context *ctx);
void MD5Update(struct MD5Context *ctx, unsigned char const *buf, size_t len);
void MD5Final(unsigned char digest[16], struct MD5Context *ctx);
#endif // COMMON_MD5_H__
md5.cpp
cpp
/*
* written by Colin Plumb in 1993, no copyright is claimed.
* This code is in the public domain; do with it what you wish.
*
* Equivalent code is available from RSA Data Security, Inc.
* This code has been tested against that, and is equivalent,
* except that you don't need to include two pages of legalese
* with every copy.
*
* To compute the message digest of a chunk of bytes, declare an
* MD5Context structure, pass it to MD5Init, call MD5Update as
* needed on buffers full of bytes, and then call MD5Final, which
* will fill a supplied 16-byte array with the digest.
*/
#include <string.h>
#include "md5.h"
#ifndef WORDS_BIGENDIAN
#define byteReverse(buf, len) /* Nothing */
#else
/*
* Note: this code is harmless on little-endian machines.
*/
static void byteReverse(unsigned char *buf, unsigned longs)
{
u32 t;
do {
t = (u32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
((unsigned) buf[1] << 8 | buf[0]);
*(u32 *) buf = t;
buf += 4;
} while (--longs);
}
#endif
static void MD5Transform(u32 buf[4], u32 const in[16]);
/*
* Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
* initialization constants.
*/
void MD5Init(struct MD5Context *ctx)
{
ctx->buf[0] = 0x67452301;
ctx->buf[1] = 0xefcdab89;
ctx->buf[2] = 0x98badcfe;
ctx->buf[3] = 0x10325476;
ctx->bits[0] = 0;
ctx->bits[1] = 0;
}
/*
* Update context to reflect the concatenation of another buffer full
* of bytes.
*/
void MD5Update(struct MD5Context *ctx, unsigned char const *buf, size_t len)
{
u32 t;
/* Update bitcount */
t = ctx->bits[0];
if ((ctx->bits[0] = t + ((u32) len << 3)) < t)
ctx->bits[1]++; /* Carry from low to high */
ctx->bits[1] += len >> 29;
t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
/* Handle any leading odd-sized chunks */
if (t) {
unsigned char *p = (unsigned char *) ctx->in + t;
t = 64 - t;
if (len < t) {
memcpy(p, buf, len);
return;
}
memcpy(p, buf, t);
byteReverse(ctx->in, 16);
MD5Transform(ctx->buf, (u32 *) ctx->in);
buf += t;
len -= t;
}
/* Process data in 64-byte chunks */
while (len >= 64) {
memcpy(ctx->in, buf, 64);
byteReverse(ctx->in, 16);
MD5Transform(ctx->buf, (u32 *) ctx->in);
buf += 64;
len -= 64;
}
/* Handle any remaining bytes of data. */
memcpy(ctx->in, buf, len);
}
/*
* Final wrapup - pad to 64-byte boundary with the bit pattern
* 1 0* (64-bit count of bits processed, MSB-first)
*/
void MD5Final(unsigned char digest[16], struct MD5Context *ctx)
{
unsigned count;
unsigned char *p;
/* Compute number of bytes mod 64 */
count = (ctx->bits[0] >> 3) & 0x3F;
/* Set the first char of padding to 0x80. This is safe since there is
always at least one byte free */
p = ctx->in + count;
*p++ = 0x80;
/* Bytes of padding needed to make 64 bytes */
count = 64 - 1 - count;
/* Pad out to 56 mod 64 */
if (count < 8) {
/* Two lots of padding: Pad the first block to 64 bytes */
memset(p, 0, count);
byteReverse(ctx->in, 16);
MD5Transform(ctx->buf, (u32 *) ctx->in);
/* Now fill the next block with 56 bytes */
memset(ctx->in, 0, 56);
} else {
/* Pad block to 56 bytes */
memset(p, 0, count - 8);
}
byteReverse(ctx->in, 14);
/* Append length in bits and transform */
memcpy(&ctx->in[14], &ctx->bits[0], sizeof(u32));
memcpy(&ctx->in[15], &ctx->bits[1], sizeof(u32));
MD5Transform(ctx->buf, (u32 *) ctx->in);
byteReverse((unsigned char *) ctx->buf, 4);
memcpy(digest, ctx->buf, 16);
memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */
}
/* The four core functions - F1 is optimized somewhat */
/* #define F1(x, y, z) (x & y | ~x & z) */
#define F1(x, y, z) (z ^ (x & (y ^ z)))
#define F2(x, y, z) F1(z, x, y)
#define F3(x, y, z) (x ^ y ^ z)
#define F4(x, y, z) (y ^ (x | ~z))
/* This is the central step in the MD5 algorithm. */
#define MD5STEP(f, w, x, y, z, data, s) \
( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x )
/*
* The core of the MD5 algorithm, this alters an existing MD5 hash to
* reflect the addition of 16 longwords of new data. MD5Update blocks
* the data and converts bytes into longwords for this routine.
*/
__attribute__((no_sanitize("unsigned-integer-overflow"))) static void MD5Transform(u32 buf[4], u32 const in[16])
{
u32 a, b, c, d;
a = buf[0];
b = buf[1];
c = buf[2];
d = buf[3];
MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
buf[0] += a;
buf[1] += b;
buf[2] += c;
buf[3] += d;
}
解决办法
https://www.cnblogs.com/schips/p/linux_c_read_write_file_size_over_2g.html
cpp
fpos_t pos;
fseek(fp,0,SEEK_END);
fgetpos(fp,&pos);
fseek( fp, 0, SEEK_SET);
把获取文件大小改成下面的方式就可以了:
cpp
for(int i=0;i<model_infos.models.size();i++)
{
FILE* input_file = fopen((odm_path + model_infos.models[i].name).c_str(), "rb");
if (!input_file) {
ALOGE("ZIP Failed to open input file.\n");
return NULL;
}
fpos_t pos;
fseek(input_file,0,SEEK_END);
fgetpos(input_file,&pos);
fseek( input_file, 0, SEEK_SET);
auto input_size = pos;
if(input_size == 0)
{
ALOGE("ZIP pos one Failed , pos is 0.\n");
return NULL;
}
input_max_size = std::max<long long>(input_size,input_max_size);
decompressed__max_size =std::max<long long>(decompressed__max_size,model_infos.models[i].size);
fclose(input_file);
}