From 6fddd46708252975a5f5f34e8e8bbf81bd286cd8 Mon Sep 17 00:00:00 2001 From: djm34 Date: Fri, 10 Apr 2020 18:26:01 +0300 Subject: [PATCH] fix high cpu usage for mtp-tcr solo and mtp-classic --- compat/ccminer-config.h | 2 +- configure.ac | 2 +- cuda_mtp/cuda_mtp_forlib.cu | 22 ++-- cuda_mtp/mtp-tcr.cu | 64 +++++------- cuda_mtp/mtp.cu | 75 ++++++-------- merkletree/mtp.cpp | 199 +++++++++++++++++++++++++++++++++++- merkletree/mtp.h | 6 +- run-tcr.cmd | 3 +- 8 files changed, 274 insertions(+), 99 deletions(-) diff --git a/compat/ccminer-config.h b/compat/ccminer-config.h index 8f0d6db..e988cfd 100644 --- a/compat/ccminer-config.h +++ b/compat/ccminer-config.h @@ -164,7 +164,7 @@ #define PACKAGE_URL "http://github.com/zcoinofficial/ccminer" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.2.9-djm34-beta" +#define PACKAGE_VERSION "1.2.9-djm34" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be diff --git a/configure.ac b/configure.ac index aaf8706..51bb16b 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([ccminer], [1.2.9L-djm34-beta], [], [ccminer], [http://github.com/zcoinofficial/ccminer]) +AC_INIT([ccminer], [1.2.9L-djm34], [], [ccminer], [http://github.com/zcoinofficial/ccminer]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cuda_mtp/cuda_mtp_forlib.cu b/cuda_mtp/cuda_mtp_forlib.cu index fd8b48a..be2e6dd 100644 --- a/cuda_mtp/cuda_mtp_forlib.cu +++ b/cuda_mtp/cuda_mtp_forlib.cu @@ -986,7 +986,7 @@ uint32_t get_tpb_mtp(int thr_id) __host__ -void mtp_setBlockTarget(int thr_id, const void* pDataIn, const void *pTargetIn, const void * zElement) +void mtp_setBlockTarget_old(int thr_id, const void* pDataIn, const void *pTargetIn, const void * zElement) { // cudaSetDevice(device_map[thr_id]); @@ -997,7 +997,7 @@ void mtp_setBlockTarget(int thr_id, const void* pDataIn, const void *pTargetIn, } __host__ -void mtp_setBlockTarget_test(int thr_id, const void* pDataIn, const void *pTargetIn, const void * zElement,cudaStream_t s0) +void mtp_setBlockTarget(int thr_id, const void* pDataIn, const void *pTargetIn, const void * zElement,cudaStream_t s0) { // cudaSetDevice(device_map[thr_id]); @@ -1024,12 +1024,11 @@ void mtp_fill(uint32_t dev_id, const uint64_t *Block, uint32_t offset, uint32_t } __host__ -uint32_t mtp_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce) +uint32_t mtp_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce,cudaStream_t s0) { // cudaSetDevice(device_map[thr_id]); uint32_t result = UINT32_MAX; - CUDA_SAFE_CALL(cudaMemset(d_MinNonces[thr_id], 0xff, sizeof(uint32_t))); -// int dev_id = device_map[thr_id % MAX_GPUS]; + cudaMemsetAsync(d_MinNonces[thr_id], 0xff, sizeof(uint32_t), s0); uint32_t tpb = TPB_MTP; //TPB52; @@ -1040,11 +1039,10 @@ uint32_t mtp_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce) dim3 blockyloop(tpb); //yloop_init <<>>(thr_id, threads, startNounce, GYLocal[thr_id]); + cudaStreamSynchronize(s0); mtp_yloop << < gridyloop, blockyloop >> >(thr_id, threads, startNounce, (Type*)HBlock[thr_id], d_MinNonces[thr_id]); - - - CUDA_SAFE_CALL(cudaMemcpy(h_MinNonces[thr_id], d_MinNonces[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost)); + cudaStreamSynchronize(s0); result = *h_MinNonces[thr_id]; return result; @@ -1785,11 +1783,11 @@ __global__ void mtp_fc2(uint32_t threads, uint4 * DBlock, uint2 *a) { -__host__ void get_tree(int thr_id, uint8_t* d) { +__host__ void get_tree_old(int thr_id, uint8_t* d) { CUDA_SAFE_CALL(cudaMemcpy(d, buffer_a[thr_id], sizeof(uint2) * 2 * 1048576 * 4, cudaMemcpyDeviceToHost)); } -__host__ void get_tree_test(int thr_id, uint8_t* d, cudaStream_t s0) { +__host__ void get_tree(int thr_id, uint8_t* d, cudaStream_t s0) { CUDA_SAFE_CALL(cudaMemcpyAsync(d, buffer_a[thr_id], sizeof(uint2) * 2 * 1048576 * 4, cudaMemcpyDeviceToHost, s0)); } @@ -1913,7 +1911,7 @@ void mtp_fill_1b(int thr_id, uint64_t *Block, uint32_t block_nr) } __host__ -void mtp_fill_1c_test(int thr_id, uint64_t *Block, uint32_t block_nr, cudaStream_t s0) +void mtp_fill_1c(int thr_id, uint64_t *Block, uint32_t block_nr, cudaStream_t s0) { // cudaSetDevice(device_map[thr_id]); // uint4 *Blockptr = &HBlock[thr_id][block_nr * 64]; @@ -1934,7 +1932,7 @@ void mtp_fill_1c_test(int thr_id, uint64_t *Block, uint32_t block_nr, cudaStream } __host__ -void mtp_fill_1c(int thr_id, uint64_t *Block, uint32_t block_nr) +void mtp_fill_1c_old(int thr_id, uint64_t *Block, uint32_t block_nr) { // cudaSetDevice(device_map[thr_id]); // uint4 *Blockptr = &HBlock[thr_id][block_nr * 64]; diff --git a/cuda_mtp/mtp-tcr.cu b/cuda_mtp/mtp-tcr.cu index 2ac2f0d..569c09c 100644 --- a/cuda_mtp/mtp-tcr.cu +++ b/cuda_mtp/mtp-tcr.cu @@ -13,22 +13,13 @@ #define memcost 4*1024*1024 extern void mtp_cpu_init(int thr_id, uint32_t threads); - extern uint32_t mtptcr_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, cudaStream_t s0); - -extern void mtp_setBlockTarget(int thr_id,const void* pDataIn, const void *pTargetIn, const void * zElement); -extern void mtp_setBlockTarget_test(int thr_id, const void* pDataIn, const void *pTargetIn, const void * zElement,cudaStream_t s0); -extern void mtp_fill(uint32_t d, const uint64_t *Block, uint32_t offset, uint32_t datachunk); -extern void mtp_fill_1b(int thr_id, uint64_t *Block, uint32_t block_nr); +extern void mtp_setBlockTarget(int thr_id, const void* pDataIn, const void *pTargetIn, const void * zElement,cudaStream_t s0); extern uint32_t get_tpb_mtp(int thr_id); - -extern void mtp_fill_1c(int thr_id, uint64_t *Block, uint32_t block_nr); -extern void mtp_fill_1c_test(int thr_id, uint64_t *Block, uint32_t block_nr, cudaStream_t s0); -//extern void mtp_i_cpu(int thr_id, uint32_t *block_header); -extern void mtp_i_cpu(int thr_id, uint32_t *block_header, cudaStream_t s0); +extern void mtp_fill_1c(int thr_id, uint64_t *Block, uint32_t block_nr, cudaStream_t s0); extern void mtp_i_cpu2(int thr_id, uint32_t *block_header, cudaStream_t s0); -void get_tree(int thr_id, uint8_t* d); -void get_tree_test(int thr_id, uint8_t* d, cudaStream_t s0); +void get_tree(int thr_id, uint8_t* d, cudaStream_t s0); + #define HASHLEN 32 #define SALTLEN 16 #define PWD "password" @@ -60,7 +51,7 @@ extern "C" int scanhash_mtptcr(int nthreads,int thr_id, struct work* work, uint3 //if (JobId==0) // pthread_barrier_init(&barrier, NULL, nthreads); - cudaStream_t s0; + cudaStream_t s0; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -131,18 +122,18 @@ if (JobId[thr_id] != work->data[16] || XtraNonce2[thr_id] != ((uint64_t*)work->x context[thr_id] = init_argon2d_param((const char*)endiandata); argon2_ctx_from_mtp(&context[thr_id], &instance[thr_id]); - mtp_fill_1c_test(thr_id, instance[thr_id].memory[0 + 0].v, 0 + 0,s0); - mtp_fill_1c_test(thr_id, instance[thr_id].memory[0 + 1].v, 0 + 1,s0); - mtp_fill_1c_test(thr_id, instance[thr_id].memory[2 + 0].v, 1048576 + 0,s0); - mtp_fill_1c_test(thr_id, instance[thr_id].memory[2 + 1].v, 1048576 + 1,s0); - mtp_fill_1c_test(thr_id, instance[thr_id].memory[4 + 0].v, 2097152 + 0,s0); - mtp_fill_1c_test(thr_id, instance[thr_id].memory[4 + 1].v, 2097152 + 1,s0); - mtp_fill_1c_test(thr_id, instance[thr_id].memory[6 + 0].v, 3145728 + 0,s0); - mtp_fill_1c_test(thr_id, instance[thr_id].memory[6 + 1].v, 3145728 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 0].v, 0 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 1].v, 0 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 0].v, 1048576 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 1].v, 1048576 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 0].v, 2097152 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 1].v, 2097152 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 0].v, 3145728 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 1].v, 3145728 + 1,s0); mtp_i_cpu2(thr_id, instance[thr_id].block_header,s0); - get_tree_test(thr_id,dx[thr_id],s0); + get_tree(thr_id,dx[thr_id],s0); cudaStreamSynchronize(s0); @@ -154,7 +145,7 @@ argon2_ctx_from_mtp(&context[thr_id], &instance[thr_id]); std::copy(root.begin(), root.end(), TheMerkleRoot[thr_id]); - mtp_setBlockTarget_test(thr_id, endiandata, ptarget, &TheMerkleRoot[thr_id],s0); + mtp_setBlockTarget(thr_id, endiandata, ptarget, &TheMerkleRoot[thr_id],s0); root.resize(0); } @@ -185,7 +176,7 @@ argon2_ctx_from_mtp(&context[thr_id], &instance[thr_id]); blockS nBlockMTP[MTP_L *2] = {0}; unsigned char nProofMTP[MTP_L * 3 * 353 ] = {0}; - uint32_t is_sol = mtptcr_solver_test(thr_id,foundNonce, &instance[thr_id], nBlockMTP,nProofMTP, TheMerkleRoot[thr_id], mtpHashValue, *ordered_tree[thr_id], endiandata,TheUint256Target[0],s0); + uint32_t is_sol = mtptcr_solver(thr_id,foundNonce, &instance[thr_id], nBlockMTP,nProofMTP, TheMerkleRoot[thr_id], mtpHashValue, *ordered_tree[thr_id], endiandata,TheUint256Target[0],s0); if (JobId[thr_id] != work->data[16] || XtraNonce2[thr_id] != ((uint64_t*)work->xnonce2)[0]) return 0; // if work has changed stop and go back to the initialization @@ -318,19 +309,20 @@ extern "C" int scanhash_mtptcr_solo(int nthreads, int thr_id, struct work* work, argon2_ctx_from_mtp(&context[thr_id], &instance[thr_id]); - mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 0].v, 0 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 1].v, 0 + 1); - mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 0].v, 1048576 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 1].v, 1048576 + 1); - mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 0].v, 2097152 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 1].v, 2097152 + 1); - mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 0].v, 3145728 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 1].v, 3145728 + 1); + mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 0].v, 0 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 1].v, 0 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 0].v, 1048576 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 1].v, 1048576 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 0].v, 2097152 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 1].v, 2097152 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 0].v, 3145728 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 1].v, 3145728 + 1,s0); mtp_i_cpu2(thr_id, instance[thr_id].block_header,s0); - get_tree(thr_id, dx[thr_id]); + get_tree(thr_id, dx[thr_id],s0); + cudaStreamSynchronize(s0); // printf("Step 2 : Compute the root Φ of the Merkle hash tree \n"); // sleep(10); @@ -342,7 +334,7 @@ extern "C" int scanhash_mtptcr_solo(int nthreads, int thr_id, struct work* work, std::copy(root.begin(), root.end(), TheMerkleRoot[thr_id]); - mtp_setBlockTarget(thr_id, endiandata, ptarget, &TheMerkleRoot[thr_id]); + mtp_setBlockTarget(thr_id, endiandata, ptarget, &TheMerkleRoot[thr_id],s0); root.resize(0); } @@ -371,7 +363,7 @@ extern "C" int scanhash_mtptcr_solo(int nthreads, int thr_id, struct work* work, blockS nBlockMTP[MTP_L * 2] = { 0 }; unsigned char nProofMTP[MTP_L * 3 * 353] = { 0 }; - uint32_t is_sol = mtptcr_solver(thr_id, foundNonce, &instance[thr_id], nBlockMTP, nProofMTP, TheMerkleRoot[thr_id], mtpHashValue, *ordered_tree[thr_id], endiandata, TheUint256Target[0]); + uint32_t is_sol = mtptcr_solver(thr_id, foundNonce, &instance[thr_id], nBlockMTP, nProofMTP, TheMerkleRoot[thr_id], mtpHashValue, *ordered_tree[thr_id], endiandata, TheUint256Target[0],s0); if (is_sol == 1 /*&& fulltest(vhash64, ptarget)*/) { diff --git a/cuda_mtp/mtp.cu b/cuda_mtp/mtp.cu index 17d4bec..7be0335 100644 --- a/cuda_mtp/mtp.cu +++ b/cuda_mtp/mtp.cu @@ -10,20 +10,13 @@ #define memcost 4*1024*1024 extern void mtp_cpu_init(int thr_id, uint32_t threads); - -extern uint32_t mtp_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce); - -extern void mtp_setBlockTarget(int thr_id,const void* pDataIn, const void *pTargetIn, const void * zElement); -extern void mtp_fill(uint32_t d, const uint64_t *Block, uint32_t offset, uint32_t datachunk); -extern void mtp_fill_1b(int thr_id, uint64_t *Block, uint32_t block_nr); +extern uint32_t mtp_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, cudaStream_t s0); +extern void mtp_setBlockTarget(int thr_id,const void* pDataIn, const void *pTargetIn, const void * zElement, cudaStream_t s0); extern uint32_t get_tpb_mtp(int thr_id); - -extern void mtp_fill_1c(int thr_id, uint64_t *Block, uint32_t block_nr); - -//extern void mtp_i_cpu(int thr_id, uint32_t *block_header); -extern void mtp_i_cpu(int thr_id, uint32_t *block_header, cudaStream_t s0); +extern void mtp_fill_1c(int thr_id, uint64_t *Block, uint32_t block_nr, cudaStream_t s0); extern void mtp_i_cpu2(int thr_id, uint32_t *block_header, cudaStream_t s0); -void get_tree(int thr_id, uint8_t* d); +void get_tree(int thr_id, uint8_t* d, cudaStream_t s0); + #define HASHLEN 32 #define SALTLEN 16 #define PWD "password" @@ -51,11 +44,11 @@ extern "C" int scanhash_mtp(int nthreads,int thr_id, struct work* work, uint32_t { unsigned char mtpHashValue[32]; - cudaStream_t s0; + //if (JobId==0) // pthread_barrier_init(&barrier, NULL, nthreads); - + cudaStream_t s0; uint32_t *pdata = work->data; uint32_t *ptarget = work->target; @@ -124,22 +117,20 @@ if (JobId[thr_id] != work->data[16] || XtraNonce2[thr_id] != ((uint64_t*)work->x context[thr_id] = init_argon2d_param((const char*)endiandata); argon2_ctx_from_mtp(&context[thr_id], &instance[thr_id]); - mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 0].v, 0 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 1].v, 0 + 1); - mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 0].v, 1048576 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 1].v, 1048576 + 1); - mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 0].v, 2097152 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 1].v, 2097152 + 1); - mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 0].v, 3145728 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 1].v, 3145728 + 1); + mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 0].v, 0 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 1].v, 0 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 0].v, 1048576 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 1].v, 1048576 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 0].v, 2097152 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 1].v, 2097152 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 0].v, 3145728 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 1].v, 3145728 + 1,s0); mtp_i_cpu2(thr_id, instance[thr_id].block_header,s0); + get_tree(thr_id,dx[thr_id],s0); - - get_tree(thr_id,dx[thr_id]); - - + cudaStreamSynchronize(s0); // sleep(10); ordered_tree[thr_id] = new MerkleTree(dx[thr_id], true); @@ -150,7 +141,7 @@ argon2_ctx_from_mtp(&context[thr_id], &instance[thr_id]); std::copy(root.begin(), root.end(), TheMerkleRoot[thr_id]); - mtp_setBlockTarget(thr_id, endiandata, ptarget, &TheMerkleRoot[thr_id]); + mtp_setBlockTarget(thr_id, endiandata, ptarget, &TheMerkleRoot[thr_id],s0); root.resize(0); } @@ -162,7 +153,7 @@ argon2_ctx_from_mtp(&context[thr_id], &instance[thr_id]); // cudaProfilerStart(); // cudaProfilerStop(); *hashes_done = pdata[19] - first_nonce + throughput; - foundNonce = mtp_cpu_hash_32(thr_id, throughput, pdata[19]); + foundNonce = mtp_cpu_hash_32(thr_id, throughput, pdata[19],s0); // cudaProfilerStop(); uint32_t _ALIGN(64) vhash64[8]; if (foundNonce != UINT32_MAX) @@ -181,7 +172,7 @@ argon2_ctx_from_mtp(&context[thr_id], &instance[thr_id]); blockS nBlockMTP[MTP_L *2] = {0}; unsigned char nProofMTP[MTP_L * 3 * 353 ] = {0}; - uint32_t is_sol = mtp_solver(thr_id,foundNonce, &instance[thr_id], nBlockMTP,nProofMTP, TheMerkleRoot[thr_id], mtpHashValue, *ordered_tree[thr_id], endiandata,TheUint256Target[0]); + uint32_t is_sol = mtp_solver(thr_id,foundNonce, &instance[thr_id], nBlockMTP,nProofMTP, TheMerkleRoot[thr_id], mtpHashValue, *ordered_tree[thr_id], endiandata,TheUint256Target[0],s0); if (is_sol==1 /*&& fulltest(vhash64, ptarget)*/) { @@ -315,22 +306,22 @@ extern "C" int scanhash_mtp_solo(int nthreads, int thr_id, struct work* work, ui argon2_ctx_from_mtp(&context[thr_id], &instance[thr_id]); - mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 0].v, 0 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 1].v, 0 + 1); - mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 0].v, 1048576 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 1].v, 1048576 + 1); - mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 0].v, 2097152 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 1].v, 2097152 + 1); - mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 0].v, 3145728 + 0); - mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 1].v, 3145728 + 1); + mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 0].v, 0 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[0 + 1].v, 0 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 0].v, 1048576 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[2 + 1].v, 1048576 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 0].v, 2097152 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[4 + 1].v, 2097152 + 1,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 0].v, 3145728 + 0,s0); + mtp_fill_1c(thr_id, instance[thr_id].memory[6 + 1].v, 3145728 + 1,s0); mtp_i_cpu2(thr_id, instance[thr_id].block_header,s0); - get_tree(thr_id, dx[thr_id]); + get_tree(thr_id, dx[thr_id],s0); // printf("Step 2 : Compute the root Φ of the Merkle hash tree \n"); // sleep(10); - + cudaStreamSynchronize(s0); ordered_tree[thr_id] = new MerkleTree(dx[thr_id], true); JobId[thr_id] = work->data[17]; @@ -339,7 +330,7 @@ extern "C" int scanhash_mtp_solo(int nthreads, int thr_id, struct work* work, ui std::copy(root.begin(), root.end(), TheMerkleRoot[thr_id]); - mtp_setBlockTarget(thr_id, endiandata, ptarget, &TheMerkleRoot[thr_id]); + mtp_setBlockTarget(thr_id, endiandata, ptarget, &TheMerkleRoot[thr_id],s0); root.resize(0); } @@ -355,7 +346,7 @@ extern "C" int scanhash_mtp_solo(int nthreads, int thr_id, struct work* work, ui uint32_t foundNonce; *hashes_done = pdata[19] - first_nonce + throughput; - foundNonce = mtp_cpu_hash_32(thr_id, throughput, pdata[19]); + foundNonce = mtp_cpu_hash_32(thr_id, throughput, pdata[19],s0); uint32_t _ALIGN(64) vhash64[8]; if (foundNonce != UINT32_MAX) @@ -368,7 +359,7 @@ extern "C" int scanhash_mtp_solo(int nthreads, int thr_id, struct work* work, ui blockS nBlockMTP[MTP_L * 2] = { 0 }; unsigned char nProofMTP[MTP_L * 3 * 353] = { 0 }; - uint32_t is_sol = mtp_solver(thr_id, foundNonce, &instance[thr_id], nBlockMTP, nProofMTP, TheMerkleRoot[thr_id], mtpHashValue, *ordered_tree[thr_id], endiandata, TheUint256Target[0]); + uint32_t is_sol = mtp_solver(thr_id, foundNonce, &instance[thr_id], nBlockMTP, nProofMTP, TheMerkleRoot[thr_id], mtpHashValue, *ordered_tree[thr_id], endiandata, TheUint256Target[0],s0); if (is_sol == 1 /*&& fulltest(vhash64, ptarget)*/) { diff --git a/merkletree/mtp.cpp b/merkletree/mtp.cpp index 64113e0..30b53a9 100644 --- a/merkletree/mtp.cpp +++ b/merkletree/mtp.cpp @@ -697,7 +697,7 @@ MerkleTree TheTree,uint32_t* input, uint256 hashTarget) { -int mtp_solver(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, +int mtp_solver_old(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, blockS *nBlockMTP /*[72 * 2][128]*/, unsigned char* nProofMTP, unsigned char* resultMerkleRoot, unsigned char* mtpHashValue, MerkleTree TheTree, uint32_t* input, uint256 hashTarget) { @@ -895,7 +895,7 @@ int mtp_solver(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, -int mtptcr_solver(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, +int mtptcr_solver_old(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, blockS *nBlockMTP /*[72 * 2][128]*/, unsigned char* nProofMTP, unsigned char* resultMerkleRoot, unsigned char* mtpHashValue, MerkleTree TheTree, uint32_t* input, uint256 hashTarget) { @@ -1090,10 +1090,203 @@ int mtptcr_solver(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, return 0; } +int mtp_solver(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, + blockS *nBlockMTP /*[72 * 2][128]*/, unsigned char* nProofMTP, unsigned char* resultMerkleRoot, unsigned char* mtpHashValue, + MerkleTree TheTree, uint32_t* input, uint256 hashTarget, cudaStream_t s0) { + + const uint8_t L = 64; + + if (instance != NULL) { + // input[19]=0x01000000; + uint256 Y[L + 1]; + // std::string proof_blocks[L * 3]; + memset(&Y, 0, sizeof(Y)); + uint8_t zero[32] = { 0 }; + ablake2b_state BlakeHash; + ablake2b_init(&BlakeHash, 32); + + + + + ablake2b_update(&BlakeHash, (unsigned char*)&input[0], 80); + ablake2b_update(&BlakeHash, (unsigned char*)&resultMerkleRoot[0], 16); + ablake2b_update(&BlakeHash, &TheNonce, sizeof(unsigned int)); + ablake2b_final(&BlakeHash, (unsigned char*)&Y[0], 32); + + + + blockS blocks[L * 2]; + + /////////////////////////////// + bool init_blocks = false; + bool unmatch_block = false; + unsigned char proof_ser[1000] = { 0 }; + unsigned int proof_size; + for (uint8_t j = 1; j <= L; j++) { + + uint32_t ij = (((uint32_t*)(&Y[j - 1]))[0]) % (instance->context_ptr->m_cost); + uint32_t except_index = (uint32_t)(instance->context_ptr->m_cost / instance->context_ptr->lanes); + if (ij %except_index == 0 || ij%except_index == 1) { + init_blocks = true; + break; + } + + uint32_t prev_index; + uint32_t ref_index; + getblockindex_test(thr_id, ij, instance, &prev_index, &ref_index,s0); + + // copy_blockS(&nBlockMTP[j * 2 - 2], &instance->memory[prev_index]); + get_block_test(thr_id, /*(uint8_t*)*/nBlockMTP[j * 2 - 2].v, prev_index,s0); + //ref block + // copy_blockS(&nBlockMTP[j * 2 - 1], &instance->memory[ref_index]); + get_block_test(thr_id, /*(uint8_t*)*/nBlockMTP[j * 2 - 1].v, ref_index,s0); + block blockhash; + uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; + // copy_block(&blockhash, &instance->memory[ij]); + get_block_test(thr_id, /*(uint8_t*)*/&blockhash.v, ij,s0); + + store_block(&blockhash_bytes, &blockhash); + ablake2b_state BlakeHash2; + ablake2b_init(&BlakeHash2, 32); + ablake2b_update(&BlakeHash2, &Y[j - 1], sizeof(uint256)); + ablake2b_update(&BlakeHash2, blockhash_bytes, ARGON2_BLOCK_SIZE); + ablake2b_final(&BlakeHash2, (unsigned char*)&Y[j], 32); + //////////////////////////////////////////////////////////////// + // current block + clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE); + clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); -int mtptcr_solver_test(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, + unsigned char curr[32] = { 0 }; + block blockhash_curr; + uint8_t blockhash_curr_bytes[ARGON2_BLOCK_SIZE]; + // copy_block(&blockhash_curr, &instance->memory[ij]); + get_block_test(thr_id, /*(uint8_t*)*/&blockhash_curr.v, ij,s0); + store_block(&blockhash_curr_bytes, &blockhash_curr); + ablake2b_state state_curr; + ablake2b_init(&state_curr, MERKLE_TREE_ELEMENT_SIZE_B); + ablake2b4rounds_update(&state_curr, blockhash_curr_bytes, ARGON2_BLOCK_SIZE); + uint8_t digest_curr[MERKLE_TREE_ELEMENT_SIZE_B]; + ablake2b4rounds_final(&state_curr, digest_curr, sizeof(digest_curr)); + MerkleTree::Buffer hash_curr = MerkleTree::Buffer(digest_curr, digest_curr + sizeof(digest_curr)); + clear_internal_memory(blockhash_curr.v, ARGON2_BLOCK_SIZE); + clear_internal_memory(blockhash_curr_bytes, ARGON2_BLOCK_SIZE); + + + std::deque> zProofMTP = TheTree.getProofOrdered(hash_curr, ij + 1); + + nProofMTP[(j * 3 - 3) * 353] = (unsigned char)(zProofMTP.size()); + + int k1 = 0; + for (const std::vector &mtpData : zProofMTP) { + std::copy(mtpData.begin(), mtpData.end(), nProofMTP + ((j * 3 - 3) * 353 + 1 + k1 * mtpData.size())); + k1++; + } + + //prev proof + unsigned char prev[32] = { 0 }; + block blockhash_prev; + uint8_t blockhash_prev_bytes[ARGON2_BLOCK_SIZE]; + // copy_block(&blockhash_prev, &instance->memory[prev_index]); + get_block_test(thr_id, /*(uint8_t*)*/&blockhash_prev.v, prev_index,s0); + store_block(&blockhash_prev_bytes, &blockhash_prev); + ablake2b_state state_prev; + ablake2b_init(&state_prev, MERKLE_TREE_ELEMENT_SIZE_B); + ablake2b4rounds_update(&state_prev, blockhash_prev_bytes, ARGON2_BLOCK_SIZE); + uint8_t digest_prev[MERKLE_TREE_ELEMENT_SIZE_B]; + + + ablake2b4rounds_final(&state_prev, digest_prev, sizeof(digest_prev)); + + + MerkleTree::Buffer hash_prev = MerkleTree::Buffer(digest_prev, digest_prev + sizeof(digest_prev)); + clear_internal_memory(blockhash_prev.v, ARGON2_BLOCK_SIZE); + clear_internal_memory(blockhash_prev_bytes, ARGON2_BLOCK_SIZE); + + std::deque> zProofMTP2 = TheTree.getProofOrdered(hash_prev, prev_index + 1); + + nProofMTP[(j * 3 - 2) * 353] = (unsigned char)(zProofMTP2.size()); + + int k2 = 0; + for (const std::vector &mtpData : zProofMTP2) { + std::copy(mtpData.begin(), mtpData.end(), nProofMTP + ((j * 3 - 2) * 353 + 1 + k2 * mtpData.size())); + k2++; + } + + + //ref proof + unsigned char ref[32] = { 0 }; + block blockhash_ref; + uint8_t blockhash_ref_bytes[ARGON2_BLOCK_SIZE]; + // copy_block(&blockhash_ref, &instance->memory[ref_index]); + get_block_test(thr_id, /*(uint8_t*)*/&blockhash_ref.v, ref_index,s0); + store_block(&blockhash_ref_bytes, &blockhash_ref); + ablake2b_state state_ref; + ablake2b_init(&state_ref, MERKLE_TREE_ELEMENT_SIZE_B); + ablake2b4rounds_update(&state_ref, blockhash_ref_bytes, ARGON2_BLOCK_SIZE); + uint8_t digest_ref[MERKLE_TREE_ELEMENT_SIZE_B]; + ablake2b4rounds_final(&state_ref, digest_ref, sizeof(digest_ref)); + MerkleTree::Buffer hash_ref = MerkleTree::Buffer(digest_ref, digest_ref + sizeof(digest_ref)); + clear_internal_memory(blockhash_ref.v, ARGON2_BLOCK_SIZE); + clear_internal_memory(blockhash_ref_bytes, ARGON2_BLOCK_SIZE); + + std::deque> zProofMTP3 = TheTree.getProofOrdered(hash_ref, ref_index + 1); + + nProofMTP[(j * 3 - 1) * 353] = (unsigned char)(zProofMTP3.size()); + + int k3 = 0; + for (const std::vector &mtpData : zProofMTP3) { + std::copy(mtpData.begin(), mtpData.end(), nProofMTP + ((j * 3 - 1) * 353 + 1 + k3 * mtpData.size())); + k3++; + } + + + ///////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////// + } + + if (init_blocks) { + + return 0; + } + + + char hex_tmp[64]; + + if (Y[L] > hashTarget) { + // Found a solution + printf("False positive. Nonce=%08x Hash:", TheNonce); + for (int n = 0; n < 32; n++) { + printf("%02x", ((unsigned char*)&Y[0])[n]); + } + printf("\n"); + } + else { + for (int i = 0; i<32; i++) + mtpHashValue[i] = (((unsigned char*)(&Y[L]))[i]); + + // Found a solution + /* + printf("Found a solution. Nonce=%08x Hash:", TheNonce); + for (int n = 0; n < 32; n++) { + printf("%02x", ((unsigned char*)&Y[L])[n]); + } + printf("\n"); + */ + return 1; + + + } + + } + + + return 0; +} + + +int mtptcr_solver(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, blockS *nBlockMTP /*[72 * 2][128]*/, unsigned char* nProofMTP, unsigned char* resultMerkleRoot, unsigned char* mtpHashValue, MerkleTree TheTree, uint32_t* input, uint256 hashTarget,cudaStream_t s0 ) { diff --git a/merkletree/mtp.h b/merkletree/mtp.h index fe62ff2..b7a5027 100644 --- a/merkletree/mtp.h +++ b/merkletree/mtp.h @@ -69,17 +69,17 @@ int mtp_solver_orig(uint32_t TheNonce, argon2_instance_t *instance, int mtp_solver(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, blockS *nBlockMTP /*[72 * 2][128]*/, unsigned char *nProofMTP, unsigned char* resultMerkleRoot, unsigned char* mtpHashValue, - MerkleTree TheTree, uint32_t* input, uint256 hashTarget); + MerkleTree TheTree, uint32_t* input, uint256 hashTarget,cudaStream_t s0); //int mtp_solver_test(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, // blockS *nBlockMTP /*[72 * 2][128]*/, unsigned char *nProofMTP, unsigned char* resultMerkleRoot, unsigned char* mtpHashValue, // MerkleTree TheTree, uint32_t* input, uint256 hashTarget); -int mtptcr_solver(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, +int mtptcr_solver_old(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, blockS *nBlockMTP /*[72 * 2][128]*/, unsigned char* nProofMTP, unsigned char* resultMerkleRoot, unsigned char* mtpHashValue, MerkleTree TheTree, uint32_t* input, uint256 hashTarget); -int mtptcr_solver_test(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, +int mtptcr_solver(int thr_id, uint32_t TheNonce, argon2_instance_t *instance, blockS *nBlockMTP /*[72 * 2][128]*/, unsigned char* nProofMTP, unsigned char* resultMerkleRoot, unsigned char* mtpHashValue, MerkleTree TheTree, uint32_t* input, uint256 hashTarget,cudaStream_t s0); diff --git a/run-tcr.cmd b/run-tcr.cmd index 4a5df59..ec2483f 100644 --- a/run-tcr.cmd +++ b/run-tcr.cmd @@ -1,8 +1,9 @@ rem solo mining: rem x64\Release\ccminer -a mtp-tcr -o http://192.168.0.171:8382 -u djm34 -p password --coinbase-addr TPkxM1Aw872FL9gs4udCDzy5hAG7M7sVSE --no-getwork --no-stratum --quiet -i 20 rem pool mining: - x64\Release\ccminer -a mtp-tcr -o stratum+tcp://pool.tecracoin.io:4556 -u TPkxM1Aw872FL9gs4udCDzy5hAG7M7sVSE -p 0,minpayout=0.6 -i 26 + x64\Release\ccminer -a mtp-tcr -o stratum+tcp://pool.tecracoin.io:4556 -u TPkxM1Aw872FL9gs4udCDzy5hAG7M7sVSE -p 0,minpayout=0.6 -i 26 rem x64\Release\ccminer -a mtp-tcr -o stratum+tcp://dev.pool.tecracoin.io:4557 -u GJVkKsPFdBsavo5wn5WGiCSScgFZE2F27C -p 0,d=0.128,minpayout=0.6 --cpu-affinity 2 +