[Discussion] How to improve C++ SDK performance?

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[Discussion] How to improve C++ SDK performance?

xubo245

when we test performance with testReadNextBatchRow in PR2816, and
```
 for (int j = 0; j < length; j++) {
                i++;
                jobject row = env->GetObjectArrayElement(batch, j);
                if (i > 1 && i % printNum == 0) {
                    gettimeofday(&read, NULL);

                    double hasNextTime = 1000000 *
(startReadNextBatchRow.tv_sec - startHasNext.tv_sec) +
                                         startReadNextBatchRow.tv_usec -
startHasNext.tv_usec;

                    double readNextBatchTime = 1000000 *
(endReadNextBatchRow.tv_sec - startReadNextBatchRow.tv_sec) +
                                               endReadNextBatchRow.tv_usec -
startReadNextBatchRow.tv_usec;

                    time = 1000000 * (read.tv_sec - startHasNext.tv_sec) +
read.tv_usec - startHasNext.tv_usec;
                    printf("%d: time is %lf s, speed is %lf records/s,
hasNext time is %lf s,readNextBatchRow time is %lf s ",
                           i, time / 1000000.0, printNum / (time /
1000000.0), hasNextTime / 1000000.0,
                           readNextBatchTime / 1000000.0);
                    gettimeofday(&startHasNext, NULL);
                    printf("%s\t", carbonRow.getString(0));
                    printf("%s\t", carbonRow.getString(1));
                    printf("%s\t", carbonRow.getString(2));
                    printf("%s\t", carbonRow.getString(3));
                    printf("%ld\t", carbonRow.getLong(4));
                    printf("%ld\t", carbonRow.getLong(5));
                    printf("\n");
                }
                env->DeleteLocalRef(row);
            }
        } else {
            i = i + length;
        }
        env->DeleteLocalRef(batch);
    }
```
The performance is 1.6million records/s for read.
But if I add
```
 carbonRow.setCarbonRow(row);
                carbonRow.getString(0);
                carbonRow.getString(1);
                carbonRow.getString(2);
                carbonRow.getString(3);
                carbonRow.getLong(4);
                carbonRow.getLong(5);
```
for each row, the performance will decrease to 0.3 million records/s.

 for (int j = 0; j < length; j++) {
                i++;
                jobject row = env->GetObjectArrayElement(batch, j);
                carbonRow.setCarbonRow(row);
                carbonRow.getString(0);
                carbonRow.getString(1);
                carbonRow.getString(2);
                carbonRow.getString(3);
                carbonRow.getLong(4);
                carbonRow.getLong(5);
                if (i > 1 && i % printNum == 0) {
                    gettimeofday(&read, NULL);

                    double hasNextTime = 1000000 *
(startReadNextBatchRow.tv_sec - startHasNext.tv_sec) +
                                         startReadNextBatchRow.tv_usec -
startHasNext.tv_usec;

                    double readNextBatchTime = 1000000 *
(endReadNextBatchRow.tv_sec - startReadNextBatchRow.tv_sec) +
                                               endReadNextBatchRow.tv_usec -
startReadNextBatchRow.tv_usec;

                    time = 1000000 * (read.tv_sec - startHasNext.tv_sec) +
read.tv_usec - startHasNext.tv_usec;
                    printf("%d: time is %lf s, speed is %lf records/s,
hasNext time is %lf s,readNextBatchRow time is %lf s ",
                           i, time / 1000000.0, printNum / (time /
1000000.0), hasNextTime / 1000000.0,
                           readNextBatchTime / 1000000.0);
                    gettimeofday(&startHasNext, NULL);
                    printf("%s\t", carbonRow.getString(0));
                    printf("%s\t", carbonRow.getString(1));
                    printf("%s\t", carbonRow.getString(2));
                    printf("%s\t", carbonRow.getString(3));
                    printf("%ld\t", carbonRow.getLong(4));
                    printf("%ld\t", carbonRow.getLong(5));
                    printf("\n");
                }
                env->DeleteLocalRef(row);
            }
        } else {
            i = i + length;
        }
        env->DeleteLocalRef(batch);
    }
```

when we get string or int data type data by carbonRow.getString/getInt,
which will call JNI, it need some time,
so how to improve the C++ SDK performance? Any good idea for it?  Do you
know better idea for how to convert object  to primitive data type like in,
float?



--
Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/