feat(webstatement): implement batch processing for data jobs
- Tambahkan pemrosesan data secara bertahap (chunking) dengan konstanta `CHUNK_SIZE` untuk mengurangi penggunaan memori dan menangani data dalam jumlah besar. - Perbarui `ProcessArrangementDataJob`: - Tambahkan properti `arrangementBatch` untuk menyimpan batch data sementara. - Implementasikan metode `addToBatch` untuk menambahkan data ke batch. - Implementasikan metode `saveBatch` untuk menyimpan data batch ke database menggunakan metode bulk `TempArrangement::upsert`. - Tambahkan logging untuk melacak progress pemrosesan data per chunk. - Reset batch setelah penyimpanan atau ketika terjadi error untuk menghindari pemrosesan ulang data yang gagal. - Perbarui `ProcessBillDetailDataJob`: - Tambahkan properti `billDetailBatch` untuk menyimpan batch data sementara. - Implementasikan metode `addToBatch` untuk menambahkan data ke batch. - Implementasikan metode `saveBatch` untuk menyimpan data batch ke database menggunakan metode bulk `TempBillDetail::upsert`. - Tambahkan logging untuk melacak progress pemrosesan data per chunk. - Reset batch setelah penyimpanan atau ketika terjadi error untuk menghindari pemrosesan ulang data yang gagal. - Perbaiki penghitungan error count dengan menambahkannya saat terjadi error pada pemrosesan batch. - Tambahkan timestamp (`created_at` dan `updated_at`) pada setiap record dalam batch sebelum disimpan ke database. - Lakukan cleanup batch secara otomatis setelah pemrosesan selesai.
This commit is contained in:
@@ -20,10 +20,12 @@
|
|||||||
private const MAX_EXECUTION_TIME = 86400; // 24 hours in seconds
|
private const MAX_EXECUTION_TIME = 86400; // 24 hours in seconds
|
||||||
private const FILENAME = 'ST.AA.ARRANGEMENT.csv';
|
private const FILENAME = 'ST.AA.ARRANGEMENT.csv';
|
||||||
private const DISK_NAME = 'sftpStatement';
|
private const DISK_NAME = 'sftpStatement';
|
||||||
|
private const CHUNK_SIZE = 1000; // Process data in chunks to reduce memory usage
|
||||||
|
|
||||||
private string $period = '';
|
private string $period = '';
|
||||||
private int $processedCount = 0;
|
private int $processedCount = 0;
|
||||||
private int $errorCount = 0;
|
private int $errorCount = 0;
|
||||||
|
private array $arrangementBatch = [];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new job instance.
|
* Create a new job instance.
|
||||||
@@ -61,6 +63,7 @@
|
|||||||
set_time_limit(self::MAX_EXECUTION_TIME);
|
set_time_limit(self::MAX_EXECUTION_TIME);
|
||||||
$this->processedCount = 0;
|
$this->processedCount = 0;
|
||||||
$this->errorCount = 0;
|
$this->errorCount = 0;
|
||||||
|
$this->arrangementBatch = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
private function processPeriod()
|
private function processPeriod()
|
||||||
@@ -111,10 +114,23 @@
|
|||||||
|
|
||||||
$headers = (new TempArrangement())->getFillable();
|
$headers = (new TempArrangement())->getFillable();
|
||||||
$rowCount = 0;
|
$rowCount = 0;
|
||||||
|
$chunkCount = 0;
|
||||||
|
|
||||||
while (($row = fgetcsv($handle, 0, self::CSV_DELIMITER)) !== false) {
|
while (($row = fgetcsv($handle, 0, self::CSV_DELIMITER)) !== false) {
|
||||||
$rowCount++;
|
$rowCount++;
|
||||||
$this->processRow($row, $headers, $rowCount, $filePath);
|
$this->processRow($row, $headers, $rowCount, $filePath);
|
||||||
|
|
||||||
|
// Process in chunks to avoid memory issues
|
||||||
|
if (count($this->arrangementBatch) >= self::CHUNK_SIZE) {
|
||||||
|
$this->saveBatch();
|
||||||
|
$chunkCount++;
|
||||||
|
Log::info("Processed chunk $chunkCount ({$this->processedCount} records so far)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process any remaining records
|
||||||
|
if (!empty($this->arrangementBatch)) {
|
||||||
|
$this->saveBatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose($handle);
|
fclose($handle);
|
||||||
@@ -127,22 +143,29 @@
|
|||||||
if (count($headers) !== count($row)) {
|
if (count($headers) !== count($row)) {
|
||||||
Log::warning("Row $rowCount in $filePath has incorrect column count. Expected: " .
|
Log::warning("Row $rowCount in $filePath has incorrect column count. Expected: " .
|
||||||
count($headers) . ", Got: " . count($row));
|
count($headers) . ", Got: " . count($row));
|
||||||
|
$this->errorCount++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
$data = array_combine($headers, $row);
|
$data = array_combine($headers, $row);
|
||||||
$this->saveRecord($data, $rowCount, $filePath);
|
$this->addToBatch($data, $rowCount, $filePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function saveRecord(array $data, int $rowCount, string $filePath)
|
/**
|
||||||
|
* Add record to batch instead of saving immediately
|
||||||
|
*/
|
||||||
|
private function addToBatch(array $data, int $rowCount, string $filePath)
|
||||||
: void
|
: void
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
if ($data['arrangement_id'] !== 'arrangement_id') {
|
if ($data['arrangement_id'] !== 'arrangement_id') {
|
||||||
TempArrangement::updateOrCreate(
|
// Add timestamp fields
|
||||||
['arrangement_id' => $data['arrangement_id']], // key to find existing record
|
$now = now();
|
||||||
$data // data to update or create
|
$data['created_at'] = $now;
|
||||||
);
|
$data['updated_at'] = $now;
|
||||||
|
|
||||||
|
// Add to batch
|
||||||
|
$this->arrangementBatch[] = $data;
|
||||||
$this->processedCount++;
|
$this->processedCount++;
|
||||||
}
|
}
|
||||||
} catch (Exception $e) {
|
} catch (Exception $e) {
|
||||||
@@ -151,6 +174,32 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save batched records to the database
|
||||||
|
*/
|
||||||
|
private function saveBatch()
|
||||||
|
: void
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
if (!empty($this->arrangementBatch)) {
|
||||||
|
// Bulk insert/update arrangements
|
||||||
|
TempArrangement::upsert(
|
||||||
|
$this->arrangementBatch,
|
||||||
|
['arrangement_id'], // Unique key
|
||||||
|
array_diff((new TempArrangement())->getFillable(), ['arrangement_id']) // Update columns
|
||||||
|
);
|
||||||
|
|
||||||
|
// Reset batch after processing
|
||||||
|
$this->arrangementBatch = [];
|
||||||
|
}
|
||||||
|
} catch (Exception $e) {
|
||||||
|
Log::error("Error in saveBatch: " . $e->getMessage());
|
||||||
|
$this->errorCount += count($this->arrangementBatch);
|
||||||
|
// Reset batch even if there's an error to prevent reprocessing the same failed records
|
||||||
|
$this->arrangementBatch = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private function cleanup(string $tempFilePath)
|
private function cleanup(string $tempFilePath)
|
||||||
: void
|
: void
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -20,10 +20,12 @@
|
|||||||
private const MAX_EXECUTION_TIME = 86400; // 24 hours in seconds
|
private const MAX_EXECUTION_TIME = 86400; // 24 hours in seconds
|
||||||
private const FILENAME = 'ST.AA.BILL.DETAILS.csv';
|
private const FILENAME = 'ST.AA.BILL.DETAILS.csv';
|
||||||
private const DISK_NAME = 'sftpStatement';
|
private const DISK_NAME = 'sftpStatement';
|
||||||
|
private const CHUNK_SIZE = 1000; // Process data in chunks to reduce memory usage
|
||||||
|
|
||||||
private string $period = '';
|
private string $period = '';
|
||||||
private int $processedCount = 0;
|
private int $processedCount = 0;
|
||||||
private int $errorCount = 0;
|
private int $errorCount = 0;
|
||||||
|
private array $billDetailBatch = [];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new job instance.
|
* Create a new job instance.
|
||||||
@@ -61,6 +63,7 @@
|
|||||||
set_time_limit(self::MAX_EXECUTION_TIME);
|
set_time_limit(self::MAX_EXECUTION_TIME);
|
||||||
$this->processedCount = 0;
|
$this->processedCount = 0;
|
||||||
$this->errorCount = 0;
|
$this->errorCount = 0;
|
||||||
|
$this->billDetailBatch = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
private function processPeriod()
|
private function processPeriod()
|
||||||
@@ -111,10 +114,23 @@
|
|||||||
|
|
||||||
$headers = (new TempBillDetail())->getFillable();
|
$headers = (new TempBillDetail())->getFillable();
|
||||||
$rowCount = 0;
|
$rowCount = 0;
|
||||||
|
$chunkCount = 0;
|
||||||
|
|
||||||
while (($row = fgetcsv($handle, 0, self::CSV_DELIMITER)) !== false) {
|
while (($row = fgetcsv($handle, 0, self::CSV_DELIMITER)) !== false) {
|
||||||
$rowCount++;
|
$rowCount++;
|
||||||
$this->processRow($row, $headers, $rowCount, $filePath);
|
$this->processRow($row, $headers, $rowCount, $filePath);
|
||||||
|
|
||||||
|
// Process in chunks to avoid memory issues
|
||||||
|
if (count($this->billDetailBatch) >= self::CHUNK_SIZE) {
|
||||||
|
$this->saveBatch();
|
||||||
|
$chunkCount++;
|
||||||
|
Log::info("Processed chunk $chunkCount ({$this->processedCount} records so far)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process any remaining records
|
||||||
|
if (!empty($this->billDetailBatch)) {
|
||||||
|
$this->saveBatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose($handle);
|
fclose($handle);
|
||||||
@@ -127,22 +143,29 @@
|
|||||||
if (count($headers) !== count($row)) {
|
if (count($headers) !== count($row)) {
|
||||||
Log::warning("Row $rowCount in $filePath has incorrect column count. Expected: " .
|
Log::warning("Row $rowCount in $filePath has incorrect column count. Expected: " .
|
||||||
count($headers) . ", Got: " . count($row));
|
count($headers) . ", Got: " . count($row));
|
||||||
|
$this->errorCount++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
$data = array_combine($headers, $row);
|
$data = array_combine($headers, $row);
|
||||||
$this->saveRecord($data, $rowCount, $filePath);
|
$this->addToBatch($data, $rowCount, $filePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function saveRecord(array $data, int $rowCount, string $filePath)
|
/**
|
||||||
|
* Add record to batch instead of saving immediately
|
||||||
|
*/
|
||||||
|
private function addToBatch(array $data, int $rowCount, string $filePath)
|
||||||
: void
|
: void
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
if (isset($data['_id']) && $data['_id'] !== '_id') {
|
if (isset($data['_id']) && $data['_id'] !== '_id') {
|
||||||
TempBillDetail::updateOrCreate(
|
// Add timestamp fields
|
||||||
['_id' => $data['_id']],
|
$now = now();
|
||||||
$data
|
$data['created_at'] = $now;
|
||||||
);
|
$data['updated_at'] = $now;
|
||||||
|
|
||||||
|
// Add to batch
|
||||||
|
$this->billDetailBatch[] = $data;
|
||||||
$this->processedCount++;
|
$this->processedCount++;
|
||||||
}
|
}
|
||||||
} catch (Exception $e) {
|
} catch (Exception $e) {
|
||||||
@@ -151,6 +174,32 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save batched records to the database
|
||||||
|
*/
|
||||||
|
private function saveBatch()
|
||||||
|
: void
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
if (!empty($this->billDetailBatch)) {
|
||||||
|
// Bulk insert/update bill details
|
||||||
|
TempBillDetail::upsert(
|
||||||
|
$this->billDetailBatch,
|
||||||
|
['_id'], // Unique key
|
||||||
|
array_diff((new TempBillDetail())->getFillable(), ['_id']) // Update columns
|
||||||
|
);
|
||||||
|
|
||||||
|
// Reset batch after processing
|
||||||
|
$this->billDetailBatch = [];
|
||||||
|
}
|
||||||
|
} catch (Exception $e) {
|
||||||
|
Log::error("Error in saveBatch: " . $e->getMessage());
|
||||||
|
$this->errorCount += count($this->billDetailBatch);
|
||||||
|
// Reset batch even if there's an error to prevent reprocessing the same failed records
|
||||||
|
$this->billDetailBatch = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private function cleanup(string $tempFilePath)
|
private function cleanup(string $tempFilePath)
|
||||||
: void
|
: void
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user