feat(webstatement): implement batch processing for data jobs
- Tambahkan pemrosesan data secara bertahap (chunking) dengan konstanta `CHUNK_SIZE` untuk mengurangi penggunaan memori dan menangani data dalam jumlah besar. - Perbarui `ProcessArrangementDataJob`: - Tambahkan properti `arrangementBatch` untuk menyimpan batch data sementara. - Implementasikan metode `addToBatch` untuk menambahkan data ke batch. - Implementasikan metode `saveBatch` untuk menyimpan data batch ke database menggunakan metode bulk `TempArrangement::upsert`. - Tambahkan logging untuk melacak progress pemrosesan data per chunk. - Reset batch setelah penyimpanan atau ketika terjadi error untuk menghindari pemrosesan ulang data yang gagal. - Perbarui `ProcessBillDetailDataJob`: - Tambahkan properti `billDetailBatch` untuk menyimpan batch data sementara. - Implementasikan metode `addToBatch` untuk menambahkan data ke batch. - Implementasikan metode `saveBatch` untuk menyimpan data batch ke database menggunakan metode bulk `TempBillDetail::upsert`. - Tambahkan logging untuk melacak progress pemrosesan data per chunk. - Reset batch setelah penyimpanan atau ketika terjadi error untuk menghindari pemrosesan ulang data yang gagal. - Perbaiki penghitungan error count dengan menambahkannya saat terjadi error pada pemrosesan batch. - Tambahkan timestamp (`created_at` dan `updated_at`) pada setiap record dalam batch sebelum disimpan ke database. - Lakukan cleanup batch secara otomatis setelah pemrosesan selesai.
This commit is contained in:
@@ -20,10 +20,12 @@
|
||||
private const MAX_EXECUTION_TIME = 86400; // 24 hours in seconds
|
||||
private const FILENAME = 'ST.AA.ARRANGEMENT.csv';
|
||||
private const DISK_NAME = 'sftpStatement';
|
||||
private const CHUNK_SIZE = 1000; // Process data in chunks to reduce memory usage
|
||||
|
||||
private string $period = '';
|
||||
private int $processedCount = 0;
|
||||
private int $errorCount = 0;
|
||||
private array $arrangementBatch = [];
|
||||
|
||||
/**
|
||||
* Create a new job instance.
|
||||
@@ -61,6 +63,7 @@
|
||||
set_time_limit(self::MAX_EXECUTION_TIME);
|
||||
$this->processedCount = 0;
|
||||
$this->errorCount = 0;
|
||||
$this->arrangementBatch = [];
|
||||
}
|
||||
|
||||
private function processPeriod()
|
||||
@@ -111,10 +114,23 @@
|
||||
|
||||
$headers = (new TempArrangement())->getFillable();
|
||||
$rowCount = 0;
|
||||
$chunkCount = 0;
|
||||
|
||||
while (($row = fgetcsv($handle, 0, self::CSV_DELIMITER)) !== false) {
|
||||
$rowCount++;
|
||||
$this->processRow($row, $headers, $rowCount, $filePath);
|
||||
|
||||
// Process in chunks to avoid memory issues
|
||||
if (count($this->arrangementBatch) >= self::CHUNK_SIZE) {
|
||||
$this->saveBatch();
|
||||
$chunkCount++;
|
||||
Log::info("Processed chunk $chunkCount ({$this->processedCount} records so far)");
|
||||
}
|
||||
}
|
||||
|
||||
// Process any remaining records
|
||||
if (!empty($this->arrangementBatch)) {
|
||||
$this->saveBatch();
|
||||
}
|
||||
|
||||
fclose($handle);
|
||||
@@ -127,22 +143,29 @@
|
||||
if (count($headers) !== count($row)) {
|
||||
Log::warning("Row $rowCount in $filePath has incorrect column count. Expected: " .
|
||||
count($headers) . ", Got: " . count($row));
|
||||
$this->errorCount++;
|
||||
return;
|
||||
}
|
||||
|
||||
$data = array_combine($headers, $row);
|
||||
$this->saveRecord($data, $rowCount, $filePath);
|
||||
$this->addToBatch($data, $rowCount, $filePath);
|
||||
}
|
||||
|
||||
private function saveRecord(array $data, int $rowCount, string $filePath)
|
||||
/**
|
||||
* Add record to batch instead of saving immediately
|
||||
*/
|
||||
private function addToBatch(array $data, int $rowCount, string $filePath)
|
||||
: void
|
||||
{
|
||||
try {
|
||||
if ($data['arrangement_id'] !== 'arrangement_id') {
|
||||
TempArrangement::updateOrCreate(
|
||||
['arrangement_id' => $data['arrangement_id']], // key to find existing record
|
||||
$data // data to update or create
|
||||
);
|
||||
// Add timestamp fields
|
||||
$now = now();
|
||||
$data['created_at'] = $now;
|
||||
$data['updated_at'] = $now;
|
||||
|
||||
// Add to batch
|
||||
$this->arrangementBatch[] = $data;
|
||||
$this->processedCount++;
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
@@ -151,6 +174,32 @@
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save batched records to the database
|
||||
*/
|
||||
private function saveBatch()
|
||||
: void
|
||||
{
|
||||
try {
|
||||
if (!empty($this->arrangementBatch)) {
|
||||
// Bulk insert/update arrangements
|
||||
TempArrangement::upsert(
|
||||
$this->arrangementBatch,
|
||||
['arrangement_id'], // Unique key
|
||||
array_diff((new TempArrangement())->getFillable(), ['arrangement_id']) // Update columns
|
||||
);
|
||||
|
||||
// Reset batch after processing
|
||||
$this->arrangementBatch = [];
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
Log::error("Error in saveBatch: " . $e->getMessage());
|
||||
$this->errorCount += count($this->arrangementBatch);
|
||||
// Reset batch even if there's an error to prevent reprocessing the same failed records
|
||||
$this->arrangementBatch = [];
|
||||
}
|
||||
}
|
||||
|
||||
private function cleanup(string $tempFilePath)
|
||||
: void
|
||||
{
|
||||
|
||||
@@ -20,10 +20,12 @@
|
||||
private const MAX_EXECUTION_TIME = 86400; // 24 hours in seconds
|
||||
private const FILENAME = 'ST.AA.BILL.DETAILS.csv';
|
||||
private const DISK_NAME = 'sftpStatement';
|
||||
private const CHUNK_SIZE = 1000; // Process data in chunks to reduce memory usage
|
||||
|
||||
private string $period = '';
|
||||
private int $processedCount = 0;
|
||||
private int $errorCount = 0;
|
||||
private array $billDetailBatch = [];
|
||||
|
||||
/**
|
||||
* Create a new job instance.
|
||||
@@ -61,6 +63,7 @@
|
||||
set_time_limit(self::MAX_EXECUTION_TIME);
|
||||
$this->processedCount = 0;
|
||||
$this->errorCount = 0;
|
||||
$this->billDetailBatch = [];
|
||||
}
|
||||
|
||||
private function processPeriod()
|
||||
@@ -111,10 +114,23 @@
|
||||
|
||||
$headers = (new TempBillDetail())->getFillable();
|
||||
$rowCount = 0;
|
||||
$chunkCount = 0;
|
||||
|
||||
while (($row = fgetcsv($handle, 0, self::CSV_DELIMITER)) !== false) {
|
||||
$rowCount++;
|
||||
$this->processRow($row, $headers, $rowCount, $filePath);
|
||||
|
||||
// Process in chunks to avoid memory issues
|
||||
if (count($this->billDetailBatch) >= self::CHUNK_SIZE) {
|
||||
$this->saveBatch();
|
||||
$chunkCount++;
|
||||
Log::info("Processed chunk $chunkCount ({$this->processedCount} records so far)");
|
||||
}
|
||||
}
|
||||
|
||||
// Process any remaining records
|
||||
if (!empty($this->billDetailBatch)) {
|
||||
$this->saveBatch();
|
||||
}
|
||||
|
||||
fclose($handle);
|
||||
@@ -127,22 +143,29 @@
|
||||
if (count($headers) !== count($row)) {
|
||||
Log::warning("Row $rowCount in $filePath has incorrect column count. Expected: " .
|
||||
count($headers) . ", Got: " . count($row));
|
||||
$this->errorCount++;
|
||||
return;
|
||||
}
|
||||
|
||||
$data = array_combine($headers, $row);
|
||||
$this->saveRecord($data, $rowCount, $filePath);
|
||||
$this->addToBatch($data, $rowCount, $filePath);
|
||||
}
|
||||
|
||||
private function saveRecord(array $data, int $rowCount, string $filePath)
|
||||
/**
|
||||
* Add record to batch instead of saving immediately
|
||||
*/
|
||||
private function addToBatch(array $data, int $rowCount, string $filePath)
|
||||
: void
|
||||
{
|
||||
try {
|
||||
if (isset($data['_id']) && $data['_id'] !== '_id') {
|
||||
TempBillDetail::updateOrCreate(
|
||||
['_id' => $data['_id']],
|
||||
$data
|
||||
);
|
||||
// Add timestamp fields
|
||||
$now = now();
|
||||
$data['created_at'] = $now;
|
||||
$data['updated_at'] = $now;
|
||||
|
||||
// Add to batch
|
||||
$this->billDetailBatch[] = $data;
|
||||
$this->processedCount++;
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
@@ -151,6 +174,32 @@
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save batched records to the database
|
||||
*/
|
||||
private function saveBatch()
|
||||
: void
|
||||
{
|
||||
try {
|
||||
if (!empty($this->billDetailBatch)) {
|
||||
// Bulk insert/update bill details
|
||||
TempBillDetail::upsert(
|
||||
$this->billDetailBatch,
|
||||
['_id'], // Unique key
|
||||
array_diff((new TempBillDetail())->getFillable(), ['_id']) // Update columns
|
||||
);
|
||||
|
||||
// Reset batch after processing
|
||||
$this->billDetailBatch = [];
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
Log::error("Error in saveBatch: " . $e->getMessage());
|
||||
$this->errorCount += count($this->billDetailBatch);
|
||||
// Reset batch even if there's an error to prevent reprocessing the same failed records
|
||||
$this->billDetailBatch = [];
|
||||
}
|
||||
}
|
||||
|
||||
private function cleanup(string $tempFilePath)
|
||||
: void
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user