Dawn/src/file/csv.c

222 lines
5.9 KiB
C

/**
* Copyright (c) 2021 Dominic Masters
*
* This software is released under the MIT License.
* https://opensource.org/licenses/MIT
*/
#include "csv.h"
csvbufferresult_t csvBuffer(
assetbuffer_t *asset, csvbuffercallback_t *callback, void *user
) {
int32_t cellChar, read, i, currentColumnCount;
char buffer[CSV_BUFFER_SIZE];
char cell[CSV_CELL_SIZE_MAX];
char c;
bool callbackResponse;
bool insideEncapsulation = false;
csvbufferresult_t result = {
.cellCount = 0,
.rowCount = 0,
.columnCount = 0
};
// Init the cell Char Index.
cellChar = 0;
currentColumnCount = 0;
// Begin buffering.
while(true) {
// Read n bytes into our buffer
read = assetBufferRead(asset, buffer, CSV_BUFFER_SIZE);
// Now read back those bytes.
for(i = 0; i < read; i++) {
c = buffer[i];
// Characters we flat out ignore
if(c == '\r') continue;
// Handle quote marks.
if(c == '"') {
if(buffer[i+1] == '"') {// "" means a single quote (double-escaped)
i++;
cell[cellChar++] = c;
} else if(insideEncapsulation) {
insideEncapsulation = false;
} else {
insideEncapsulation = true;
}
continue;
}
// Is the start of a new cell/row?
if(c == '\0' || (!insideEncapsulation && (c == ',' || c == '\n'))) {
cell[cellChar] = '\0';// Terminate Cell string
// Fire off the callback
if(callback != NULL) {
callbackResponse = callback(asset, user, result.rowCount, currentColumnCount, cell);
if(!callbackResponse) return result;
}
// Prepare for next row/cell
currentColumnCount++;
result.columnCount = mathMax(currentColumnCount, result.columnCount);
if(c == '\n') {
result.rowCount++;
currentColumnCount = 0;
}
result.cellCount++;// Only count cells with
cellChar = 0;
continue;// Skip
}
// Add character to the cell.
cell[cellChar++] = c;
}
if(read < CSV_BUFFER_SIZE) break;
}
// If this is an empty row we don't count it, otherwise we do.
if(currentColumnCount != 0) result.rowCount++;
return result;
}
bool _csvBufferRowParserCallback(
assetbuffer_t *asset, void *user, int32_t row, int32_t column, char *data
) {
csvbufferrowdata_t *rowData = (csvbufferrowdata_t *)user;
// Now did we change rows?
if(row != rowData->row) {
// Yes we did, let's buffer the previous row.
if(rowData->callback != NULL) {
if(!rowData->callback(
asset, rowData->user, rowData->row, &rowData->rowCurrent
)) return false;
}
// Begin next row
rowData->row = row;
rowData->rowCurrent.columnCount = 0;
}
// Determine string info for the cell
int32_t length = (int32_t)strlen(data);
int32_t offset = (column * CSV_CELL_SIZE_MAX);
// Now copy the string data to the buffer
arrayCopy(sizeof(char), data, length + 1, rowData->rowCurrent.data + offset);
// Update the pointer to the string
rowData->rowCurrent.columns[column] = rowData->rowCurrent.data + offset;
rowData->rowCurrent.columnCount++;
return true;
}
csvbufferresult_t csvBufferRow(
assetbuffer_t *asset, csvbufferrowcallback_t *callback, void *user
) {
csvbufferrowdata_t data;
csvbufferresult_t result;
data.row = 0;
data.user = user;
data.callback = callback;
data.rowCurrent.columnCount = 0;
// Perform a per-cell buffer and run the parser callback.
result = csvBuffer(asset, &_csvBufferRowParserCallback, &data);
// Because the buffer may not fire for the last row we handle it here.
if(data.rowCurrent.columnCount > 0 && callback != NULL) {
if(!callback(asset, user, data.row, &data.rowCurrent)) return result;
}
return result;
}
bool _csvBufferRowWithHeadersCallback(
assetbuffer_t *asset, void *user, int32_t row, csvrow_t *csv
) {
csvbufferrowwithheadersdata_t *data = (csvbufferrowwithheadersdata_t *)user;
// Take the headers for row 0
if(row == 0) {
csvRowPopulate(csv, &data->headerRow);
return true;
}
// Fire the callback
return data->callback(asset, data->user, row, &data->headerRow, csv);
}
csvbufferresult_t csvBufferRowWithHeaders(
assetbuffer_t *asset, csvbufferrowwitheaderscallback_t *callback, void *user
) {
csvbufferrowwithheadersdata_t data;
data.user = user;
data.callback = callback;
return csvBufferRow(asset, &_csvBufferRowWithHeadersCallback, &data);
}
void csvRowPopulate(csvrow_t *source, csvrow_t *dest) {
int32_t i;
dest->columnCount = source->columnCount;
// Copy the raw characters from the source buffer.
arrayCopy(sizeof(char), source->data, CSV_ROW_CHARACTERS_MAX, dest->data);
// Now update the destination pointers to reference the data buffer.
for(i = 0; i < source->columnCount; i++) {
dest->columns[i] = dest->data + (i * CSV_CELL_SIZE_MAX);
}
}
bool _csvHeadersGetCallback(
assetbuffer_t *asset, void *user, int32_t row, csvrow_t *current
) {
csvrow_t *rowData = (csvrow_t *)user;
csvRowPopulate(current, rowData);
return false;// False to break the loop
}
csvbufferresult_t csvHeadersGet(assetbuffer_t *asset, csvrow_t *row) {
return csvBufferRow(asset, &_csvHeadersGetCallback, row);
}
int32_t csvColumnGetIndex(csvrow_t *row, char *key) {
return arrayFindString(row->columns, row->columnCount, key);
}
bool _csvRowSearchCallback(
assetbuffer_t *asset, void *user, int32_t row, csvrow_t *csv
) {
csvsearchdata_t *data = (csvsearchdata_t *)user;
// Does the search match?
if(strcmp(csv->columns[data->column], data->value) != 0) return true;
// Matched, copy and end.
csvRowPopulate(csv, data->row);
data->rowIndex = row;
return false;
}
int32_t csvRowSearch(
assetbuffer_t *asset, csvrow_t *row, int32_t column, char *value
) {
csvsearchdata_t data = {
.column = column,
.row = row,
.rowIndex = -1,
.value = value
};
csvBufferRow(asset, &_csvRowSearchCallback, &data);
return data.rowIndex;
}