Прочтите файл .CSV в C

У меня есть. CSV файл:

lp;imie;nazwisko;ulica;numer;kod;miejscowosc;telefon;email;data_ur
1;Jan;Kowalski;ul. Nowa;1a;11-234;Budry;123-123-456;[email protected];1980.05.13
2;Jerzy;Nowak;ul. Konopnicka;13a/3;00-900;Lichowice;(55)333-44-55;[email protected];1990.03.23

и мне нужно прочитать это в C. У меня есть код, но только для подключения.

Ответ 1

Надеюсь, это поможет вам начать работу

Смотрите в прямом эфире http://ideone.com/l23He (используя stdin)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const char* getfield(char* line, int num)
{
    const char* tok;
    for (tok = strtok(line, ";");
            tok && *tok;
            tok = strtok(NULL, ";\n"))
    {
        if (!--num)
            return tok;
    }
    return NULL;
}

int main()
{
    FILE* stream = fopen("input", "r");

    char line[1024];
    while (fgets(line, 1024, stream))
    {
        char* tmp = strdup(line);
        printf("Field 3 would be %s\n", getfield(tmp, 3));
        // NOTE strtok clobbers tmp
        free(tmp);
    }
}

Вывод:

Field 3 would be nazwisko
Field 3 would be Kowalski
Field 3 would be Nowak

Ответ 2

Следующий код имеет простой язык c и обрабатывает пробелы. Он выделяет память только один раз, поэтому для каждой обрабатываемой строки требуется одна свободная().

http://ideone.com/mSCgPM

/* Tiny CSV Reader */
/* Copyright (C) 2015, Deligiannidis Konstantinos

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://w...content-available-to-author-only...u.org/licenses/>.  */


#include <stdio.h>
#include <string.h>
#include <stdlib.h>


/* For more that 100 columns or lines (when delimiter = \n), minor modifications are needed. */
int getcols( const char * const line, const char * const delim, char ***out_storage )

{
const char *start_ptr, *end_ptr, *iter;
char **out;
int i;                                          //For "for" loops in the old c style.
int tokens_found = 1, delim_size, line_size;    //Calculate "line_size" indirectly, without strlen() call.
int start_idx[100], end_idx[100];   //Store the indexes of tokens. Example "Power;": loc('P')=1, loc(';')=6
//Change 100 with MAX_TOKENS or use malloc() for more than 100 tokens. Example: "b1;b2;b3;...;b200"

if ( *out_storage != NULL )                 return -4;  //This SHOULD be NULL: Not Already Allocated
if ( !line || !delim )                      return -1;  //NULL pointers Rejected Here
if ( (delim_size = strlen( delim )) == 0 )  return -2;  //Delimiter not provided

start_ptr = line;   //Start visiting input. We will distinguish tokens in a single pass, for good performance.
                    //Then we are allocating one unified memory region & doing one memory copy.
while ( ( end_ptr = strstr( start_ptr, delim ) ) ) {

    start_idx[ tokens_found -1 ] = start_ptr - line;    //Store the Index of current token
    end_idx[ tokens_found - 1 ] = end_ptr - line;       //Store Index of first character that will be replaced with
                                                        //'\0'. Example: "arg1||arg2||end" -> "arg1\0|arg2\0|end"
    tokens_found++;                                     //Accumulate the count of tokens.
    start_ptr = end_ptr + delim_size;                   //Set pointer to the next c-string within the line
}

for ( iter = start_ptr; (*iter!='\0') ; iter++ );

start_idx[ tokens_found -1 ] = start_ptr - line;    //Store the Index of current token: of last token here.
end_idx[ tokens_found -1 ] = iter - line;           //and the last element that will be replaced with \0

line_size = iter - line;    //Saving CPU cycles: Indirectly Count the size of *line without using strlen();

int size_ptr_region = (1 + tokens_found)*sizeof( char* );   //The size to store pointers to c-strings + 1 (*NULL).
out = (char**) malloc( size_ptr_region + ( line_size + 1 ) + 5 );   //Fit everything there...it is all memory.
//It reserves a contiguous space for both (char**) pointers AND string region. 5 Bytes for "Out of Range" tests.
*out_storage = out;     //Update the char** pointer of the caller function.

//"Out of Range" TEST. Verify that the extra reserved characters will not be changed. Assign Some Values.
//char *extra_chars = (char*) out + size_ptr_region + ( line_size + 1 );
//extra_chars[0] = 1; extra_chars[1] = 2; extra_chars[2] = 3; extra_chars[3] = 4; extra_chars[4] = 5;

for ( i = 0; i < tokens_found; i++ )    //Assign adresses first part of the allocated memory pointers that point to
    out[ i ] = (char*) out + size_ptr_region + start_idx[ i ];  //the second part of the memory, reserved for Data.
out[ tokens_found ] = (char*) NULL; //[ ptr1, ptr2, ... , ptrN, (char*) NULL, ... ]: We just added the (char*) NULL.
                                                    //Now assign the Data: c-strings. (\0 terminated strings):
char *str_region = (char*) out + size_ptr_region;   //Region inside allocated memory which contains the String Data.
memcpy( str_region, line, line_size );   //Copy input with delimiter characters: They will be replaced with \0.

//Now we should replace: "arg1||arg2||arg3" with "arg1\0|arg2\0|arg3". Don't worry for characters after '\0'
//They are not used in standard c lbraries.
for( i = 0; i < tokens_found; i++) str_region[ end_idx[ i ] ] = '\0';

//"Out of Range" TEST. Wait until Assigned Values are Printed back.
//for ( int i=0; i < 5; i++ ) printf("c=%x ", extra_chars[i] ); printf("\n");

// *out memory should now contain (example data):
//[ ptr1, ptr2,...,ptrN, (char*) NULL, "token1\0", "token2\0",...,"tokenN\0", 5 bytes for tests ]
//   |__________________________________^           ^              ^             ^
//          |_______________________________________|              |             |
//                   |_____________________________________________|      These 5 Bytes should be intact.

return tokens_found;
}


int main()

{

char in_line[] = "Arg1;;Th;s is not Del;m;ter;;Arg3;;;;Final";
char delim[] = ";;";
char **columns;
int i;

printf("Example1:\n");
columns = NULL; //Should be NULL to indicate that it is not assigned to allocated memory. Otherwise return -4;

int cols_found = getcols( in_line, delim, &columns);
for ( i = 0; i < cols_found; i++ ) printf("Column[ %d ] = %s\n", i, columns[ i ] );  //<- (1st way).
// (2nd way) // for ( i = 0; columns[ i ]; i++) printf("start_idx[ %d ] = %s\n", i, columns[ i ] );

free( columns );    //Release the Single Contiguous Memory Space.
columns = NULL;     //Pointer = NULL to indicate it does not reserve space and that is ready for the next malloc().

printf("\n\nExample2, Nested:\n\n");

char example_file[] = "ID;Day;Month;Year;Telephone;email;Date of registration\n"
        "1;Sunday;january;2009;123-124-456;[email protected];2015-05-13\n"
        "2;Monday;March;2011;(+30)333-22-55;[email protected];2009-05-23";

char **rows;
int j;

rows = NULL; //getcols() requires it to be NULL. (Avoid dangling pointers, leaks e.t.c).

getcols( example_file, "\n", &rows);
for ( i = 0; rows[ i ]; i++) {
    {
        printf("Line[ %d ] = %s\n", i, rows[ i ] );
        char **columnX = NULL;
        getcols( rows[ i ], ";", &columnX);
        for ( j = 0; columnX[ j ]; j++) printf("  Col[ %d ] = %s\n", j, columnX[ j ] );
        free( columnX );
    }
}

free( rows );
rows = NULL;

return 0;
}

Ответ 3

/* csv - read write comma separated value format
 * Copyright (c) 2003 Michael B. Allen <mba2000 ioplex.com>
 *
 * The MIT License
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <errno.h>
#include <wchar.h>
#include <wctype.h>

#include "mba/msgno.h"
#include "mba/csv.h"

#define ST_START     1
#define ST_COLLECT   2
#define ST_TAILSPACE 3
#define ST_END_QUOTE 4

struct sinput {
    FILE *in;
    const unsigned char *src;
    size_t sn;
    size_t count;
};
struct winput {
    const wchar_t *src;
    size_t sn;
    size_t count;
};

static int
snextch(struct sinput *in)
{
    int ch;

    if (in->in) {
        if ((ch = fgetc(in->in)) == EOF) {
            if (ferror(in->in)) {
                PMNO(errno);
                return -1;
            }
            return 0;
        }
    } else {
        if (in->sn == 0) {
            return 0;
        }
        ch = *(in->src)++;
        in->sn--;
    }
    in->count++;

    return ch;
}
static int
wnextch(struct winput *in)
{
    int ch;

    if (in->sn == 0) {
        return 0;
    }
    ch = *(in->src)++;
    in->sn--;
    in->count++;

    return ch;
}

static int
csv_parse_str(struct sinput *in,
            unsigned char *buf,
            size_t bn,
            unsigned char *row[],
            int rn,
            int sep,
            int flags)
{
    int trim, quotes, ch, state, r, j, t, inquotes;

    trim = flags & CSV_TRIM;
    quotes = flags & CSV_QUOTES;
    state = ST_START;
    inquotes = 0;
    ch = r = j = t = 0;

    memset(row, 0, sizeof(unsigned char *) * rn);

    while (rn && bn && (ch = snextch(in)) > 0) {
        switch (state) {
            case ST_START:
                if (ch != '\n' && ch != sep && isspace(ch)) {
                    if (!trim) {
                        buf[j++] = ch; bn--;
                        t = j;
                    }
                    break;
                } else if (quotes && ch == '"') {
                    j = t = 0;
                    state = ST_COLLECT;
                    inquotes = 1;
                    break;
                }
                state = ST_COLLECT;
            case ST_COLLECT:
                if (inquotes) {
                    if (ch == '"') {
                        state = ST_END_QUOTE;
                        break;
                    }
                } else if (ch == sep || ch == '\n') {
                    row[r++] = buf; rn--;
                    if (ch == '\n' && t && buf[t - 1] == '\r') {
                        t--; bn++; /* crlf -> lf */
                    }
                    buf[t] = '\0'; bn--;
                    buf += t + 1;
                    j = t = 0;
                    state = ST_START;
                    inquotes = 0;
                    if (ch == '\n') {
                        rn = 0;
                    }
                    break;
                } else if (quotes && ch == '"') {
                    PMNF(errno = EILSEQ, ": unexpected quote in element %d", (r + 1));
                    return -1;
                }
                buf[j++] = ch; bn--;
                if (!trim || isspace(ch) == 0) {
                    t = j;
                }
                break;
            case ST_TAILSPACE:
            case ST_END_QUOTE:
                if (ch == sep || ch == '\n') {
                    row[r++] = buf; rn--;
                    buf[j] = '\0'; bn--;
                    buf += j + 1;
                    j = t =  0;
                    state = ST_START;
                    inquotes = 0;
                    if (ch == '\n') {
                        rn = 0;
                    }
                    break;
                } else if (quotes && ch == '"' && state != ST_TAILSPACE) {
                    buf[j++] = '"'; bn--;        /* nope, just an escaped quote */
                    t = j;
                    state = ST_COLLECT;
                    break;
                } else if (isspace(ch)) {
                    state = ST_TAILSPACE;
                    break;
                }
                errno = EILSEQ;
                PMNF(errno, ": bad end quote in element %d", (r + 1));
                return -1;
        }
    }
    if (ch == -1) {
        AMSG("");
        return -1;
    }
    if (bn == 0) {
        PMNO(errno = E2BIG);
        return -1;
    }
    if (rn) {
        if (inquotes && state != ST_END_QUOTE) {
            PMNO(errno = EILSEQ);
            return -1;
        }
        row[r] = buf;
        buf[t] = '\0';
    }

    return in->count;
}
static int
csv_parse_wcs(struct winput *in, wchar_t *buf, size_t bn, wchar_t *row[], int rn, wint_t sep, int flags)
{
    int trim, quotes, state, r, j, t, inquotes;
    wint_t ch;

    trim = flags & CSV_TRIM;
    quotes = flags & CSV_QUOTES;
    state = ST_START;
    inquotes = 0;
    ch = r = j = t = 0;

    memset(row, 0, sizeof(wchar_t *) * rn);

    while (rn && bn && (ch = wnextch(in)) > 0) {
        switch (state) {
            case ST_START:
                if (ch != L'\n' && ch != sep && iswspace(ch)) {
                    if (!trim) {
                        buf[j++] = ch; bn--;
                        t = j;
                    }
                    break;
                } else if (quotes && ch == L'"') {
                    j = t = 0;
                    state = ST_COLLECT;
                    inquotes = 1;
                    break;
                }
                state = ST_COLLECT;
            case ST_COLLECT:
                if (inquotes) {
                    if (ch == L'"') {
                        state = ST_END_QUOTE;
                        break;
                    }
                } else if (ch == sep || ch == L'\n') {
                    row[r++] = buf; rn--;
                    buf[t] = L'\0'; bn--;
                    buf += t + 1;
                    j = t = 0;
                    state = ST_START;
                    inquotes = 0;
                    if (ch == L'\n') {
                        rn = 0;
                    }
                    break;
                } else if (quotes && ch == L'"') {
                    PMNF(errno = EILSEQ, ": unexpected quote in element %d", (r + 1));
                    return -1;
                }
                buf[j++] = ch; bn--;
                if (!trim || iswspace(ch) == 0) {
                    t = j;
                }
                break;
            case ST_TAILSPACE:
            case ST_END_QUOTE:
                if (ch == sep || ch == L'\n') {
                    row[r++] = buf; rn--;
                    buf[j] = L'\0'; bn--;
                    buf += j + 1;
                    j = t =  0;
                    state = ST_START;
                    inquotes = 0;
                    if (ch == L'\n') {
                        rn = 0;
                    }
                    break;
                } else if (quotes && ch == L'"' && state != ST_TAILSPACE) {
                    buf[j++] = L'"'; bn--;       /* nope, just an escaped quote */
                    t = j;
                    state = ST_COLLECT;
                    break;
                } else if (iswspace(ch)) {
                    state = ST_TAILSPACE;
                    break;
                }
                PMNF(errno = EILSEQ, ": bad end quote in element %d", (r + 1));
                return -1;
        }
    }
    if (ch == (wint_t)-1) {
        AMSG("");
        return -1;
    }
    if (bn == 0) {
        PMNO(errno = E2BIG);
        return -1;
    }
    if (rn) {
        if (inquotes && state != ST_END_QUOTE) {
            PMNO(errno = EILSEQ);
            return -1;
        }
        row[r] = buf;
        buf[t] = L'\0';
    }

    return in->count;
}
int
csv_row_parse_wcs(const wchar_t *src, size_t sn, wchar_t *buf, size_t bn, wchar_t *row[], int rn, int sep, int trim)
{
    struct winput input;
    input.src = src;
    input.sn = sn;
    input.count = 0;
    return csv_parse_wcs(&input, buf, bn, row, rn, (wint_t)sep, trim);
}
int
csv_row_parse_str(const unsigned char *src, size_t sn, unsigned char *buf, size_t bn, unsigned char *row[], int rn, int sep, int trim)
{
    struct sinput input;
    input.in = NULL;
    input.src = src;
    input.sn = sn;
    input.count = 0;
    return csv_parse_str(&input, buf, bn, row, rn, sep, trim);
}
int
csv_row_fread(FILE *in, unsigned char *buf, size_t bn, unsigned char *row[], int numcols, int sep, int trim)
{
    struct sinput input;
    input.in = in;
    input.count = 0;
    return csv_parse_str(&input, buf, bn, row, numcols, sep, trim);
}

Ответ 4

ifstream fs(filenema);
string line = "";
while (getline(fs, line))
{
    stringstream linestream(line);
    string token = "";
    while (getline(linestream, token, ';')) 
    {
        ...    
    }
}

Ответ 5

Полный пример, который оставляет поля как строки с завершающим NULL в исходном входном буфере и обеспечивает доступ к ним через массив указателей char. Процессор CSV был подтвержден для работы с полями, заключенными в "двойные кавычки", игнорируя любые символы разделителей внутри них.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// adjust BUFFER_SIZE to suit longest line 
#define BUFFER_SIZE 1024 * 1024
#define NUM_FIELDS 10
#define MAXERRS 5
#define RET_OK 0
#define RET_FAIL 1
#define FALSE 0
#define TRUE 1

// char* array will point to fields
char *pFields[NUM_FIELDS];
// field offsets into pFields array:
#define LP          0
#define IMIE        1
#define NAZWISKo    2
#define ULICA       3
#define NUMER       4
#define KOD         5
#define MIEJSCOw    6
#define TELEFON     7
#define EMAIL       8
#define DATA_UR     9

long loadFile(FILE *pFile, long *errcount);
static int  loadValues(char *line, long lineno);
static char delim;

long loadFile(FILE *pFile, long *errcount){

    char sInputBuf [BUFFER_SIZE];
    long lineno = 0L;

    if(pFile == NULL)
        return RET_FAIL;

    while (!feof(pFile)) {

        // load line into static buffer
        if(fgets(sInputBuf, BUFFER_SIZE-1, pFile)==NULL)
            break;

        // skip first line (headers)
        if(++lineno==1)
            continue;

        // jump over empty lines
        if(strlen(sInputBuf)==0)
            continue;
        // set pFields array pointers to null-terminated string fields in sInputBuf
        if(loadValues(sInputBuf,lineno)==RET_FAIL){
           (*errcount)++;
            if(*errcount > MAXERRS)
                break;
        } else {    
            // On return pFields array pointers point to loaded fields ready for load into DB or whatever
            // Fields can be accessed via pFields, e.g.
            printf("lp=%s, imie=%s, data_ur=%s\n", pFields[LP], pFields[IMIE], pFields[DATA_UR]);
        }
    }
    return lineno;
}


static int  loadValues(char *line, long lineno){
    if(line == NULL)
        return RET_FAIL;

    // chop of last char of input if it is a CR or LF (e.g.Windows file loading in Unix env.)
    // can be removed if sure fgets has removed both CR and LF from end of line
    if(*(line + strlen(line)-1) == '\r' || *(line + strlen(line)-1) == '\n')
        *(line + strlen(line)-1) = '\0';
    if(*(line + strlen(line)-1) == '\r' || *(line + strlen(line)-1 )== '\n')
        *(line + strlen(line)-1) = '\0';

    char *cptr = line;
    int fld = 0;
    int inquote = FALSE;
    char ch;

    pFields[fld]=cptr;
    while((ch=*cptr) != '\0' && fld < NUM_FIELDS){
        if(ch == '"') {
            if(! inquote)
                pFields[fld]=cptr+1;
            else {
                *cptr = '\0';               // zero out " and jump over it
            }
            inquote = ! inquote;
        } else if(ch == delim && ! inquote){
            *cptr = '\0';                   // end of field, null terminate it
            pFields[++fld]=cptr+1;
        }
        cptr++;
    }   
    if(fld > NUM_FIELDS-1){
        fprintf(stderr, "Expected field count (%d) exceeded on line %ld\n", NUM_FIELDS, lineno);
        return RET_FAIL;
    } else if (fld < NUM_FIELDS-1){
        fprintf(stderr, "Expected field count (%d) not reached on line %ld\n", NUM_FIELDS, lineno);
        return RET_FAIL;    
    }
    return RET_OK;
}

int main(int argc, char **argv)
{
   FILE *fp;
   long errcount = 0L;
   long lines = 0L;

   if(argc!=3){
       printf("Usage: %s csvfilepath delimiter\n", basename(argv[0]));
       return (RET_FAIL);
   }   
   if((delim=argv[2][0])=='\0'){
       fprintf(stderr,"delimiter must be specified\n");
       return (RET_FAIL);
   }
   fp = fopen(argv[1] , "r");
   if(fp == NULL) {
      fprintf(stderr,"Error opening file: %d\n",errno);
      return(RET_FAIL);
   }
   lines=loadFile(fp,&errcount);
   fclose(fp);
   printf("Processed %ld lines, encountered %ld error(s)\n", lines, errcount);
   if(errcount>0)
        return(RET_FAIL);
    return(RET_OK); 
}

Ответ 6

Думаю, я бы поделился этим кодом. Это довольно просто, но эффективно. Он обрабатывает разделенные запятыми файлы скобками. Вы можете легко изменить его в соответствии с вашими потребностями.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>


int main(int argc, char *argv[])
{
  //argv[1] path to csv file
  //argv[2] number of lines to skip
  //argv[3] length of longest value (in characters)

  FILE *pfinput;
  unsigned int nSkipLines, currentLine, lenLongestValue;
  char *pTempValHolder;
  int c;
  unsigned int vcpm; //value character marker
  int QuotationOnOff; //0 - off, 1 - on

  nSkipLines = atoi(argv[2]);
  lenLongestValue = atoi(argv[3]);

  pTempValHolder = (char*)malloc(lenLongestValue);  

  if( pfinput = fopen(argv[1],"r") ) {

    rewind(pfinput);

    currentLine = 1;
    vcpm = 0;
    QuotationOnOff = 0;

    //currentLine > nSkipLines condition skips ignores first argv[2] lines
    while( (c = fgetc(pfinput)) != EOF)
    {
       switch(c)
       {
          case ',':
            if(!QuotationOnOff && currentLine > nSkipLines) 
            {
              pTempValHolder[vcpm] = '\0';
              printf("%s,",pTempValHolder);
              vcpm = 0;
            }
            break;
          case '\n':
            if(currentLine > nSkipLines)
            {
              pTempValHolder[vcpm] = '\0';
              printf("%s\n",pTempValHolder);
              vcpm = 0;
            }
            currentLine++;
            break;
          case '\"':
            if(currentLine > nSkipLines)
            {
              if(!QuotationOnOff) {
                QuotationOnOff = 1;
                pTempValHolder[vcpm] = c;
                vcpm++;
              } else {
                QuotationOnOff = 0;
                pTempValHolder[vcpm] = c;
                vcpm++;
              }
            }
            break;
          default:
            if(currentLine > nSkipLines)
            {
              pTempValHolder[vcpm] = c;
              vcpm++;
            }
            break;
       }
    }

    fclose(pfinput); 
    free(pTempValHolder);

  }

  return 0;
}