Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
fpc-src / usr / share / fpcsrc / 3.0.0 / packages / fcl-base / src / csvreadwrite.pp
Size: Mime:
{
  CSV Parser, Builder classes.
  Version 0.5 2014-10-25

  Copyright (C) 2010-2014 Vladimir Zhirov <vvzh.home@gmail.com>

  Contributors:
    Luiz Americo Pereira Camara
    Mattias Gaertner
    Reinier Olislagers

  This library is free software; you can redistribute it and/or modify it
  under the terms of the GNU Library General Public License as published by
  the Free Software Foundation; either version 2 of the License, or (at your
  option) any later version with the following modification:

  As a special exception, the copyright holders of this library give you
  permission to link this library with independent modules to produce an
  executable, regardless of the license terms of these independent modules,and
  to copy and distribute the resulting executable under terms of your choice,
  provided that you also meet, for each linked independent module, the terms
  and conditions of the license of that module. An independent module is a
  module which is not derived from or based on this library. If you modify
  this library, you may extend this exception to your version of the library,
  but you are not obligated to do so. If you do not wish to do so, delete this
  exception statement from your version.

  This program is distributed in the hope that it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
  for more details.

  You should have received a copy of the GNU Library General Public License
  along with this library; if not, write to the Free Software Foundation,
  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
}

unit csvreadwrite;

{$mode objfpc}{$H+}

interface

uses
  Classes, SysUtils, strutils;

Type
  TCSVChar = Char;

  { TCSVHandler }

  TCSVHandler = class(TPersistent)
  private
    procedure SetDelimiter(const AValue: TCSVChar);
    procedure SetQuoteChar(const AValue: TCSVChar);
    procedure UpdateCachedChars;
  protected
    // special chars
    FDelimiter: TCSVChar;
    FQuoteChar: TCSVChar;
    FLineEnding: String;
    // cached values to speed up special chars operations
    FSpecialChars: TSysCharSet;
    FDoubleQuote: String;
    // parser settings
    FIgnoreOuterWhitespace: Boolean;
    // builder settings
    FQuoteOuterWhitespace: Boolean;
    // document settings
    FEqualColCountPerRow: Boolean;
  public
    constructor Create; virtual;
    procedure Assign(ASource: TPersistent); override;
    procedure AssignCSVProperties(ASource: TCSVHandler);
    // Delimiter that separates the field, e.g. comma, semicolon, tab
    property Delimiter: TCSVChar read FDelimiter write SetDelimiter;
    // Character used to quote "problematic" data
    // (e.g. with delimiters or spaces in them)
    // A common quotechar is "
    property QuoteChar: TCSVChar read FQuoteChar write SetQuoteChar;
    // String at the end of the line of data (e.g. CRLF)
    property LineEnding: String read FLineEnding write FLineEnding;
    // Ignore whitespace between delimiters and field data
    property IgnoreOuterWhitespace: Boolean read FIgnoreOuterWhitespace write FIgnoreOuterWhitespace;
    // Use quotes when outer whitespace is found
    property QuoteOuterWhitespace: Boolean read FQuoteOuterWhitespace write FQuoteOuterWhitespace;
    // When reading and writing: make sure every line has the same column count, create empty cells in the end of row if required
    property EqualColCountPerRow: Boolean read FEqualColCountPerRow write FEqualColCountPerRow;
  end;

  // Sequential input from CSV stream

  { TCSVParser }

  TCSVParser = class(TCSVHandler)
  private
    FFreeStream: Boolean;
    // fields
    FSourceStream: TStream;
    FStrStreamWrapper: TStringStream;
    // parser state
    EndOfFile: Boolean;
    EndOfLine: Boolean;
    FCurrentChar: TCSVChar;
    FCurrentRow: Integer;
    FCurrentCol: Integer;
    FMaxColCount: Integer;
    // output buffers
    FCellBuffer: String;
    FWhitespaceBuffer: String;
    procedure ClearOutput;
    // basic parsing
    procedure SkipEndOfLine;
    procedure SkipDelimiter;
    procedure SkipWhitespace;
    procedure NextChar;
    // complex parsing
    procedure ParseCell;
    procedure ParseQuotedValue;
    // simple parsing
    procedure ParseValue;
  public
    constructor Create;
    destructor Destroy; override;
    // Source data stream
    procedure SetSource(AStream: TStream); overload;
    // Source data string.
    procedure SetSource(const AString: String); overload;
    // Rewind to beginning of data
    procedure ResetParser;
    // Read next cell data; return false if end of file reached
    function  ParseNextCell: Boolean;
    // Current row (0 based)
    property CurrentRow: Integer read FCurrentRow;
    // Current column (0 based); -1 if invalid/before beginning of file
    property CurrentCol: Integer read FCurrentCol;
    // Data in current cell
    property CurrentCellText: String read FCellBuffer;
    // The maximum number of columns found in the stream:
    property MaxColCount: Integer read FMaxColCount;
    // Does the parser own the stream ? If true, a previous stream is freed when set or when parser is destroyed.
    Property FreeStream : Boolean Read FFreeStream Write FFreeStream;
  end;

  // Sequential output to CSV stream
  TCSVBuilder = class(TCSVHandler)
  private
    FOutputStream: TStream;
    FDefaultOutput: TMemoryStream;
    FNeedLeadingDelimiter: Boolean;
    function GetDefaultOutputAsString: String;
  protected
    procedure AppendStringToStream(const AString: String; AStream: TStream);
    function  QuoteCSVString(const AValue: String): String;
  public
    constructor Create;
    destructor Destroy; override;
    // Set output/destination stream.
    // If not called, output is sent to DefaultOutput
    procedure SetOutput(AStream: TStream);
    // If using default stream, reset output to beginning.
    // If using user-defined stream, user should reposition stream himself
    procedure ResetBuilder;
    // Add a cell to the output with data AValue
    procedure AppendCell(const AValue: String);
    // Write end of row to the output, starting a new row
    procedure AppendRow;
    // Default output as memorystream (if output not set using SetOutput)
    property DefaultOutput: TMemoryStream read FDefaultOutput;
    // Default output in string format (if output not set using SetOutput)
    property DefaultOutputAsString: String read GetDefaultOutputAsString;
  end;

function ChangeLineEndings(const AString, ALineEnding: String): String;

implementation

const
  CsvCharSize = SizeOf(TCSVChar);
  CR    = #13;
  LF    = #10;
  HTAB  = #9;
  SPACE = #32;
  WhitespaceChars = [HTAB, SPACE];
  LineEndingChars = [CR, LF];

// The following implementation of ChangeLineEndings function originates from
// Lazarus CodeTools library by Mattias Gaertner. It was explicitly allowed
// by Mattias to relicense it under modified LGPL and include into CsvDocument.

function ChangeLineEndings(const AString, ALineEnding: String): String;
var
  I: Integer;
  Src: PChar;
  Dest: PChar;
  DestLength: Integer;
  EndingLength: Integer;
  EndPos: PChar;
begin
  if AString = '' then
    Exit(AString);
  EndingLength := Length(ALineEnding);
  DestLength := Length(AString);

  Src := PChar(AString);
  EndPos := Src + DestLength;
  while Src < EndPos do
  begin
    if (Src^ = CR) then
    begin
      Inc(Src);
      if (Src^ = LF) then
      begin
        Inc(Src);
        Inc(DestLength, EndingLength - 2);
      end else
        Inc(DestLength, EndingLength - 1);
    end else
    begin
      if (Src^ = LF) then
        Inc(DestLength, EndingLength - 1);
      Inc(Src);
    end;
  end;

  SetLength(Result, DestLength);
  Src := PChar(AString);
  Dest := PChar(Result);
  EndPos := Dest + DestLength;
  while (Dest < EndPos) do
  begin
    if Src^ in LineEndingChars then
    begin
      for I := 1 to EndingLength do
      begin
        Dest^ := ALineEnding[I];
        Inc(Dest);
      end;
      if (Src^ = CR) and (Src[1] = LF) then
        Inc(Src, 2)
      else
        Inc(Src);
    end else
    begin
      Dest^ := Src^;
      Inc(Src);
      Inc(Dest);
    end;
  end;
end;

{ TCSVHandler }

procedure TCSVHandler.SetDelimiter(const AValue: TCSVChar);
begin
  if FDelimiter <> AValue then
  begin
    FDelimiter := AValue;
    UpdateCachedChars;
  end;
end;

procedure TCSVHandler.SetQuoteChar(const AValue: TCSVChar);
begin
  if FQuoteChar <> AValue then
  begin
    FQuoteChar := AValue;
    UpdateCachedChars;
  end;
end;

procedure TCSVHandler.UpdateCachedChars;
begin
  FDoubleQuote := FQuoteChar + FQuoteChar;
  FSpecialChars := [CR, LF, FDelimiter, FQuoteChar];
end;

constructor TCSVHandler.Create;
begin
  inherited Create;
  FDelimiter := ',';
  FQuoteChar := '"';
  FLineEnding := sLineBreak;
  FIgnoreOuterWhitespace := False;
  FQuoteOuterWhitespace := True;
  FEqualColCountPerRow := True;
  UpdateCachedChars;
end;

procedure TCSVHandler.Assign(ASource: TPersistent);
begin
  if (ASource is TCSVHandler) then
    AssignCSVProperties(ASource as TCSVHandler)
  else
    inherited Assign(ASource);
end;

procedure TCSVHandler.AssignCSVProperties(ASource: TCSVHandler);
begin
  FDelimiter := ASource.FDelimiter;
  FQuoteChar := ASource.FQuoteChar;
  FLineEnding := ASource.FLineEnding;
  FIgnoreOuterWhitespace := ASource.FIgnoreOuterWhitespace;
  FQuoteOuterWhitespace := ASource.FQuoteOuterWhitespace;
  FEqualColCountPerRow := ASource.FEqualColCountPerRow;
  UpdateCachedChars;
end;

{ TCSVParser }

procedure TCSVParser.ClearOutput;
begin
  FCellBuffer := '';
  FWhitespaceBuffer := '';
  FCurrentRow := 0;
  FCurrentCol := -1;
  FMaxColCount := 0;
end;

procedure TCSVParser.SkipEndOfLine;
begin
  // treat LF+CR as two linebreaks, not one
  if (FCurrentChar = CR) then
    NextChar;
  if (FCurrentChar = LF) then
    NextChar;
end;

procedure TCSVParser.SkipDelimiter;
begin
  if FCurrentChar = FDelimiter then
    NextChar;
end;

procedure TCSVParser.SkipWhitespace;
begin
  while FCurrentChar = SPACE do
    NextChar;
end;

procedure TCSVParser.NextChar;
begin
  if FSourceStream.Read(FCurrentChar, CsvCharSize) < CsvCharSize then
  begin
    FCurrentChar := #0;
    EndOfFile := True;
  end;
  EndOfLine := FCurrentChar in LineEndingChars;
end;

procedure TCSVParser.ParseCell;
begin
  FCellBuffer := '';
  if FIgnoreOuterWhitespace then
    SkipWhitespace;
  if FCurrentChar = FQuoteChar then
    ParseQuotedValue
  else
    ParseValue;
end;

procedure TCSVParser.ParseQuotedValue;
var
  QuotationEnd: Boolean;
begin
  NextChar; // skip opening quotation char
  repeat
    // read value up to next quotation char
    while not ((FCurrentChar = FQuoteChar) or EndOfFile) do
    begin
      if EndOfLine then
      begin
        AppendStr(FCellBuffer, FLineEnding);
        SkipEndOfLine;
      end else
      begin
        AppendStr(FCellBuffer, FCurrentChar);
        NextChar;
      end;
    end;
    // skip quotation char (closing or escaping)
    if not EndOfFile then
      NextChar;
    // check if it was escaping
    if FCurrentChar = FQuoteChar then
    begin
      AppendStr(FCellBuffer, FCurrentChar);
      QuotationEnd := False;
      NextChar;
    end else
      QuotationEnd := True;
  until QuotationEnd;
  // read the rest of the value until separator or new line
  ParseValue;
end;

procedure TCSVParser.ParseValue;
begin
  while not ((FCurrentChar = FDelimiter) or EndOfLine or EndOfFile) do
  begin
    AppendStr(FWhitespaceBuffer, FCurrentChar);
    NextChar;
  end;
  // merge whitespace buffer
  if FIgnoreOuterWhitespace then
    RemoveTrailingChars(FWhitespaceBuffer, WhitespaceChars);
  AppendStr(FCellBuffer, FWhitespaceBuffer);
  FWhitespaceBuffer := '';
end;

constructor TCSVParser.Create;
begin
  inherited Create;
  ClearOutput;
  FStrStreamWrapper := nil;
  EndOfFile := True;
end;

destructor TCSVParser.Destroy;
begin
  if FFreeStream and (FSourceStream<>FStrStreamWrapper) then
     FreeAndNil(FSourceStream);
  FreeAndNil(FStrStreamWrapper);
  inherited Destroy;
end;

procedure TCSVParser.SetSource(AStream: TStream);
begin
  If FSourceStream=AStream then exit;
  if FFreeStream and (FSourceStream<>FStrStreamWrapper) then
     FreeAndNil(FSourceStream);
  FSourceStream := AStream;
  ResetParser;
end;

procedure TCSVParser.SetSource(const AString: String); overload;
begin
  FreeAndNil(FStrStreamWrapper);
  FStrStreamWrapper := TStringStream.Create(AString);
  SetSource(FStrStreamWrapper);
end;

procedure TCSVParser.ResetParser;
begin
  ClearOutput;
  FSourceStream.Seek(0, soFromBeginning);
  EndOfFile := False;
  NextChar;
end;

// Parses next cell; returns True if there are more cells in the input stream.
function TCSVParser.ParseNextCell: Boolean;
var
  LineColCount: Integer;
begin
  if EndOfLine or EndOfFile then
  begin
    // Having read the previous line, adjust column count if necessary:
    LineColCount := FCurrentCol + 1;
    if LineColCount > FMaxColCount then
      FMaxColCount := LineColCount;
  end;

  if EndOfFile then
    Exit(False);

  // Handle line ending
  if EndOfLine then
  begin
    SkipEndOfLine;
    if EndOfFile then
      Exit(False);
    FCurrentCol := 0;
    Inc(FCurrentRow);
  end else
    Inc(FCurrentCol);

  // Skipping a delimiter should be immediately followed by parsing a cell
  // without checking for line break first, otherwise we miss last empty cell.
  // But 0th cell does not start with delimiter unlike other cells, so
  // the following check is required not to miss the first empty cell:
  if FCurrentCol > 0 then
    SkipDelimiter;
  ParseCell;
  Result := True;
end;

{ TCSVBuilder }

function TCSVBuilder.GetDefaultOutputAsString: String;
var
  StreamSize: Integer;
begin
  Result := '';
  StreamSize := FDefaultOutput.Size;
  if StreamSize > 0 then
  begin
    SetLength(Result, StreamSize);
    FDefaultOutput.ReadBuffer(Result[1], StreamSize);
  end;
end;

procedure TCSVBuilder.AppendStringToStream(const AString: String; AStream: TStream);
var
  StrLen: Integer;
begin
  StrLen := Length(AString);
  if StrLen > 0 then
    AStream.WriteBuffer(AString[1], StrLen);
end;

function TCSVBuilder.QuoteCSVString(const AValue: String): String;
var
  I: Integer;
  ValueLen: Integer;
  NeedQuotation: Boolean;
begin
  ValueLen := Length(AValue);

  NeedQuotation := (AValue <> '') and FQuoteOuterWhitespace
    and ((AValue[1] in WhitespaceChars) or (AValue[ValueLen] in WhitespaceChars));

  if not NeedQuotation then
    for I := 1 to ValueLen do
    begin
      if AValue[I] in FSpecialChars then
      begin
        NeedQuotation := True;
        Break;
      end;
    end;

  if NeedQuotation then
  begin
    // double existing quotes
    Result := FDoubleQuote;
    Insert(StringReplace(AValue, FQuoteChar, FDoubleQuote, [rfReplaceAll]),
      Result, 2);
  end else
    Result := AValue;
end;

constructor TCSVBuilder.Create;
begin
  inherited Create;
  FDefaultOutput := TMemoryStream.Create;
  FOutputStream := FDefaultOutput;
end;

destructor TCSVBuilder.Destroy;
begin
  FreeAndNil(FDefaultOutput);
  inherited Destroy;
end;

procedure TCSVBuilder.SetOutput(AStream: TStream);
begin
  if Assigned(AStream) then
    FOutputStream := AStream
  else
    FOutputStream := FDefaultOutput;

  ResetBuilder;
end;

procedure TCSVBuilder.ResetBuilder;
begin
  if FOutputStream = FDefaultOutput then
    FDefaultOutput.Clear;

  // Do not clear external FOutputStream because it may be pipe stream
  // or something else that does not support size and position.
  // To clear external output is up to the user of TCSVBuilder.

  FNeedLeadingDelimiter := False;
end;

procedure TCSVBuilder.AppendCell(const AValue: String);
var
  CellValue: String;
begin
  if FNeedLeadingDelimiter then
    FOutputStream.WriteBuffer(FDelimiter, CsvCharSize);

  CellValue := ChangeLineEndings(AValue, FLineEnding);
  CellValue := QuoteCSVString(CellValue);
  AppendStringToStream(CellValue, FOutputStream);

  FNeedLeadingDelimiter := True;
end;

procedure TCSVBuilder.AppendRow;
begin
  AppendStringToStream(FLineEnding, FOutputStream);
  FNeedLeadingDelimiter := False;
end;

end.