Repository URL to install this package:
Version:
3.0.0 ▾
|
{
This file is part of the Free Component Library (FCL)
Copyright (c) 2012 by the Free Pascal development team
Memory database
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
unit memindexdb;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, fpindexer, contnrs;
Type
TMatch = Class;
{ TDescrItem }
TDescrItem = Class(TCollectionItem)
private
FDescription: String;
FTheID: Integer;
Protected
Function BlockSize : Integer; virtual;
Procedure WriteStringToStream(Astream : TStream; S : String);
Procedure WriteToStream(S : TStream); virtual;
Procedure WriteRefToStream(AStream : Tstream; AItem : TDescrItem);
Function ReadStringFromStream(Astream : TStream) : String;
Function ReadFromStream(S : TStream) : Integer; virtual;
Published
// The description
Property Description : String Read FDescription Write FDescription;
// ID. Not used during work. Only used when loading/saving,
// it then equals the Index. (index is slow, uses a linear search)
Property TheID : Integer Read FTheID Write FTheID;
end;
{ TDescrCollection }
TDescrCollection = Class(TCollection)
private
FHash : TFPhashList;
FLoadCount : Integer;
function GetD(AIndex : Integer): TDescrItem;
procedure SetD(AIndex : Integer; AValue: TDescrItem);
Protected
Procedure RebuildHash;
Function Resolve(AIndex : Integer) : TDescrItem;
Public
destructor destroy; override;
Procedure BeginLoading;
Procedure EndLoading;
Procedure AllocateIDS;
Function FindDescr(Const ADescription : String) : TDescrItem;
Function AddDescr(Const ADescription : String) : TDescrItem;
Property Descr [AIndex : Integer] : TDescrItem Read GetD Write SetD; default;
end;
TLanguageItem = Class(TDescrItem);
TMatchedItem = Class(TDescrItem)
Private
FList : TFPList;
function GetMatch(AIndex : Integer): TMatch;
function GetMatchCount: Integer;
Protected
Function AddMatch(AMatch : TMatch) : Integer;
Procedure RemoveMatch(AMatch : TMatch);
Public
Constructor Create(ACollection : TCollection); override;
Destructor Destroy; override;
Property Matches [AIndex : Integer] : TMatch Read GetMatch; default;
Property MatchCount : Integer Read GetMatchCount;
end;
{ TWordItem }
TWordItem = Class(TMatchedItem);
{ TURLItem }
TURLItem = Class(TMatchedItem)
private
FLanguage: TLanguageItem;
FURLDate: TDateTime;
FLangID : Integer;
protected
Function BlockSize : Integer; override;
Procedure WriteToStream(S : TStream); override;
Function ReadFromStream(S : TStream) : Integer; override;
Procedure Resolve(Languages: TDescrCollection);
Public
Property URLDate : TDateTime Read FURLDate Write FURLDate;
Property Language : TLanguageItem Read FLanguage Write FLanguage;
end;
{ TMatch }
TMatch = Class(TDescrItem)
private
FPosition: Int64;
FURL: TURLItem;
FWord: TWordItem;
FWordID : Integer;
FURLID : Integer;
function GetContext: String;
procedure SetContext(AValue: String);
procedure SetURL(AValue: TURLItem);
procedure SetWord(AValue: TWordItem);
protected
Function BlockSize : Integer; override;
Procedure WriteToStream(S : TStream); override;
Procedure Resolve(Words, URLS : TDescrCollection);
Function ReadFromStream(S : TStream) : Integer; override;
Public
Property Word : TWordItem Read FWord Write SetWord;
Property URL : TURLItem Read FURL Write SetURL;
Property Position : Int64 Read FPosition Write FPosition;
Property Context : String Read GetContext Write SetContext;
end;
{ TMatches }
TMatches = Class(TCollection)
private
function GetM(AIndex : Integer): TMatch;
procedure SetM(AIndex : Integer; AValue: TMatch);
Public
Function AddMatch(AWord : TWordItem; AURL : TURLItem) : TMatch;
Property Matches[AIndex : Integer] : TMatch Read GetM Write SetM; default;
end;
{ TMemIndexDB }
TMemIndexDB = class(TCustomIndexDB)
Private
FStream: TStream;
FURLS : TDescrCollection;
FLanguages : TDescrCollection;
FWords : TDescrCollection;
FMatches : TMatches;
procedure GetMatches(AWord: String; SearchOptions: TSearchOptions; AList: TFPList);
procedure IntersectMatches(ListA, ListB: TFPList);
procedure UnionMatches(ListA, ListB: TFPList);
protected
Procedure LoadFromStream; virtual; abstract;
Procedure SaveToStream; virtual; abstract;
procedure Clear;virtual;
public
Constructor Create(AOwner : TComponent); override;
Destructor Destroy; override;
procedure Connect; override;
procedure DisConnect; override;
procedure CommitTrans; override;
procedure BeginTrans; override;
procedure CompactDB; override;
procedure DeleteWordsFromFile(URL: string); override;
procedure AddSearchData(ASearchData: TSearchWordData); override;
procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); override;
procedure CreateIndexerTables; override;
Property Stream : TStream Read FStream Write FStream;
end;
{ TFileIndexDB }
TFileIndexDB = Class(TMemIndexDB)
private
FFIleName: String;
FWriteOnCommit: Boolean;
Protected
Procedure LoadFromStream; override;
Procedure SaveToStream; override;
Public
procedure Connect; override;
procedure DisConnect; override;
procedure CommitTrans; override;
Property FileName : String Read FFIleName Write FFileName;
Property WriteOnCommit : Boolean Read FWriteOnCommit Write FWriteOnCommit;
end;
implementation
uses bufstream;
{ TMemIndexDB }
Resourcestring
SErrNoStream = 'No stream assigned';
SInvalidStreamData = 'Invalid data at offset %d. Got %d, expected %d.';
{ TFileIndexDB }
Const
FileVersion = 1;
LanguageBlock = 1;
URLBlock = 2;
WordBlock = 3;
MatchBlock = 4;
{ TURLItem }
function TURLItem.BlockSize: Integer;
begin
Result:=inherited BlockSize;
Result:=Result+sizeOf(FURLDate)+SizeOf(Integer);
end;
procedure TURLItem.WriteToStream(S: TStream);
Var
I : Integer;
begin
inherited WriteToStream(S);
S.WriteBuffer(FURLDate,SizeOf(FURLDate));
WriteRefToStream(S,FLanguage);
end;
function TURLItem.ReadFromStream(S: TStream): Integer;
begin
Result:=inherited ReadFromStream(S);
S.ReadBuffer(FURLDate,SizeOf(FURLDate));
S.ReadBuffer(FLangID,SizeOf(FLangID));
end;
procedure TURLItem.Resolve(Languages: TDescrCollection);
begin
FLanguage:=TLanguageItem(Languages.Resolve(FLangID));
end;
{ TDescrItem }
function TDescrItem.BlockSize: Integer;
begin
Result:=Sizeof(Integer)+Length(FDescription)*SizeOf(Char);
end;
procedure TDescrItem.WriteStringToStream(Astream: TStream; S: String);
Var
L : Integer;
begin
L:=Length(S);
AStream.WriteBuffer(L,SizeOf(L));
if (L>0) then
AStream.WriteBuffer(S[1],L*SizeOf(Char));
end;
procedure TDescrItem.WriteToStream(S: TStream);
begin
S.WriteDWord(BlockSize);
WriteStringToStream(S,FDescription);
end;
procedure TDescrItem.WriteRefToStream(AStream : Tstream; AItem: TDescrItem);
Var
I : Integer;
begin
If AItem=Nil then
I:=0
else
I:=AItem.TheID;
AStream.WriteBuffer(I,SizeOf(I));
end;
function TDescrItem.ReadStringFromStream(Astream: TStream): String;
Var
L : Integer;
begin
AStream.ReadBuffer(L,SizeOf(L));
SetLength(Result,L);
if (L>0) then
AStream.ReadBuffer(Pointer(Result)^,L*SizeOf(Char));
end;
function TDescrItem.ReadFromStream(S: TStream) : Integer;
begin
S.ReadBuffer(Result,SizeOf(Result));
Description:=ReadStringFromStream(S);
end;
procedure TFileIndexDB.LoadFromStream;
Var
I,S,L : Integer;
U : TURLItem;
W : TWordItem;
M : TMatch;
Li : TLanguageItem;
begin
Clear;
L:=Stream.ReadDWord;
if (L<>FileVersion) then
Raise EFPIndexEr.CreateFmt(SInvalidStreamData,[Stream.Position,L,FileVersion]);
L:=Stream.ReadDWord;
if (L<>LanguageBlock) then
Raise EFPIndexEr.CreateFmt(SInvalidStreamData,[Stream.Position,L,LanguageBlock]);
L:=Stream.ReadDWord;
FLanguages.BeginLoading;
For I:=0 to L-1 do
begin
Li:=TLanguageItem(FLanguages.Add);
S:=Li.ReadFromStream(Stream);
if (S<>Li.BlockSize) then
Raise EFPIndexEr.CreateFmt(SInvalidStreamData,[Stream.Position,S,Li.BlockSize]);
end;
FLanguages.EndLoading;
L:=Stream.ReadDWord;
if (L<>URLBlock) then
Raise EFPIndexEr.CreateFmt(SInvalidStreamData,[Stream.Position,L,URLBlock]);
L:=Stream.ReadDWord;
FURLS.BeginLoading;
For I:=0 to L-1 do
begin
U:=TURLItem(FURLS.AddDescr(''));
S:=U.ReadFromStream(Stream);
if (S<>U.BlockSize) then
Raise EFPIndexEr.CreateFmt(SInvalidStreamData,[Stream.Position,S,U.BlockSize]);
U.Resolve(FLanguages);
end;
FURLS.EndLoading;
L:=Stream.ReadDWord;
if (L<>WordBlock) then
Raise EFPIndexEr.CreateFmt(SInvalidStreamData,[Stream.Position,L,WordBlock]);
L:=Stream.ReadDWord;
FWords.BeginLoading;
For I:=0 to L-1 do
begin
W:=TWordItem(FWords.AddDescr(''));
S:=W.ReadFromStream(Stream);
if (S<>W.BlockSize) then
Raise EFPIndexEr.CreateFmt(SInvalidStreamData,[Stream.Position,S,W.BlockSize]);
end;
FWords.EndLoading;
L:=Stream.ReadDWord;
if (L<>MatchBlock) then
Raise EFPIndexEr.CreateFmt(SInvalidStreamData,[Stream.Position,L,MatchBlock]);
L:=Stream.ReadDWord;
For I:=0 to L-1 do
begin
M:=TMatch(FMatches.Add);
S:=M.ReadFromStream(Stream);
M.Resolve(FWords,FURLS);
if (S<>M.BlockSize) then
Raise EFPIndexEr.CreateFmt(SInvalidStreamData,[Stream.Position,S,M.BlockSize]);
end;
end;
procedure TFileIndexDB.SaveToStream;
Var
I : Integer;
L : Integer;
U : TURLItem;
begin
Stream.WriteDWord(FileVersion);
Stream.WriteDWord(LanguageBlock);
Stream.WriteDWord(FLanguages.Count);
For I:=0 to FLanguages.Count-1 do
FLanguages[i].WriteToStream(Stream);
Stream.WriteDWord(URLBlock);
Stream.WriteDWord(FURLS.Count);
For I:=0 to FURLS.Count-1 do
FURLS[i].WriteToStream(Stream);
Stream.WriteDWord(WordBlock);
Stream.WriteDWord(FWords.Count);
For I:=0 to FWords.Count-1 do
FWords[i].WriteToStream(Stream);
Stream.WriteDWord(MatchBlock);
Stream.WriteDWord(FMatches.Count);
For I:=0 to FMatches.Count-1 do
FMatches[i].WriteToStream(Stream);
end;
procedure TFileIndexDB.Connect;
Var
F : TFileStream;
B : TReadBufStream;
begin
B:=Nil;
F:=Nil;
if FileExists(FileName) then
begin
F:=TFileStream.Create(FileName,fmOpenRead);
B:=TReadBufStream.Create(F,1000000);
B.SourceOwner:=True;
end;
try
Stream:=B;
inherited Connect;
finally
Stream.Free;
end;
end;
procedure TFileIndexDB.DisConnect;
Var
F : TFileStream;
B : TWriteBufStream;
begin
F:=TFileStream.Create(FileName,fmCreate);
B:=TWriteBufStream.Create(F,1000000);
B.SourceOwner:=True;
try
Stream:=B;
inherited DisConnect;
finally
Stream.Free;
Stream:=Nil;
end;
end;
procedure TFileIndexDB.CommitTrans;
begin
If WriteOnCommit and (FileName<>'') then
Disconnect;
end;
procedure TMemIndexDB.CompactDB;
begin
// Do nothing
end;
procedure TMemIndexDB.BeginTrans;
begin
// Do nothing
end;
procedure TMemIndexDB.CommitTrans;
begin
// Do nothing
end;
procedure TMemIndexDB.Clear;
begin
FMatches.Clear;
FWords.Clear;
FURLS.Clear;
FLanguages.Clear;
end;
constructor TMemIndexDB.Create(AOwner: TComponent);
begin
inherited Create(AOwner);
FURLS:=TDescrCollection.Create(TURLItem);
FWords:=TDescrCollection.Create(TWordItem);
FLanguages:=TDescrCollection.Create(TLanguageItem);
FMatches:=TMatches.Create(TMatch);
end;
destructor TMemIndexDB.Destroy;
begin
Clear;
FreeAndNil(FMatches);
FreeAndNil(FWords);
FreeAndNil(FURLS);
FreeAndNil(FLanguages);
inherited Destroy;
end;
procedure TMemIndexDB.Connect;
begin
if Assigned(Stream) then
LoadFromStream;
end;
procedure TMemIndexDB.DisConnect;
begin
if Assigned(Stream) then
begin
FLanguages.AllocateIDs;
FURLS.AllocateIDs;
FWords.AllocateIDs;
SaveToStream
end;
end;
procedure TMemIndexDB.DeleteWordsFromFile(URL: string);
begin
// inherited DeleteWordsFromFile(URL);
end;
procedure TMemIndexDB.AddSearchData(ASearchData: TSearchWordData);
Var
AURL : TURLItem;
AWord : TWordItem;
M : TMatch;
L : TLanguageItem;
begin
// Writeln('Adding search data : ',ASearchData.URL,' : ',ASearchData.SearchWord);
AURL:=TURLItem(FURLs.FindDescr(ASearchData.URL));
If (AURL=Nil) then
begin
L:=TLanguageItem(FLanguages.FindDescr(ASearchData.Language));
If (L=Nil) then
begin
// Writeln('adding language : ',ASearchData.Language);
L:=TLanguageItem(FLanguages.AddDescr(ASearchData.Language));
end;
// Writeln('adding URL : ',ASearchData.URL);
AURL:=TURLItem(FURLS.AddDescr(ASearchData.URL));
AURL.URLDate:=ASearchData.FileDate;
AURL.Language:=L;
end;
AWord:=TWordItem(FWords.FindDescr(ASearchData.SearchWord));
If (AWord=Nil) then
begin
// Writeln('adding Word : ',ASearchData.SearchWord);
AWord:=TWordItem(FWords.AddDescr(ASearchData.SearchWord));
end;
// Writeln('Adding match : ',ASearchData.Position, ' ',ASearchData.Context);
M:=FMatches.AddMatch(AWord,AURL);
M.Position:=ASearchData.Position;
M.Context:=ASearchData.Context;
end;
procedure TMemIndexDB.UnionMatches(ListA,ListB : TFPList);
begin
ListA.AddList(ListB);
end;
procedure TMemIndexDB.IntersectMatches(ListA,ListB : TFPList);
Var
L : TFPList;
URL : TURLItem;
I,J : Integer;
OK : Boolean;
begin
For I:=ListA.Count-1 downto 0 do
begin
URL:=TMatch(ListA[i]).URL;
OK:=False;
J:=ListB.Count-1;
While (J>=0) and (TMatch(ListB[i]).URL<>URL) do
Dec(J);
if (J=-1) then
ListA.Delete(I);
end;
end;
procedure TMemIndexDB.GetMatches(AWord : String; SearchOptions: TSearchOptions; AList : TFPList);
Procedure AddMatches(W : TWordItem);
Var
I : Integer;
begin
For I:=0 to W.MatchCount-1 do
AList.Add(W.Matches[i]);
end;
Var
W : TWordItem;
I : Integer;
begin
If (AWord='') then exit;
if (AWord[1]='''') then
Delete(AWord,1,1);
I:=Length(AWord);
if (AWord[i]='''') then
Delete(AWord,i,1);
AWord:=LowerCase(AWord);
if soContains in SearchOptions then
begin
For I:=0 to FWords.Count-1 do
begin
W:=TWordItem(FWords[i]);
If Pos(Aword,W.Description)<>0 then
AddMatches(W);
end
end
else
begin
W:=TWordItem(FWords.FindDescr(AWord));
if (W<>Nil) then
AddMatches(W);
end;
end;
procedure TMemIndexDB.FindSearchData(SearchWord: TWordParser;
FPSearch: TFPSearch; SearchOptions: TSearchOptions);
Var
L,W : TFPList;
S : String;
I : Integer;
M : TMatch;
WD : TSearchWordData;
begin
L:=TFPList.Create;
try
W:=TFPList.Create;
for I:=0 to SearchWord.Count-1 do
begin
Case SearchWord.Token[i].TokenType of
wtWord : begin
S:=LowerCase(SearchWord.Token[i].Value);
if (I=0) then
GetMatches(S,SearchOptions,L)
else
GetMatches(S,SearchOptions,W);
end;
wtOr :
UnionMatches(L,W);
wtAnd :
InterSectMatches(L,W);
end;
end;
For I:=0 to L.Count-1 do
begin
M:=TMatch(L[i]);
WD.SearchWord:=M.Word.Description;
WD.Context:=M.Context;
WD.FileDate:=M.URL.URLDate;
WD.URL:=M.URL.Description;
WD.Position:=M.Position;
WD.Language:=M.URL.Language.Description;
FPSearch.AddResult(i,WD);
end;
finally
L.Free;
end;
end;
procedure TMemIndexDB.CreateIndexerTables;
begin
Clear;
end;
{ TMatches }
function TMatches.GetM(AIndex : Integer): TMatch;
begin
Result:=TMatch(Items[AIndex]);
end;
procedure TMatches.SetM(AIndex : Integer; AValue: TMatch);
begin
Items[AIndex]:=AValue;
end;
function TMatches.AddMatch(AWord: TWordItem; AURL: TURLItem): TMatch;
begin
Result:=TMatch(Add);
Result.URL:=AURl;
Result.Word:=AWord;
end;
{ TMatch }
procedure TMatch.SetURL(AValue: TURLItem);
begin
if FURL=AValue then exit;
If (FURL<>Nil) then
FURL.RemoveMatch(Self);
FURL:=AValue;
If (FURL<>Nil) then
FURL.AddMatch(Self);
end;
function TMatch.GetContext: String;
begin
Result:=Description;
end;
procedure TMatch.SetContext(AValue: String);
begin
Description:=AValue;
end;
procedure TMatch.SetWord(AValue: TWordItem);
begin
if FWord=AValue then exit;
If (FWord<>Nil) then
FWord.RemoveMatch(Self);
FWord:=AValue;
If (FWord<>Nil) then
FWord.AddMatch(Self);
end;
function TMatch.BlockSize: Integer;
begin
Result:=inherited BlockSize;
Result:=Result+SizeOf(FPosition)+2*SizeOf(Integer);
end;
procedure TMatch.WriteToStream(S: TStream);
Var
L : Integer;
begin
inherited WriteToStream(S);
S.WriteBuffer(FPosition,Sizeof(FPosition));
WriteRefToStream(S,FWord);
WriteRefToStream(S,FUrl);
end;
procedure TMatch.Resolve(Words, URLS: TDescrCollection);
begin
Word:=TWordItem(Words.Resolve(FWordID));
URL:=TURLItem(URLS.Resolve(FURLID));
end;
function TMatch.ReadFromStream(S: TStream): Integer;
begin
Result:=inherited ReadFromStream(S);
S.ReadBuffer(FPosition,Sizeof(FPosition));
S.ReadBuffer(FWordID,SizeOf(FWordID));
S.ReadBuffer(FURLID,SizeOf(FURLID));
end;
{ TDescrCollection }
function TDescrCollection.GetD(AIndex : Integer): TDescrItem;
begin
Result:=TDescrItem(Items[AIndex])
end;
procedure TDescrCollection.SetD(AIndex : Integer; AValue: TDescrItem);
begin
Items[AIndex]:=AValue;
end;
procedure TDescrCollection.RebuildHash;
Var
I : Integer;
D : TDescrItem;
begin
if FHash<>Nil then
FHash.Clear
else
FHash:=TFPhashList.Create;
For I:=0 to Count-1 do
begin
D:=GetD(I);
FHash.Add(D.Description,D);
end;
end;
function TDescrCollection.Resolve(AIndex: Integer): TDescrItem;
begin
If (Aindex=-1) or (AIndex>=Count) then
Result:=Nil
else
Result:=TDescrItem(Items[AIndex]);
end;
destructor TDescrCollection.destroy;
begin
FreeAndNil(FHash);
inherited destroy;
end;
procedure TDescrCollection.BeginLoading;
begin
Inc(FLoadCount);
end;
procedure TDescrCollection.EndLoading;
begin
if (FLoadCount>0) then
begin
Dec(FLoadCount);
If (FLoadCount=0) then
RebuildHash;
end;
end;
procedure TDescrCollection.AllocateIDS;
Var
I : Integer;
begin
For I:=0 to Count-1 do
GetD(i).TheID:=I;
end;
function TDescrCollection.FindDescr(const ADescription: String): TDescrItem;
begin
If FHash=Nil then
Result:=Nil
else
Result:=TDescrItem(FHash.Find(ADescription));
end;
function TDescrCollection.AddDescr(const ADescription: String): TDescrItem;
begin
Result:=Add as TDescrItem;
Result.Description:=ADescription;
if (FLoadCount=0) then
begin
If FHash=Nil then
ReBuildHash
else
FHash.Add(ADescription,Result);
end;
end;
{ TWordItem }
function TMatchedItem.GetMatch(AIndex : Integer): TMatch;
begin
Result:=TMatch(FList[AIndex]);
end;
function TMatchedItem.GetMatchCount: Integer;
begin
Result:=FList.Count;
end;
function TMatchedItem.AddMatch(AMatch: TMatch): Integer;
begin
FList.Add(AMatch);
end;
procedure TMatchedItem.RemoveMatch(AMatch: TMatch);
begin
Flist.Remove(AMatch);
end;
constructor TMatchedItem.Create(ACollection: TCollection);
begin
inherited Create(ACollection);
FList:=TFPList.Create;
end;
destructor TMatchedItem.Destroy;
begin
FreeAndNil(Flist);
inherited Destroy;
end;
end.