PowerBASIC Forums
  Source Code
  LZ78

Post New Topic  Post A Reply
profile | register | preferences | faq | search

UBBFriend: Email This Page to Someone! next newest topic | next oldest topic
Author Topic:   LZ78
Semen Matusovski
Member
posted May 26, 2002 10:15 AM     Click Here to See the Profile for Semen Matusovski     Edit/Delete Message   Reply w/Quote
"Clean" LZ78 algorithm (like I understand it).
Written from "zero" base (using mathematical description only).
Some notes:
1) This is exactly LZ78 (not patented).
I especially didn't read LZW description, but when I wrote a first variant and then read, I found that I did LZW (which I didn't want).
The main difference - LZW uses pre-allocated one-letter words (for all ASCII) and didn't write them into the file.
When I wrote a program, I understood that this way is very natural.
Don't know, how it was possible to patent such moment (it's like patent for digits 0 & 1 ) ...
Anyway I returned a program to canonic LZ78 form (initial dictionary is empty) and I need to output "roots".
Really, not a big problem - I use a big dictionary, and if to compress a file of hundreds KB, we increase compressed size per 1-2% only.

2) LZ77/LZSS compresses better, but LZ78 works much faster.
3) Unlike Huffman's algo, LZ encodes seriously slowly, but decodes extremely fast.
4) "Serious" arhivators combine many methods together and use LZSS, not LZ78.
So don't expect the same ratio as in pkzip/arj/rar.
Probably, i'll try to construct advanced algo (and I saw C implementations, such as LZHUF by LHA author), but it's not so simple to understand them, using a code only.


#Compile Exe
#Register None
#Dim All

$SourceFile = "C:\System.1st" ' <-- Change me
$TargetFile = "C:\System.Lz"
%nTest = 1 ' to have correct timing

%LZ78_DICTIONARY = 65535?? ' < 64K
%LZ78_NIL = %LZ78_DICTIONARY

Function DeCompress_LZ(InBuf As String, OutBuf As String) As Long
Dim Parents(%LZ78_DICTIONARY - 1) As Local Word
Dim abc(%LZ78_DICTIONARY - 1) As Local Byte
Dim abcPos As Local Word, abcSize As Local Word

Dim InStreamSize As Local Word, InStreamPos As Local Word
Dim OutBufPos As Local Dword, rOutBufPos As Dword, OutBufSize As Local Dword
Dim InBufPos As Local Dword, InBufSize As Local Dword
Dim LastInBufPos As Local Dword
Dim Parent As Local Word, rParent As Local Word
Dim Initialized As Local Long

OutBuf = "": InBufSize = Len(InBuf)
Do
If Initialized = 0 Then
If LastInBufPos >= InBufSize Then Exit Do
If LastInBufPos = 0 Then
OutBufSize = CvDwd(InBuf, 1): LastInBufPos = 4
OutBuf = Space$(OutBufSize)
ReDim bOutBuf(0) As Local Byte At StrPtr(OutBuf) - 1
End If
InStreamSize = CvDwd(InBuf, LastInBufPos + 1)
ReDim InStream(0) As Local Word At StrPtr(InBuf) + LastInBufPos + 2
LastInBufPos = LastInBufPos + 2 + InStreamSize * 2
abcSize = CvDwd(InBuf, LastInBufPos + 1)
ReDim abc(0) As Local Byte At StrPtr(InBuf) + LastInBufPos + 2
LastInBufPos = LastInBufPos + 2 + abcSize
If (abcSize Mod 2) Then Incr LastInBufPos

InStreamPos = 0: abcPos = 0
Initialized = 1
' MsgBox Str$(abcSize) + Str$(InStreamSize) + Str$(LastInBufPos)
End If

Parent = InStream(InStreamPos)
If abcPos < abcSize Then
Parents(abcPos) = Parent
Parent = abcPos
Incr abcPos
End If
OutBufPos = rOutBufPos
rParent = Parent
While rParent <> %LZ78_NIL
Incr OutBufPos
rParent = Parents(rParent)
Wend
While Parent <> %LZ78_NIL
Incr rOutBufPos
bOutBuf(OutBufPos) = abc(Parent)
Decr OutBufPos
Parent = Parents(Parent)
Wend

Incr InstreamPos
If InStreamPos >= InStreamSize Then Initialized = 0
Loop

End Function

Function Compress_LZ (InBuf As String, OutBuf As String) As Long

Dim Parents(%LZ78_DICTIONARY - 1) As Local Word
Dim Childs(%LZ78_DICTIONARY - 1) As Word
Dim FirstChild(%LZ78_NIL) As Word
Dim abc(%LZ78_DICTIONARY - 1) As Local Byte
Dim OutStream(%LZ78_DICTIONARY - 1) As Local Word
Dim Top(255) As Local Word
Dim OutStreamPos As Local Word
Dim abcPos As Local Word
Dim InBufSize As Local Dword, InBufPos As Dword
Dim Parent As Local Word, rParent As Local Word, tParent As Local Word
Dim Symbol As Local Byte, AlignMent As Local String
Dim Initialized As Local Long

OutBuf = ""
InBufSize = Len(InBuf): If InBufSize = 0 Then Function = -1: Exit Function
ReDim bInBuf(0) As Byte At StrPtr(InBuf) - 1

For InBufPos = 1 To InBufSize
If Initialized = 0 Then
For tParent = 0 To 255: Top(tParent) = %LZ78_NIL: Next
FirstChild(%LZ78_NIL) = %LZ78_NIL: Parent = %LZ78_NIL
abcPos = 0: OutStreamPos = 0
Initialized = 1
End If

Symbol = bInBuf(InBufPos)
If Parent = %LZ78_NIL Then
If Top(Symbol) <> %LZ78_NIL Then tParent = Top(Symbol) Else tParent = %LZ78_NIL
Else
tParent = FirstChild(Parent)
While tParent <> %LZ78_NIL
If abc(tParent) = Symbol Then Exit Do Else tParent = Childs(tParent)
Wend
End If

If tParent = %LZ78_NIL Then
If Parent = %LZ78_NIL Then Top(Symbol) = abcPos
abc(abcPos) = Symbol
Parents(abcPos) = Parent
rParent = FirstChild(Parent)
Childs(abcPos) = rParent
FirstChild(Parent) = abcPos
FirstChild(abcPos) = %LZ78_NIL
OutStream(OutStreamPos) = Parent: Incr OutStreamPos
Incr abcPos
Parent = %LZ78_NIL
Else
Parent = tParent
End If

If (abcPos >= %LZ78_DICTIONARY) Or (InBufPos = InBufSize) Then
If Parent <> %LZ78_NIL Then OutStream(OutStreamPos) = Parent: Incr OutStreamPos
If (abcPos Mod 2) Then AlignMent = " " Else AlignMent = ""
If OutBuf = "" Then OutBuf = MkDwd$(InBufSize) ' header
OutBuf = OutBuf + MkWrd$(OutStreamPos) + Peek$(VarPtr(OutStream(0)), OutStreamPos * 2) + _
MkWrd$(abcPos) + Peek$(VarPtr(abc(0)), abcPos) + AlignMent
Initialized = 0
End If
Next

End Function

Function PbMain
Local InBuf As String, OutBuf As String, InBufC As String
Local i As Long, f As Long, t1 As Single, t2 As Single, t3 As Single

f = FreeFile: ErrClear
Open $SourceFile For Binary As #f
If Err = 0 Then Get$ #f, Lof(f), InBuf
Close #f
If Err Then MsgBox "Can't read the source file": Exit Function


t1 = Timer
For i = 1 To %nTest
Compress_LZ InBuf, OutBuf
Next
t2 = Timer
For i = 1 To %nTest
DeCompress_LZ OutBuf, InBufC
Next
t3 = Timer

f = FreeFile: ErrClear
Open $TargetFile For Output As #f
If Err = 0 Then Print #f, OutBuf
Close #f
If Err Then MsgBox "Can't write the target file": Exit Function

MsgBox "Compress: " + Format$(1000 * (t2 - t1) / %nTest, "#.#") + " ms" + $CrLf + _
"Decompress: " + Format$(1000 * (t3 - t2) / %nTest, "#.#") + "ms" + $CrLf + _
"Ratio : " + Format$(Len(InBuf)) + " -> " + Format$(Len(OutBuf))

If InBuf <> InBufC Then MsgBox Str$(Len(InBufC)) + "troubles"
End Function

------------------
E-MAIL: matus@perevozki.ru

[This message has been edited by Semen Matusovski (edited May 26, 2002).]

IP: Logged

All times are EasternTime (US)

next newest topic | next oldest topic

Administrative Options: Close Topic | Archive/Move | Delete Topic
Post New Topic  Post A Reply
Hop to:

Contact Us | PowerBASIC BASIC Compilers

Copyright © 1999-2005 PowerBASIC, Inc. All Rights Reserved.


Ultimate Bulletin Board 5.45c