PowerBASIC Forums
  Source Code
  lz77 compression routines

Post New Topic  Post A Reply
profile | register | preferences | faq | search

UBBFriend: Email This Page to Someone! next newest topic | next oldest topic
Author Topic:   lz77 compression routines
Don Dickinson
Member
posted November 25, 1999 09:52 AM     Click Here to See the Profile for Don Dickinson     Edit/Delete Message   Reply w/Quote

'
' pb_comp.bas
'
' A variation of Microsoft's version of LZ77 compression.
' that can be compiles with either PBDLL6 or PBCC2.
' With modifications, it could also be compiled with
' PBCC 1 or PBDLL 5.
'
' By Don Dickinson
' ddickinson@basicguru.com
' www.basicguru.com/dickinson
'
'
' NOTE
'
' LZ77 Compression routines. Similar to (but does not compress with the same
' as and not compatible with) Microsoft's LZ77 compression routines used in
' it's compress.exe and lzexpand.dll routines. The compression routines are
' MUCH slower than M$'s compress.exe, but the decompression routines are
' very fast - almost instantaneous with most data. There is great room
' for improvement in the speed of the main compression routine (compress_buffer)
' Feel free to email me any improvements you may have!
'
'
' CREDITS
'
' Windows Undocumented File Formats by Pete Davis and Mike Wallace
' Microsoft's implementation of LZ77 is described within.
'
' The Data Compression Book by Mark Nelson and Jean-Loup Gailly
' This is the "bible" of data compression. If you want to learn
' about data compression (all types), read it!
'
'
' DISCLAIMER
'
' Use it as you see fit. By using or compiling this code or derivative thereof,
' you are consenting to the hold the author, Don Dickinson, harmless from
' all effects or side-effects its use.
'
'
' FUNCTIONS
'
' Sub compress_buffer(inBuffer As String, outBuffer As String) Export
' Compresses the input buffer and puts the resulting string
' inthe outBuffer string variable.
'
' Function compress_file(fileIn As String, fileOut As String) Export As Long
' Compresses the fileIn file and puts the result in the fileOut file.
' Returns %True if successful, %False if not.
'
' Sub decompress_buffer(inBuffer As String, outBuffer As String) Export
' Decompresses the inBuffer and puts the result in outBuffer
'
' Function decompress_file(fileIn As String, fileOut As String) Export As Long
' Decompresses the fileIn file and puts the result in the fileOut file.
' Returns %True if successful, %False if not.
'
'
' ERRORS
'
' If an error occurs (file compression routines), the reason for that
' error can be obtained by reading the global g_pbcomp_LastError
' string variable.
'
'=============================================================================
#If Not %Def(%PB_COMP_BAS)
%PB_COMP_BAS = %True

'
'
' Constants
'=============================================================================
' These are microsoft's LZ constants. I use the same except for %MAGIC_3
' which I give my own value to (&hDD). They are stored in various
' members of the MS_LZ_TYPE structure when saving data to a file.
'
%MAGIC_1 = &h44445A53
%MAGIC_2 = &h3327F088
'%MAGIC_3 = &h41 ' Microsoft uses this one.
%MAGIC_3 = &hDD

%WINDOW_SIZE = 4096

'
' Structures
'=============================================================================
Type MS_LZ_TYPE
Magic1 As Long
Magic2 As Long
Magic3 As Byte
FileFix As Byte
DecompSize As Long
End Type

'
' Globals
'=============================================================================
Global g_pbcomp_bas_LastError As String

'
' Function Prototypes
'=============================================================================
Declare Function lz_get_offset(x1 As Integer, x2 As Integer) As Integer
Declare Function lz_get_len(i2 As Integer) As Integer
Declare Function lz_get_comp_code(iLen As Integer, iOff As Integer) As String

Declare Function compress_file(fileIn As String, fileOut As String) As Long
Declare Sub compress_buffer(inBuffer As String, outBuffer As String)
Declare Sub decompress_buffer(inBuffer As String, outBuffer As String)
Declare Function decompress_file(fileIn As String, fileOut As String) As Long

'Declare Sub get_best_match( ByVal ptrBuffer As Long, ByVal ptrWindow As Long, _
' ByVal inLen As Long, ByVal bufferIndex As Long, _
' iBestOffset As Integer, iBestLen As Integer)


'=============================================================================
'=============================================================================
'
' lz_get_offset
' Retrieves the offset encoded in the compression code
'=============================================================================
Function lz_get_offset(x1 As Integer, x2 As Integer) As Integer
Dim i As Integer

i = (x2 And &hF0)
Shift Right i, 4
Function =( (i * &h0100 + x1) And &h0FFF ) + &h0010

End Function

'
' lz_get_len
' Retrieves the length from a compression code
'=============================================================================
Function lz_get_len(i2 As Integer) As Integer

Function = (i2 And &h0F) + 3

End Function

'
' Encodes the length and offset of the compressed bytes in a two byte string
'=============================================================================
Function lz_get_comp_code(iLen As Integer, iOff As Integer) As String

Dim Part1 As Integer
Dim Part2 As Integer
Dim Part3 As Integer

Part1 = iLen - 3
Part1 = Part1 And &h0F
Shift Left Part1, 8

Part2 = iOff - 16
Part2 = Part2 And &h0F00
Shift Left part2, 4

Part3 = iOff - 16
Part3 = Part3 And &h00FF

Function = Mki$(Part1 + Part2 + Part3)

End Function

'++++++++++ EXPERIMENTAL CODE Remm'ed out ++++++++++++++++++++++++++++++++++++
#If 0
'=============================================================================
Sub get_best_match( ByVal ptrBuffer As Long, ByVal ptrWindow As Long, _
ByVal inLen As Long, ByVal bufferIndex As Long, _
iBestOffset As Integer, iBestLen As Integer)

Dim i As Long
Dim j As Long
Dim idxBuf As Long
Dim idxWindow As Long
Dim iPosBuf As Long
Dim iposWin As Long
Dim iLen As Long
Dim iOff As Long
Dim ptrBuf As Byte Ptr
Dim ptrWin As Byte Ptr

iBestOffset = 0
iBestLen = 0

For iPosWin = 0 To %WINDOW_SIZE
iPosBuf = 0
ptrBuf = ptrBuffer + bufferIndex - 1
ptrWin = ptrWindow + iPosWin
If @ptrBuf = @ptrWin Then
iLen = 1
If iLen > iBestLen Then
iBestLen = 1
iBestOffset = iPosWin + 1
End If
For i = 1 To 17
Incr ptrBuf
Incr ptrWin
If bufferIndex + i > inLen Then
Exit For
End If
If @ptrBuf = @ptrWin Then
iLen = i + 1
Else
Exit For
End If
Next i
If iLen > iBestLen Then
iBestLen = iLen
iBestOffset = iPosWin + 1
If iBestLen > 17 Then Exit For
End If

End If
Next iPosWin

End Sub
#EndIf
'+++++++++ END OF EXPERIMENTAL CODE ++++++++++++++++++++++++++++++++++++++++++

'=============================================================================
Sub compress_buffer(inBuffer As String, outBuffer As String) Export

Dim ptrWindow As Byte Ptr
Dim ptrBuffer As Byte Ptr
Dim ptrData As Byte Ptr
Dim ptrOutBuffer As Byte Ptr
Dim BitMask As Byte
Dim iLen As Integer
Dim iOff As Integer
Dim iBestLen As Integer
Dim iBestOff As Integer
Dim i As Long
Dim j As Long
Dim inLen As Long
Dim iPos As Long
Dim windowPointer As Long
Dim bufferIndex As Long
Dim iOutCount As Long
Dim pBuffer As Long
Dim pWindow As Long
Dim sWindow As String
Dim sAccum As String
Dim sData As String
Dim sNewData As String

inLen = Len(inBuffer)
sWindow = Space$(%WINDOW_SIZE * 2)
ptrWindow = StrPtr(sWindow)
ptrBuffer = StrPtr(inBuffer)
bufferIndex = 1
windowPointer = 0
outBuffer = ""
iOutCount = 0

outBuffer = Space$(Len(inBuffer) * 1.2)
ptrOutBuffer = StrPtr(outBuffer)

Do

If bufferIndex > inLen Then Exit Do

sData = ""
BitMask = 0
For i = 0 To 7

'- Find the best match in the window
iBestLen = 0
iBestOff = 0
sAccum = ""
iPos = 1

pBuffer = ptrBuffer
pWindow = ptrWindow


'
' get_best_match
' performs the same function as the for loop below, but
' it is bigger, harder to understand, and not much
' faster so I don't use it.
'
'get_best_match pBuffer, pWindow, inLen, bufferIndex, iBestOff, iBestLen

For j = 0 To 17
If bufferIndex + j > inLen Then Exit For
sAccum = sAccum + Chr$(@ptrBuffer[bufferIndex + j - 1]) 'Mid$(inBuffer, bufferIndex + j, 1)
iPos = Instr(iPos, sWindow, sAccum)
If iPos < 1 Then Exit For
iBestLen = j + 1
iBestOff = iPos
iPos = iBestOff
Next i

'- If it's less than 3 bytes then we store it
' otherwise, compress it.
'
If iBestLen < 3 Then

Bit Set BitMask, i

'- Build the data string
sNewData = Chr$(@ptrBuffer[bufferIndex - 1])
ptrData = StrPtr(sNewData)

'- Update the window
windowPointer = windowPointer + 1
If windowPointer > %WINDOW_SIZE Then
windowPointer = 1
End If
@ptrWindow[windowPointer - 1] = @ptrData
@ptrWindow[windowPointer - 1 + %WINDOW_SIZE] = @ptrData

bufferIndex = bufferIndex + 1
Else

'- Get the compressed information
sNewData = lz_get_comp_code(iBestLen, iBestOff)

For j = 0 To iBestLen - 1
windowPointer = windowPointer + 1
If windowPointer > %WINDOW_SIZE Then
windowPointer = 1
End If
@ptrWindow[windowPointer - 1] = @ptrBuffer[bufferIndex - 1 + j]
@ptrWindow[windowPointer - 1 + %WINDOW_SIZE] = @ptrWindow[windowPointer - 1]
Next j
bufferIndex = bufferIndex + iBestLen
End If
sData = sData + sNewData
If bufferIndex > inLen Then Exit For
Next i

'- Write the compressed data to the output buffer
@ptrOutBuffer[iOutCount] = BitMask
Incr iOutCount

For i = 1 To Len(sData)
@ptrOutBuffer[iOutCount] = Asc(Mid$(sData, i, 1))
Incr iOutCount
Next i

If bufferIndex > inLen Then Exit Do
Loop

outBuffer = Left$(outBuffer, iOutCount)

End Sub

'
' decompress_buffer
'=============================================================================
Sub decompress_buffer(inBuffer As String, outBuffer As String) Export

Dim ptrBuffer As Byte Ptr
Dim ptrWindow As Byte Ptr
Dim ptrOut As Byte Ptr
Dim ptrOutBuffer As Byte Ptr
Dim BitMask As Byte
Dim Byte1 As Byte
Dim Byte2 As Byte
Dim iOffset As Integer
Dim iLen As Integer
Dim i As Long
Dim j As Long
Dim inLen As Long
Dim windowPointer As Long
Dim bufferIndex As Long
Dim iOutCount As Long
Dim iAlloc As Long
Dim dataOut As String
Dim sWindow As String
Dim saveBuffer As String
Dim rHeader As MS_LZ_TYPE

inLen = Len(inBuffer)

sWindow = Space$(%WINDOW_SIZE * 2)
windowPointer = 0
ptrWindow = StrPtr(sWindow)
ptrBuffer = StrPtr(inBuffer)

%ALLOC_SIZE = 65535
iOutCount = 0
iAlloc = %ALLOC_SIZE
outBuffer = Space$(%ALLOC_SIZE)
ptrOutBuffer = StrPtr(outBuffer)


bufferIndex = 1
Do Until bufferIndex > inLen

'- Get more memory if needed
' (when within %ALLOC_SIZE bytes of the end)
'
If iOutCount > iAlloc - %ALLOC_SIZE Then
outBuffer = outBuffer + Space$(%ALLOC_SIZE)
ptrOutBuffer = StrPtr(outBuffer)
iAlloc = iAlloc + %ALLOC_SIZE
End If

'- This byte determines whether
' or not the next 8 terms are
' compressed. If the bit is set,
' the term is not compressed.
' Otherwise the term is two bytes
' long and is encoded with a 4-bit
' length and a 12-bit offset into
' the window.
'
BitMask = @ptrBuffer[bufferIndex-1]
Incr bufferIndex
If bufferIndex > inLen Then Exit Do
For i = 0 To 7

'- This is not a compressed byte
' so just Write it out.
'
If Bit(BitMask, i) Then

'- Read the byte
ptrOut = ptrBuffer + bufferIndex - 1

'- Set the window
windowPointer = windowPointer + 1
If windowPointer > %WINDOW_SIZE Then
windowPointer = 1
End If

@ptrWindow[windowPointer - 1] = @ptrOut
@ptrWindow[windowPointer - 1 + %WINDOW_SIZE] = @ptrOut

'- Move the pointer to the next bytes
Incr bufferIndex
Incr iOutCount
@ptrOutBuffer[iOutCount - 1] = @ptrOut

'- This byte is compressed,
' so decode And Write out.
'
Else

Byte1 = @ptrBuffer[bufferIndex - 1]
Byte2 = @ptrBuffer[bufferIndex]
iOffset = lz_get_offset(Int(Byte1), Int(Byte2))
iLen = lz_get_len(Int(Byte2))

dataOut = Mid$(sWindow, iOffset, iLen)
ptrOut = StrPtr(dataOut)
For j = 1 To iLen
windowPointer = windowPointer + 1
If windowPointer > %WINDOW_SIZE Then
windowPointer = 1
End If
@ptrWindow[windowPointer - 1] = @ptrOut 'Asc(Mid$(dataOut, j, 1))
@ptrWindow[windowPointer - 1 + %WINDOW_SIZE] = @ptrOut '@ptrWindow[windowPointer - 1]

Incr iOutCount
@ptrOutBuffer[iOutCount - 1] = @ptrOut
Incr ptrOut
Next i

bufferIndex = bufferIndex + 2
End If

If bufferIndex > inLen Then Exit For
Next i
If bufferIndex > inLen Then Exit Do
Loop
outBuffer = Left$(outBuffer, iOutCount)

End Sub

'
' decompress_file
'=============================================================================
Function decompress_file(fileIn As String, fileOut As String) Export As Long

Dim iIn As Long
Dim iOut As Long
Dim inBuffer As String
Dim outBuffer As String
Dim rHeader As MS_LZ_TYPE

'- Input file must exist
' and output file can't exist.
'
If Dir$(fileIn) = "" Then
g_pbcomp_bas_LastError = "Input file doesn't exist: " + fileIn
Function = %False
Exit Function
End If

If Dir$(fileOut) <> "" Then
g_pbcomp_bas_LastError = "Output file exists: " + fileOut
Function = %False
Exit Function
End If

'- Must be able to open both
iIn = FreeFile
Open fileIn For Binary Shared As #iIn
If Err Then
g_pbcomp_bas_LastError = "Can't open input: " + fileIn
Function = %False
Exit Function
End If

iOut = FreeFile
Open fileOut For Binary As #iOut
If Err Then
g_pbcomp_bas_LastError = "Can't open output: " + fileOut
Function = %False
Close #iIn
Exit Function
End If

'- Read all of the data from the input file.
inBuffer = Space$(Lof(iIn))
Get #iIn,, inBuffer
Close #iIn

'- Strip off the file header
inBuffer = Mid$(inBuffer, Len(rHeader) + 1)

'- Do the de-compression
decompress_buffer inBuffer, outBuffer

'- Write out the decompressed file
Put #iOut,, outBuffer
Close #iOut

Function = %True

End Function

'
' compress_file
'=============================================================================
Function compress_file(fileIn As String, fileOut As String) Export As Long

Dim iIn As Long
Dim iOut As Long
Dim inBuffer As String
Dim outBuffer As String
Dim rHeader As MS_LZ_TYPE

'- Input file must exist
' and output file can't exist.
'
If Dir$(fileIn) = "" Then
g_pbcomp_bas_LastError = "Input file doesn't exist: " + fileIn
Function = %False
Exit Function
End If

If Dir$(fileOut) <> "" Then
g_pbcomp_bas_LastError = "Output file exists: " + fileOut
Function = %False
Exit Function
End If

'- Must be able to open both
iIn = FreeFile
Open fileIn For Binary Shared As #iIn
If Err Then
g_pbcomp_bas_LastError = "Can't open input: " + fileIn
Function = %False
Exit Function
End If

iOut = FreeFile
Open fileOut For Binary As #iOut
If Err Then
g_pbcomp_bas_LastError = "Can't open output: " + fileOut
Function = %False
Close #iIn
Exit Function
End If

'- Read all of the data from the input file.
inBuffer = Space$(Lof(iIn))
Get #iIn,, inBuffer
Close #iIn

'- Strip off the file header
inBuffer = Mid$(inBuffer, Len(rHeader) + 1)

'- Do the compression
compress_buffer inBuffer, outBuffer

'- Write out the decompressed file
rHeader.Magic1 = %MAGIC_1
rHeader.Magic2 = %MAGIC_2
rHeader.Magic3 = %MAGIC_3
rHeader.FileFix = 0
rHeader.DecompSize = Len(inBuffer)

Put #iOut,, rHeader
Put #iOut,, outBuffer
Close #iOut

Function = %True

End Function

#EndIf


IP: Logged

All times are Pacific Time (US)

next newest topic | next oldest topic

Administrative Options: Close Topic | Archive/Move | Delete Topic
Post New Topic  Post A Reply
Hop to:

Contact Us | PowerBASIC on the Net

Copyright © 1999-2002 PowerBASIC, Inc. All Rights Reserved.


Ultimate Bulletin Board 5.45c