; wx.xm: Word eXtract unicum
; /AJK 27.Aug.81, 27.Aug.81

;    _______
;   |      /
;   |     /
;   |    /    Copyright (c) 1981 by Knowlogy
;   |   //\                         PO Box 283
;   |  //  \                        Wilsonville, Oregon  97070
;   | //    \
;   |//______\

	uses LIB2800
	uses LIB2801

	db	'WX V1: COPYRIGHT (C) 1981 BY KNOWLOGY',13,10,26,0

Length	equ	256		; max line/word length
BufSiz	equ	8192		; buffer size

	entry wx
wx:
	HEAhea [hl=0100h]	; initialize stack and heap
	USKini []		; scan command
	USKflg [hl=flgtbl]	; process flags
	USKdef [stk="-",stk=0]	; supply default arguments
	USKall [hl=Length]->[(line)=hl] ; allocate line buffer
	USKall [hl=Length]->[(word)=hl] ; allocate word buffer

; Open the next file (heterogenous).
wx1:
	USKtnf [stk=(nflg),stk=(vflg)]->[hl=stk,bc=stk,de=stk]+C-a
	jp	c,wx11		; branch when files exhausted
	BBIopn [stk=hl,stk=RO+OldOnly+Text,stk=BufSiz]->[(inch)=a]+C
	jr	nc,wx2		; branch on successful open
	ERRMSG [a=a,b=1,c=0,hl=hl]
	ld	(code),a	; store return code
	jr	wx1

; Read the next line of characters from the file.
wx2:
	BBIgl [stk=(inch),stk=(line),stk=Length]->[]+C-a
	jr	c,wx10		; at EOF, advance to next file
	ld	ix,(line)	; IX -> start of line
	ld	a,(iflg)	; see if we ignore format commands
	and	a
	jr	z,wx3		; if not, don't check for them
	ld	a,(ix+0)	; A = first character of line
	cp	'.'		; does line start with period?
	jr	z,wx2		; if so, it's a format command, skip it

; Scan through the line.  Pass alphanumerics to the word buffer;
; non-alphanumerics cause word buffer to be dumped.
wx3:				; here to scan a new word
	ld	iy,(word)	; IY -> start of word
	ld	(iy+0),0
wx4:				; scan a character
	ld	a,(ix+0)	; A = next char from line
	and	07Fh		; clear any parity
	inc	ix
	ld	hl,pflg		; see if we should preserve case
	bit	0,(hl)
	jr	nz,wx5		; branch if so
	cp	'A'		; convert upper case to lower
	jr	c,wx5
	cp	'Z'+1
	jr	nc,wx5
	add	a,'a'-'A'
wx5:
	ld	hl,dflg		; see if digits are considered word chars
	bit	0,(hl)
	jr	z,wx6		; branch if not
	cp	'0'		; see if it's a digit
	jr	c,wx8		; branch if not
	cp	'9'+1
	jr	c,wx7		; branch if it is
wx6:
	cp	'A'		; see if it's an upper case letter
	jr	c,wx8		; branch if not
	cp	'Z'+1
	jr	c,wx7		; branch if it is
	cp	'a'		; see if it's a lower case letter
	jr	c,wx8		; branch if not
	cp	'z'+1
	jr	nc,wx8		; branch if not, must be delimiter

; Got an alphanumeric; append it to word buffer.
wx7:
	ld	(iy+0),a
	inc	iy
	jr	wx4

; Got a non-alphanumeric; flush the word buffer.
wx8:
	ld	hl,(word)	; see if there's anything in the word buffer
	ld	a,(hl)
	and	a
	jr	z,wx9		; branch if not, nothing to write
	ld	(iy+0),0	; null-terminate the word
	LIOpl [stk=1,stk=(word)]; write the word
wx9:
	ld	a,(ix-1)	; get the separator back
	and	a		; was it null (end-of-line)?
	jr	nz,wx3		; if not, continue to next character
	jp	wx2		; else go read another line

; Here on file EOF.
wx10:
	BBIcls [stk=(inch)]	; close this file
	jp	wx1		; go open another file

; Here when all files exhausted.
wx11:
	SHLexi [a=(code)]

; Flags
flgtbl:
	db 'd',0,0,0,0,0,0,0,0,0,0,0,0
dflg:	dw 0
	db 'i',0,0,0,0,0,0,0,0,0,0,0,0
iflg:	dw 0
	db 'n',0,0,0,0,0,0,0,0,0,0,0,0
nflg:	dw 0
	db 'p',0,0,0,0,0,0,0,0,0,0,0,0
pflg:	dw 0
	db 'v',0,0,0,0,0,0,0,0,0,0,0,0
vflg:	dw 0
	db 0

code:	db	0		; return code, reflects any error

inch:	ds	1		; input channel
line:	ds	2		; -> line buffer
word:	ds	2		; -> word buffer

	end wx
