getwrd.pro (Documentation for /home/davin/idl/socware/)

;-------------------------------------------------------------
;+
; NAME:
;       GETWRD
; PURPOSE:
;       Return the n'th word from a text string.
; CATEGORY:
; CALLING SEQUENCE:
;       wrd = getwrd(txt, n, [m])
; INPUTS:
;       txt = text string to extract from.         in
;         txt is allowed to be an array.
;       n = word number to get (first = 0 = def).  in
;       m = optional last word number to get.      in
; KEYWORD PARAMETERS:
;       Keywords:
;         LOCATION = l.  Return word n string location.
;         DELIMITER = d. Set word delimiter (def = space & tab).
;         /LAST means n is offset from last word.  So n=0 gives
;           last word, n=-1 gives next to last, ...
;           If n=-2 and m=0 then last 3 words are returned.
;         /NOTRIM suppresses whitespace trimming on ends.
;         NWORDS=n.  Returns number of words in string.
;         /KEEP_LEADING_DEL  Keep any leading delimiter in result.
;         /KEEP_TRAILING_DEL Keep any trailing delimiter in result.
; OUTPUTS:
;       wrd = returned word or words.              out
; COMMON BLOCKS:
;       getwrd_com
; NOTES:
;       Note: If a NULL string is given (txt="") then the last string
;             given is used.  This saves finding the words again.
;             If m > n wrd will be a string of words from word n to
;             word m.  If no m is given wrd will be a single word.
;             n<0 returns text starting at word abs(n) to string end
;             If n is out of range then a null string is returned.
;             See also nwrds.
; MODIFICATION HISTORY:
;       Ray Sterner,  6 Jan, 1985.
;       R. Sterner, Fall 1989 --- converted to SUN.
;       R. Sterner, Jan 1990 --- added delimiter.
;       R. Sterner, 18 Mar, 1990 --- added /LAST.
;       R. Sterner, 31 Jan, 1991 --- added /NOTRIM.
;       R. Sterner, 20 May, 1991 --- Added common and NULL string.
;       R. Sterner, 13 Dec, 1992 --- Made tabs equivalent to spaces.
;       R. Sterner,  4 Jan, 1993 --- Added NWORDS keyword.
;       R. Sterner, 2001 Jan 15 --- Fixed to use first element if not a scalar.
;       R. Sterner, 2006 Mar 07 --- Added /KEEP_LEADING_DEL, /KEEP_TRAILING_DEL.
;       Also cleaned up some.
;       R. Sterner, 2010 Apr 29 --- Converted arrays from () to [].
;       R. Sterner, 2010 Jun 17 --- Added the /KEEP_* keywords to the help text.
;       R. Sterner, 2011 Mar 23 --- Allowed txt to be an array.
;       R. Sterner, 2011 Dec 12 --- Added /RESET keyword.
;       R. Sterner, 2013 Feb 01 --- Made loop use long int.
;
; Copyright (C) 1985, Johns Hopkins University/Applied Physics Laboratory
; This software may be used, copied, or redistributed as long as it is not
; sold and this copyright notice is reproduced on each copy made.  This
; routine is provided as is without any express or implied warranties
; whatsoever.  Other limitations apply as described in the file disclaimer.txt.
;-
;-------------------------------------------------------------
 
 
	FUNCTION GETWRD, TXTSTR, NTH, MTH, help=hlp, location=ll,$
	   delimiter=delim, notrim=notrim, last=last, nwords=nwords, $
	   keep_leading_del=keep_lead, keep_trailing_del=keep_trail, $
	   reset=reset
 
	common getwrd_com, txtstr0, nwds, loc, len, ddel, pre, post
 
	if (n_params(0) lt 1) or keyword_set(hlp) then begin
	  print," Return the n'th word from a text string."
	  print,' wrd = getwrd(txt, n, [m])'
	  print,'   txt = text string to extract from.         in'
	  print,'     txt is allowed to be an array.'
	  print,'   n = word number to get (first = 0 = def).  in'
	  print,'   m = optional last word number to get.      in'
	  print,'   wrd = returned word or words.              out'
	  print,' Keywords:'
	  print,'   LOCATION = l.  Return word n string location.'
	  print,'   DELIMITER = d. Set word delimiter (def = space & tab).'
	  print,'   /LAST means n is offset from last word.  So n=0 gives'
	  print,'     last word, n=-1 gives next to last, ...'
	  print,'     If n=-2 and m=0 then last 3 words are returned.'
	  print,'   /NOTRIM suppresses whitespace trimming on ends.'
	  print,'   NWORDS=n.  Returns number of words in string.'
          print,'   /KEEP_LEADING_DEL  Keep any leading delimiter in result.'
          print,'   /KEEP_TRAILING_DEL Keep any trailing delimiter in result.'
	  print,'   /RESET Reinitialize even if a null string is given.'
	  print,'Note: If a NULL string is given (txt="") then the last string'
	  print,'      given is used.  This saves finding the words again.'
	  print,'      With /RESET a null string has no special meaning.'
	  print,'      If m > n wrd will be a string of words from word n to'
	  print,'      word m.  If no m is given wrd will be a single word.'
	  print,'      n<0 returns text starting at word abs(n) to string end'
	  print,'      If n is out of range then a null string is returned.'
	  print,'      See also nwrds.'
	  return, -1
	endif
 
	;-------------------------------------
	;  Defaults
	;-------------------------------------
	if n_params(0) lt 2 then nth = 0		; Def is first word.
	if n_params(0) lt 3 then mth = nth		; Def is one word.
 
	;-------------------------------------
	;  Handle input if an array
	;-------------------------------------
	n_txtstr = n_elements(txtstr)
	if n_txtstr gt 1 then begin
	  txtout = strarr(n_txtstr)
	  for i=0L,n_txtstr-1L do begin
	    txtout1 = getwrd(txtstr[i],NTH, MTH, help=hlp, location=ll,$
	      delimiter=delim, notrim=notrim, last=last, nwords=nwords, $
	      keep_leading_del=keep_lead, keep_trailing_del=keep_trail)
	    txtout[i] = txtout1
	  endfor
	  return, txtout
	endif
 
	;-------------------------------------
	;  Initialize
	;-------------------------------------
;	if strlen(txtstr[0]) gt 0 then begin		; Non-null arg.
	if (strlen(txtstr[0]) gt 0) or $
           keyword_set(reset) then begin		; Non-null arg.
	  ddel = ' '					; Def del is a space.
	  if n_elements(delim) ne 0 then ddel = delim	; Use given delimiter.
	  tst = (byte(ddel))[0]				; Del to byte value.
	  tb = byte(txtstr[0])				; String to bytes.
	  if ddel eq ' ' then begin		        ; Check for tabs?
	    w = where(tb eq 9B, cnt)			; Yes.
	    if cnt gt 0 then tb[w] = 32B		; Convert any to space.
	  endif
	  x = tb NE tst					; Non-delchar (=words).
	  x = [0,X,0]					; 0s at ends.
 
	  Y = (x-shift(x,1)) eq 1			; Diff=1: word start.
	  z = where(shift(y,-1) eq 1)			; Word start locations.
	  y2 = (x-shift(x,-1)) eq 1			; Diff=1: word end.
	  z2 = where(shift(y2,1) eq 1)			; Word end locations.
 
	  txtstr0 = txtstr[0]				; Move string to common.
	  nwds = LONG(total(y))				; Number of words.
	  loc = z					; Word start locations.
	  len = z2 - z - 1				; Word lengths.
	
	  ;-----  Deal with /keep_* keywords ----- 
	  pre = ''					; Prefix.
	  post = ''					; Postfix.
	  if strmid(txtstr0,0,1) eq ddel then pre=ddel	; Leading delimiter?
	  if strmid(txtstr0,strlen(txtstr0)-1,1) $	; Trailing delimiter?
	    eq ddel then post=ddel
	endif else begin
	  if n_elements(nwds) eq 0 then begin		; Check if first call.
	    print,' Error in getwrd: must give a '+$
	      'non-NULL string on the first call.'
	    return, -1					; -1 = error flag.
	  endif
	endelse
 
	nwords = nwds					; Set nwords
 
	;-------------------------------------
	;  Offset from last word
	;-------------------------------------
	if keyword_set(last) then begin			; Offset from last.
	  lst = nwds - 1
	  in = lst + nth				; Nth word.
	  im = lst + mth				; Mth word.
	  if (in lt 0) and (im lt 0) then return, ''	; Out of range.
	  in = in > 0					; Smaller of in and im
	  im = im > 0					;  to zero.
	  if (in gt lst) and (im gt lst) then return,'' ; Out of range.
	  in = in < lst					; Larger of in and im
	  im = im < lst					;  to be last.
	  ll = loc[in]					; Nth word start.
	  out = strmid(txtstr0,ll,loc[im]-loc[in]+len[im])
	  ;-----  Deal with /keep_* keywords ----- 
	  if in gt 0 then pre2=ddel else pre2=pre	; Not at first word.
	  if im lt lst then post2=ddel else post2=post	; Not at last word.
	  if keyword_set(keep_lead) then out=pre2+out
	  if keyword_set(keep_trail) then out=out+post2
	  if keyword_set(notrim) then return, out
	  return, strtrim(out,2)
	endif
 
	;-------------------------------------
	;  Offset from first word
	;-------------------------------------
	n = abs(nth)					; Allow nth<0.
	if n gt nwds-1 then return,''			; out of range, null.
	ll = loc[n]					; N'th word position.
	mth = mth<(nwds-1)				; Words to end.
	if nth lt 0 then begin				; Handle nth<0.
	  out = strmid(txtstr0,ll,9999)
	endif else begin				; nth=>0
	  out = strmid(txtstr0,LL,loc[mth]-loc[nth]+len[mth])
	endelse
	;-----  Deal with /keep_* keywords ----- 
	if n gt 0 then pre2=ddel else pre2=pre		; Not at first word.
	if mth lt (nwds-1) then post2=ddel else post2=post ; Not at last word.
	if keyword_set(keep_lead) then out=pre2+out
	if keyword_set(keep_trail) then out=out+post2
	if keyword_set(notrim) then return, out
	return, strtrim(out,2)
 
	end