Zsh Mailing List Archive
Messages sorted by:
Reverse Date,
Date,
Thread,
Author
Tip of the day: grep2awk zle function
- X-seq: zsh-users 20409
- From: Joep van Delft <joepvandelft@xxxxxxxxx>
- To: zsh-users@xxxxxxx
- Subject: Tip of the day: grep2awk zle function
- Date: Sat, 15 Aug 2015 01:01:57 +0200
- List-help: <mailto:zsh-users-help@zsh.org>
- List-id: Zsh Users List <zsh-users.zsh.org>
- List-post: <mailto:zsh-users@zsh.org>
- Mailing-list: contact zsh-users-help@xxxxxxx; run by ezmlm
Hi!
This summer's side project has resulted in grep2awk.
grep2awk is a zle function that turns the first grep command in the
current command line into its awk equivalent, leaving the cursor
right where you want it. This operation includes a transformation of
Basic Regular Expression to Extended Regular Expression.
Possibly of some interest, is an adaptation of the ZSH test suite,
with which zle functions can be tested with the currently
installed zsh, outside the scope of the ZSH source directory/make
file.
Code, installation instructions, and some details can be found at
https://github.com/joepvd/grep2awk.
Kind regards,
Joep
# grep2awk:
# A zle convenience function to exchange a `grep` on the command line
# into a corresponding `awk`-line.
# How to use:
# Drop this file somewhere in your $fpath. And configure your shell to
# make use of it:
#
# autoload grep2awk
# zle -N grep2awk
# bindkey "^X^A" grep2awk
#
# This makes the widget available under <CTRL-X><CTRL-A>.
#
# The files t/comptest and t/ztst.zsh are taken (and modified) from the
# zsh project. This project can be used under the same terms as the zsh
# project.
#
# By Joep van Delft, 2015
emulate -L zsh
setopt extendedglob
autoload -Uz split-shell-arguments
grep2awk() {
# Needs to be in its own scope, to not have the variables leak to
# the main shell process.
local replacement_line REPLY REPLY2 flag cursor
local -a reply
typeset -A opt
# Define some functions first:
# - regex2awkprog
# - fixed2ere
# - bre2ere
# - swapcommand
regex2awkprog() {
local re prelude action prog awk
re=${(Q)1} # Remove one level of quoting, so we have what
# grep would receive.
(( opt[F] > 0 )) && {
re="$(fixed2ere $re)"
}
# A sanity check:
(( opt[E] + opt[F] + opt[G] + opt[P] > 1 )) && {
zle -M "grep2awk:Options E, F, G and P are mutually exclusive."
zle -M "Exiting."
return 1
}
(( opt[G] == 1 || opt[E] + opt[F] + opt[P] == 0 )) && {
# It is Basic Regular Expression
re="$(bre2ere $re)"
}
(( opt[w] > 0 )) && {
re="\\<($re)\\>"
}
(( opt[x] > 0 )) && {
re="^($re)$"
}
# Replace any forward slash with an escaped one:
re=${re//\//\\/}
# Wrap in slashes, and replace any single quote with its
# ASCII code:
re=/${re//\'/\\47}/
# Negate the pattern if told so:
(( opt[v] > 0 )) && {
re="!$re"
}
(( opt[i] + opt[y] > 0 )) && {
prelude+="BEGIN{IGNORECASE=1}; "
}
# The print action. Add some extra stuff if line count or file name
# are enabled.
action="{print ${opt[H]:+FILENAME\":\"}${opt[n]:+NR\":\"}\$0}"
# c, H, l and L are mutually exclusive. One of them will win.
(( opt[H] + opt[l] + opt[L] + opt[c] > 1 )) && {
zle -M "grep2awk:Options c, H, l and L are mutually exclusive."
zle -M "Exiting."
return 1
}
(( opt[c] > 0 )) && {
# This is incompatible with `grep -c`, as files where the count is 0
# are not displayed. This is saner behavior, IMHO.
prelude+='BEGINFILE{c=0};ENDFILE{if(c>0)print FILENAME":"c}; '
action='{c++}'
}
(( opt[l] > 0 )) && {
action="{print FILENAME; nextfile}"
}
(( opt[L] > 0 )) && {
prelude+="BEGINFILE{flag=0};ENDFILE{if (flag==0)print FILENAME}; "
action="{flag=1; nextfile}"
}
prog="${prelude}${re} $action"
printf -- ${(qq)prog}
}
fixed2ere() {
local p ere
# We already receive a backslash escaped version of the
# string by virtue of the ``(q)``-flag when this function is called.
# That means that we need to transform the characters in the
# first line to the corresponding character sequences in the
# second line:
#
# + | { } [ ] ( ) * . ? \ $ ^
# \+ \| \{ \} \[ \] \( \) \* \. \? \\ \$ \^
str=$1
ere=""
for i in {1..$#str}; do
if [[ $str[$i] =~ '\+|\||\{|\}|\[|\]|\(|\)|\*|\.|\?|\$|\^' ]]; then
ere+="\\$str[$i]"
elif [[ $str[$i] == $'\n' ]]; then
# This works after arrowing up into history, but not
# directly after writing it. Is there a way to detect
# if this is the case?
ere+='|'
elif [[ $str[$i] == '\' ]]; then
ere+="\\\\$str[$i]"
else
ere+="$str[$i]"
fi
done
printf -- "%s" "$ere"
}
bre2ere() {
# Basic Regular Expression to Extended Regular Expression
#
# What needs to be done:
# Translate BRE: \+,\{,\},\|,\(,\), +, {, }, |, (, )
# to ERE: +, {, }, |, (, ),\+,\{,\},\|,\(,\)
#
# But only if the backslash is not escaped with a backslash.
local bre ere i debug
# Variables tracking states:
local s_brack s_brack_start s_esc s_altern s_altern_start s_char_cls
bre=$1
ere=""
s_esc=off
s_altern=on
s_altern_start=off
s_brack=off
s_brack_start=off
s_char_cls=off
zstyle -s ':grep2awk:bre2ere:' debug debug
if [[ -n $debug ]]; then
rm "$debug" 2>/dev/null
debug() {
printf "bre[$i]=$bre[$i] s_esc=$s_esc " >> "$debug"
printf "s_brack=$s_brack s_brack_start=$s_brack_start " >> "$debug"
printf "s_altern=$s_altern s_alter_now=$s_altern_start\n" >> "$debug"
}
fi
for i in {1..$#bre}; do
[[ -n $debug ]] && debug
# Bracket expressions
# -------------------
if [[ $bre[$i] == '[' && $s_esc == off && $s_brack == off ]]; then
ere+="$bre[$i]"
s_brack=on
s_brack_start=on
# Character class accounting within bracket expressions, in order
# to keep track of the end of a bracket expression.
elif [[ $bre[$i] == '[' && $s_brack == on ]]; then
ere+="$bre[$i]"
s_char_cls=on
elif [[ $bre[$i] == ']' && $s_char_cls == on ]]; then
[[ $s_brack == on ]] || return 1
ere+="$bre[$i]"
s_char_cls=off
# Special casing the start of bracket expressions.
# s_brack_start allows for inclusion of ] as a character as per
# POSIX.2.
elif [[ $s_brack == on && $s_brack_start == on ]]; then
ere+="$bre[$i]"
[[ $bre[$i] == '^' ]] \
&& s_brack_start=on \
|| s_brack_start=off
elif [[ $bre[$i] == ']' && $s_brack == on ]]; then
[[ $s_char_cls == off ]] || return 1
ere+="$bre[$i]"
s_brack=off
# Finally, the default bracket expansion action:
elif [[ $s_brack == on ]]; then
ere+="$bre[$i]"
# Alternation and subexpressions
# ------------------------------
# Needed for special casing of *, ^ and $.
elif [[ $bre[$i] =~ '\||\(' && $s_esc == on ]]; then
s_altern=on
s_altern_start=on
ere+="$bre[$i]" # The escape sign is dropped now
s_esc=off
elif [[ $bre[$i] == '^' && $s_altern == on ]]; then
[[ $s_esc == off ]] || return 1
ere+='^'
elif [[ $bre[$i] == "*" && $s_altern == on ]]; then
# When * is at the start of an alternation/subexpression,
# escape it:
ere+='\\*'
elif [[ $bre[$i] == '$' ]]; then
# Dollar is a special character when it is at the end of
# string, end of alternation, or end of subexpression.
# Otherwise: it is not special. As we do not need to know the
# 'history' of the regex, we can use a lookahead to find out
# with which thing we are dealing.
#
# The behavior of $ is not documented in man 1 grep; it is,
# however, in `man 7 regex`. Some tests indicate that GNU grep
# adheres to the POSIX.2 specification.
if [[ $#bre -eq $i || ${bre:$i:2} =~ '\\(\||\))' ]]; then
[[ $s_esc == off ]] && ere+='$' || ere+='\\$'
else
[[ $s_esc == off ]] && ere+='\\$' || ere+='$'
fi
# Normal BRE to ERE:
# ------------------
elif [[ $bre[$i] =~ '\+|\{|\}|\)|\^' && $s_esc == on ]]; then
ere+="$bre[$i]" # The escape sign is dropped now
s_esc=off
elif [[ $bre[$i] =~ '\+|\||\{|\}|\(|\)|\^' && $s_esc == off ]]; then
ere+="\\\\$bre[$i]"
elif [[ $bre[$i] == '\\' && $s_esc == on ]]; then
ere+='\\\\\\\\'
s_esc=off
elif [[ $bre[$i] == "\\" && $s_esc == off ]]; then
s_esc=on
else
ere+="$bre[$i]"
s_esc=off
fi
[[ $s_altern == on \
&& $s_altern_start == off \
&& $bre[$i] != "^" \
]] && s_altern=off
s_altern_start=off
done
printf -- "%s" "$ere"
}
# Some debugging/testing convenience functions.
# If grep2awk is autoloaded, the regex replacement
# functions can be called directly:
#
# grep2awk bre2ere 'a[^a-z]\(c$\|d\)'
#
# See the test suite for some examples.
[[ -n $1 ]] && {
echo $($1 "$2")
return $?
}
zstyle -s ':grep2awk:' awk awk
: ${awk:=awk --}
# Store line as $reply[@]:
split-shell-arguments
# `flag` keeps the program state while looping through the splitted
# shell words. Its semantics:
# flag==0: No 'grep', egrep', 'fgrep' encountered yet.
# flag==1: Set when grep command is replaced by awk
# command. Options or regex.
# flag==2: Regex (End of options arrived).
# flag==3: The rest.
flag=0
cursor=0
for (( i=1; i<=$#reply; i++ )); do
if (( i%2==1 && flag < 3 )); then
# Uneven "words" are whitespace. Only interested in changing the
# cursor position if exchanging the regex is not replaced by the awk
# program yet (flag < 3).
(( cursor += $#reply[$i] ))
elif (( flag == 0 )); then
case $reply[$i] in
(grep)
flag=1
reply[$i]="$awk"
;;
(egrep)
flag=1
reply[$i]="$awk"
typeset "opt[E]"=1
;;
(fgrep)
flag=1
reply[$i]="$awk"
typeset "opt[F]"=1
;;
esac
(( cursor += $#reply[$i] ))
elif [[ $flag == 1 && $reply[$i] == -* ]]; then
# Is it the End of Options-marker?
if [[ $reply[$i] == "--" ]]; then
flag=2
elif [[ $reply[$i] =~ ^--. ]]; then
zle -M "grep2awk: Long options to grep are not supported"
return 1
else
for ((l=1; l<$#reply[$i]; l++)); do
#check those options!
# Is it an option to grep? A regular expression of
# the options that are recognized:
o=${reply[$i]:$l:1}
if [[ $o =~ "c|E|e|F|G|H|i|l|L|n|v|w|x|y" ]]; then
typeset "opt[$o]"=1
else
zle -M "grep2awk: Option $o is not supported"
return 1
fi
done
[[ $o == "e" ]] && flag=2
fi
# Delete the options, and the now superfluous whitespace:
reply[$i]=""
reply[$i+1]=""
# And leave the cursor pointer where it was.
elif (( flag==1 || flag==2 )); then
# The fall-through for opt_or_reg OR regex:
# This is the regex.
reply[$i]=$(regex2awkprog $reply[$i])
(( cursor += $#reply[$i] - 2 ))
flag=3
fi
# Append the current version of $reply[$i] to replacement_line:
replacement_line+=$reply[$i]
done
# Now we are done. Apply the changes to the BUFFER:
BUFFER="$replacement_line"
CURSOR=$cursor
}
grep2awk "$@"
Messages sorted by:
Reverse Date,
Date,
Thread,
Author