We have found these regular expressions useful or interesting:
s/(\S+)(\s+)(\S+)/$3$2$1/
m/^(\w+)\s*=\s*(.*?)\s*$/ # keyword is $1, value is $2
m/.{80,}/ length( ) >= 80 # ok, not a regex
m|(\d+)/(\d+)/(\d+) (\d+):(\d+):(\d+)|
s(/usr/bin)(/usr/local/bin)g
s/%([0-9A-Fa-f][0-9A-Fa-f])/chr(hex($1))/ge
s{ /* # Match the opening delimiter .*? # Match a minimal number of characters */ # Match the closing delimiter }{ }gsx;
s/^\s+//; s/\s+$//;
s/\\n/\n/g;
s/^.*:://
# XXX: fails on legal IPs 127.1 and 2130706433. m{ ^ ( \d | [01]?\d\d | 2[0-4]\d | 25[0-5] ) \. ( \d | [01]?\d\d | 2[0-4]\d | 25[0-5] ) \. ( \d | [01]?\d\d | 2[0-4]\d | 25[0-5] ) \. ( \d | [01]?\d\d | 2[0-4]\d | 25[0-5] ) $ }x
s{^.*/}{ }
$cols = ( ($ENV{TERMCAP} || " ") =~ m/:co#(\d+):/ ) ? $1 : 80;
($name = " $0 @ARGV") =~ s{ /\S+/}{ }g;
die "This isn't Linux" unless $^O =~ m/linux/i;
s/\n\s+/ /g
@nums = m/(\d+\.?\d*|\.\d+)/g;
@capwords = m/(\b\p{ Upper-case Letter }+\b)/g;
@lowords = m/(\b\p{ Lower-case Letter }+\b)/g;
@icwords = m{ ( \b [\p{ Upper-case Letter }\p{ Title-case Letter }] \p{ Lower-case Letter } * \b ) }gx;
@links = m/<A[^>]+?HREF\s*=\s*["']?([^'" >]+?)['"]?\s*>/ig;
$initial = /^\S+\s+(\S)\S*\s+\S/ ? $1 : "";
s/"([^"]*)"/``$1''/g # old way # next is unicode only s/"([^"]*)"/\x{201C}\x{201C}$1\x{201D}\x{201D}/g
{ local $/ = ""; while (<>) { s/\n/ /g; s/ {3,}/ /g; push @sentences, m/(\S.*?[!?.])(?= {2}|\Z)/g; } }
m/\b(\d{4})-(\d\d)-(\d\d)\b/ # YYYY in $1, MM in $2, DD in $3
m/ ^ (?: 1 \s (?: \d\d\d \s)? # 1, or 1 and area code | # ... or ... \(\d\d\d\) \s # area code with parens | # ... or ... (?: \+\d\d?\d? \s)? # optional +country code \d\d\d ([\s\-]) # and area code ) \d\d\d (\s|\1) # prefix (and area code separator) \d\d\d\d # exchange $ /x
m/\boh\s+my\s+gh?o(d(dess(es)?|s?)|odness|sh)\b/i
push(@lines, $1) while $input =~ s{ ^ # gobble from front ( . # begin $1: any single char (/s) ?* # but minimally matching even none ) (?: # make capturing if saving terminators \x0D \x0A # CRLF | \x0A # LF | \x0D # CR | \x0C # FF # (see http://www.unicode.org/reports/tr13/tr13-9.html) | \x{2028} # Unicode LS | \x{2029} # Unicode PS ) }{ }sx; # consumes $input
Or use split:
@lines = split m{ (?: # make capturing if saving terminators \x0D \x0A # CRLF | \x0A # LF | \x0D # CR | \x0C # FF # (see http://www.unicode.org/reports/tr13/tr13-9.html) | \x{2028} # Unicode LS | \x{2029} # Unicode PS ) }x, $input;