I had been thinking about it from quite some time...
Why do I always do a "shell-out" to `grep` and `sed` to edit Linux configuration files, knowing full well that its not good from security standpoint, and not a clean/efficient code! 🤔
Maybe its time, last week while sitting at a Dental clinic, I quickly wrote a minimal version of `grep`, just supporting the most basic operations, the equivalent of:
grep -e or egrep (extended POSIX regex match)
grep -q (quiet 🤫)
grep -F (fixed string match), and
grep -c (count the number of matches)
All I needed was a quick file read (getline) plus POSIX regex match - a.k.a `regcomp()`, `regexec()` and this was it:
(later I decided to throw in "show line number" and "stop on first match" also 😀 why not!? not a big change)
#define _GNU_SOURCE /* for strcasestr */ | |
#include <stdio.h> | |
#include <inttypes.h> | |
#include <stdint.h> | |
#include <stdbool.h> | |
#include <sys/types.h> | |
#include <stdarg.h> | |
#include <stdlib.h> | |
#include <regex.h> | |
#include <string.h> | |
#include <errno.h> | |
#define MGREP_VERBOSE_DEBUG (1<<0) | |
#define MGREP_FIXED_STRING (1<<1) /* -F / fgrep */ | |
#define MGREP_IGNORE_CASE (1<<2) /* -i */ | |
#define MGREP_EXTENDED_REGEX (1<<3) /* -E / egrep */ | |
#define MGREP_QUIET (1<<4) /* -q */ | |
#define MGREP_FIRST_MATCH (1<<5) /* -m1 */ | |
#define MGREP_COUNT_ONLY (1<<6) /* -c */ | |
#define MGREP_FILE_LINNUM (1<<7) /* -nH */ | |
bool | |
min_grep (const char *filename, | |
uint16_t flags, | |
const char *match_re, | |
...) | |
{ | |
FILE *fp = NULL; | |
char *line = NULL; | |
size_t len = 0, linnum = 1; | |
ssize_t nread = 0; | |
int match_count = 0, rc = 0,re_cflags = 0; | |
bool opt_plain_str_match = false; | |
bool opt_silent = false; | |
bool opt_stop_at_m1 = false; | |
bool opt_show_fileline = false; | |
bool opt_ignore_case = false; | |
regex_t re_id; | |
if (!filename || !match_re) { | |
fprintf(stderr, "No filename|match-regex!\n"); | |
return false; | |
} | |
if (!(fp = fopen(filename, "r"))) { | |
fprintf(stderr, "Failed to open file %s (%s)\n", filename, strerror(errno)); | |
return false; | |
} | |
opt_plain_str_match = (flags & MGREP_FIXED_STRING) ? true : false; | |
opt_silent = (flags & MGREP_COUNT_ONLY) ? true : false; | |
opt_silent = (flags & MGREP_QUIET) ? true : opt_silent; | |
opt_show_fileline = (flags & MGREP_FILE_LINNUM) ? true : false; | |
opt_stop_at_m1= (flags & MGREP_FIRST_MATCH) ? true : false; | |
if (flags & MGREP_IGNORE_CASE) { re_cflags |= REG_ICASE; opt_ignore_case = true; } | |
if (flags & MGREP_EXTENDED_REGEX) { | |
if (opt_plain_str_match) { | |
fprintf(stderr, "Cannot do both regex and plain string match!\n"); | |
return false; | |
} | |
re_cflags |= REG_EXTENDED; | |
} | |
if (!opt_plain_str_match && ((rc = regcomp(&re_id, match_re, re_cflags)) != 0)) { | |
char ebuf[1024] = {0}; | |
regerror(rc, &re_id, ebuf, sizeof ebuf); | |
fprintf(stderr, "Failed to compile regex! (%s)\n", ebuf); | |
goto err; | |
} | |
while ((nread = getline(&line, &len, fp)) != -1) { | |
if (opt_plain_str_match) { | |
if ((opt_ignore_case ? strcasestr : strstr )(line, match_re)) { | |
if (!opt_silent && opt_show_fileline) printf("%s:%zu:", filename, linnum); | |
if (!opt_silent) printf("%s", line); | |
match_count++; | |
if (opt_stop_at_m1) return true; | |
} | |
} else { | |
if (regexec(&re_id, line, 0, NULL, 0) == 0) { | |
if (!opt_silent && opt_show_fileline) printf("%s:%zu:", filename, linnum); | |
if (!opt_silent) printf("%s", line); | |
match_count++; | |
if (opt_stop_at_m1) return true; | |
} | |
} | |
linnum++; | |
} | |
err: | |
if (!opt_plain_str_match) regfree(&re_id); | |
free(line); | |
fclose(fp); | |
if (flags & MGREP_COUNT_ONLY) fprintf(stderr, "%d", match_count); | |
return (match_count > 0) ? true : false; | |
} | |
int main (int argc, char *argv[]) | |
{ | |
bool ret = min_grep("/etc/hosts", MGREP_EXTENDED_REGEX, "^127[.]"); | |
printf("\nret: %d\n", ret); | |
return 0; | |
} |
Emboldened by this, I attempted a minimal `sed` to do "inline-replace" (my most common use !)
The only additional checks I needed, was to find the match offsets, and of course captures and replace:
#include <stdio.h> | |
#include <inttypes.h> | |
#include <stdint.h> | |
#include <stdbool.h> | |
#include <sys/types.h> | |
#include <stdarg.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include <regex.h> | |
#include <limits.h> | |
#include <unistd.h> | |
#include <errno.h> | |
#define MSED_VERBOSE_DEBUG (1<<0) | |
#define MSED_IGNORE_CASE (1<<1) | |
#define MSED_EXTENDED_REGEX (1<<2) | |
#define MSED_REPLACE_INPLACE (1<<3) | |
#define MSED_FOLLOW_SYMLINKS (1<<4) | |
bool | |
min_sed (const char *filename, | |
uint16_t flags, | |
const char *match_re, | |
const char *replace_str, | |
...) | |
{ | |
FILE *ifp = NULL, *ofp = NULL; | |
char *line = NULL; | |
size_t len = 0; | |
ssize_t nread; | |
int match_count = 0, rc = 0; | |
int re_cflags = 0; | |
char ofname[PATH_MAX] = {0}; | |
bool opt_replace_inplace = false; | |
char replacebuf[BUFSIZ] = {0}; | |
regex_t re_id; | |
regmatch_t pmatch[2]; | |
if (!filename || !match_re) { | |
fprintf(stderr, "No filename|match-regex!\n"); | |
return false; | |
} | |
ifp = fopen(filename, "r"); | |
if (ifp == NULL) { | |
fprintf(stderr, "Failed to open file %s (%s)\n", filename, strerror(errno)); | |
return false; | |
} | |
if (flags & MSED_REPLACE_INPLACE) opt_replace_inplace = true; | |
if (opt_replace_inplace) { | |
char linkname[PATH_MAX] = {0}; | |
if (flags & MSED_FOLLOW_SYMLINKS) { | |
if (readlink(filename, linkname, sizeof linkname) != 0) { | |
fprintf(stderr, "Failed to read link %s (%s)\n", ofname, strerror(errno)); | |
} else { | |
// TODO | |
} | |
} | |
sprintf(ofname, "%s~", filename); | |
ofp = fopen(ofname, "w+"); | |
if (ofp == NULL) { | |
fprintf(stderr, "Failed to open file %s (%s)\n", ofname, strerror(errno)); | |
return false; | |
} | |
} | |
if (flags & MSED_IGNORE_CASE) { re_cflags |= REG_ICASE; } | |
if (flags & MSED_EXTENDED_REGEX) { re_cflags |= REG_EXTENDED; } | |
if ((rc = regcomp(&re_id, match_re, re_cflags)) != 0) { | |
char ebuf[1024] = {0}; | |
regerror(rc, &re_id, ebuf, sizeof ebuf); | |
fprintf(stderr, "Failed to compile regex! (%s)\n", ebuf); | |
} | |
/* int n; */ | |
while ((nread = getline(&line, &len, ifp)) != -1) { | |
if (regexec(&re_id, line, ARR_SIZ(pmatch), pmatch, 0) == 0) { | |
char *rp = strchr(replace_str, '&'); | |
if (rp && (pmatch[1].rm_eo > 0)) { | |
char new_replace[BUFSIZ] = {0}; | |
snprintf(new_replace, BUFSIZ, "%.*s%.*s%s", (int)(rp - replace_str), replace_str, | |
(int)(pmatch[1].rm_eo - pmatch[1].rm_so - 1), line + pmatch[1].rm_so , | |
rp + 1); | |
fprintf(opt_replace_inplace ? ofp : stderr, "%s", new_replace); | |
} else { | |
if ((pmatch[0].rm_so == 0) && (pmatch[0].rm_eo == nread)) { | |
/* entire line replace */ | |
fprintf(opt_replace_inplace ? ofp : stderr, "%s\n", replace_str); | |
} else { | |
/* part replace */ | |
memset(replacebuf, 0, BUFSIZ); | |
snprintf(replacebuf, BUFSIZ, "%.*s%s%s", pmatch[0].rm_so, line, replace_str, line + pmatch[0].rm_eo); | |
fprintf(opt_replace_inplace ? ofp : stderr, "%s", replacebuf); | |
} | |
} | |
match_count++; | |
} else { | |
fprintf(opt_replace_inplace ? ofp : stderr, "%s", line); | |
} | |
} | |
regfree(&re_id); | |
free(line); | |
fclose(ifp); | |
if (opt_replace_inplace) { | |
fflush(ofp); | |
fclose(ofp); | |
if (match_count > 0) { | |
rc = rename(ofname, filename); | |
} else { | |
rc = unlink(ofname); | |
} | |
return rc == 0 ? true : false; | |
} | |
return (match_count > 0) ? true : false; | |
} | |
int main (int argc, char *argv[]) | |
{ | |
bool ret = min_sed("/etc/fail2ban/fail2ban.conf", | |
MSED_EXTENDED_REGEX|MSED_REPLACE_INPLACE, | |
"^#?ignoreip\\s+=\\s+(.*)", "ignoreip = & 9.9.9.9/18"); | |
printf("\nret: %d\n", ret); | |
return 0; | |
} | |
I am saving only 2 set of captures, one for the entire match (implicit, in `pmatch[0]`) and one if the user specifies a capture (`pmatch[1]`)
Note: I know `&` means "all that matched" and not same as `\1` but since I am not going to add support for multiple captures and replace, this is it!