Sunday, March 09, 2025

Minimal implementations of grep and sed

 I had been thinking about it from quite some time...

Why do I always do a "shell-out" to `grep` and `sed` to edit Linux configuration files, knowing full well that its not good from security standpoint, and not a clean/efficient code! 🤔

Maybe its time, last week while sitting at a Dental clinic, I quickly wrote a minimal version of `grep`, just supporting the most basic operations, the equivalent of:

grep -e or egrep (extended POSIX regex match)

grep -q (quiet 🤫)

grep -F (fixed string match), and

grep -c (count the number of matches)

All I needed was a quick file read (getline) plus POSIX regex match - a.k.a `regcomp()`, `regexec()` and this was it:


(later I decided to throw in "show line number" and "stop on first match" also 😀 why not!? not a big change)

#define _GNU_SOURCE /* for strcasestr */
#include <stdio.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdbool.h>
#include <sys/types.h>
#include <stdarg.h>
#include <stdlib.h>
#include <regex.h>
#include <string.h>
#include <errno.h>
#define MGREP_VERBOSE_DEBUG (1<<0)
#define MGREP_FIXED_STRING (1<<1) /* -F / fgrep */
#define MGREP_IGNORE_CASE (1<<2) /* -i */
#define MGREP_EXTENDED_REGEX (1<<3) /* -E / egrep */
#define MGREP_QUIET (1<<4) /* -q */
#define MGREP_FIRST_MATCH (1<<5) /* -m1 */
#define MGREP_COUNT_ONLY (1<<6) /* -c */
#define MGREP_FILE_LINNUM (1<<7) /* -nH */
bool
min_grep (const char *filename,
uint16_t flags,
const char *match_re,
...)
{
FILE *fp = NULL;
char *line = NULL;
size_t len = 0, linnum = 1;
ssize_t nread = 0;
int match_count = 0, rc = 0,re_cflags = 0;
bool opt_plain_str_match = false;
bool opt_silent = false;
bool opt_stop_at_m1 = false;
bool opt_show_fileline = false;
bool opt_ignore_case = false;
regex_t re_id;
if (!filename || !match_re) {
fprintf(stderr, "No filename|match-regex!\n");
return false;
}
if (!(fp = fopen(filename, "r"))) {
fprintf(stderr, "Failed to open file %s (%s)\n", filename, strerror(errno));
return false;
}
opt_plain_str_match = (flags & MGREP_FIXED_STRING) ? true : false;
opt_silent = (flags & MGREP_COUNT_ONLY) ? true : false;
opt_silent = (flags & MGREP_QUIET) ? true : opt_silent;
opt_show_fileline = (flags & MGREP_FILE_LINNUM) ? true : false;
opt_stop_at_m1= (flags & MGREP_FIRST_MATCH) ? true : false;
if (flags & MGREP_IGNORE_CASE) { re_cflags |= REG_ICASE; opt_ignore_case = true; }
if (flags & MGREP_EXTENDED_REGEX) {
if (opt_plain_str_match) {
fprintf(stderr, "Cannot do both regex and plain string match!\n");
return false;
}
re_cflags |= REG_EXTENDED;
}
if (!opt_plain_str_match && ((rc = regcomp(&re_id, match_re, re_cflags)) != 0)) {
char ebuf[1024] = {0};
regerror(rc, &re_id, ebuf, sizeof ebuf);
fprintf(stderr, "Failed to compile regex! (%s)\n", ebuf);
goto err;
}
while ((nread = getline(&line, &len, fp)) != -1) {
if (opt_plain_str_match) {
if ((opt_ignore_case ? strcasestr : strstr )(line, match_re)) {
if (!opt_silent && opt_show_fileline) printf("%s:%zu:", filename, linnum);
if (!opt_silent) printf("%s", line);
match_count++;
if (opt_stop_at_m1) return true;
}
} else {
if (regexec(&re_id, line, 0, NULL, 0) == 0) {
if (!opt_silent && opt_show_fileline) printf("%s:%zu:", filename, linnum);
if (!opt_silent) printf("%s", line);
match_count++;
if (opt_stop_at_m1) return true;
}
}
linnum++;
}
err:
if (!opt_plain_str_match) regfree(&re_id);
free(line);
fclose(fp);
if (flags & MGREP_COUNT_ONLY) fprintf(stderr, "%d", match_count);
return (match_count > 0) ? true : false;
}
int main (int argc, char *argv[])
{
bool ret = min_grep("/etc/hosts", MGREP_EXTENDED_REGEX, "^127[.]");
printf("\nret: %d\n", ret);
return 0;
}
view raw min_grep.c hosted with ❤ by GitHub

Emboldened by this, I attempted a minimal `sed` to do "inline-replace" (my most common use !)

The only additional checks I needed, was to find the match offsets, and of course captures and replace:

#include <stdio.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdbool.h>
#include <sys/types.h>
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include <regex.h>
#include <limits.h>
#include <unistd.h>
#include <errno.h>
#define MSED_VERBOSE_DEBUG (1<<0)
#define MSED_IGNORE_CASE (1<<1)
#define MSED_EXTENDED_REGEX (1<<2)
#define MSED_REPLACE_INPLACE (1<<3)
#define MSED_FOLLOW_SYMLINKS (1<<4)
bool
min_sed (const char *filename,
uint16_t flags,
const char *match_re,
const char *replace_str,
...)
{
FILE *ifp = NULL, *ofp = NULL;
char *line = NULL;
size_t len = 0;
ssize_t nread;
int match_count = 0, rc = 0;
int re_cflags = 0;
char ofname[PATH_MAX] = {0};
bool opt_replace_inplace = false;
char replacebuf[BUFSIZ] = {0};
regex_t re_id;
regmatch_t pmatch[2];
if (!filename || !match_re) {
fprintf(stderr, "No filename|match-regex!\n");
return false;
}
ifp = fopen(filename, "r");
if (ifp == NULL) {
fprintf(stderr, "Failed to open file %s (%s)\n", filename, strerror(errno));
return false;
}
if (flags & MSED_REPLACE_INPLACE) opt_replace_inplace = true;
if (opt_replace_inplace) {
char linkname[PATH_MAX] = {0};
if (flags & MSED_FOLLOW_SYMLINKS) {
if (readlink(filename, linkname, sizeof linkname) != 0) {
fprintf(stderr, "Failed to read link %s (%s)\n", ofname, strerror(errno));
} else {
// TODO
}
}
sprintf(ofname, "%s~", filename);
ofp = fopen(ofname, "w+");
if (ofp == NULL) {
fprintf(stderr, "Failed to open file %s (%s)\n", ofname, strerror(errno));
return false;
}
}
if (flags & MSED_IGNORE_CASE) { re_cflags |= REG_ICASE; }
if (flags & MSED_EXTENDED_REGEX) { re_cflags |= REG_EXTENDED; }
if ((rc = regcomp(&re_id, match_re, re_cflags)) != 0) {
char ebuf[1024] = {0};
regerror(rc, &re_id, ebuf, sizeof ebuf);
fprintf(stderr, "Failed to compile regex! (%s)\n", ebuf);
}
/* int n; */
while ((nread = getline(&line, &len, ifp)) != -1) {
if (regexec(&re_id, line, ARR_SIZ(pmatch), pmatch, 0) == 0) {
char *rp = strchr(replace_str, '&');
if (rp && (pmatch[1].rm_eo > 0)) {
char new_replace[BUFSIZ] = {0};
snprintf(new_replace, BUFSIZ, "%.*s%.*s%s", (int)(rp - replace_str), replace_str,
(int)(pmatch[1].rm_eo - pmatch[1].rm_so - 1), line + pmatch[1].rm_so ,
rp + 1);
fprintf(opt_replace_inplace ? ofp : stderr, "%s", new_replace);
} else {
if ((pmatch[0].rm_so == 0) && (pmatch[0].rm_eo == nread)) {
/* entire line replace */
fprintf(opt_replace_inplace ? ofp : stderr, "%s\n", replace_str);
} else {
/* part replace */
memset(replacebuf, 0, BUFSIZ);
snprintf(replacebuf, BUFSIZ, "%.*s%s%s", pmatch[0].rm_so, line, replace_str, line + pmatch[0].rm_eo);
fprintf(opt_replace_inplace ? ofp : stderr, "%s", replacebuf);
}
}
match_count++;
} else {
fprintf(opt_replace_inplace ? ofp : stderr, "%s", line);
}
}
regfree(&re_id);
free(line);
fclose(ifp);
if (opt_replace_inplace) {
fflush(ofp);
fclose(ofp);
if (match_count > 0) {
rc = rename(ofname, filename);
} else {
rc = unlink(ofname);
}
return rc == 0 ? true : false;
}
return (match_count > 0) ? true : false;
}
int main (int argc, char *argv[])
{
bool ret = min_sed("/etc/fail2ban/fail2ban.conf",
MSED_EXTENDED_REGEX|MSED_REPLACE_INPLACE,
"^#?ignoreip\\s+=\\s+(.*)", "ignoreip = & 9.9.9.9/18");
printf("\nret: %d\n", ret);
return 0;
}
view raw min_sed.c hosted with ❤ by GitHub


I am saving only 2 set of captures, one for the entire match (implicit, in `pmatch[0]`) and one if the user specifies a capture (`pmatch[1]`)

Note: I know `&` means  "all that matched" and not same as `\1` but since I am not going to add support for multiple captures and replace, this is it!

Thursday, February 27, 2025

 Switch case on strings!


Interesting solution:    https://stackoverflow.com/a/51816519/112687 


#ifndef __SSWITCH_H__

#define __SSWITCH_H__

#include <string.h>

    #ifndef SWITCH_CASE_INIT

    #define SWITCH_CASE_INIT

    #define SWITCH(X)  for(char* __switch_p__=X,__switch_next__=1;__switch_p__;__switch_p__=0,__switch_next__=1) {{

    #define CASE(X)    } if (!__switch_next__ || !(__switch_next__ = strcmp(__switch_p__, X))) {

    #define DEFAULT    } {

    #define END        }}

    #endif

#endif