All the code here is free to use!
Warning! I have not confirmed this code is bug-free. It's likely old and while I slowly work to gain the experience to spot these things, these are little tools I make for myself. Please be aware of the risks before downloading anything off the internet.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
#include <inttypes.h>
#include <limits.h>
#define INITIAL_BUFFER_SIZE 128
#define MAX_ESCAPE_CODE_LENGTH 12
#define SIZE_LIMIT SIZE_MAX/2
int utf8_char_length(unsigned char c) {
if (c < 0x80) return 1;
else if ((c & 0xE0) == 0xC0) return 2;
else if ((c & 0xF0) == 0xE0) return 3;
else return 4;
}
uint32_t get_unicode_code_point(unsigned char* character) {
uint32_t code_point = 0;
int length = utf8_char_length(character[0]);
if (length == 1) {
code_point = character[0];
} else if (length == 2) {
code_point = (character[0] & 0x1F) << 6;
code_point |= (character[1] & 0x3F);
} else if (length == 3) {
code_point = (character[0] & 0x0F) << 12;
code_point |= (character[1] & 0x3F) << 6;
code_point |= (character[2] & 0x3F);
} else if (length == 4) {
code_point = (character[0] & 0x07) << 18;
code_point |= (character[1] & 0x3F) << 12;
code_point |= (character[2] & 0x3F) << 6;
code_point |= (character[3] & 0x3F);
}
return code_point;
}
char* get_char_code(unsigned char* input) {
char* escape_code = calloc(MAX_ESCAPE_CODE_LENGTH, 1);
if (!escape_code) {
perror("malloc");
return NULL;
}
int n = snprintf(escape_code, MAX_ESCAPE_CODE_LENGTH, "&#%" PRIu32 ";", get_unicode_code_point(input));
if (n < 0 || n >= MAX_ESCAPE_CODE_LENGTH) {
free(escape_code);
return NULL;
}
return escape_code;
}
void merging_handler(
char* input,
char** output,
size_t* output_length,
size_t* output_writepoint,
size_t* input_readpoint) {
unsigned char character = (unsigned char)input[*input_readpoint];
int len = utf8_char_length(character);
char* escape_char = get_char_code((unsigned char*)&input[*input_readpoint]);
if (!escape_char) exit(EXIT_FAILURE);
size_t escape_len = strlen(escape_char);
if (*output_writepoint + escape_len >= *output_length - 1) {
*output_length *= 2;
char* temp = realloc(*output, *output_length);
if (!temp) {
perror("realloc");
free(escape_char);
exit(EXIT_FAILURE);
}
*output = temp;
}
strncpy(*output + *output_writepoint, escape_char, escape_len + 1);
*output_writepoint += escape_len;
free(escape_char);
*input_readpoint += len;
}
int main(int argc, char* argv[]) {
// leave 16 bytes free allocator (ptmalloc)
bool escape_space = false; // -s
bool escape_tab = false; // -t
bool escape_html_operators = false; // -h
bool escape_new_line = false; // -n
bool escape_extended_characters = false; // -e
// "-thens" should work for all
for (int i = 1; i < argc; i++) {
for (size_t j = 0; j < strlen(argv[i]); j++) {
if (argv[i][j] == 's') {
escape_space = true;
} else if (argv[i][j] == 't') {
escape_tab = true;
} else if (argv[i][j] == 'h') {
escape_html_operators = true;
} else if (argv[i][j] == 'n') {
escape_new_line = true;
} else if (argv[i][j] == 'e') {
escape_extended_characters = true;
}
}
}
size_t buffer_size = INITIAL_BUFFER_SIZE;
char* buffer = calloc(buffer_size, 1);
if (!buffer) {
perror("malloc");
return EXIT_FAILURE;
}
size_t total_length = 0;
while (fgets(buffer + total_length, buffer_size - total_length, stdin)) {
total_length += strlen(buffer + total_length);
if (buffer[total_length - 1] != '\n' && !feof(stdin)) {
buffer_size *= 2;
char* temp = realloc(buffer, buffer_size);
if (!temp) {
perror("realloc");
free(buffer);
exit(EXIT_FAILURE);
}
buffer = temp;
}
}
buffer[total_length] = '\0';
char* output = calloc(total_length, 1);
if (!output) {
perror("malloc");
return EXIT_FAILURE;
}
size_t output_writepoint = 0;
size_t new_length = total_length;
for (size_t input_readpoint = 0; input_readpoint < total_length; ) {
if (
(
(
(unsigned char)buffer[input_readpoint] > 127 ||
buffer[input_readpoint] == 38
) &&
escape_extended_characters
) ||
(
buffer[input_readpoint] == 32 &&
escape_space
) ||
(
buffer[input_readpoint] == 9 &&
escape_tab
) ||
(
(
buffer[input_readpoint] == 34 ||
buffer[input_readpoint] == 60 ||
buffer[input_readpoint] == 62
) &&
escape_html_operators
)
) {
merging_handler(
buffer,
&output,
&new_length,
&output_writepoint,
&input_readpoint);
} else {
output[output_writepoint++] = buffer[input_readpoint++];
}
}
output[output_writepoint] = '\0';
printf("%s", output);
free(buffer);
free(output);
return EXIT_SUCCESS;
}