made formatting more consistent

This commit is contained in:
frosty
2026-03-05 04:50:32 +00:00
parent 5ed5a6ecc7
commit 24cec7a350
16 changed files with 1363 additions and 1363 deletions

View File

@@ -8,108 +8,108 @@
static char logic_log[4096]; static char logic_log[4096];
typedef struct { typedef struct {
const char *buffer; const char *buffer;
int pos; int pos;
} Parser; } Parser;
static double parse_expression(Parser *p); static double parse_expression(Parser *p);
static void skip_ws(Parser *p) { static void skip_ws(Parser *p) {
while (p->buffer[p->pos] == ' ') p->pos++; while (p->buffer[p->pos] == ' ') p->pos++;
} }
static double parse_factor(Parser *p) { static double parse_factor(Parser *p) {
skip_ws(p); skip_ws(p);
if (p->buffer[p->pos] == '-') { if (p->buffer[p->pos] == '-') {
p->pos++; p->pos++;
return -parse_factor(p); return -parse_factor(p);
} }
if (p->buffer[p->pos] == '(') { if (p->buffer[p->pos] == '(') {
p->pos++; p->pos++;
double res = parse_expression(p); double res = parse_expression(p);
if (p->buffer[p->pos] == ')') p->pos++; if (p->buffer[p->pos] == ')') p->pos++;
return res; return res;
} }
char *endptr; char *endptr;
double val = strtod(&p->buffer[p->pos], &endptr); double val = strtod(&p->buffer[p->pos], &endptr);
p->pos = (int)(endptr - p->buffer); p->pos = (int)(endptr - p->buffer);
return val; return val;
} }
static double parse_term(Parser *p) { static double parse_term(Parser *p) {
double left = parse_factor(p); double left = parse_factor(p);
while (1) { while (1) {
skip_ws(p); skip_ws(p);
char op = p->buffer[p->pos]; char op = p->buffer[p->pos];
if (op == '*' || op == '/') { if (op == '*' || op == '/') {
p->pos++; p->pos++;
double right = parse_factor(p); double right = parse_factor(p);
double old = left; double old = left;
left = (op == '*') ? left * right : left / right; left = (op == '*') ? left * right : left / right;
char step[256]; char step[256];
snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op, snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
right, left); right, left);
strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1); strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
} else } else
break; break;
} }
return left; return left;
} }
static double parse_expression(Parser *p) { static double parse_expression(Parser *p) {
double left = parse_term(p); double left = parse_term(p);
while (1) { while (1) {
skip_ws(p); skip_ws(p);
char op = p->buffer[p->pos]; char op = p->buffer[p->pos];
if (op == '+' || op == '-') { if (op == '+' || op == '-') {
p->pos++; p->pos++;
double right = parse_term(p); double right = parse_term(p);
double old = left; double old = left;
left = (op == '+') ? left + right : left - right; left = (op == '+') ? left + right : left - right;
char step[256]; char step[256];
snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op, snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
right, left); right, left);
strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1); strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
} else } else
break; break;
} }
return left; return left;
} }
double evaluate(const char *expr) { double evaluate(const char *expr) {
logic_log[0] = '\0'; logic_log[0] = '\0';
if (!expr || strlen(expr) == 0) return 0.0; if (!expr || strlen(expr) == 0) return 0.0;
Parser p = {expr, 0}; Parser p = {expr, 0};
return parse_expression(&p); return parse_expression(&p);
} }
InfoBox fetch_calc_data(char *math_input) { InfoBox fetch_calc_data(char *math_input) {
InfoBox info = {NULL, NULL, NULL, NULL}; InfoBox info = {NULL, NULL, NULL, NULL};
if (!math_input) return info; if (!math_input) return info;
double result = evaluate(math_input); double result = evaluate(math_input);
char html_output[5120]; char html_output[5120];
snprintf(html_output, sizeof(html_output), snprintf(html_output, sizeof(html_output),
"<div class='calc-container' style='line-height: 1.6;'>" "<div class='calc-container' style='line-height: 1.6;'>"
"%s" "%s"
"<div style='margin-top: 8px; border-top: 1px solid #eee; " "<div style='margin-top: 8px; border-top: 1px solid #eee; "
"padding-top: 8px; font-size: 1.2em;'>" "padding-top: 8px; font-size: 1.2em;'>"
"<b>%g</b>" "<b>%g</b>"
"</div>" "</div>"
"</div>", "</div>",
strlen(logic_log) > 0 ? logic_log : "<div>Constant value</div>", strlen(logic_log) > 0 ? logic_log : "<div>Constant value</div>",
result); result);
info.title = strdup("Calculation"); info.title = strdup("Calculation");
info.extract = strdup(html_output); info.extract = strdup(html_output);
info.thumbnail_url = info.thumbnail_url =
strdup("/static/calculation.svg"); strdup("/static/calculation.svg");
info.url = strdup("#"); info.url = strdup("#");
return info; return info;
} }

View File

@@ -11,239 +11,239 @@
#include <ctype.h> #include <ctype.h>
static const char *PREFIXES[] = { static const char *PREFIXES[] = {
"what is the definition of ", "what's the definition of ", "what is the definition of ", "what's the definition of ",
"what is the meaning of ", "what's the meaning of ", "what is the meaning of ", "what's the meaning of ",
"what does the word ", "definition of ", "meaning of ", "def of ", "what does the word ", "definition of ", "meaning of ", "def of ",
"define ", "definition ", "define:", "def ", "def:", "define ", "definition ", "define:", "def ", "def:",
"what does ", "what is ", "what's ", "whats ", "what does ", "what is ", "what's ", "whats ",
"meaning ", "dictionary ", "dict ", NULL "meaning ", "dictionary ", "dict ", NULL
}; };
static const char *SUFFIXES[] = { static const char *SUFFIXES[] = {
" definition", " def", " meaning", " mean", " means", " definition", " def", " meaning", " mean", " means",
" dictionary", " dict", " define", " defined", " dictionary", " dict", " define", " defined",
" definition?", " def?", " meaning?", " mean?", " means?", " definition?", " def?", " meaning?", " mean?", " means?",
" in english", " in english?", NULL " in english", " in english?", NULL
}; };
static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL}; static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL};
static const char *strcasestr_impl(const char *haystack, const char *needle) { static const char *strcasestr_impl(const char *haystack, const char *needle) {
if (!haystack || !needle || !*needle) return haystack; if (!haystack || !needle || !*needle) return haystack;
size_t len = strlen(needle); size_t len = strlen(needle);
for (const char *h = haystack; *h; h++) { for (const char *h = haystack; *h; h++) {
if (strncasecmp(h, needle, len) == 0) return h; if (strncasecmp(h, needle, len) == 0) return h;
} }
return NULL; return NULL;
} }
struct MemStruct { char *memory; size_t size; }; struct MemStruct { char *memory; size_t size; };
static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) { static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) {
size_t realsize = size * nmemb; size_t realsize = size * nmemb;
struct MemStruct *mem = (struct MemStruct *)userp; struct MemStruct *mem = (struct MemStruct *)userp;
char *ptr = realloc(mem->memory, mem->size + realsize + 1); char *ptr = realloc(mem->memory, mem->size + realsize + 1);
if (!ptr) return 0; if (!ptr) return 0;
mem->memory = ptr; mem->memory = ptr;
memcpy(&(mem->memory[mem->size]), contents, realsize); memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize; mem->size += realsize;
mem->memory[mem->size] = 0; mem->memory[mem->size] = 0;
return realsize; return realsize;
} }
static char *xpath_text(xmlDocPtr doc, const char *xpath) { static char *xpath_text(xmlDocPtr doc, const char *xpath) {
xmlXPathContextPtr ctx = xmlXPathNewContext(doc); xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
if (!ctx) return NULL; if (!ctx) return NULL;
xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx); xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
xmlXPathFreeContext(ctx); xmlXPathFreeContext(ctx);
if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) { if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
if (obj) xmlXPathFreeObject(obj); if (obj) xmlXPathFreeObject(obj);
return NULL; return NULL;
} }
xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]); xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
char *result = content ? strdup((char *)content) : NULL; char *result = content ? strdup((char *)content) : NULL;
if (content) xmlFree(content); if (content) xmlFree(content);
xmlXPathFreeObject(obj); xmlXPathFreeObject(obj);
return result; return result;
} }
static char *build_html(const char *word, const char *pron, const char *pos, static char *build_html(const char *word, const char *pron, const char *pos,
const char *def, const char *ex) { const char *def, const char *ex) {
char html[4096]; char html[4096];
int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>"); int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>");
if (word) n += snprintf(html + n, sizeof(html) - n, if (word) n += snprintf(html + n, sizeof(html) - n,
"<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word); "<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word);
if (pron) n += snprintf(html + n, sizeof(html) - n, if (pron) n += snprintf(html + n, sizeof(html) - n,
"<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron); "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron);
if (pos) n += snprintf(html + n, sizeof(html) - n, if (pos) n += snprintf(html + n, sizeof(html) - n,
"<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos); "<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos);
if (def) n += snprintf(html + n, sizeof(html) - n, if (def) n += snprintf(html + n, sizeof(html) - n,
"<div style='margin-bottom: 8px;'>%s</div>", def); "<div style='margin-bottom: 8px;'>%s</div>", def);
if (ex) n += snprintf(html + n, sizeof(html) - n, if (ex) n += snprintf(html + n, sizeof(html) - n,
"<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex); "<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex);
snprintf(html + n, sizeof(html) - n, "</div>"); snprintf(html + n, sizeof(html) - n, "</div>");
return strdup(html); return strdup(html);
} }
static char *extract_word(const char *query) { static char *extract_word(const char *query) {
if (!query) return NULL; if (!query) return NULL;
const char *start = query; const char *start = query;
for (int i = 0; PREFIXES[i]; i++) { for (int i = 0; PREFIXES[i]; i++) {
size_t len = strlen(PREFIXES[i]); size_t len = strlen(PREFIXES[i]);
if (strncasecmp(start, PREFIXES[i], len) == 0) { if (strncasecmp(start, PREFIXES[i], len) == 0) {
start += len; start += len;
break; break;
}
} }
}
while (*start == ' ') start++; while (*start == ' ') start++;
char *word = strdup(start); char *word = strdup(start);
if (!word) return NULL; if (!word) return NULL;
int changed = 1; int changed = 1;
while (changed) { while (changed) {
changed = 0; changed = 0;
for (int i = 0; SKIP_WORDS[i]; i++) { for (int i = 0; SKIP_WORDS[i]; i++) {
size_t len = strlen(SKIP_WORDS[i]); size_t len = strlen(SKIP_WORDS[i]);
if (strncasecmp(word, SKIP_WORDS[i], len) == 0) { if (strncasecmp(word, SKIP_WORDS[i], len) == 0) {
memmove(word, word + len, strlen(word + len) + 1); memmove(word, word + len, strlen(word + len) + 1);
changed = 1; changed = 1;
break; break;
} }
}
} }
}
changed = 1; changed = 1;
while (changed) { while (changed) {
changed = 0; changed = 0;
for (int i = 0; SUFFIXES[i]; i++) { for (int i = 0; SUFFIXES[i]; i++) {
const char *found = strcasestr_impl(word, SUFFIXES[i]); const char *found = strcasestr_impl(word, SUFFIXES[i]);
if (found) { if (found) {
char *pos = word + (found - word); char *pos = word + (found - word);
*pos = '\0'; *pos = '\0';
changed = 1; changed = 1;
break; break;
} }
}
} }
}
size_t len = strlen(word); size_t len = strlen(word);
while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' || while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' ||
word[len-1] == '!' || word[len-1] == '.')) { word[len-1] == '!' || word[len-1] == '.')) {
word[--len] = '\0'; word[--len] = '\0';
} }
if (len == 0) { free(word); return NULL; } if (len == 0) { free(word); return NULL; }
for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]); for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]);
char *space = strchr(word, ' '); char *space = strchr(word, ' ');
if (space) *space = '\0'; if (space) *space = '\0';
return word; return word;
} }
int is_dictionary_query(const char *query) { int is_dictionary_query(const char *query) {
if (!query) return 0; if (!query) return 0;
for (int i = 0; PREFIXES[i]; i++) { for (int i = 0; PREFIXES[i]; i++) {
size_t len = strlen(PREFIXES[i]); size_t len = strlen(PREFIXES[i]);
if (strncasecmp(query, PREFIXES[i], len) == 0) { if (strncasecmp(query, PREFIXES[i], len) == 0) {
const char *after = query + len; const char *after = query + len;
while (*after == ' ') after++; while (*after == ' ') after++;
if (*after != '\0') return 1; if (*after != '\0') return 1;
}
} }
}
for (int i = 0; SUFFIXES[i]; i++) { for (int i = 0; SUFFIXES[i]; i++) {
const char *pos = strcasestr_impl(query, SUFFIXES[i]); const char *pos = strcasestr_impl(query, SUFFIXES[i]);
if (pos) { if (pos) {
const char *after = pos + strlen(SUFFIXES[i]); const char *after = pos + strlen(SUFFIXES[i]);
while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++; while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++;
if (*after == '\0' && pos > query && (pos - query) < 100) return 1; if (*after == '\0' && pos > query && (pos - query) < 100) return 1;
}
} }
}
if (strncasecmp(query, "what is ", 8) == 0 || if (strncasecmp(query, "what is ", 8) == 0 ||
strncasecmp(query, "what's ", 7) == 0 || strncasecmp(query, "what's ", 7) == 0 ||
strncasecmp(query, "whats ", 6) == 0) { strncasecmp(query, "whats ", 6) == 0) {
const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 : const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 :
strncasecmp(query, "what's ", 7) == 0 ? 7 : 6); strncasecmp(query, "what's ", 7) == 0 ? 7 : 6);
const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ", const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ",
"our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL}; "our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL};
for (int i = 0; articles[i]; i++) { for (int i = 0; articles[i]; i++) {
if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0; if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0;
}
const char *space = strchr(word, ' ');
if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1;
} }
const char *space = strchr(word, ' ');
if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1;
}
return 0; return 0;
} }
char *construct_dictionary_url(const char *query) { char *construct_dictionary_url(const char *query) {
char *word = extract_word(query); char *word = extract_word(query);
if (!word) return NULL; if (!word) return NULL;
CURL *curl = curl_easy_init(); CURL *curl = curl_easy_init();
if (!curl) { free(word); return NULL; } if (!curl) { free(word); return NULL; }
char *escaped = curl_easy_escape(curl, word, 0); char *escaped = curl_easy_escape(curl, word, 0);
const char *base = "https://dictionary.cambridge.org/dictionary/english/"; const char *base = "https://dictionary.cambridge.org/dictionary/english/";
char *url = malloc(strlen(base) + strlen(escaped) + 1); char *url = malloc(strlen(base) + strlen(escaped) + 1);
if (url) { if (url) {
strcpy(url, base); strcpy(url, base);
strcat(url, escaped); strcat(url, escaped);
} }
curl_free(escaped); curl_free(escaped);
curl_easy_cleanup(curl); curl_easy_cleanup(curl);
free(word); free(word);
return url; return url;
} }
InfoBox fetch_dictionary_data(const char *query) { InfoBox fetch_dictionary_data(const char *query) {
InfoBox info = {NULL, NULL, NULL, NULL}; InfoBox info = {NULL, NULL, NULL, NULL};
char *url = construct_dictionary_url(query); char *url = construct_dictionary_url(query);
if (!url) return info; if (!url) return info;
CURL *curl = curl_easy_init(); CURL *curl = curl_easy_init();
if (!curl) { free(url); return info; } if (!curl) { free(url); return info; }
struct MemStruct chunk = {malloc(1), 0}; struct MemStruct chunk = {malloc(1), 0};
curl_easy_setopt(curl, CURLOPT_URL, url); curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0"); curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
apply_proxy_settings(curl); apply_proxy_settings(curl);
if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) { if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL, htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL,
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
if (doc) { if (doc) {
char *word = xpath_text(doc, "//span[@class='hw dhw']"); char *word = xpath_text(doc, "//span[@class='hw dhw']");
char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']"); char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']");
char *pos = xpath_text(doc, "//span[@class='pos dpos']"); char *pos = xpath_text(doc, "//span[@class='pos dpos']");
char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]"); char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]");
char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]"); char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]");
if (word && def) { if (word && def) {
info.title = strdup("Dictionary"); info.title = strdup("Dictionary");
info.extract = build_html(word, pron, pos, def, ex); info.extract = build_html(word, pron, pos, def, ex);
info.thumbnail_url = strdup("/static/dictionary.jpg"); info.thumbnail_url = strdup("/static/dictionary.jpg");
info.url = strdup(url); info.url = strdup(url);
} }
free(word); free(pron); free(pos); free(def); free(ex); free(word); free(pron); free(pos); free(def); free(ex);
xmlFreeDoc(doc); xmlFreeDoc(doc);
}
} }
}
curl_easy_cleanup(curl); curl_easy_cleanup(curl);
free(chunk.memory); free(chunk.memory);
free(url); free(url);
return info; return info;
} }

View File

@@ -2,12 +2,12 @@
#include <stdlib.h> #include <stdlib.h>
void free_infobox(InfoBox *info) { void free_infobox(InfoBox *info) {
if (info->title) if (info->title)
free(info->title); free(info->title);
if (info->thumbnail_url) if (info->thumbnail_url)
free(info->thumbnail_url); free(info->thumbnail_url);
if (info->extract) if (info->extract)
free(info->extract); free(info->extract);
if (info->url) if (info->url)
free(info->url); free(info->url);
} }

View File

@@ -2,10 +2,10 @@
#define INFOBOX_H #define INFOBOX_H
typedef struct { typedef struct {
char *title; char *title;
char *thumbnail_url; char *thumbnail_url;
char *extract; char *extract;
char *url; char *url;
} InfoBox; } InfoBox;
void free_infobox(InfoBox *info); void free_infobox(InfoBox *info);

View File

@@ -82,24 +82,24 @@ static const UnitDef *find_unit(const char *str) {
size_t j = 0; size_t j = 0;
for (size_t i = 0; i < len && j < 63; i++) { for (size_t i = 0; i < len && j < 63; i++) {
if ((unsigned char)str[i] == 0xC2 && (unsigned char)str[i+1] == 0xB0) { if ((unsigned char)str[i] == 0xC2 && (unsigned char)str[i+1] == 0xB0) {
i++; i++;
continue; continue;
} }
if (str[i] == '^' && i + 1 < len && str[i + 1] == '2') { if (str[i] == '^' && i + 1 < len && str[i + 1] == '2') {
normalized[j++] = '2'; normalized[j++] = '2';
i++; i++;
continue; continue;
} }
normalized[j++] = tolower((unsigned char)str[i]); normalized[j++] = tolower((unsigned char)str[i]);
} }
normalized[j] = '\0'; normalized[j] = '\0';
for (int i = 0; i < UNIT_COUNT; i++) { for (int i = 0; i < UNIT_COUNT; i++) {
if (strcmp(normalized, UNITS[i].name) == 0) return &UNITS[i]; if (strcmp(normalized, UNITS[i].name) == 0) return &UNITS[i];
for (int k = 0; k < 4 && UNITS[i].alias[k]; k++) { for (int k = 0; k < 4 && UNITS[i].alias[k]; k++) {
if (strcmp(normalized, UNITS[i].alias[k]) == 0) return &UNITS[i]; if (strcmp(normalized, UNITS[i].alias[k]) == 0) return &UNITS[i];
} }
} }
return NULL; return NULL;
} }
@@ -108,37 +108,37 @@ int is_unit_conv_query(const char *query) {
if (!query) return 0; if (!query) return 0;
const char *patterns[] = { const char *patterns[] = {
" to ", " in ", " into ", " to ", " in ", " into ",
" = ", " equals ", " equal ", " = ", " equals ", " equal ",
" convert ", " conversion ", " convert ", " conversion ",
" -> ", "", " -> ", "",
NULL NULL
}; };
int has_pattern = 0; int has_pattern = 0;
for (int i = 0; patterns[i]; i++) { for (int i = 0; patterns[i]; i++) {
if (strstr(query, patterns[i])) { if (strstr(query, patterns[i])) {
has_pattern = 1; has_pattern = 1;
break; break;
} }
} }
if (!has_pattern) { if (!has_pattern) {
const char *last_space = strrchr(query, ' '); const char *last_space = strrchr(query, ' ');
if (last_space) { if (last_space) {
const UnitDef *u = find_unit(last_space + 1); const UnitDef *u = find_unit(last_space + 1);
if (u) { if (u) {
const char *before = query; const char *before = query;
while (*before && is_whitespace(*before)) before++; while (*before && is_whitespace(*before)) before++;
const char *num_end = before; const char *num_end = before;
while (*num_end && while (*num_end &&
(isdigit(*num_end) || *num_end == '.' || *num_end == '-' || (isdigit(*num_end) || *num_end == '.' || *num_end == '-' ||
*num_end == '+' || *num_end == '/' || *num_end == '\'' || *num_end == '"')) { *num_end == '+' || *num_end == '/' || *num_end == '\'' || *num_end == '"')) {
num_end++; num_end++;
}
if (num_end > before) has_pattern = 1;
}
} }
if (num_end > before) has_pattern = 1;
}
}
} }
return has_pattern; return has_pattern;
@@ -153,58 +153,58 @@ static double parse_value(const char **ptr) {
if (*p == '-' || *p == '+') p++; if (*p == '-' || *p == '+') p++;
while (*p >= '0' && *p <= '9') { while (*p >= '0' && *p <= '9') {
value = value * 10 + (*p - '0'); value = value * 10 + (*p - '0');
has_num = 1;
p++;
}
if (*p == '.') {
p++;
double frac = 0.1;
while (*p >= '0' && *p <= '9') {
value += (*p - '0') * frac;
frac *= 0.1;
has_num = 1; has_num = 1;
p++; p++;
} }
}
if (*p == '/' && has_num) {
p++;
double denom = 0.0;
int has_denom = 0;
while (*p >= '0' && *p <= '9') {
denom = denom * 10 + (*p - '0');
has_denom = 1;
p++;
}
if (has_denom && denom > 0) {
value = value / denom;
}
}
while (*p == '\'' || *p == '"') {
double extra = 0.0;
p++;
while (*p >= '0' && *p <= '9') {
extra = extra * 10 + (*p - '0');
p++;
}
if (*p == '.') { if (*p == '.') {
p++; p++;
double frac = 0.1; double frac = 0.1;
while (*p >= '0' && *p <= '9') { while (*p >= '0' && *p <= '9') {
value += (*p - '0') * frac; extra += (*p - '0') * frac;
frac *= 0.1; frac *= 0.1;
has_num = 1; p++;
p++;
} }
} }
if (*p == '\'' || *p == '"') p++;
if (*p == '/' && has_num) { value += extra * (p[-1] == '\'' ? 0.3048 : 0.0254);
p++;
double denom = 0.0;
int has_denom = 0;
while (*p >= '0' && *p <= '9') {
denom = denom * 10 + (*p - '0');
has_denom = 1;
p++;
}
if (has_denom && denom > 0) {
value = value / denom;
}
}
while (*p == '\'' || *p == '"') {
double extra = 0.0;
p++;
while (*p >= '0' && *p <= '9') {
extra = extra * 10 + (*p - '0');
p++;
}
if (*p == '.') {
p++;
double frac = 0.1;
while (*p >= '0' && *p <= '9') {
extra += (*p - '0') * frac;
frac *= 0.1;
p++;
}
}
if (*p == '\'' || *p == '"') p++;
value += extra * (p[-1] == '\'' ? 0.3048 : 0.0254);
} }
if (!has_num) { if (!has_num) {
*ptr = p; *ptr = p;
return 0.0; return 0.0;
} }
*ptr = p; *ptr = p;
@@ -235,29 +235,29 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
const char *to_pos = NULL; const char *to_pos = NULL;
size_t keyword_len = 0; size_t keyword_len = 0;
for (int i = 0; to_keywords[i]; i++) { for (int i = 0; to_keywords[i]; i++) {
const char *found = strstr(p, to_keywords[i]); const char *found = strstr(p, to_keywords[i]);
if (found) { if (found) {
to_pos = found + strlen(to_keywords[i]); to_pos = found + strlen(to_keywords[i]);
keyword_len = strlen(to_keywords[i]); keyword_len = strlen(to_keywords[i]);
break; break;
} }
} }
if (!to_pos) { if (!to_pos) {
const char *last_space = strrchr(p, ' '); const char *last_space = strrchr(p, ' ');
if (last_space && last_space > p) { if (last_space && last_space > p) {
char from_part[64] = {0}; char from_part[64] = {0};
size_t len = last_space - p; size_t len = last_space - p;
if (len < 63) { if (len < 63) {
strncpy(from_part, p, len); strncpy(from_part, p, len);
*from_unit = find_unit(from_part); *from_unit = find_unit(from_part);
if (*from_unit) { if (*from_unit) {
*to_unit = find_unit(last_space + 1); *to_unit = find_unit(last_space + 1);
return *to_unit ? 1 : 0; return *to_unit ? 1 : 0;
}
}
} }
return 0; }
}
return 0;
} }
char from_part[64] = {0}; char from_part[64] = {0};
@@ -271,20 +271,20 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
*from_unit = find_unit(from_part); *from_unit = find_unit(from_part);
if (!*from_unit) { if (!*from_unit) {
char *end = from_part + strlen(from_part); char *end = from_part + strlen(from_part);
while (end > from_part) { while (end > from_part) {
while (end > from_part && is_whitespace(end[-1])) end--; while (end > from_part && is_whitespace(end[-1])) end--;
if (end <= from_part) break; if (end <= from_part) break;
char *start = end; char *start = end;
while (start > from_part && !is_whitespace(start[-1])) start--; while (start > from_part && !is_whitespace(start[-1])) start--;
size_t word_len = end - start; size_t word_len = end - start;
memmove(from_part + word_len + 1, from_part, start - from_part); memmove(from_part + word_len + 1, from_part, start - from_part);
from_part[word_len] = ' '; from_part[word_len] = ' ';
from_part[word_len + 1] = '\0'; from_part[word_len + 1] = '\0';
*from_unit = find_unit(from_part); *from_unit = find_unit(from_part);
if (*from_unit) break; if (*from_unit) break;
end = start; end = start;
} }
} }
if (!*from_unit) return 0; if (!*from_unit) return 0;
@@ -297,30 +297,30 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
size_t to_len = 0; size_t to_len = 0;
const char *tp = to_pos; const char *tp = to_pos;
while (*tp && !is_separator(*tp) && to_len < 63) { while (*tp && !is_separator(*tp) && to_len < 63) {
to_part[to_len++] = *tp++; to_part[to_len++] = *tp++;
} }
to_part[to_len] = '\0'; to_part[to_len] = '\0';
*to_unit = find_unit(to_part); *to_unit = find_unit(to_part);
if (!*to_unit) { if (!*to_unit) {
const char *try_ptr = to_pos; const char *try_ptr = to_pos;
while (*try_ptr && is_whitespace(*try_ptr)) try_ptr++; while (*try_ptr && is_whitespace(*try_ptr)) try_ptr++;
char try_buf[64] = {0}; char try_buf[64] = {0};
size_t try_len = 0; size_t try_len = 0;
while (*try_ptr && try_len < 63) { while (*try_ptr && try_len < 63) {
try_buf[try_len++] = *try_ptr++; try_buf[try_len++] = *try_ptr++;
} }
while (try_len > 0) { while (try_len > 0) {
*to_unit = find_unit(try_buf); *to_unit = find_unit(try_buf);
if (*to_unit) { if (*to_unit) {
strcpy(to_part, try_buf); strcpy(to_part, try_buf);
break; break;
}
char *last_space = strrchr(try_buf, ' ');
if (!last_space) break;
*last_space = '\0';
try_len = strlen(try_buf);
} }
char *last_space = strrchr(try_buf, ' ');
if (!last_space) break;
*last_space = '\0';
try_len = strlen(try_buf);
}
} }
return *to_unit ? 1 : 0; return *to_unit ? 1 : 0;
@@ -343,7 +343,7 @@ static double convert_value(double value, const UnitDef *from, const UnitDef *to
if (from->type != to->type) return 0; if (from->type != to->type) return 0;
if (from->type == UNIT_TEMP) { if (from->type == UNIT_TEMP) {
return convert_temp(value, from, to); return convert_temp(value, from, to);
} }
double base_value = value * from->to_base; double base_value = value * from->to_base;
@@ -353,23 +353,23 @@ static double convert_value(double value, const UnitDef *from, const UnitDef *to
static void format_number(double val, char *buf, size_t bufsize) { static void format_number(double val, char *buf, size_t bufsize) {
if (bufsize == 0) return; if (bufsize == 0) return;
if (val == 0) { if (val == 0) {
snprintf(buf, bufsize, "0"); snprintf(buf, bufsize, "0");
return; return;
} }
if (fabs(val) < 0.01 && fabs(val) > 0) { if (fabs(val) < 0.01 && fabs(val) > 0) {
snprintf(buf, bufsize, "%.2g", val); snprintf(buf, bufsize, "%.2g", val);
} else if (fabs(val) < 1) { } else if (fabs(val) < 1) {
snprintf(buf, bufsize, "%.2f", val); snprintf(buf, bufsize, "%.2f", val);
char *p = buf + strlen(buf) - 1; char *p = buf + strlen(buf) - 1;
while (p > buf && *p == '0') *p-- = '\0'; while (p > buf && *p == '0') *p-- = '\0';
if (*p == '.') *p = '\0'; if (*p == '.') *p = '\0';
} else if (fmod(val + 0.0001, 1.0) < 0.0002) { } else if (fmod(val + 0.0001, 1.0) < 0.0002) {
snprintf(buf, bufsize, "%.0f", val); snprintf(buf, bufsize, "%.0f", val);
} else { } else {
snprintf(buf, bufsize, "%.2f", val); snprintf(buf, bufsize, "%.2f", val);
char *p = buf + strlen(buf) - 1; char *p = buf + strlen(buf) - 1;
while (p > buf && *p == '0') *p-- = '\0'; while (p > buf && *p == '0') *p-- = '\0';
if (*p == '.') *p = '\0'; if (*p == '.') *p = '\0';
} }
} }
@@ -383,74 +383,74 @@ static const char *pluralize(const char *unit, double value, char *buf, size_t b
buf[bufsize - 1] = '\0'; buf[bufsize - 1] = '\0';
if (strcmp(unit, "foot") == 0 || strcmp(unit, "square foot") == 0) { if (strcmp(unit, "foot") == 0 || strcmp(unit, "square foot") == 0) {
if (is_one) strcpy(buf, unit); if (is_one) strcpy(buf, unit);
else strcpy(buf, strcmp(unit, "square foot") == 0 ? "square feet" : "feet"); else strcpy(buf, strcmp(unit, "square foot") == 0 ? "square feet" : "feet");
return buf; return buf;
} }
if (strcmp(unit, "inch") == 0 || strcmp(unit, "square inch") == 0) { if (strcmp(unit, "inch") == 0 || strcmp(unit, "square inch") == 0) {
if (is_one) strcpy(buf, unit); if (is_one) strcpy(buf, unit);
else strcpy(buf, strcmp(unit, "square inch") == 0 ? "square inches" : "inches"); else strcpy(buf, strcmp(unit, "square inch") == 0 ? "square inches" : "inches");
return buf; return buf;
} }
if (strcmp(unit, "stone") == 0) { if (strcmp(unit, "stone") == 0) {
if (is_one) strcpy(buf, "stone"); if (is_one) strcpy(buf, "stone");
else strcpy(buf, "stones"); else strcpy(buf, "stones");
return buf; return buf;
} }
if (strcmp(unit, "celsius") == 0 || if (strcmp(unit, "celsius") == 0 ||
strcmp(unit, "fahrenheit") == 0 || strcmp(unit, "fahrenheit") == 0 ||
strcmp(unit, "kelvin") == 0) { strcmp(unit, "kelvin") == 0) {
strcpy(buf, unit); strcpy(buf, unit);
return buf; return buf;
} }
if (unit[len-1] == 's' || if (unit[len-1] == 's' ||
unit[len-1] == 'x' || unit[len-1] == 'x' ||
unit[len-1] == 'z' || unit[len-1] == 'z' ||
(len >= 2 && unit[len-2] == 'c' && unit[len-1] == 'h') || (len >= 2 && unit[len-2] == 'c' && unit[len-1] == 'h') ||
(len >= 2 && unit[len-2] == 's' && unit[len-1] == 'h')) { (len >= 2 && unit[len-2] == 's' && unit[len-1] == 'h')) {
if (!is_one) { if (!is_one) {
buf[len] = 'e'; buf[len] = 'e';
buf[len+1] = '\0'; buf[len+1] = '\0';
} }
} else if (unit[len-1] == 'y' && len >= 2 && } else if (unit[len-1] == 'y' && len >= 2 &&
!(unit[len-2] == 'a' || unit[len-2] == 'e' || !(unit[len-2] == 'a' || unit[len-2] == 'e' ||
unit[len-2] == 'i' || unit[len-2] == 'o' || unit[len-2] == 'i' || unit[len-2] == 'o' ||
unit[len-2] == 'u')) { unit[len-2] == 'u')) {
if (is_one) { if (is_one) {
buf[len-1] = '\0'; buf[len-1] = '\0';
} else {
buf[len] = 's';
buf[len+1] = '\0';
}
} else if (len >= 2 && unit[len-2] == 'f' && unit[len-1] == 'e') {
if (is_one) {
buf[len-2] = '\0';
} else {
buf[len-1] = 's';
buf[len] = '\0';
}
} else if (unit[len-1] == 'f' && len >= 1) {
if (is_one) {
buf[len-1] = '\0';
} else {
buf[len-1] = 'v';
buf[len] = 'e';
buf[len+1] = 's';
buf[len+2] = '\0';
}
} else if (unit[len-1] == 'e' && len >= 2 && unit[len-2] == 'f') {
if (is_one) {
buf[len-2] = '\0';
} else {
buf[len-1] = 's';
buf[len] = '\0';
}
} else { } else {
if (!is_one) { buf[len] = 's';
buf[len] = 's'; buf[len+1] = '\0';
buf[len+1] = '\0'; }
} } else if (len >= 2 && unit[len-2] == 'f' && unit[len-1] == 'e') {
if (is_one) {
buf[len-2] = '\0';
} else {
buf[len-1] = 's';
buf[len] = '\0';
}
} else if (unit[len-1] == 'f' && len >= 1) {
if (is_one) {
buf[len-1] = '\0';
} else {
buf[len-1] = 'v';
buf[len] = 'e';
buf[len+1] = 's';
buf[len+2] = '\0';
}
} else if (unit[len-1] == 'e' && len >= 2 && unit[len-2] == 'f') {
if (is_one) {
buf[len-2] = '\0';
} else {
buf[len-1] = 's';
buf[len] = '\0';
}
} else {
if (!is_one) {
buf[len] = 's';
buf[len+1] = '\0';
}
} }
return buf; return buf;
@@ -466,12 +466,12 @@ static char *build_html(double value, const UnitDef *from, double result, const
pluralize(to->name, result, to_name_buf, sizeof(to_name_buf)); pluralize(to->name, result, to_name_buf, sizeof(to_name_buf));
int n = snprintf(html, sizeof(html), int n = snprintf(html, sizeof(html),
"<div class='unit-conv-container' style='line-height: 1.6;'>" "<div class='unit-conv-container' style='line-height: 1.6;'>"
"<div style='font-size: 1.3em; margin-bottom: 8px;'>" "<div style='font-size: 1.3em; margin-bottom: 8px;'>"
"<b>%s %s</b> = <b>%s %s</b>" "<b>%s %s</b> = <b>%s %s</b>"
"</div>", "</div>",
val_buf, from_name_buf, val_buf, from_name_buf,
res_buf, to_name_buf); res_buf, to_name_buf);
snprintf(html + n, sizeof(html) - n, "</div>"); snprintf(html + n, sizeof(html) - n, "</div>");
return html; return html;
} }

View File

@@ -23,32 +23,32 @@ static void shorten_summary(char **extract_ptr, int max_chars) {
int end_pos = max_chars; int end_pos = max_chars;
for (int i = max_chars; i > (max_chars / 2); i--) { for (int i = max_chars; i > (max_chars / 2); i--) {
if (text[i] == '.' || text[i] == '!' || text[i] == '?') { if (text[i] == '.' || text[i] == '!' || text[i] == '?') {
end_pos = i + 1; end_pos = i + 1;
break; break;
} }
} }
char *new_text = (char *)malloc(end_pos + 4); char *new_text = (char *)malloc(end_pos + 4);
if (new_text) { if (new_text) {
strncpy(new_text, text, end_pos); strncpy(new_text, text, end_pos);
new_text[end_pos] = '\0'; new_text[end_pos] = '\0';
strcat(new_text, "..."); strcat(new_text, "...");
free(*extract_ptr); free(*extract_ptr);
*extract_ptr = new_text; *extract_ptr = new_text;
} }
} }
static size_t WikiWriteMemoryCallback(void *contents, size_t size, size_t nmemb, static size_t WikiWriteMemoryCallback(void *contents, size_t size, size_t nmemb,
void *userp) { void *userp) {
size_t realsize = size * nmemb; size_t realsize = size * nmemb;
struct WikiMemoryStruct *mem = (struct WikiMemoryStruct *)userp; struct WikiMemoryStruct *mem = (struct WikiMemoryStruct *)userp;
char *ptr = realloc(mem->memory, mem->size + realsize + 1); char *ptr = realloc(mem->memory, mem->size + realsize + 1);
if (ptr == NULL) { if (ptr == NULL) {
fprintf(stderr, "Not enough memory (realloc returned NULL)\n"); fprintf(stderr, "Not enough memory (realloc returned NULL)\n");
return 0; return 0;
} }
mem->memory = ptr; mem->memory = ptr;
@@ -63,48 +63,48 @@ static void extract_wiki_info(xmlNode *node, InfoBox *info) {
xmlNode *cur_node = NULL; xmlNode *cur_node = NULL;
for (cur_node = node; cur_node; cur_node = cur_node->next) { for (cur_node = node; cur_node; cur_node = cur_node->next) {
if (cur_node->type == XML_ELEMENT_NODE) { if (cur_node->type == XML_ELEMENT_NODE) {
if (strcmp((const char *)cur_node->name, "page") == 0) { if (strcmp((const char *)cur_node->name, "page") == 0) {
xmlChar *title = xmlGetProp(cur_node, (const xmlChar *)"title"); xmlChar *title = xmlGetProp(cur_node, (const xmlChar *)"title");
if (title) { if (title) {
info->title = strdup((const char *)title); info->title = strdup((const char *)title);
const char *base_article_url = "https://en.wikipedia.org/wiki/"; const char *base_article_url = "https://en.wikipedia.org/wiki/";
char *formatted_title = strdup((const char *)title); char *formatted_title = strdup((const char *)title);
for (int i = 0; formatted_title[i]; i++) { for (int i = 0; formatted_title[i]; i++) {
if (formatted_title[i] == ' ') formatted_title[i] = '_'; if (formatted_title[i] == ' ') formatted_title[i] = '_';
}
info->url =
malloc(strlen(base_article_url) + strlen(formatted_title) + 1);
if (info->url) {
strcpy(info->url, base_article_url);
strcat(info->url, formatted_title);
}
free(formatted_title);
xmlFree(title);
}
} }
if (strcmp((const char *)cur_node->name, "thumbnail") == 0) { info->url =
xmlChar *source = xmlGetProp(cur_node, (const xmlChar *)"source"); malloc(strlen(base_article_url) + strlen(formatted_title) + 1);
if (source) { if (info->url) {
info->thumbnail_url = strdup((const char *)source); strcpy(info->url, base_article_url);
xmlFree(source); strcat(info->url, formatted_title);
}
}
if (strcmp((const char *)cur_node->name, "extract") == 0) {
xmlChar *content = xmlNodeGetContent(cur_node);
if (content) {
info->extract = strdup((const char *)content);
shorten_summary(&(info->extract), 300);
xmlFree(content);
}
} }
free(formatted_title);
xmlFree(title);
} }
extract_wiki_info(cur_node->children, info); }
if (strcmp((const char *)cur_node->name, "thumbnail") == 0) {
xmlChar *source = xmlGetProp(cur_node, (const xmlChar *)"source");
if (source) {
info->thumbnail_url = strdup((const char *)source);
xmlFree(source);
}
}
if (strcmp((const char *)cur_node->name, "extract") == 0) {
xmlChar *content = xmlNodeGetContent(cur_node);
if (content) {
info->extract = strdup((const char *)content);
shorten_summary(&(info->extract), 300);
xmlFree(content);
}
}
}
extract_wiki_info(cur_node->children, info);
} }
} }
@@ -120,27 +120,27 @@ InfoBox fetch_wiki_data(char *api_url) {
curl_handle = curl_easy_init(); curl_handle = curl_easy_init();
if (curl_handle) { if (curl_handle) {
curl_easy_setopt(curl_handle, CURLOPT_URL, api_url); curl_easy_setopt(curl_handle, CURLOPT_URL, api_url);
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION,
WikiWriteMemoryCallback); WikiWriteMemoryCallback);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk); curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0"); curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
apply_proxy_settings(curl_handle); apply_proxy_settings(curl_handle);
res = curl_easy_perform(curl_handle); res = curl_easy_perform(curl_handle);
if (res == CURLE_OK) { if (res == CURLE_OK) {
xmlDocPtr doc = xmlDocPtr doc =
xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0); xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
if (doc != NULL) { if (doc != NULL) {
xmlNode *root_element = xmlDocGetRootElement(doc); xmlNode *root_element = xmlDocGetRootElement(doc);
extract_wiki_info(root_element, &info); extract_wiki_info(root_element, &info);
xmlFreeDoc(doc); xmlFreeDoc(doc);
}
} }
}
curl_easy_cleanup(curl_handle); curl_easy_cleanup(curl_handle);
free(chunk.memory); free(chunk.memory);
} }
return info; return info;
@@ -152,14 +152,14 @@ char *construct_wiki_url(const char *search_term) {
char *escaped_term = curl_easy_escape(curl, search_term, 0); char *escaped_term = curl_easy_escape(curl, search_term, 0);
const char *base = const char *base =
"https://en.wikipedia.org/w/" "https://en.wikipedia.org/w/"
"api.php?action=query&prop=extracts|pageimages&exintro&" "api.php?action=query&prop=extracts|pageimages&exintro&"
"explaintext&pithumbsize=400&format=xml&origin=*&titles="; "explaintext&pithumbsize=400&format=xml&origin=*&titles=";
char *full_url = malloc(strlen(base) + strlen(escaped_term) + 1); char *full_url = malloc(strlen(base) + strlen(escaped_term) + 1);
if (full_url) { if (full_url) {
strcpy(full_url, base); strcpy(full_url, base);
strcat(full_url, escaped_term); strcat(full_url, escaped_term);
} }
curl_free(escaped_term); curl_free(escaped_term);

View File

@@ -14,9 +14,9 @@
#include "Routes/Search.h" #include "Routes/Search.h"
int handle_opensearch(UrlParams *params) { int handle_opensearch(UrlParams *params) {
(void)params; (void)params;
serve_static_file_with_mime("opensearch.xml", "application/opensearchdescription+xml"); serve_static_file_with_mime("opensearch.xml", "application/opensearchdescription+xml");
return 0; return 0;
} }
int main() { int main() {
@@ -31,32 +31,32 @@ int main() {
curl_global_init(CURL_GLOBAL_DEFAULT); curl_global_init(CURL_GLOBAL_DEFAULT);
Config config = { Config config = {
.host = "0.0.0.0", .host = "0.0.0.0",
.port = 5000, .port = 5000,
.proxy = "", .proxy = "",
.proxy_list_file = "", .proxy_list_file = "",
.max_proxy_retries = 3, .max_proxy_retries = 3,
.randomize_username = 0, .randomize_username = 0,
.randomize_password = 0 .randomize_password = 0
}; };
if (load_config("config.ini", &config) != 0) { if (load_config("config.ini", &config) != 0) {
fprintf(stderr, "Warning: Could not load config file, using defaults\n"); fprintf(stderr, "Warning: Could not load config file, using defaults\n");
} }
if (config.proxy_list_file[0] != '\0') { if (config.proxy_list_file[0] != '\0') {
if (load_proxy_list(config.proxy_list_file) < 0) { if (load_proxy_list(config.proxy_list_file) < 0) {
fprintf(stderr, "Warning: Failed to load proxy list, continuing without proxies\n"); fprintf(stderr, "Warning: Failed to load proxy list, continuing without proxies\n");
} }
} }
max_proxy_retries = config.max_proxy_retries; max_proxy_retries = config.max_proxy_retries;
set_proxy_config(config.proxy, config.randomize_username, config.randomize_password); set_proxy_config(config.proxy, config.randomize_username, config.randomize_password);
if (proxy_url[0] != '\0') { if (proxy_url[0] != '\0') {
fprintf(stderr, "Using proxy: %s\n", proxy_url); fprintf(stderr, "Using proxy: %s\n", proxy_url);
} else if (proxy_count > 0) { } else if (proxy_count > 0) {
fprintf(stderr, "Using %d proxies from %s\n", proxy_count, config.proxy_list_file); fprintf(stderr, "Using %d proxies from %s\n", proxy_count, config.proxy_list_file);
} }
set_handler("/", home_handler); set_handler("/", home_handler);
@@ -70,10 +70,10 @@ int main() {
int result = beaker_run(config.host, config.port); int result = beaker_run(config.host, config.port);
if (result != 0) { if (result != 0) {
fprintf(stderr, "Error: Beaker server failed to start.\n"); fprintf(stderr, "Error: Beaker server failed to start.\n");
curl_global_cleanup(); curl_global_cleanup();
xmlCleanupParser(); xmlCleanupParser();
return EXIT_FAILURE; return EXIT_FAILURE;
} }
curl_global_cleanup(); curl_global_cleanup();

View File

@@ -17,15 +17,15 @@ static const char RAND_CHARS[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRST
static void generate_random_string(char *buf, size_t len) { static void generate_random_string(char *buf, size_t len) {
for (size_t i = 0; i < len - 1; i++) { for (size_t i = 0; i < len - 1; i++) {
buf[i] = RAND_CHARS[rand() % (sizeof(RAND_CHARS) - 1)]; buf[i] = RAND_CHARS[rand() % (sizeof(RAND_CHARS) - 1)];
} }
buf[len - 1] = '\0'; buf[len - 1] = '\0';
} }
void set_proxy_config(const char *proxy_str, int rand_user, int rand_pass) { void set_proxy_config(const char *proxy_str, int rand_user, int rand_pass) {
if (proxy_str && proxy_str[0]) { if (proxy_str && proxy_str[0]) {
strncpy(proxy_url, proxy_str, sizeof(proxy_url) - 1); strncpy(proxy_url, proxy_str, sizeof(proxy_url) - 1);
proxy_url[sizeof(proxy_url) - 1] = '\0'; proxy_url[sizeof(proxy_url) - 1] = '\0';
} }
randomize_username = rand_user; randomize_username = rand_user;
randomize_password = rand_pass; randomize_password = rand_pass;
@@ -40,49 +40,49 @@ static Proxy parse_proxy_line(const char *line) {
if (len == 0) return proxy; if (len == 0) return proxy;
if (strncmp(line, "http://", 7) == 0) { if (strncmp(line, "http://", 7) == 0) {
proxy.type = PROXY_HTTP; proxy.type = PROXY_HTTP;
host_start = line + 7; host_start = line + 7;
} else if (strncmp(line, "socks5://", 9) == 0) { } else if (strncmp(line, "socks5://", 9) == 0) {
proxy.type = PROXY_SOCKS5; proxy.type = PROXY_SOCKS5;
host_start = line + 9; host_start = line + 9;
} else if (strncmp(line, "socks4://", 9) == 0) { } else if (strncmp(line, "socks4://", 9) == 0) {
proxy.type = PROXY_SOCKS4; proxy.type = PROXY_SOCKS4;
host_start = line + 9; host_start = line + 9;
} else { } else {
host_start = line; host_start = line;
} }
const char *at = strchr(host_start, '@'); const char *at = strchr(host_start, '@');
if (at) { if (at) {
char cred_buf[128]; char cred_buf[128];
size_t cred_len = at - host_start; size_t cred_len = at - host_start;
if (cred_len >= sizeof(cred_buf)) cred_len = sizeof(cred_buf) - 1; if (cred_len >= sizeof(cred_buf)) cred_len = sizeof(cred_buf) - 1;
strncpy(cred_buf, host_start, cred_len); strncpy(cred_buf, host_start, cred_len);
cred_buf[cred_len] = '\0'; cred_buf[cred_len] = '\0';
char *colon = strchr(cred_buf, ':'); char *colon = strchr(cred_buf, ':');
if (colon) { if (colon) {
size_t user_len = colon - cred_buf; size_t user_len = colon - cred_buf;
if (user_len >= sizeof(proxy.username)) user_len = sizeof(proxy.username) - 1; if (user_len >= sizeof(proxy.username)) user_len = sizeof(proxy.username) - 1;
strncpy(proxy.username, cred_buf, user_len); strncpy(proxy.username, cred_buf, user_len);
proxy.username[user_len] = '\0'; proxy.username[user_len] = '\0';
strncpy(proxy.password, colon + 1, sizeof(proxy.password) - 1); strncpy(proxy.password, colon + 1, sizeof(proxy.password) - 1);
proxy.password[sizeof(proxy.password) - 1] = '\0'; proxy.password[sizeof(proxy.password) - 1] = '\0';
} }
host_start = at + 1; host_start = at + 1;
} }
port_start = strchr(host_start, ':'); port_start = strchr(host_start, ':');
if (port_start) { if (port_start) {
char host_buf[256]; char host_buf[256];
size_t host_len = port_start - host_start; size_t host_len = port_start - host_start;
if (host_len >= sizeof(host_buf)) host_len = sizeof(host_buf) - 1; if (host_len >= sizeof(host_buf)) host_len = sizeof(host_buf) - 1;
strncpy(host_buf, host_start, host_len); strncpy(host_buf, host_start, host_len);
host_buf[host_len] = '\0'; host_buf[host_len] = '\0';
snprintf(proxy.host, sizeof(proxy.host), "%.*s", (int)host_len, host_buf); snprintf(proxy.host, sizeof(proxy.host), "%.*s", (int)host_len, host_buf);
proxy.port = atoi(port_start + 1); proxy.port = atoi(port_start + 1);
} else { } else {
snprintf(proxy.host, sizeof(proxy.host), "%s", host_start); snprintf(proxy.host, sizeof(proxy.host), "%s", host_start);
} }
return proxy; return proxy;
@@ -90,71 +90,71 @@ static Proxy parse_proxy_line(const char *line) {
int load_proxy_list(const char *filename) { int load_proxy_list(const char *filename) {
if (!filename || filename[0] == '\0') { if (!filename || filename[0] == '\0') {
return 0; return 0;
} }
pthread_mutex_lock(&proxy_mutex); pthread_mutex_lock(&proxy_mutex);
if (proxy_list) { if (proxy_list) {
free(proxy_list); free(proxy_list);
proxy_list = NULL; proxy_list = NULL;
} }
proxy_count = 0; proxy_count = 0;
FILE *file = fopen(filename, "r"); FILE *file = fopen(filename, "r");
if (!file) { if (!file) {
pthread_mutex_unlock(&proxy_mutex); pthread_mutex_unlock(&proxy_mutex);
fprintf(stderr, "[WARN] Could not open proxy list file: %s\n", filename); fprintf(stderr, "[WARN] Could not open proxy list file: %s\n", filename);
return -1; return -1;
} }
int capacity = 16; int capacity = 16;
proxy_list = (Proxy *)malloc(capacity * sizeof(Proxy)); proxy_list = (Proxy *)malloc(capacity * sizeof(Proxy));
if (!proxy_list) { if (!proxy_list) {
fclose(file); fclose(file);
return -1; return -1;
} }
proxy_count = 0; proxy_count = 0;
char line[512]; char line[512];
while (fgets(line, sizeof(line), file)) { while (fgets(line, sizeof(line), file)) {
line[strcspn(line, "\r\n")] = 0; line[strcspn(line, "\r\n")] = 0;
if (line[0] == '\0' || line[0] == '#') { if (line[0] == '\0' || line[0] == '#') {
continue; continue;
}
char *p = line;
while (*p == ' ' || *p == '\t') p++;
char *end = p + strlen(p) - 1;
while (end > p && (*end == ' ' || *end == '\t')) {
*end = '\0';
end--;
}
if (p[0] == '\0') continue;
Proxy proxy = parse_proxy_line(p);
if (proxy.port == 0) {
continue;
}
if (proxy_count >= capacity) {
capacity *= 2;
Proxy *new_list = (Proxy *)realloc(proxy_list, capacity * sizeof(Proxy));
if (!new_list) {
free(proxy_list);
proxy_list = NULL;
proxy_count = 0;
fclose(file);
pthread_mutex_unlock(&proxy_mutex);
return -1;
} }
proxy_list = new_list;
}
char *p = line; proxy_list[proxy_count++] = proxy;
while (*p == ' ' || *p == '\t') p++;
char *end = p + strlen(p) - 1;
while (end > p && (*end == ' ' || *end == '\t')) {
*end = '\0';
end--;
}
if (p[0] == '\0') continue;
Proxy proxy = parse_proxy_line(p);
if (proxy.port == 0) {
continue;
}
if (proxy_count >= capacity) {
capacity *= 2;
Proxy *new_list = (Proxy *)realloc(proxy_list, capacity * sizeof(Proxy));
if (!new_list) {
free(proxy_list);
proxy_list = NULL;
proxy_count = 0;
fclose(file);
pthread_mutex_unlock(&proxy_mutex);
return -1;
}
proxy_list = new_list;
}
proxy_list[proxy_count++] = proxy;
} }
fclose(file); fclose(file);
@@ -166,8 +166,8 @@ int load_proxy_list(const char *filename) {
void free_proxy_list(void) { void free_proxy_list(void) {
pthread_mutex_lock(&proxy_mutex); pthread_mutex_lock(&proxy_mutex);
if (proxy_list) { if (proxy_list) {
free(proxy_list); free(proxy_list);
proxy_list = NULL; proxy_list = NULL;
} }
proxy_count = 0; proxy_count = 0;
pthread_mutex_unlock(&proxy_mutex); pthread_mutex_unlock(&proxy_mutex);
@@ -176,8 +176,8 @@ void free_proxy_list(void) {
Proxy *get_random_proxy(void) { Proxy *get_random_proxy(void) {
pthread_mutex_lock(&proxy_mutex); pthread_mutex_lock(&proxy_mutex);
if (proxy_count == 0) { if (proxy_count == 0) {
pthread_mutex_unlock(&proxy_mutex); pthread_mutex_unlock(&proxy_mutex);
return NULL; return NULL;
} }
int start = rand() % proxy_count; int start = rand() % proxy_count;
@@ -185,19 +185,19 @@ Proxy *get_random_proxy(void) {
Proxy *selected = NULL; Proxy *selected = NULL;
while (checked < proxy_count) { while (checked < proxy_count) {
int idx = (start + checked) % proxy_count; int idx = (start + checked) % proxy_count;
if (proxy_list[idx].failures < max_proxy_retries) { if (proxy_list[idx].failures < max_proxy_retries) {
selected = &proxy_list[idx]; selected = &proxy_list[idx];
break; break;
} }
checked++; checked++;
} }
if (!selected) { if (!selected) {
for (int i = 0; i < proxy_count; i++) { for (int i = 0; i < proxy_count; i++) {
proxy_list[i].failures = 0; proxy_list[i].failures = 0;
} }
selected = &proxy_list[rand() % proxy_count]; selected = &proxy_list[rand() % proxy_count];
} }
pthread_mutex_unlock(&proxy_mutex); pthread_mutex_unlock(&proxy_mutex);
@@ -213,45 +213,45 @@ void record_proxy_failure(Proxy *proxy) {
void apply_proxy_settings(CURL *curl) { void apply_proxy_settings(CURL *curl) {
if (proxy_url[0] != '\0') { if (proxy_url[0] != '\0') {
curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url); curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url);
if (strncmp(proxy_url, "socks5://", 9) == 0) { if (strncmp(proxy_url, "socks5://", 9) == 0) {
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5); curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
} else if (strncmp(proxy_url, "socks4://", 9) == 0) { } else if (strncmp(proxy_url, "socks4://", 9) == 0) {
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A); curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
} else { } else {
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
} }
if (randomize_username || randomize_password) { if (randomize_username || randomize_password) {
char userpwd[256]; char userpwd[256];
char username[32] = {0}; char username[32] = {0};
char password[32] = {0}; char password[32] = {0};
if (randomize_username) generate_random_string(username, sizeof(username)); if (randomize_username) generate_random_string(username, sizeof(username));
if (randomize_password) generate_random_string(password, sizeof(password)); if (randomize_password) generate_random_string(password, sizeof(password));
snprintf(userpwd, sizeof(userpwd), "%s:%s", username, password); snprintf(userpwd, sizeof(userpwd), "%s:%s", username, password);
curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd); curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
} }
} else if (proxy_count > 0) { } else if (proxy_count > 0) {
Proxy *proxy = get_random_proxy(); Proxy *proxy = get_random_proxy();
if (proxy) { if (proxy) {
char proxy_url_buf[512]; char proxy_url_buf[512];
snprintf(proxy_url_buf, sizeof(proxy_url_buf), "%s:%d", proxy->host, proxy->port); snprintf(proxy_url_buf, sizeof(proxy_url_buf), "%s:%d", proxy->host, proxy->port);
curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url_buf); curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url_buf);
if (proxy->type == PROXY_HTTP) { if (proxy->type == PROXY_HTTP) {
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
} else if (proxy->type == PROXY_SOCKS4) { } else if (proxy->type == PROXY_SOCKS4) {
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A); curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
} else { } else {
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5); curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
} }
if (proxy->username[0] != '\0' || proxy->password[0] != '\0') { if (proxy->username[0] != '\0' || proxy->password[0] != '\0') {
char userpwd[128]; char userpwd[128];
snprintf(userpwd, sizeof(userpwd), "%s:%s", proxy->username, proxy->password); snprintf(userpwd, sizeof(userpwd), "%s:%s", proxy->username, proxy->password);
curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd); curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
}
} }
} }
}
} }

View File

@@ -2,13 +2,13 @@
#include <stdlib.h> #include <stdlib.h>
int home_handler(UrlParams *params) { int home_handler(UrlParams *params) {
(void)params; (void)params;
TemplateContext ctx = new_context(); TemplateContext ctx = new_context();
char *rendered_html = render_template("home.html", &ctx); char *rendered_html = render_template("home.html", &ctx);
send_response(rendered_html); send_response(rendered_html);
free(rendered_html); free(rendered_html);
free_context(&ctx); free_context(&ctx);
return 0; return 0;
} }

View File

@@ -16,9 +16,9 @@ typedef struct {
static int is_allowed_domain(const char *url) { static int is_allowed_domain(const char *url) {
const char *protocol = strstr(url, "://"); const char *protocol = strstr(url, "://");
if (!protocol) { if (!protocol) {
protocol = url; protocol = url;
} else { } else {
protocol += 3; protocol += 3;
} }
const char *path = strchr(protocol, '/'); const char *path = strchr(protocol, '/');
@@ -26,49 +26,49 @@ static int is_allowed_domain(const char *url) {
char host[256] = {0}; char host[256] = {0};
if (host_len >= sizeof(host)) { if (host_len >= sizeof(host)) {
host_len = sizeof(host) - 1; host_len = sizeof(host) - 1;
} }
strncpy(host, protocol, host_len); strncpy(host, protocol, host_len);
const char *allowed_domains[] = { const char *allowed_domains[] = {
"mm.bing.net", "mm.bing.net",
"th.bing.com", "th.bing.com",
NULL NULL
}; };
for (int i = 0; allowed_domains[i] != NULL; i++) { for (int i = 0; allowed_domains[i] != NULL; i++) {
size_t domain_len = strlen(allowed_domains[i]); size_t domain_len = strlen(allowed_domains[i]);
size_t host_str_len = strlen(host); size_t host_str_len = strlen(host);
if (host_str_len >= domain_len) { if (host_str_len >= domain_len) {
const char *suffix = host + host_str_len - domain_len; const char *suffix = host + host_str_len - domain_len;
if (strcmp(suffix, allowed_domains[i]) == 0) { if (strcmp(suffix, allowed_domains[i]) == 0) {
return 1; return 1;
}
} }
} }
}
return 0; return 0;
} }
static size_t write_callback(void *contents, size_t size, size_t nmemb, static size_t write_callback(void *contents, size_t size, size_t nmemb,
void *userp) { void *userp) {
size_t realsize = size * nmemb; size_t realsize = size * nmemb;
MemoryBuffer *buf = (MemoryBuffer *)userp; MemoryBuffer *buf = (MemoryBuffer *)userp;
if (buf->size + realsize > MAX_IMAGE_SIZE) { if (buf->size + realsize > MAX_IMAGE_SIZE) {
return 0; return 0;
} }
if (buf->size + realsize > buf->capacity) { if (buf->size + realsize > buf->capacity) {
size_t new_capacity = buf->capacity * 2; size_t new_capacity = buf->capacity * 2;
if (new_capacity < buf->size + realsize) { if (new_capacity < buf->size + realsize) {
new_capacity = buf->size + realsize; new_capacity = buf->size + realsize;
} }
char *new_data = realloc(buf->data, new_capacity); char *new_data = realloc(buf->data, new_capacity);
if (!new_data) return 0; if (!new_data) return 0;
buf->data = new_data; buf->data = new_data;
buf->capacity = new_capacity; buf->capacity = new_capacity;
} }
memcpy(buf->data + buf->size, contents, realsize); memcpy(buf->data + buf->size, contents, realsize);
@@ -79,38 +79,38 @@ static size_t write_callback(void *contents, size_t size, size_t nmemb,
int image_proxy_handler(UrlParams *params) { int image_proxy_handler(UrlParams *params) {
const char *url = NULL; const char *url = NULL;
for (int i = 0; i < params->count; i++) { for (int i = 0; i < params->count; i++) {
if (strcmp(params->params[i].key, "url") == 0) { if (strcmp(params->params[i].key, "url") == 0) {
url = params->params[i].value; url = params->params[i].value;
break; break;
} }
} }
if (!url || strlen(url) == 0) { if (!url || strlen(url) == 0) {
send_response("Missing 'url' parameter"); send_response("Missing 'url' parameter");
return 0; return 0;
} }
if (!is_allowed_domain(url)) { if (!is_allowed_domain(url)) {
send_response("Domain not allowed"); send_response("Domain not allowed");
return 0; return 0;
} }
CURL *curl = curl_easy_init(); CURL *curl = curl_easy_init();
if (!curl) { if (!curl) {
send_response("Failed to initialize curl"); send_response("Failed to initialize curl");
return 0; return 0;
} }
MemoryBuffer buf = { MemoryBuffer buf = {
.data = malloc(8192), .data = malloc(8192),
.size = 0, .size = 0,
.capacity = 8192 .capacity = 8192
}; };
if (!buf.data) { if (!buf.data) {
curl_easy_cleanup(curl); curl_easy_cleanup(curl);
send_response("Memory allocation failed"); send_response("Memory allocation failed");
return 0; return 0;
} }
curl_easy_setopt(curl, CURLOPT_URL, url); curl_easy_setopt(curl, CURLOPT_URL, url);
@@ -130,15 +130,15 @@ int image_proxy_handler(UrlParams *params) {
char content_type[64] = {0}; char content_type[64] = {0};
if (content_type_ptr) { if (content_type_ptr) {
strncpy(content_type, content_type_ptr, sizeof(content_type) - 1); strncpy(content_type, content_type_ptr, sizeof(content_type) - 1);
} }
curl_easy_cleanup(curl); curl_easy_cleanup(curl);
if (res != CURLE_OK || response_code != 200) { if (res != CURLE_OK || response_code != 200) {
free(buf.data); free(buf.data);
send_response("Failed to fetch image"); send_response("Failed to fetch image");
return 0; return 0;
} }
const char *mime_type = strlen(content_type) > 0 ? content_type : "image/jpeg"; const char *mime_type = strlen(content_type) > 0 ? content_type : "image/jpeg";

View File

@@ -17,12 +17,12 @@ struct MemoryBlock {
}; };
static size_t ImageWriteCallback(void *data, size_t size, size_t nmemb, static size_t ImageWriteCallback(void *data, size_t size, size_t nmemb,
void *userp) { void *userp) {
size_t realsize = size * nmemb; size_t realsize = size * nmemb;
struct MemoryBlock *mem = (struct MemoryBlock *)userp; struct MemoryBlock *mem = (struct MemoryBlock *)userp;
char *ptr = (char *)realloc(mem->response, mem->size + realsize + 1); char *ptr = (char *)realloc(mem->response, mem->size + realsize + 1);
if (ptr == NULL) { if (ptr == NULL) {
return 0; return 0;
} }
mem->response = ptr; mem->response = ptr;
memcpy(&(mem->response[mem->size]), data, realsize); memcpy(&(mem->response[mem->size]), data, realsize);
@@ -35,30 +35,30 @@ static char *fetch_images_html(const char *url) {
CURL *curl_handle; CURL *curl_handle;
struct MemoryBlock chunk = {.response = malloc(1), .size = 0}; struct MemoryBlock chunk = {.response = malloc(1), .size = 0};
if (!chunk.response) { if (!chunk.response) {
return NULL; return NULL;
} }
curl_handle = curl_easy_init(); curl_handle = curl_easy_init();
if (!curl_handle) { if (!curl_handle) {
free(chunk.response); free(chunk.response);
return NULL; return NULL;
} }
curl_easy_setopt(curl_handle, CURLOPT_URL, url); curl_easy_setopt(curl_handle, CURLOPT_URL, url);
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, ImageWriteCallback); curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, ImageWriteCallback);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk); curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
curl_easy_setopt( curl_easy_setopt(
curl_handle, CURLOPT_USERAGENT, curl_handle, CURLOPT_USERAGENT,
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"); "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko");
curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 10L); curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 10L);
apply_proxy_settings(curl_handle); apply_proxy_settings(curl_handle);
CURLcode res = curl_easy_perform(curl_handle); CURLcode res = curl_easy_perform(curl_handle);
if (res != CURLE_OK) { if (res != CURLE_OK) {
free(chunk.response); free(chunk.response);
curl_easy_cleanup(curl_handle); curl_easy_cleanup(curl_handle);
return NULL; return NULL;
} }
curl_easy_cleanup(curl_handle); curl_easy_cleanup(curl_handle);
@@ -71,15 +71,15 @@ int images_handler(UrlParams *params) {
int page = 1; int page = 1;
if (params) { if (params) {
for (int i = 0; i < params->count; i++) { for (int i = 0; i < params->count; i++) {
if (strcmp(params->params[i].key, "q") == 0) { if (strcmp(params->params[i].key, "q") == 0) {
raw_query = params->params[i].value; raw_query = params->params[i].value;
} else if (strcmp(params->params[i].key, "p") == 0) { } else if (strcmp(params->params[i].key, "p") == 0) {
int parsed = atoi(params->params[i].value); int parsed = atoi(params->params[i].value);
if (parsed > 1) page = parsed; if (parsed > 1) page = parsed;
}
} }
} }
}
context_set(&ctx, "query", raw_query); context_set(&ctx, "query", raw_query);
@@ -87,7 +87,7 @@ int images_handler(UrlParams *params) {
snprintf(page_str, sizeof(page_str), "%d", page); snprintf(page_str, sizeof(page_str), "%d", page);
snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0); snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
snprintf(next_str, sizeof(next_str), "%d", page + 1); snprintf(next_str, sizeof(next_str), "%d", page + 1);
context_set(&ctx, "page", page_str); context_set(&ctx, "page", page_str);
context_set(&ctx, "prev_page", prev_str); context_set(&ctx, "prev_page", prev_str);
context_set(&ctx, "next_page", next_str); context_set(&ctx, "next_page", next_str);
@@ -95,198 +95,198 @@ int images_handler(UrlParams *params) {
context_set(&ctx, "query", display_query); context_set(&ctx, "query", display_query);
if (!raw_query || strlen(raw_query) == 0) { if (!raw_query || strlen(raw_query) == 0) {
send_response("<h1>No query provided</h1>"); send_response("<h1>No query provided</h1>");
if (display_query) free(display_query); if (display_query) free(display_query);
free_context(&ctx); free_context(&ctx);
return -1; return -1;
} }
CURL *tmp = curl_easy_init(); CURL *tmp = curl_easy_init();
if (!tmp) { if (!tmp) {
send_response("<h1>Error initializing curl</h1>"); send_response("<h1>Error initializing curl</h1>");
if (display_query) free(display_query); if (display_query) free(display_query);
free_context(&ctx); free_context(&ctx);
return -1; return -1;
} }
char *encoded_query = curl_easy_escape(tmp, raw_query, 0); char *encoded_query = curl_easy_escape(tmp, raw_query, 0);
curl_easy_cleanup(tmp); curl_easy_cleanup(tmp);
if (!encoded_query) { if (!encoded_query) {
send_response("<h1>Error encoding query</h1>"); send_response("<h1>Error encoding query</h1>");
if (display_query) free(display_query); if (display_query) free(display_query);
free_context(&ctx); free_context(&ctx);
return -1; return -1;
} }
char url[1024]; char url[1024];
int first = (page - 1) * 32 + 1; int first = (page - 1) * 32 + 1;
snprintf(url, sizeof(url), snprintf(url, sizeof(url),
"https://www.bing.com/images/search?q=%s&first=%d", encoded_query, first); "https://www.bing.com/images/search?q=%s&first=%d", encoded_query, first);
char *html = fetch_images_html(url); char *html = fetch_images_html(url);
if (!html) { if (!html) {
send_response("<h1>Error fetching images</h1>"); send_response("<h1>Error fetching images</h1>");
free(encoded_query); free(encoded_query);
free(display_query); free(display_query);
free_context(&ctx); free_context(&ctx);
return -1; return -1;
} }
htmlDocPtr doc = htmlReadMemory(html, (int)strlen(html), NULL, NULL, htmlDocPtr doc = htmlReadMemory(html, (int)strlen(html), NULL, NULL,
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR); HTML_PARSE_RECOVER | HTML_PARSE_NOERROR);
if (!doc) { if (!doc) {
free(html); free(html);
free(encoded_query); free(encoded_query);
free(display_query); free(display_query);
free_context(&ctx); free_context(&ctx);
return -1; return -1;
} }
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
if (!xpathCtx) { if (!xpathCtx) {
xmlFreeDoc(doc); xmlFreeDoc(doc);
free(html); free(html);
free(encoded_query); free(encoded_query);
free(display_query); free(display_query);
free_context(&ctx); free_context(&ctx);
return -1; return -1;
} }
xmlXPathObjectPtr xpathObj = xmlXPathObjectPtr xpathObj =
xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx); xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx);
int image_count = 0; int image_count = 0;
char ***image_matrix = NULL; char ***image_matrix = NULL;
int *inner_counts = NULL; int *inner_counts = NULL;
if (xpathObj && xpathObj->nodesetval) { if (xpathObj && xpathObj->nodesetval) {
int nodes = xpathObj->nodesetval->nodeNr; int nodes = xpathObj->nodesetval->nodeNr;
int max_images = (nodes < 32) ? nodes : 32; int max_images = (nodes < 32) ? nodes : 32;
image_matrix = malloc(sizeof(char **) * max_images); image_matrix = malloc(sizeof(char **) * max_images);
inner_counts = malloc(sizeof(int) * max_images); inner_counts = malloc(sizeof(int) * max_images);
for (int i = 0; i < nodes; i++) { for (int i = 0; i < nodes; i++) {
if (image_count >= 32) break; if (image_count >= 32) break;
xmlNodePtr node = xpathObj->nodesetval->nodeTab[i]; xmlNodePtr node = xpathObj->nodesetval->nodeTab[i];
xmlNodePtr img_node = NULL; xmlNodePtr img_node = NULL;
xmlNodePtr tit_node = NULL; xmlNodePtr tit_node = NULL;
xmlNodePtr des_node = NULL; xmlNodePtr des_node = NULL;
xmlNodePtr thumb_link = NULL; xmlNodePtr thumb_link = NULL;
for (xmlNodePtr child = node->children; child; child = child->next) { for (xmlNodePtr child = node->children; child; child = child->next) {
if (child->type != XML_ELEMENT_NODE) continue; if (child->type != XML_ELEMENT_NODE) continue;
if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) { if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) {
xmlChar *class = xmlGetProp(child, (const xmlChar *)"class"); xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
if (class) { if (class) {
if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) { if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) {
thumb_link = child; thumb_link = child;
for (xmlNodePtr thumb_child = child->children; thumb_child; thumb_child = thumb_child->next) { for (xmlNodePtr thumb_child = child->children; thumb_child; thumb_child = thumb_child->next) {
if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) { if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) {
xmlChar *div_class = xmlGetProp(thumb_child, (const xmlChar *)"class"); xmlChar *div_class = xmlGetProp(thumb_child, (const xmlChar *)"class");
if (div_class && xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) { if (div_class && xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) {
for (xmlNodePtr cico_child = thumb_child->children; cico_child; cico_child = cico_child->next) { for (xmlNodePtr cico_child = thumb_child->children; cico_child; cico_child = cico_child->next) {
if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") == 0) { if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") == 0) {
img_node = cico_child; img_node = cico_child;
break; break;
}
}
}
if (div_class) xmlFree(div_class);
}
}
} else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
tit_node = child;
}
xmlFree(class);
}
} else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
for (xmlNodePtr meta_child = child->children; meta_child; meta_child = meta_child->next) {
if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
xmlChar *div_class = xmlGetProp(meta_child, (const xmlChar *)"class");
if (div_class) {
if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
des_node = meta_child;
}
xmlFree(div_class);
}
} else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") == 0) {
xmlChar *a_class = xmlGetProp(meta_child, (const xmlChar *)"class");
if (a_class && xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
tit_node = meta_child;
}
if (a_class) xmlFree(a_class);
}
} }
} }
if (class) xmlFree(class); }
if (div_class) xmlFree(div_class);
}
}
} else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
tit_node = child;
}
xmlFree(class);
}
} else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
for (xmlNodePtr meta_child = child->children; meta_child; meta_child = meta_child->next) {
if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
xmlChar *div_class = xmlGetProp(meta_child, (const xmlChar *)"class");
if (div_class) {
if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
des_node = meta_child;
}
xmlFree(div_class);
}
} else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") == 0) {
xmlChar *a_class = xmlGetProp(meta_child, (const xmlChar *)"class");
if (a_class && xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
tit_node = meta_child;
}
if (a_class) xmlFree(a_class);
} }
} }
xmlChar *iurl = img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
xmlChar *full_url = thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
xmlChar *title = des_node ? xmlNodeGetContent(des_node) : (tit_node ? xmlNodeGetContent(tit_node) : NULL);
xmlChar *rurl = tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
if (iurl && strlen((char *)iurl) > 0) {
char *proxy_url = NULL;
CURL *esc_curl = curl_easy_init();
if (esc_curl) {
char *encoded = curl_easy_escape(esc_curl, (char *)iurl, 0);
if (encoded) {
size_t proxy_len = strlen("/proxy?url=") + strlen(encoded) + 1;
proxy_url = malloc(proxy_len);
if (proxy_url) {
snprintf(proxy_url, proxy_len, "/proxy?url=%s", encoded);
}
curl_free(encoded);
}
curl_easy_cleanup(esc_curl);
}
image_matrix[image_count] = malloc(sizeof(char *) * 4);
image_matrix[image_count][0] = proxy_url ? strdup(proxy_url) : strdup((char *)iurl);
image_matrix[image_count][1] = strdup(title ? (char *)title : "Image");
image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#");
image_matrix[image_count][3] = strdup(full_url ? (char *)full_url : "#");
inner_counts[image_count] = 4;
image_count++;
} }
if (class) xmlFree(class);
if (iurl) xmlFree(iurl);
if (title) xmlFree(title);
if (rurl) xmlFree(rurl);
if (full_url) xmlFree(full_url);
} }
}
xmlChar *iurl = img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
xmlChar *full_url = thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
xmlChar *title = des_node ? xmlNodeGetContent(des_node) : (tit_node ? xmlNodeGetContent(tit_node) : NULL);
xmlChar *rurl = tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
if (iurl && strlen((char *)iurl) > 0) {
char *proxy_url = NULL;
CURL *esc_curl = curl_easy_init();
if (esc_curl) {
char *encoded = curl_easy_escape(esc_curl, (char *)iurl, 0);
if (encoded) {
size_t proxy_len = strlen("/proxy?url=") + strlen(encoded) + 1;
proxy_url = malloc(proxy_len);
if (proxy_url) {
snprintf(proxy_url, proxy_len, "/proxy?url=%s", encoded);
}
curl_free(encoded);
}
curl_easy_cleanup(esc_curl);
}
image_matrix[image_count] = malloc(sizeof(char *) * 4);
image_matrix[image_count][0] = proxy_url ? strdup(proxy_url) : strdup((char *)iurl);
image_matrix[image_count][1] = strdup(title ? (char *)title : "Image");
image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#");
image_matrix[image_count][3] = strdup(full_url ? (char *)full_url : "#");
inner_counts[image_count] = 4;
image_count++;
}
if (iurl) xmlFree(iurl);
if (title) xmlFree(title);
if (rurl) xmlFree(rurl);
if (full_url) xmlFree(full_url);
}
} }
context_set_array_of_arrays(&ctx, "images", image_matrix, image_count, context_set_array_of_arrays(&ctx, "images", image_matrix, image_count,
inner_counts); inner_counts);
char *rendered = render_template("images.html", &ctx); char *rendered = render_template("images.html", &ctx);
if (rendered) { if (rendered) {
send_response(rendered); send_response(rendered);
free(rendered); free(rendered);
} else { } else {
send_response("<h1>Error rendering image results</h1>"); send_response("<h1>Error rendering image results</h1>");
} }
if (image_matrix) { if (image_matrix) {
for (int i = 0; i < image_count; i++) { for (int i = 0; i < image_count; i++) {
for (int j = 0; j < 4; j++) { for (int j = 0; j < 4; j++) {
free(image_matrix[i][j]); free(image_matrix[i][j]);
}
free(image_matrix[i]);
} }
free(image_matrix); free(image_matrix[i]);
}
free(image_matrix);
} }
if (inner_counts) { if (inner_counts) {
free(inner_counts); free(inner_counts);
} }
if (xpathObj) xmlXPathFreeObject(xpathObj); if (xpathObj) xmlXPathFreeObject(xpathObj);

View File

@@ -23,13 +23,13 @@ static void *wiki_thread_func(void *arg) {
InfoBoxThreadData *data = (InfoBoxThreadData *)arg; InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
char *dynamic_url = construct_wiki_url(data->query); char *dynamic_url = construct_wiki_url(data->query);
if (dynamic_url) { if (dynamic_url) {
data->result = fetch_wiki_data(dynamic_url); data->result = fetch_wiki_data(dynamic_url);
data->success = data->success =
(data->result.title != NULL && data->result.extract != NULL && (data->result.title != NULL && data->result.extract != NULL &&
strlen(data->result.extract) > 10); strlen(data->result.extract) > 10);
free(dynamic_url); free(dynamic_url);
} else { } else {
data->success = 0; data->success = 0;
} }
return NULL; return NULL;
} }
@@ -41,43 +41,43 @@ static int is_calculator_query(const char *query) {
int has_math_operator = 0; int has_math_operator = 0;
for (const char *p = query; *p; p++) { for (const char *p = query; *p; p++) {
if (isdigit(*p) || *p == '.') { if (isdigit(*p) || *p == '.') {
has_digit = 1; has_digit = 1;
} }
if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '^') { if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '^') {
has_math_operator = 1; has_math_operator = 1;
} }
} }
if (!has_digit || !has_math_operator) return 0; if (!has_digit || !has_math_operator) return 0;
int len = strlen(query); int len = strlen(query);
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
char c = query[i]; char c = query[i];
if (c == '+' || c == '-' || c == '*' || c == '/' || c == '^') { if (c == '+' || c == '-' || c == '*' || c == '/' || c == '^') {
int has_num_before = 0; int has_num_before = 0;
int has_num_after = 0; int has_num_after = 0;
for (int j = i - 1; j >= 0; j--) { for (int j = i - 1; j >= 0; j--) {
if (isdigit(query[j]) || query[j] == '.') { if (isdigit(query[j]) || query[j] == '.') {
has_num_before = 1; has_num_before = 1;
break; break;
}
if (query[j] != ' ') break;
}
for (int j = i + 1; j < len; j++) {
if (isdigit(query[j]) || query[j] == '.') {
has_num_after = 1;
break;
}
if (query[j] != ' ') break;
}
if (has_num_before || has_num_after) {
return 1;
}
} }
if (query[j] != ' ') break;
}
for (int j = i + 1; j < len; j++) {
if (isdigit(query[j]) || query[j] == '.') {
has_num_after = 1;
break;
}
if (query[j] != ' ') break;
}
if (has_num_before || has_num_after) {
return 1;
}
}
} }
return 0; return 0;
@@ -87,11 +87,11 @@ static void *calc_thread_func(void *arg) {
InfoBoxThreadData *data = (InfoBoxThreadData *)arg; InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
if (is_calculator_query(data->query)) { if (is_calculator_query(data->query)) {
data->result = fetch_calc_data((char *)data->query); data->result = fetch_calc_data((char *)data->query);
data->success = data->success =
(data->result.title != NULL && data->result.extract != NULL); (data->result.title != NULL && data->result.extract != NULL);
} else { } else {
data->success = 0; data->success = 0;
} }
return NULL; return NULL;
@@ -101,11 +101,11 @@ static void *dict_thread_func(void *arg) {
InfoBoxThreadData *data = (InfoBoxThreadData *)arg; InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
if (is_dictionary_query(data->query)) { if (is_dictionary_query(data->query)) {
data->result = fetch_dictionary_data(data->query); data->result = fetch_dictionary_data(data->query);
data->success = data->success =
(data->result.title != NULL && data->result.extract != NULL); (data->result.title != NULL && data->result.extract != NULL);
} else { } else {
data->success = 0; data->success = 0;
} }
return NULL; return NULL;
@@ -115,22 +115,22 @@ static void *unit_thread_func(void *arg) {
InfoBoxThreadData *data = (InfoBoxThreadData *)arg; InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
if (is_unit_conv_query(data->query)) { if (is_unit_conv_query(data->query)) {
data->result = fetch_unit_conv_data(data->query); data->result = fetch_unit_conv_data(data->query);
data->success = data->success =
(data->result.title != NULL && data->result.extract != NULL); (data->result.title != NULL && data->result.extract != NULL);
} else { } else {
data->success = 0; data->success = 0;
} }
return NULL; return NULL;
} }
static int add_infobox_to_collection(InfoBox *infobox, char ****collection, static int add_infobox_to_collection(InfoBox *infobox, char ****collection,
int **inner_counts, int current_count) { int **inner_counts, int current_count) {
*collection = *collection =
(char ***)realloc(*collection, sizeof(char **) * (current_count + 1)); (char ***)realloc(*collection, sizeof(char **) * (current_count + 1));
*inner_counts = *inner_counts =
(int *)realloc(*inner_counts, sizeof(int) * (current_count + 1)); (int *)realloc(*inner_counts, sizeof(int) * (current_count + 1));
(*collection)[current_count] = (char **)malloc(sizeof(char *) * 4); (*collection)[current_count] = (char **)malloc(sizeof(char *) * 4);
(*collection)[current_count][0] = infobox->title ? strdup(infobox->title) : NULL; (*collection)[current_count][0] = infobox->title ? strdup(infobox->title) : NULL;
@@ -148,15 +148,15 @@ int results_handler(UrlParams *params) {
int page = 1; int page = 1;
if (params) { if (params) {
for (int i = 0; i < params->count; i++) { for (int i = 0; i < params->count; i++) {
if (strcmp(params->params[i].key, "q") == 0) { if (strcmp(params->params[i].key, "q") == 0) {
raw_query = params->params[i].value; raw_query = params->params[i].value;
} else if (strcmp(params->params[i].key, "p") == 0) { } else if (strcmp(params->params[i].key, "p") == 0) {
int parsed = atoi(params->params[i].value); int parsed = atoi(params->params[i].value);
if (parsed > 1) page = parsed; if (parsed > 1) page = parsed;
}
} }
} }
}
context_set(&ctx, "query", raw_query); context_set(&ctx, "query", raw_query);
@@ -164,14 +164,14 @@ int results_handler(UrlParams *params) {
snprintf(page_str, sizeof(page_str), "%d", page); snprintf(page_str, sizeof(page_str), "%d", page);
snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0); snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
snprintf(next_str, sizeof(next_str), "%d", page + 1); snprintf(next_str, sizeof(next_str), "%d", page + 1);
context_set(&ctx, "page", page_str); context_set(&ctx, "page", page_str);
context_set(&ctx, "prev_page", prev_str); context_set(&ctx, "prev_page", prev_str);
context_set(&ctx, "next_page", next_str); context_set(&ctx, "next_page", next_str);
if (!raw_query || strlen(raw_query) == 0) { if (!raw_query || strlen(raw_query) == 0) {
send_response("<h1>No query provided</h1>"); send_response("<h1>No query provided</h1>");
free_context(&ctx); free_context(&ctx);
return -1; return -1;
} }
pthread_t wiki_tid, calc_tid, dict_tid, unit_tid; pthread_t wiki_tid, calc_tid, dict_tid, unit_tid;
@@ -181,36 +181,36 @@ int results_handler(UrlParams *params) {
InfoBoxThreadData unit_data = {.query = raw_query, .success = 0}; InfoBoxThreadData unit_data = {.query = raw_query, .success = 0};
if (page == 1) { if (page == 1) {
pthread_create(&wiki_tid, NULL, wiki_thread_func, &wiki_data); pthread_create(&wiki_tid, NULL, wiki_thread_func, &wiki_data);
pthread_create(&calc_tid, NULL, calc_thread_func, &calc_data); pthread_create(&calc_tid, NULL, calc_thread_func, &calc_data);
pthread_create(&dict_tid, NULL, dict_thread_func, &dict_data); pthread_create(&dict_tid, NULL, dict_thread_func, &dict_data);
pthread_create(&unit_tid, NULL, unit_thread_func, &unit_data); pthread_create(&unit_tid, NULL, unit_thread_func, &unit_data);
} }
ScrapeJob jobs[ENGINE_COUNT]; ScrapeJob jobs[ENGINE_COUNT];
SearchResult *all_results[ENGINE_COUNT]; SearchResult *all_results[ENGINE_COUNT];
for (int i = 0; i < ENGINE_COUNT; i++) { for (int i = 0; i < ENGINE_COUNT; i++) {
all_results[i] = NULL; all_results[i] = NULL;
jobs[i].engine = &ENGINE_REGISTRY[i]; jobs[i].engine = &ENGINE_REGISTRY[i];
jobs[i].query = raw_query; jobs[i].query = raw_query;
jobs[i].out_results = &all_results[i]; jobs[i].out_results = &all_results[i];
jobs[i].max_results = 10; jobs[i].max_results = 10;
jobs[i].results_count = 0; jobs[i].results_count = 0;
jobs[i].page = page; jobs[i].page = page;
jobs[i].handle = NULL; jobs[i].handle = NULL;
jobs[i].response.memory = NULL; jobs[i].response.memory = NULL;
jobs[i].response.size = 0; jobs[i].response.size = 0;
jobs[i].response.capacity = 0; jobs[i].response.capacity = 0;
} }
scrape_engines_parallel(jobs, ENGINE_COUNT); scrape_engines_parallel(jobs, ENGINE_COUNT);
if (page == 1) { if (page == 1) {
pthread_join(wiki_tid, NULL); pthread_join(wiki_tid, NULL);
pthread_join(calc_tid, NULL); pthread_join(calc_tid, NULL);
pthread_join(dict_tid, NULL); pthread_join(dict_tid, NULL);
pthread_join(unit_tid, NULL); pthread_join(unit_tid, NULL);
} }
char ***infobox_matrix = NULL; char ***infobox_matrix = NULL;
@@ -218,118 +218,118 @@ int results_handler(UrlParams *params) {
int infobox_count = 0; int infobox_count = 0;
if (page == 1) { if (page == 1) {
if (dict_data.success) { if (dict_data.success) {
infobox_count = add_infobox_to_collection(&dict_data.result, &infobox_matrix, infobox_count = add_infobox_to_collection(&dict_data.result, &infobox_matrix,
&infobox_inner_counts, infobox_count); &infobox_inner_counts, infobox_count);
} }
if (calc_data.success) { if (calc_data.success) {
infobox_count = add_infobox_to_collection(&calc_data.result, &infobox_matrix, infobox_count = add_infobox_to_collection(&calc_data.result, &infobox_matrix,
&infobox_inner_counts, infobox_count); &infobox_inner_counts, infobox_count);
} }
if (unit_data.success) { if (unit_data.success) {
infobox_count = add_infobox_to_collection(&unit_data.result, &infobox_matrix, infobox_count = add_infobox_to_collection(&unit_data.result, &infobox_matrix,
&infobox_inner_counts, infobox_count); &infobox_inner_counts, infobox_count);
} }
if (wiki_data.success) { if (wiki_data.success) {
infobox_count = add_infobox_to_collection(&wiki_data.result, &infobox_matrix, infobox_count = add_infobox_to_collection(&wiki_data.result, &infobox_matrix,
&infobox_inner_counts, infobox_count); &infobox_inner_counts, infobox_count);
} }
} }
if (infobox_count > 0) { if (infobox_count > 0) {
context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix, context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix,
infobox_count, infobox_inner_counts); infobox_count, infobox_inner_counts);
for (int i = 0; i < infobox_count; i++) { for (int i = 0; i < infobox_count; i++) {
for (int j = 0; j < 4; j++) free(infobox_matrix[i][j]); for (int j = 0; j < 4; j++) free(infobox_matrix[i][j]);
free(infobox_matrix[i]); free(infobox_matrix[i]);
} }
free(infobox_matrix); free(infobox_matrix);
free(infobox_inner_counts); free(infobox_inner_counts);
} }
int total_results = 0; int total_results = 0;
for (int i = 0; i < ENGINE_COUNT; i++) { for (int i = 0; i < ENGINE_COUNT; i++) {
total_results += jobs[i].results_count; total_results += jobs[i].results_count;
} }
if (total_results > 0) { if (total_results > 0) {
char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results); char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results);
int *results_inner_counts = (int *)malloc(sizeof(int) * total_results); int *results_inner_counts = (int *)malloc(sizeof(int) * total_results);
char **seen_urls = (char **)malloc(sizeof(char *) * total_results); char **seen_urls = (char **)malloc(sizeof(char *) * total_results);
int unique_count = 0; int unique_count = 0;
for (int i = 0; i < ENGINE_COUNT; i++) { for (int i = 0; i < ENGINE_COUNT; i++) {
for (int j = 0; j < jobs[i].results_count; j++) { for (int j = 0; j < jobs[i].results_count; j++) {
char *display_url = all_results[i][j].url; char *display_url = all_results[i][j].url;
int is_duplicate = 0; int is_duplicate = 0;
for (int k = 0; k < unique_count; k++) { for (int k = 0; k < unique_count; k++) {
if (strcmp(seen_urls[k], display_url) == 0) { if (strcmp(seen_urls[k], display_url) == 0) {
is_duplicate = 1; is_duplicate = 1;
break; break;
}
}
if (is_duplicate) {
free(all_results[i][j].url);
free(all_results[i][j].title);
free(all_results[i][j].snippet);
continue;
}
seen_urls[unique_count] = strdup(display_url);
results_matrix[unique_count] = (char **)malloc(sizeof(char *) * 4);
char *pretty_url = pretty_display_url(display_url);
results_matrix[unique_count][0] = strdup(display_url);
results_matrix[unique_count][1] = strdup(pretty_url);
results_matrix[unique_count][2] = all_results[i][j].title ? strdup(all_results[i][j].title) : strdup("Untitled");
results_matrix[unique_count][3] = all_results[i][j].snippet ? strdup(all_results[i][j].snippet) : strdup("");
results_inner_counts[unique_count] = 4;
free(pretty_url);
free(all_results[i][j].url);
free(all_results[i][j].title);
free(all_results[i][j].snippet);
unique_count++;
} }
free(all_results[i]);
} }
context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, results_inner_counts); if (is_duplicate) {
free(all_results[i][j].url);
char *html = render_template("results.html", &ctx); free(all_results[i][j].title);
if (html) { free(all_results[i][j].snippet);
send_response(html); continue;
free(html);
} }
for (int i = 0; i < unique_count; i++) { seen_urls[unique_count] = strdup(display_url);
for (int j = 0; j < 4; j++) free(results_matrix[i][j]); results_matrix[unique_count] = (char **)malloc(sizeof(char *) * 4);
free(results_matrix[i]); char *pretty_url = pretty_display_url(display_url);
free(seen_urls[i]);
results_matrix[unique_count][0] = strdup(display_url);
results_matrix[unique_count][1] = strdup(pretty_url);
results_matrix[unique_count][2] = all_results[i][j].title ? strdup(all_results[i][j].title) : strdup("Untitled");
results_matrix[unique_count][3] = all_results[i][j].snippet ? strdup(all_results[i][j].snippet) : strdup("");
results_inner_counts[unique_count] = 4;
free(pretty_url);
free(all_results[i][j].url);
free(all_results[i][j].title);
free(all_results[i][j].snippet);
unique_count++;
} }
free(seen_urls); free(all_results[i]);
free(results_matrix); }
free(results_inner_counts);
context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, results_inner_counts);
char *html = render_template("results.html", &ctx);
if (html) {
send_response(html);
free(html);
}
for (int i = 0; i < unique_count; i++) {
for (int j = 0; j < 4; j++) free(results_matrix[i][j]);
free(results_matrix[i]);
free(seen_urls[i]);
}
free(seen_urls);
free(results_matrix);
free(results_inner_counts);
} else { } else {
char *html = render_template("results.html", &ctx); char *html = render_template("results.html", &ctx);
if (html) { if (html) {
send_response(html); send_response(html);
free(html); free(html);
} }
} }
if (page == 1) { if (page == 1) {
if (wiki_data.success) free_infobox(&wiki_data.result); if (wiki_data.success) free_infobox(&wiki_data.result);
if (calc_data.success) free_infobox(&calc_data.result); if (calc_data.success) free_infobox(&calc_data.result);
if (dict_data.success) free_infobox(&dict_data.result); if (dict_data.success) free_infobox(&dict_data.result);
if (unit_data.success) free_infobox(&unit_data.result); if (unit_data.success) free_infobox(&unit_data.result);
} }
free_context(&ctx); free_context(&ctx);

View File

@@ -11,21 +11,21 @@
#include <unistd.h> #include <unistd.h>
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
void *userp) { void *userp) {
size_t realsize = size * nmemb; size_t realsize = size * nmemb;
MemoryBuffer *mem = (MemoryBuffer *)userp; MemoryBuffer *mem = (MemoryBuffer *)userp;
if (mem->size + realsize + 1 > mem->capacity) { if (mem->size + realsize + 1 > mem->capacity) {
size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2; size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
while (new_cap < mem->size + realsize + 1) new_cap *= 2; while (new_cap < mem->size + realsize + 1) new_cap *= 2;
char *ptr = (char *)realloc(mem->memory, new_cap); char *ptr = (char *)realloc(mem->memory, new_cap);
if (!ptr) { if (!ptr) {
return 0; return 0;
} }
mem->memory = ptr; mem->memory = ptr;
mem->capacity = new_cap; mem->capacity = new_cap;
} }
memcpy(&(mem->memory[mem->size]), contents, realsize); memcpy(&(mem->memory[mem->size]), contents, realsize);
@@ -37,37 +37,37 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
static const char *get_random_user_agent() { static const char *get_random_user_agent() {
static const char *agents[] = { static const char *agents[] = {
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, " "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
"like Gecko) Chrome/120.0.0.0 Safari/537.36", "like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like " "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like "
"Gecko) " "Gecko) "
"Chrome/120.0.0.0` Safari/537.36", "Chrome/120.0.0.0` Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 " "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 "
"Firefox/121.0", "Firefox/121.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 " "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
"(KHTML, like Gecko) Version/17.2 Safari/605.1.15"}; "(KHTML, like Gecko) Version/17.2 Safari/605.1.15"};
return agents[rand() % 5]; return agents[rand() % 5];
} }
static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc, static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
SearchResult **out_results, int max_results) { SearchResult **out_results, int max_results) {
(void)engine_name; (void)engine_name;
int found_count = 0; int found_count = 0;
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
if (!xpathCtx) { if (!xpathCtx) {
return 0; return 0;
} }
const char *link_xpath = "//tr[not(contains(@class, 'result-sponsored'))]//a[@class='result-link']"; const char *link_xpath = "//tr[not(contains(@class, 'result-sponsored'))]//a[@class='result-link']";
xmlXPathObjectPtr xpathObj = xmlXPathObjectPtr xpathObj =
xmlXPathEvalExpression((xmlChar *)link_xpath, xpathCtx); xmlXPathEvalExpression((xmlChar *)link_xpath, xpathCtx);
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) { if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
if (xpathObj) xmlXPathFreeObject(xpathObj); if (xpathObj) xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx); xmlXPathFreeContext(xpathCtx);
return 0; return 0;
} }
int num_links = xpathObj->nodesetval->nodeNr; int num_links = xpathObj->nodesetval->nodeNr;
@@ -75,49 +75,49 @@ static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
int actual_alloc = (num_links < max_results) ? num_links : max_results; int actual_alloc = (num_links < max_results) ? num_links : max_results;
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult)); *out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
if (!*out_results) { if (!*out_results) {
xmlXPathFreeObject(xpathObj); xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx); xmlXPathFreeContext(xpathCtx);
return 0; return 0;
} }
for (int i = 0; i < num_links && found_count < max_results; i++) { for (int i = 0; i < num_links && found_count < max_results; i++) {
xmlNodePtr linkNode = xpathObj->nodesetval->nodeTab[i]; xmlNodePtr linkNode = xpathObj->nodesetval->nodeTab[i];
char *title = (char *)xmlNodeGetContent(linkNode); char *title = (char *)xmlNodeGetContent(linkNode);
char *url = (char *)xmlGetProp(linkNode, (xmlChar *)"href"); char *url = (char *)xmlGetProp(linkNode, (xmlChar *)"href");
char *snippet_text = NULL; char *snippet_text = NULL;
xmlNodePtr current = linkNode->parent; xmlNodePtr current = linkNode->parent;
while (current && xmlStrcasecmp(current->name, (const xmlChar *)"tr") != 0) while (current && xmlStrcasecmp(current->name, (const xmlChar *)"tr") != 0)
current = current->parent; current = current->parent;
if (current && current->next) { if (current && current->next) {
xmlNodePtr snippetRow = current->next; xmlNodePtr snippetRow = current->next;
while (snippetRow && while (snippetRow &&
xmlStrcasecmp(snippetRow->name, (const xmlChar *)"tr") != 0) xmlStrcasecmp(snippetRow->name, (const xmlChar *)"tr") != 0)
snippetRow = snippetRow->next; snippetRow = snippetRow->next;
if (snippetRow) { if (snippetRow) {
xpathCtx->node = snippetRow; xpathCtx->node = snippetRow;
xmlXPathObjectPtr sObj = xmlXPathEvalExpression( xmlXPathObjectPtr sObj = xmlXPathEvalExpression(
(xmlChar *)".//td[@class='result-snippet']", xpathCtx); (xmlChar *)".//td[@class='result-snippet']", xpathCtx);
if (sObj && sObj->nodesetval && sObj->nodesetval->nodeNr > 0) { if (sObj && sObj->nodesetval && sObj->nodesetval->nodeNr > 0) {
snippet_text = (char *)xmlNodeGetContent(sObj->nodesetval->nodeTab[0]); snippet_text = (char *)xmlNodeGetContent(sObj->nodesetval->nodeTab[0]);
}
if (sObj) xmlXPathFreeObject(sObj);
xpathCtx->node = NULL;
}
} }
if (sObj) xmlXPathFreeObject(sObj);
xpathCtx->node = NULL;
(*out_results)[found_count].url = unescape_search_url(url); }
(*out_results)[found_count].title = strdup(title ? title : "No Title"); }
(*out_results)[found_count].snippet = strdup(snippet_text ? snippet_text : "");
found_count++; (*out_results)[found_count].url = unescape_search_url(url);
(*out_results)[found_count].title = strdup(title ? title : "No Title");
(*out_results)[found_count].snippet = strdup(snippet_text ? snippet_text : "");
if (title) xmlFree(title); found_count++;
if (url) xmlFree(url);
if (snippet_text) xmlFree(snippet_text); if (title) xmlFree(title);
if (url) xmlFree(url);
if (snippet_text) xmlFree(snippet_text);
} }
xmlXPathFreeObject(xpathObj); xmlXPathFreeObject(xpathObj);
@@ -126,22 +126,22 @@ static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
} }
static int parse_startpage(const char *engine_name, xmlDocPtr doc, static int parse_startpage(const char *engine_name, xmlDocPtr doc,
SearchResult **out_results, int max_results) { SearchResult **out_results, int max_results) {
(void)engine_name; (void)engine_name;
int found_count = 0; int found_count = 0;
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
if (!xpathCtx) { if (!xpathCtx) {
return 0; return 0;
} }
const char *container_xpath = "//div[contains(@class, 'result')]"; const char *container_xpath = "//div[contains(@class, 'result')]";
xmlXPathObjectPtr xpathObj = xmlXPathObjectPtr xpathObj =
xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx); xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) { if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
if (xpathObj) xmlXPathFreeObject(xpathObj); if (xpathObj) xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx); xmlXPathFreeContext(xpathCtx);
return 0; return 0;
} }
int num_results = xpathObj->nodesetval->nodeNr; int num_results = xpathObj->nodesetval->nodeNr;
@@ -149,52 +149,52 @@ static int parse_startpage(const char *engine_name, xmlDocPtr doc,
int actual_alloc = (num_results < max_results) ? num_results : max_results; int actual_alloc = (num_results < max_results) ? num_results : max_results;
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult)); *out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
if (!*out_results) { if (!*out_results) {
xmlXPathFreeObject(xpathObj); xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx); xmlXPathFreeContext(xpathCtx);
return 0; return 0;
} }
for (int i = 0; i < num_results && found_count < max_results; i++) { for (int i = 0; i < num_results && found_count < max_results; i++) {
xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i]; xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
xpathCtx->node = resultNode; xpathCtx->node = resultNode;
xmlXPathObjectPtr linkObj = xmlXPathEvalExpression( xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
(xmlChar *)".//a[contains(@class, 'result-link')]", xpathCtx); (xmlChar *)".//a[contains(@class, 'result-link')]", xpathCtx);
char *url = char *url =
(linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0) (linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0], ? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
(xmlChar *)"href") (xmlChar *)"href")
: NULL; : NULL;
xmlXPathObjectPtr titleObj = xmlXPathEvalExpression( xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
(xmlChar *)".//h2[contains(@class, 'wgl-title')]", xpathCtx); (xmlChar *)".//h2[contains(@class, 'wgl-title')]", xpathCtx);
char *title = char *title =
(titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0) (titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0]) ? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
: NULL; : NULL;
xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression( xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
(xmlChar *)".//p[contains(@class, 'description')]", xpathCtx); (xmlChar *)".//p[contains(@class, 'description')]", xpathCtx);
char *snippet_text = char *snippet_text =
(snippetObj && snippetObj->nodesetval && (snippetObj && snippetObj->nodesetval &&
snippetObj->nodesetval->nodeNr > 0) snippetObj->nodesetval->nodeNr > 0)
? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0]) ? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
: NULL; : NULL;
if (url && title) { if (url && title) {
(*out_results)[found_count].url = strdup(url); (*out_results)[found_count].url = strdup(url);
(*out_results)[found_count].title = strdup(title); (*out_results)[found_count].title = strdup(title);
(*out_results)[found_count].snippet = (*out_results)[found_count].snippet =
strdup(snippet_text ? snippet_text : ""); strdup(snippet_text ? snippet_text : "");
found_count++; found_count++;
} }
if (title) xmlFree(title); if (title) xmlFree(title);
if (url) xmlFree(url); if (url) xmlFree(url);
if (snippet_text) xmlFree(snippet_text); if (snippet_text) xmlFree(snippet_text);
if (linkObj) xmlXPathFreeObject(linkObj); if (linkObj) xmlXPathFreeObject(linkObj);
if (titleObj) xmlXPathFreeObject(titleObj); if (titleObj) xmlXPathFreeObject(titleObj);
if (snippetObj) xmlXPathFreeObject(snippetObj); if (snippetObj) xmlXPathFreeObject(snippetObj);
} }
xpathCtx->node = NULL; xpathCtx->node = NULL;
@@ -205,22 +205,22 @@ static int parse_startpage(const char *engine_name, xmlDocPtr doc,
} }
static int parse_yahoo(const char *engine_name, xmlDocPtr doc, static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
SearchResult **out_results, int max_results) { SearchResult **out_results, int max_results) {
(void)engine_name; (void)engine_name;
int found_count = 0; int found_count = 0;
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
if (!xpathCtx) { if (!xpathCtx) {
return 0; return 0;
} }
const char *container_xpath = "//div[contains(@class, 'algo-sr')]"; const char *container_xpath = "//div[contains(@class, 'algo-sr')]";
xmlXPathObjectPtr xpathObj = xmlXPathObjectPtr xpathObj =
xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx); xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) { if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
if (xpathObj) xmlXPathFreeObject(xpathObj); if (xpathObj) xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx); xmlXPathFreeContext(xpathCtx);
return 0; return 0;
} }
int num_results = xpathObj->nodesetval->nodeNr; int num_results = xpathObj->nodesetval->nodeNr;
@@ -228,53 +228,53 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
int actual_alloc = (num_results < max_results) ? num_results : max_results; int actual_alloc = (num_results < max_results) ? num_results : max_results;
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult)); *out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
if (!*out_results) { if (!*out_results) {
xmlXPathFreeObject(xpathObj); xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx); xmlXPathFreeContext(xpathCtx);
return 0; return 0;
} }
for (int i = 0; i < num_results && found_count < max_results; i++) { for (int i = 0; i < num_results && found_count < max_results; i++) {
xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i]; xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
xpathCtx->node = resultNode; xpathCtx->node = resultNode;
xmlXPathObjectPtr linkObj = xmlXPathEvalExpression( xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
(xmlChar *)".//div[contains(@class, 'compTitle')]//a[@target='_blank']", (xmlChar *)".//div[contains(@class, 'compTitle')]//a[@target='_blank']",
xpathCtx); xpathCtx);
char *url = char *url =
(linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0) (linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0], ? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
(xmlChar *)"href") (xmlChar *)"href")
: NULL; : NULL;
xmlXPathObjectPtr titleObj = xmlXPathEvalExpression( xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
(xmlChar *)".//h3[contains(@class, 'title')]", xpathCtx); (xmlChar *)".//h3[contains(@class, 'title')]", xpathCtx);
char *title = char *title =
(titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0) (titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0]) ? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
: NULL; : NULL;
xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression( xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
(xmlChar *)".//div[contains(@class, 'compText')]//p", xpathCtx); (xmlChar *)".//div[contains(@class, 'compText')]//p", xpathCtx);
char *snippet_text = char *snippet_text =
(snippetObj && snippetObj->nodesetval && (snippetObj && snippetObj->nodesetval &&
snippetObj->nodesetval->nodeNr > 0) snippetObj->nodesetval->nodeNr > 0)
? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0]) ? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
: NULL; : NULL;
if (url && title) { if (url && title) {
(*out_results)[found_count].url = unescape_search_url(url); (*out_results)[found_count].url = unescape_search_url(url);
(*out_results)[found_count].title = strdup(title); (*out_results)[found_count].title = strdup(title);
(*out_results)[found_count].snippet = (*out_results)[found_count].snippet =
strdup(snippet_text ? snippet_text : ""); strdup(snippet_text ? snippet_text : "");
found_count++; found_count++;
} }
if (title) xmlFree(title); if (title) xmlFree(title);
if (url) xmlFree(url); if (url) xmlFree(url);
if (snippet_text) xmlFree(snippet_text); if (snippet_text) xmlFree(snippet_text);
if (linkObj) xmlXPathFreeObject(linkObj); if (linkObj) xmlXPathFreeObject(linkObj);
if (titleObj) xmlXPathFreeObject(titleObj); if (titleObj) xmlXPathFreeObject(titleObj);
if (snippetObj) xmlXPathFreeObject(snippetObj); if (snippetObj) xmlXPathFreeObject(snippetObj);
} }
xpathCtx->node = NULL; xpathCtx->node = NULL;
@@ -284,36 +284,36 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
} }
const SearchEngine ENGINE_REGISTRY[] = { const SearchEngine ENGINE_REGISTRY[] = {
{.name = "DuckDuckGo Lite", {.name = "DuckDuckGo Lite",
.base_url = "https://lite.duckduckgo.com/lite/?q=", .base_url = "https://lite.duckduckgo.com/lite/?q=",
.host_header = "lite.duckduckgo.com", .host_header = "lite.duckduckgo.com",
.referer = "https://lite.duckduckgo.com/", .referer = "https://lite.duckduckgo.com/",
.page_param = "s", .page_param = "s",
.page_multiplier = 30, .page_multiplier = 30,
.page_base = 0, .page_base = 0,
.parser = parse_ddg_lite}, .parser = parse_ddg_lite},
{.name = "Startpage", {.name = "Startpage",
.base_url = "https://www.startpage.com/sp/search?q=", .base_url = "https://www.startpage.com/sp/search?q=",
.host_header = "www.startpage.com", .host_header = "www.startpage.com",
.referer = "https://www.startpage.com/", .referer = "https://www.startpage.com/",
.page_param = "page", .page_param = "page",
.page_multiplier = 1, .page_multiplier = 1,
.page_base = 1, .page_base = 1,
.parser = parse_startpage}, .parser = parse_startpage},
{.name = "Yahoo", {.name = "Yahoo",
.base_url = "https://search.yahoo.com/search?p=", .base_url = "https://search.yahoo.com/search?p=",
.host_header = "search.yahoo.com", .host_header = "search.yahoo.com",
.referer = "https://search.yahoo.com/", .referer = "https://search.yahoo.com/",
.page_param = "b", .page_param = "b",
.page_multiplier = 10, .page_multiplier = 10,
.page_base = 1, .page_base = 1,
.parser = parse_yahoo}}; .parser = parse_yahoo}};
const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine); const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
static void configure_curl_handle(CURL *curl, const char *full_url, static void configure_curl_handle(CURL *curl, const char *full_url,
MemoryBuffer *chunk, MemoryBuffer *chunk,
struct curl_slist *headers) { struct curl_slist *headers) {
curl_easy_setopt(curl, CURLOPT_URL, full_url); curl_easy_setopt(curl, CURLOPT_URL, full_url);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
@@ -340,62 +340,62 @@ int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) {
retry: retry:
CURLM *multi_handle = curl_multi_init(); CURLM *multi_handle = curl_multi_init();
if (!multi_handle) { if (!multi_handle) {
return -1; return -1;
} }
for (int i = 0; i < num_jobs; i++) { for (int i = 0; i < num_jobs; i++) {
ScrapeJob *job = &jobs[i]; ScrapeJob *job = &jobs[i];
if (job->handle) { if (job->handle) {
curl_easy_cleanup(job->handle); curl_easy_cleanup(job->handle);
job->handle = NULL; job->handle = NULL;
} }
if (job->response.memory) { if (job->response.memory) {
free(job->response.memory); free(job->response.memory);
} }
job->handle = curl_easy_init(); job->handle = curl_easy_init();
if (!job->handle) { if (!job->handle) {
continue; continue;
} }
job->response.memory = (char *)malloc(16384); job->response.memory = (char *)malloc(16384);
job->response.size = 0; job->response.size = 0;
job->response.capacity = 16384; job->response.capacity = 16384;
char full_url[1024]; char full_url[1024];
char *encoded_query = curl_easy_escape(job->handle, job->query, 0); char *encoded_query = curl_easy_escape(job->handle, job->query, 0);
if (!encoded_query) { if (!encoded_query) {
curl_easy_cleanup(job->handle); curl_easy_cleanup(job->handle);
job->handle = NULL; job->handle = NULL;
continue; continue;
} }
int page = (job->page < 1) ? 1 : job->page; int page = (job->page < 1) ? 1 : job->page;
int page_value = (page - 1) * job->engine->page_multiplier + job->engine->page_base; int page_value = (page - 1) * job->engine->page_multiplier + job->engine->page_base;
snprintf(full_url, sizeof(full_url), "%s%s&%s=%d", snprintf(full_url, sizeof(full_url), "%s%s&%s=%d",
job->engine->base_url, job->engine->base_url,
encoded_query, encoded_query,
job->engine->page_param, job->engine->page_param,
page_value); page_value);
curl_free(encoded_query); curl_free(encoded_query);
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;
char host_buf[256], ref_buf[256]; char host_buf[256], ref_buf[256];
snprintf(host_buf, sizeof(host_buf), "Host: %s", job->engine->host_header); snprintf(host_buf, sizeof(host_buf), "Host: %s", job->engine->host_header);
snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", job->engine->referer); snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", job->engine->referer);
headers = curl_slist_append(headers, host_buf); headers = curl_slist_append(headers, host_buf);
headers = curl_slist_append(headers, ref_buf); headers = curl_slist_append(headers, ref_buf);
headers = curl_slist_append(headers, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); headers = curl_slist_append(headers, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5"); headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5");
headers = curl_slist_append(headers, "DNT: 1"); headers = curl_slist_append(headers, "DNT: 1");
configure_curl_handle(job->handle, full_url, &job->response, headers); configure_curl_handle(job->handle, full_url, &job->response, headers);
curl_easy_setopt(job->handle, CURLOPT_PRIVATE, headers); curl_easy_setopt(job->handle, CURLOPT_PRIVATE, headers);
curl_multi_add_handle(multi_handle, job->handle); curl_multi_add_handle(multi_handle, job->handle);
} }
usleep(100000 + (rand() % 100000)); usleep(100000 + (rand() % 100000));
@@ -404,86 +404,86 @@ retry:
curl_multi_perform(multi_handle, &still_running); curl_multi_perform(multi_handle, &still_running);
do { do {
int numfds = 0; int numfds = 0;
CURLMcode mc = curl_multi_wait(multi_handle, NULL, 0, 1000, &numfds); CURLMcode mc = curl_multi_wait(multi_handle, NULL, 0, 1000, &numfds);
if (mc != CURLM_OK) { if (mc != CURLM_OK) {
break; break;
} }
curl_multi_perform(multi_handle, &still_running); curl_multi_perform(multi_handle, &still_running);
} while (still_running); } while (still_running);
CURLMsg *msg; CURLMsg *msg;
int msgs_left; int msgs_left;
while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) { while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
if (msg->msg == CURLMSG_DONE) { if (msg->msg == CURLMSG_DONE) {
CURL *handle = msg->easy_handle; CURL *handle = msg->easy_handle;
for (int i = 0; i < num_jobs; i++) { for (int i = 0; i < num_jobs; i++) {
if (jobs[i].handle && jobs[i].handle == handle) { if (jobs[i].handle && jobs[i].handle == handle) {
ScrapeJob *job = &jobs[i]; ScrapeJob *job = &jobs[i];
long response_code; long response_code;
curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code); curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code);
if (msg->data.result == CURLE_OK && job->response.size > 0) { if (msg->data.result == CURLE_OK && job->response.size > 0) {
xmlDocPtr doc = htmlReadMemory( xmlDocPtr doc = htmlReadMemory(
job->response.memory, job->response.size, NULL, NULL, job->response.memory, job->response.size, NULL, NULL,
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
if (doc) { if (doc) {
job->results_count = job->engine->parser( job->results_count = job->engine->parser(
job->engine->name, doc, job->out_results, job->max_results); job->engine->name, doc, job->out_results, job->max_results);
xmlFreeDoc(doc); xmlFreeDoc(doc);
}
} else {
job->results_count = 0;
}
struct curl_slist *headers;
curl_easy_getinfo(handle, CURLINFO_PRIVATE, &headers);
if (headers) curl_slist_free_all(headers);
free(job->response.memory);
job->response.memory = NULL;
curl_multi_remove_handle(multi_handle, handle);
if (handle) curl_easy_cleanup(handle);
job->handle = NULL;
break;
}
} }
} else {
job->results_count = 0;
}
struct curl_slist *headers;
curl_easy_getinfo(handle, CURLINFO_PRIVATE, &headers);
if (headers) curl_slist_free_all(headers);
free(job->response.memory);
job->response.memory = NULL;
curl_multi_remove_handle(multi_handle, handle);
if (handle) curl_easy_cleanup(handle);
job->handle = NULL;
break;
} }
}
}
} }
curl_multi_cleanup(multi_handle); curl_multi_cleanup(multi_handle);
if (retries < max_proxy_retries && proxy_count > 0) { if (retries < max_proxy_retries && proxy_count > 0) {
int any_failed = 0; int any_failed = 0;
for (int i = 0; i < num_jobs; i++) { for (int i = 0; i < num_jobs; i++) {
if (jobs[i].results_count == 0 && jobs[i].response.size == 0) { if (jobs[i].results_count == 0 && jobs[i].response.size == 0) {
any_failed = 1; any_failed = 1;
break; break;
}
}
if (any_failed) {
retries++;
goto retry;
} }
} }
if (any_failed) {
retries++;
goto retry;
}
}
return 0; return 0;
} }
int scrape_engine(const SearchEngine *engine, const char *query, int scrape_engine(const SearchEngine *engine, const char *query,
SearchResult **out_results, int max_results) { SearchResult **out_results, int max_results) {
ScrapeJob job = { ScrapeJob job = {
.engine = engine, .engine = engine,
.query = (char *)query, .query = (char *)query,
.out_results = out_results, .out_results = out_results,
.max_results = max_results, .max_results = max_results,
.results_count = 0, .results_count = 0,
.page = 1 .page = 1
}; };
scrape_engines_parallel(&job, 1); scrape_engines_parallel(&job, 1);

View File

@@ -11,7 +11,7 @@ typedef struct {
} SearchResult; } SearchResult;
typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc, typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc,
SearchResult **out_results, int max_results); SearchResult **out_results, int max_results);
typedef struct { typedef struct {
const char *name; const char *name;
@@ -20,8 +20,8 @@ typedef struct {
const char *referer; const char *referer;
const char *page_param; const char *page_param;
int page_multiplier; int page_multiplier;
int page_base; int page_base;
ParserFunc parser; ParserFunc parser;
} SearchEngine; } SearchEngine;
@@ -46,7 +46,7 @@ extern const SearchEngine ENGINE_REGISTRY[];
extern const int ENGINE_COUNT; extern const int ENGINE_COUNT;
int scrape_engine(const SearchEngine *engine, const char *query, int scrape_engine(const SearchEngine *engine, const char *query,
SearchResult **out_results, int max_results); SearchResult **out_results, int max_results);
int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs); int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs);

View File

@@ -5,42 +5,42 @@
#include <strings.h> #include <strings.h>
char *pretty_display_url(const char *input) { char *pretty_display_url(const char *input) {
if (!input) return NULL; if (!input) return NULL;
const char *start = input; const char *start = input;
const char *protocol_pos = strstr(input, "://"); const char *protocol_pos = strstr(input, "://");
if (protocol_pos) { if (protocol_pos) {
start = protocol_pos + 3; start = protocol_pos + 3;
}
if (strncasecmp(start, "www.", 4) == 0) {
start += 4;
}
size_t input_len = strlen(start);
char temp[512];
strncpy(temp, start, sizeof(temp) - 1);
temp[sizeof(temp) - 1] = '\0';
if (input_len > 0 && temp[input_len - 1] == '/') {
temp[input_len - 1] = '\0';
}
char *output = (char *)malloc(strlen(temp) * 3 + 1);
if (!output) return NULL;
size_t j = 0;
for (size_t i = 0; temp[i] != '\0'; i++) {
if (temp[i] == '/') {
output[j++] = ' ';
output[j++] = '>';
output[j++] = ' ';
} else {
output[j++] = (char)tolower((unsigned char)temp[i]);
} }
}
output[j] = '\0';
if (strncasecmp(start, "www.", 4) == 0) { return output;
start += 4;
}
size_t input_len = strlen(start);
char temp[512];
strncpy(temp, start, sizeof(temp) - 1);
temp[sizeof(temp) - 1] = '\0';
if (input_len > 0 && temp[input_len - 1] == '/') {
temp[input_len - 1] = '\0';
}
char *output = (char *)malloc(strlen(temp) * 3 + 1);
if (!output) return NULL;
size_t j = 0;
for (size_t i = 0; temp[i] != '\0'; i++) {
if (temp[i] == '/') {
output[j++] = ' ';
output[j++] = '>';
output[j++] = ' ';
} else {
output[j++] = (char)tolower((unsigned char)temp[i]);
}
}
output[j] = '\0';
return output;
} }

View File

@@ -1,8 +1,8 @@
#include "Utility.h" #include "Utility.h"
int hex_to_int(char c) { int hex_to_int(char c) {
if (c >= '0' && c <= '9') return c - '0'; if (c >= '0' && c <= '9') return c - '0';
if (c >= 'a' && c <= 'f') return c - 'a' + 10; if (c >= 'a' && c <= 'f') return c - 'a' + 10;
if (c >= 'A' && c <= 'F') return c - 'A' + 10; if (c >= 'A' && c <= 'F') return c - 'A' + 10;
return -1; return -1;
} }