those who commit

This commit is contained in:
frosty
2026-03-10 02:32:51 -04:00
parent 24cec7a350
commit a11bf8bb6c
19 changed files with 1537 additions and 1284 deletions

View File

@@ -24,7 +24,8 @@ int load_config(const char *filename, Config *config) {
char *end = strchr(line, ']'); char *end = strchr(line, ']');
if (end) { if (end) {
*end = '\0'; *end = '\0';
snprintf(section, sizeof(section), "%.*s", (int)(sizeof(section) - 1), line + 1); snprintf(section, sizeof(section), "%.*s", (int)(sizeof(section) - 1),
line + 1);
section[sizeof(section) - 1] = '\0'; section[sizeof(section) - 1] = '\0';
} }
continue; continue;
@@ -36,8 +37,10 @@ int load_config(const char *filename, Config *config) {
char *key = line; char *key = line;
char *value = delimiter + 1; char *value = delimiter + 1;
while (*key == ' ' || *key == '\t') key++; while (*key == ' ' || *key == '\t')
while (*value == ' ' || *value == '\t') value++; key++;
while (*value == ' ' || *value == '\t')
value++;
char *key_end = key + strlen(key) - 1; char *key_end = key + strlen(key) - 1;
while (key_end > key && (*key_end == ' ' || *key_end == '\t')) { while (key_end > key && (*key_end == ' ' || *key_end == '\t')) {
@@ -46,12 +49,14 @@ int load_config(const char *filename, Config *config) {
} }
char *value_end = value + strlen(value) - 1; char *value_end = value + strlen(value) - 1;
while (value_end > value && (*value_end == ' ' || *value_end == '\t' || *value_end == '"' || *value_end == '\'')) { while (value_end > value && (*value_end == ' ' || *value_end == '\t' ||
*value_end == '"' || *value_end == '\'')) {
*value_end = '\0'; *value_end = '\0';
value_end--; value_end--;
} }
while (*value == '"' || *value == '\'') value++; while (*value == '"' || *value == '\'')
value++;
if (strcmp(section, "server") == 0) { if (strcmp(section, "server") == 0) {
if (strcmp(key, "host") == 0) { if (strcmp(key, "host") == 0) {
@@ -65,7 +70,8 @@ int load_config(const char *filename, Config *config) {
strncpy(config->proxy, value, sizeof(config->proxy) - 1); strncpy(config->proxy, value, sizeof(config->proxy) - 1);
config->proxy[sizeof(config->proxy) - 1] = '\0'; config->proxy[sizeof(config->proxy) - 1] = '\0';
} else if (strcmp(key, "list_file") == 0) { } else if (strcmp(key, "list_file") == 0) {
strncpy(config->proxy_list_file, value, sizeof(config->proxy_list_file) - 1); strncpy(config->proxy_list_file, value,
sizeof(config->proxy_list_file) - 1);
config->proxy_list_file[sizeof(config->proxy_list_file) - 1] = '\0'; config->proxy_list_file[sizeof(config->proxy_list_file) - 1] = '\0';
} else if (strcmp(key, "max_retries") == 0) { } else if (strcmp(key, "max_retries") == 0) {
config->max_proxy_retries = atoi(value); config->max_proxy_retries = atoi(value);

View File

@@ -1,9 +1,9 @@
#include "Calculator.h" #include "Calculator.h"
#include <ctype.h>
#include <math.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <math.h>
#include <ctype.h>
static char logic_log[4096]; static char logic_log[4096];
@@ -15,7 +15,8 @@ typedef struct {
static double parse_expression(Parser *p); static double parse_expression(Parser *p);
static void skip_ws(Parser *p) { static void skip_ws(Parser *p) {
while (p->buffer[p->pos] == ' ') p->pos++; while (p->buffer[p->pos] == ' ')
p->pos++;
} }
static double parse_factor(Parser *p) { static double parse_factor(Parser *p) {
@@ -27,7 +28,8 @@ static double parse_factor(Parser *p) {
if (p->buffer[p->pos] == '(') { if (p->buffer[p->pos] == '(') {
p->pos++; p->pos++;
double res = parse_expression(p); double res = parse_expression(p);
if (p->buffer[p->pos] == ')') p->pos++; if (p->buffer[p->pos] == ')')
p->pos++;
return res; return res;
} }
char *endptr; char *endptr;
@@ -82,14 +84,16 @@ static double parse_expression(Parser *p) {
double evaluate(const char *expr) { double evaluate(const char *expr) {
logic_log[0] = '\0'; logic_log[0] = '\0';
if (!expr || strlen(expr) == 0) return 0.0; if (!expr || strlen(expr) == 0)
return 0.0;
Parser p = {expr, 0}; Parser p = {expr, 0};
return parse_expression(&p); return parse_expression(&p);
} }
InfoBox fetch_calc_data(char *math_input) { InfoBox fetch_calc_data(char *math_input) {
InfoBox info = {NULL, NULL, NULL, NULL}; InfoBox info = {NULL, NULL, NULL, NULL};
if (!math_input) return info; if (!math_input)
return info;
double result = evaluate(math_input); double result = evaluate(math_input);
@@ -107,8 +111,7 @@ InfoBox fetch_calc_data(char *math_input) {
info.title = strdup("Calculation"); info.title = strdup("Calculation");
info.extract = strdup(html_output); info.extract = strdup(html_output);
info.thumbnail_url = info.thumbnail_url = strdup("/static/calculation.svg");
strdup("/static/calculation.svg");
info.url = strdup("#"); info.url = strdup("#");
return info; return info;

View File

@@ -1,6 +1,7 @@
#include "Dictionary.h" #include "Dictionary.h"
#include "../Proxy/Proxy.h" #include "../Proxy/Proxy.h"
#include "../Scraping/Scraping.h" #include "../Scraping/Scraping.h"
#include <ctype.h>
#include <curl/curl.h> #include <curl/curl.h>
#include <libxml/HTMLparser.h> #include <libxml/HTMLparser.h>
#include <libxml/xpath.h> #include <libxml/xpath.h>
@@ -8,42 +9,60 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <strings.h> #include <strings.h>
#include <ctype.h>
static const char *PREFIXES[] = { static const char *PREFIXES[] = {"what is the definition of ",
"what is the definition of ", "what's the definition of ", "what's the definition of ",
"what is the meaning of ", "what's the meaning of ", "what is the meaning of ",
"what does the word ", "definition of ", "meaning of ", "def of ", "what's the meaning of ",
"define ", "definition ", "define:", "def ", "def:", "what does the word ",
"what does ", "what is ", "what's ", "whats ", "definition of ",
"meaning ", "dictionary ", "dict ", NULL "meaning of ",
}; "def of ",
"define ",
"definition ",
"define:",
"def ",
"def:",
"what does ",
"what is ",
"what's ",
"whats ",
"meaning ",
"dictionary ",
"dict ",
NULL};
static const char *SUFFIXES[] = { static const char *SUFFIXES[] = {
" definition", " def", " meaning", " mean", " means", " definition", " def", " meaning", " mean", " means",
" dictionary", " dict", " define", " defined", " dictionary", " dict", " define", " defined", " definition?",
" definition?", " def?", " meaning?", " mean?", " means?", " def?", " meaning?", " mean?", " means?", " in english",
" in english", " in english?", NULL " in english?", NULL};
};
static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL}; static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL};
static const char *strcasestr_impl(const char *haystack, const char *needle) { static const char *strcasestr_impl(const char *haystack, const char *needle) {
if (!haystack || !needle || !*needle) return haystack; if (!haystack || !needle || !*needle)
return haystack;
size_t len = strlen(needle); size_t len = strlen(needle);
for (const char *h = haystack; *h; h++) { for (const char *h = haystack; *h; h++) {
if (strncasecmp(h, needle, len) == 0) return h; if (strncasecmp(h, needle, len) == 0)
return h;
} }
return NULL; return NULL;
} }
struct MemStruct { char *memory; size_t size; }; struct MemStruct {
char *memory;
size_t size;
};
static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) { static size_t WriteCallback(void *contents, size_t size, size_t nmemb,
void *userp) {
size_t realsize = size * nmemb; size_t realsize = size * nmemb;
struct MemStruct *mem = (struct MemStruct *)userp; struct MemStruct *mem = (struct MemStruct *)userp;
char *ptr = realloc(mem->memory, mem->size + realsize + 1); char *ptr = realloc(mem->memory, mem->size + realsize + 1);
if (!ptr) return 0; if (!ptr)
return 0;
mem->memory = ptr; mem->memory = ptr;
memcpy(&(mem->memory[mem->size]), contents, realsize); memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize; mem->size += realsize;
@@ -53,16 +72,19 @@ static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *use
static char *xpath_text(xmlDocPtr doc, const char *xpath) { static char *xpath_text(xmlDocPtr doc, const char *xpath) {
xmlXPathContextPtr ctx = xmlXPathNewContext(doc); xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
if (!ctx) return NULL; if (!ctx)
return NULL;
xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx); xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
xmlXPathFreeContext(ctx); xmlXPathFreeContext(ctx);
if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) { if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
if (obj) xmlXPathFreeObject(obj); if (obj)
xmlXPathFreeObject(obj);
return NULL; return NULL;
} }
xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]); xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
char *result = content ? strdup((char *)content) : NULL; char *result = content ? strdup((char *)content) : NULL;
if (content) xmlFree(content); if (content)
xmlFree(content);
xmlXPathFreeObject(obj); xmlXPathFreeObject(obj);
return result; return result;
} }
@@ -70,23 +92,37 @@ static char *xpath_text(xmlDocPtr doc, const char *xpath) {
static char *build_html(const char *word, const char *pron, const char *pos, static char *build_html(const char *word, const char *pron, const char *pos,
const char *def, const char *ex) { const char *def, const char *ex) {
char html[4096]; char html[4096];
int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>"); int n = snprintf(html, sizeof(html),
if (word) n += snprintf(html + n, sizeof(html) - n, "<div class='dict-container' style='line-height: 1.6;'>");
"<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word); if (word)
if (pron) n += snprintf(html + n, sizeof(html) - n, n += snprintf(html + n, sizeof(html) - n,
"<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron); "<div style='font-size: 1.3em; font-weight: bold; "
if (pos) n += snprintf(html + n, sizeof(html) - n, "margin-bottom: 4px;'>%s</div>",
"<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos); word);
if (def) n += snprintf(html + n, sizeof(html) - n, if (pron)
n += snprintf(html + n, sizeof(html) - n,
"<div style='color: #666; margin-bottom: 8px;'>/%s/</div>",
pron);
if (pos)
n += snprintf(html + n, sizeof(html) - n,
"<div style='font-style: italic; color: #888; margin-bottom: "
"8px;'>%s</div>",
pos);
if (def)
n += snprintf(html + n, sizeof(html) - n,
"<div style='margin-bottom: 8px;'>%s</div>", def); "<div style='margin-bottom: 8px;'>%s</div>", def);
if (ex) n += snprintf(html + n, sizeof(html) - n, if (ex)
"<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex); n += snprintf(html + n, sizeof(html) - n,
"<div style='color: #555; font-style: italic; margin-top: "
"8px;'>\"%s\"</div>",
ex);
snprintf(html + n, sizeof(html) - n, "</div>"); snprintf(html + n, sizeof(html) - n, "</div>");
return strdup(html); return strdup(html);
} }
static char *extract_word(const char *query) { static char *extract_word(const char *query) {
if (!query) return NULL; if (!query)
return NULL;
const char *start = query; const char *start = query;
@@ -98,9 +134,11 @@ static char *extract_word(const char *query) {
} }
} }
while (*start == ' ') start++; while (*start == ' ')
start++;
char *word = strdup(start); char *word = strdup(start);
if (!word) return NULL; if (!word)
return NULL;
int changed = 1; int changed = 1;
while (changed) { while (changed) {
@@ -135,24 +173,32 @@ static char *extract_word(const char *query) {
word[--len] = '\0'; word[--len] = '\0';
} }
if (len == 0) { free(word); return NULL; } if (len == 0) {
free(word);
return NULL;
}
for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]); for (size_t i = 0; i < len; i++)
word[i] = tolower((unsigned char)word[i]);
char *space = strchr(word, ' '); char *space = strchr(word, ' ');
if (space) *space = '\0'; if (space)
*space = '\0';
return word; return word;
} }
int is_dictionary_query(const char *query) { int is_dictionary_query(const char *query) {
if (!query) return 0; if (!query)
return 0;
for (int i = 0; PREFIXES[i]; i++) { for (int i = 0; PREFIXES[i]; i++) {
size_t len = strlen(PREFIXES[i]); size_t len = strlen(PREFIXES[i]);
if (strncasecmp(query, PREFIXES[i], len) == 0) { if (strncasecmp(query, PREFIXES[i], len) == 0) {
const char *after = query + len; const char *after = query + len;
while (*after == ' ') after++; while (*after == ' ')
if (*after != '\0') return 1; after++;
if (*after != '\0')
return 1;
} }
} }
@@ -160,23 +206,29 @@ int is_dictionary_query(const char *query) {
const char *pos = strcasestr_impl(query, SUFFIXES[i]); const char *pos = strcasestr_impl(query, SUFFIXES[i]);
if (pos) { if (pos) {
const char *after = pos + strlen(SUFFIXES[i]); const char *after = pos + strlen(SUFFIXES[i]);
while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++; while (*after == ' ' || *after == '?' || *after == '!' || *after == '.')
if (*after == '\0' && pos > query && (pos - query) < 100) return 1; after++;
if (*after == '\0' && pos > query && (pos - query) < 100)
return 1;
} }
} }
if (strncasecmp(query, "what is ", 8) == 0 || if (strncasecmp(query, "what is ", 8) == 0 ||
strncasecmp(query, "what's ", 7) == 0 || strncasecmp(query, "what's ", 7) == 0 ||
strncasecmp(query, "whats ", 6) == 0) { strncasecmp(query, "whats ", 6) == 0) {
const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 : const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8
strncasecmp(query, "what's ", 7) == 0 ? 7 : 6); : strncasecmp(query, "what's ", 7) == 0 ? 7
const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ", : 6);
"our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL}; const char *articles[] = {"the ", "your ", "my ", "his ", "her ",
"their ", "our ", "this ", "that ", "these ",
"those ", "a ", "an ", NULL};
for (int i = 0; articles[i]; i++) { for (int i = 0; articles[i]; i++) {
if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0; if (strncasecmp(word, articles[i], strlen(articles[i])) == 0)
return 0;
} }
const char *space = strchr(word, ' '); const char *space = strchr(word, ' ');
if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1; if (!space || *(space + 1) == '\0' || *(space + 1) == '?')
return 1;
} }
return 0; return 0;
@@ -184,10 +236,14 @@ int is_dictionary_query(const char *query) {
char *construct_dictionary_url(const char *query) { char *construct_dictionary_url(const char *query) {
char *word = extract_word(query); char *word = extract_word(query);
if (!word) return NULL; if (!word)
return NULL;
CURL *curl = curl_easy_init(); CURL *curl = curl_easy_init();
if (!curl) { free(word); return NULL; } if (!curl) {
free(word);
return NULL;
}
char *escaped = curl_easy_escape(curl, word, 0); char *escaped = curl_easy_escape(curl, word, 0);
const char *base = "https://dictionary.cambridge.org/dictionary/english/"; const char *base = "https://dictionary.cambridge.org/dictionary/english/";
@@ -207,10 +263,14 @@ InfoBox fetch_dictionary_data(const char *query) {
InfoBox info = {NULL, NULL, NULL, NULL}; InfoBox info = {NULL, NULL, NULL, NULL};
char *url = construct_dictionary_url(query); char *url = construct_dictionary_url(query);
if (!url) return info; if (!url)
return info;
CURL *curl = curl_easy_init(); CURL *curl = curl_easy_init();
if (!curl) { free(url); return info; } if (!curl) {
free(url);
return info;
}
struct MemStruct chunk = {malloc(1), 0}; struct MemStruct chunk = {malloc(1), 0};
curl_easy_setopt(curl, CURLOPT_URL, url); curl_easy_setopt(curl, CURLOPT_URL, url);
@@ -222,10 +282,13 @@ InfoBox fetch_dictionary_data(const char *query) {
if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) { if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL, htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL,
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |
HTML_PARSE_NOWARNING);
if (doc) { if (doc) {
char *word = xpath_text(doc, "//span[@class='hw dhw']"); char *word = xpath_text(doc, "//span[@class='hw dhw']");
char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']"); char *pron = xpath_text(
doc,
"//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']");
char *pos = xpath_text(doc, "//span[@class='pos dpos']"); char *pos = xpath_text(doc, "//span[@class='pos dpos']");
char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]"); char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]");
char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]"); char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]");
@@ -237,7 +300,11 @@ InfoBox fetch_dictionary_data(const char *query) {
info.url = strdup(url); info.url = strdup(url);
} }
free(word); free(pron); free(pos); free(def); free(ex); free(word);
free(pron);
free(pos);
free(def);
free(ex);
xmlFreeDoc(doc); xmlFreeDoc(doc);
} }
} }

View File

@@ -24,9 +24,18 @@ typedef struct {
static const UnitDef UNITS[] = { static const UnitDef UNITS[] = {
{"metre", {"m", "metres", "meter", "meters"}, UNIT_LENGTH, 1.0}, {"metre", {"m", "metres", "meter", "meters"}, UNIT_LENGTH, 1.0},
{"kilometre", {"km", "kilometres", "kilometer", "kilometers"}, UNIT_LENGTH, 1000.0}, {"kilometre",
{"centimetre", {"cm", "centimetres", "centimeter", "centimeters"}, UNIT_LENGTH, 0.01}, {"km", "kilometres", "kilometer", "kilometers"},
{"millimetre", {"mm", "millimetres", "millimeter", "millimeters"}, UNIT_LENGTH, 0.001}, UNIT_LENGTH,
1000.0},
{"centimetre",
{"cm", "centimetres", "centimeter", "centimeters"},
UNIT_LENGTH,
0.01},
{"millimetre",
{"mm", "millimetres", "millimeter", "millimeters"},
UNIT_LENGTH,
0.001},
{"mile", {"mi", "miles"}, UNIT_LENGTH, 1609.344}, {"mile", {"mi", "miles"}, UNIT_LENGTH, 1609.344},
{"yard", {"yd", "yards"}, UNIT_LENGTH, 0.9144}, {"yard", {"yd", "yards"}, UNIT_LENGTH, 0.9144},
{"foot", {"ft", "feet", "'"}, UNIT_LENGTH, 0.3048}, {"foot", {"ft", "feet", "'"}, UNIT_LENGTH, 0.3048},
@@ -41,11 +50,17 @@ static const UnitDef UNITS[] = {
{"stone", {"st", "stones"}, UNIT_WEIGHT, 6.35029}, {"stone", {"st", "stones"}, UNIT_WEIGHT, 6.35029},
{"celsius", {"c", "°c", "degrees celsius", "degrees c"}, UNIT_TEMP, 1.0}, {"celsius", {"c", "°c", "degrees celsius", "degrees c"}, UNIT_TEMP, 1.0},
{"fahrenheit", {"f", "°f", "degrees fahrenheit", "degrees f"}, UNIT_TEMP, 1.0}, {"fahrenheit",
{"f", "°f", "degrees fahrenheit", "degrees f"},
UNIT_TEMP,
1.0},
{"kelvin", {"k", "degrees kelvin", "degrees k"}, UNIT_TEMP, 1.0}, {"kelvin", {"k", "degrees kelvin", "degrees k"}, UNIT_TEMP, 1.0},
{"litre", {"l", "litres", "liter", "liters"}, UNIT_VOLUME, 1.0}, {"litre", {"l", "litres", "liter", "liters"}, UNIT_VOLUME, 1.0},
{"millilitre", {"ml", "millilitres", "milliliter", "milliliters"}, UNIT_VOLUME, 0.001}, {"millilitre",
{"ml", "millilitres", "milliliter", "milliliters"},
UNIT_VOLUME,
0.001},
{"gallon", {"gal", "gallons"}, UNIT_VOLUME, 3.78541}, {"gallon", {"gal", "gallons"}, UNIT_VOLUME, 3.78541},
{"quart", {"qt", "quarts"}, UNIT_VOLUME, 0.946353}, {"quart", {"qt", "quarts"}, UNIT_VOLUME, 0.946353},
{"pint", {"pt", "pints"}, UNIT_VOLUME, 0.473176}, {"pint", {"pt", "pints"}, UNIT_VOLUME, 0.473176},
@@ -53,9 +68,18 @@ static const UnitDef UNITS[] = {
{"fluid ounce", {"fl oz", "fluid ounces"}, UNIT_VOLUME, 0.0295735}, {"fluid ounce", {"fl oz", "fluid ounces"}, UNIT_VOLUME, 0.0295735},
{"square metre", {"sqm", "sq m", "m2", "square metres"}, UNIT_AREA, 1.0}, {"square metre", {"sqm", "sq m", "m2", "square metres"}, UNIT_AREA, 1.0},
{"square foot", {"sqft", "sq ft", "ft2", "square feet"}, UNIT_AREA, 0.092903}, {"square foot",
{"square kilometre", {"sqkm", "sq km", "km2", "square kilometres"}, UNIT_AREA, 1000000.0}, {"sqft", "sq ft", "ft2", "square feet"},
{"square mile", {"sqmi", "sq mi", "mi2", "square miles"}, UNIT_AREA, 2589988.0}, UNIT_AREA,
0.092903},
{"square kilometre",
{"sqkm", "sq km", "km2", "square kilometres"},
UNIT_AREA,
1000000.0},
{"square mile",
{"sqmi", "sq mi", "mi2", "square miles"},
UNIT_AREA,
2589988.0},
{"acre", {"acres"}, UNIT_AREA, 4046.86}, {"acre", {"acres"}, UNIT_AREA, 4046.86},
{"hectare", {"ha", "hectares"}, UNIT_AREA, 10000.0}, {"hectare", {"ha", "hectares"}, UNIT_AREA, 10000.0},
@@ -75,7 +99,8 @@ static int is_whitespace(char c) {
} }
static const UnitDef *find_unit(const char *str) { static const UnitDef *find_unit(const char *str) {
if (!str || !*str) return NULL; if (!str || !*str)
return NULL;
size_t len = strlen(str); size_t len = strlen(str);
char normalized[64] = {0}; char normalized[64] = {0};
@@ -96,24 +121,23 @@ static const UnitDef *find_unit(const char *str) {
normalized[j] = '\0'; normalized[j] = '\0';
for (int i = 0; i < UNIT_COUNT; i++) { for (int i = 0; i < UNIT_COUNT; i++) {
if (strcmp(normalized, UNITS[i].name) == 0) return &UNITS[i]; if (strcmp(normalized, UNITS[i].name) == 0)
return &UNITS[i];
for (int k = 0; k < 4 && UNITS[i].alias[k]; k++) { for (int k = 0; k < 4 && UNITS[i].alias[k]; k++) {
if (strcmp(normalized, UNITS[i].alias[k]) == 0) return &UNITS[i]; if (strcmp(normalized, UNITS[i].alias[k]) == 0)
return &UNITS[i];
} }
} }
return NULL; return NULL;
} }
int is_unit_conv_query(const char *query) { int is_unit_conv_query(const char *query) {
if (!query) return 0; if (!query)
return 0;
const char *patterns[] = { const char *patterns[] = {" to ", " in ", " into ", " = ",
" to ", " in ", " into ", " equals ", " equal ", " convert ", " conversion ",
" = ", " equals ", " equal ", " -> ", " ", NULL};
" convert ", " conversion ",
" -> ", "",
NULL
};
int has_pattern = 0; int has_pattern = 0;
for (int i = 0; patterns[i]; i++) { for (int i = 0; patterns[i]; i++) {
@@ -129,14 +153,17 @@ int is_unit_conv_query(const char *query) {
const UnitDef *u = find_unit(last_space + 1); const UnitDef *u = find_unit(last_space + 1);
if (u) { if (u) {
const char *before = query; const char *before = query;
while (*before && is_whitespace(*before)) before++; while (*before && is_whitespace(*before))
before++;
const char *num_end = before; const char *num_end = before;
while (*num_end && while (*num_end &&
(isdigit(*num_end) || *num_end == '.' || *num_end == '-' || (isdigit(*num_end) || *num_end == '.' || *num_end == '-' ||
*num_end == '+' || *num_end == '/' || *num_end == '\'' || *num_end == '"')) { *num_end == '+' || *num_end == '/' || *num_end == '\'' ||
*num_end == '"')) {
num_end++; num_end++;
} }
if (num_end > before) has_pattern = 1; if (num_end > before)
has_pattern = 1;
} }
} }
} }
@@ -146,12 +173,14 @@ int is_unit_conv_query(const char *query) {
static double parse_value(const char **ptr) { static double parse_value(const char **ptr) {
const char *p = *ptr; const char *p = *ptr;
while (*p && is_whitespace(*p)) p++; while (*p && is_whitespace(*p))
p++;
double value = 0.0; double value = 0.0;
int has_num = 0; int has_num = 0;
if (*p == '-' || *p == '+') p++; if (*p == '-' || *p == '+')
p++;
while (*p >= '0' && *p <= '9') { while (*p >= '0' && *p <= '9') {
value = value * 10 + (*p - '0'); value = value * 10 + (*p - '0');
has_num = 1; has_num = 1;
@@ -198,7 +227,8 @@ static double parse_value(const char **ptr) {
p++; p++;
} }
} }
if (*p == '\'' || *p == '"') p++; if (*p == '\'' || *p == '"')
p++;
value += extra * (p[-1] == '\'' ? 0.3048 : 0.0254); value += extra * (p[-1] == '\'' ? 0.3048 : 0.0254);
} }
@@ -212,10 +242,13 @@ static double parse_value(const char **ptr) {
} }
static int is_separator(char c) { static int is_separator(char c) {
return is_whitespace(c) || c == ',' || c == '.' || c == '(' || c == ')' || c == '\0'; return is_whitespace(c) || c == ',' || c == '.' || c == '(' || c == ')' ||
c == '\0';
} }
static int parse_conversion_query(const char *query, double *value, const UnitDef **from_unit, const UnitDef **to_unit) { static int parse_conversion_query(const char *query, double *value,
const UnitDef **from_unit,
const UnitDef **to_unit) {
*value = 0; *value = 0;
*from_unit = NULL; *from_unit = NULL;
*to_unit = NULL; *to_unit = NULL;
@@ -223,15 +256,19 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
const char *value_end = query; const char *value_end = query;
*value = parse_value(&value_end); *value = parse_value(&value_end);
if (value_end == query) return 0; if (value_end == query)
return 0;
const char *p = value_end; const char *p = value_end;
while (*p && is_whitespace(*p)) p++; while (*p && is_whitespace(*p))
p++;
size_t remaining = strlen(p); size_t remaining = strlen(p);
if (remaining < 2) return 0; if (remaining < 2)
return 0;
const char *to_keywords[] = {" to ", " in ", " into ", " -> ", "", " = ", NULL}; const char *to_keywords[] = {" to ", " in ", " into ", " -> ",
"", " = ", NULL};
const char *to_pos = NULL; const char *to_pos = NULL;
size_t keyword_len = 0; size_t keyword_len = 0;
for (int i = 0; to_keywords[i]; i++) { for (int i = 0; to_keywords[i]; i++) {
@@ -262,36 +299,45 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
char from_part[64] = {0}; char from_part[64] = {0};
size_t from_len = to_pos - p - keyword_len; size_t from_len = to_pos - p - keyword_len;
if (from_len > 63) from_len = 63; if (from_len > 63)
from_len = 63;
strncpy(from_part, p, from_len); strncpy(from_part, p, from_len);
char *end_from = from_part + from_len; char *end_from = from_part + from_len;
while (end_from > from_part && is_whitespace(end_from[-1])) end_from--; while (end_from > from_part && is_whitespace(end_from[-1]))
end_from--;
*end_from = '\0'; *end_from = '\0';
*from_unit = find_unit(from_part); *from_unit = find_unit(from_part);
if (!*from_unit) { if (!*from_unit) {
char *end = from_part + strlen(from_part); char *end = from_part + strlen(from_part);
while (end > from_part) { while (end > from_part) {
while (end > from_part && is_whitespace(end[-1])) end--; while (end > from_part && is_whitespace(end[-1]))
if (end <= from_part) break; end--;
if (end <= from_part)
break;
char *start = end; char *start = end;
while (start > from_part && !is_whitespace(start[-1])) start--; while (start > from_part && !is_whitespace(start[-1]))
start--;
size_t word_len = end - start; size_t word_len = end - start;
memmove(from_part + word_len + 1, from_part, start - from_part); memmove(from_part + word_len + 1, from_part, start - from_part);
from_part[word_len] = ' '; from_part[word_len] = ' ';
from_part[word_len + 1] = '\0'; from_part[word_len + 1] = '\0';
*from_unit = find_unit(from_part); *from_unit = find_unit(from_part);
if (*from_unit) break; if (*from_unit)
break;
end = start; end = start;
} }
} }
if (!*from_unit) return 0; if (!*from_unit)
return 0;
while (*to_pos && is_whitespace(*to_pos)) to_pos++; while (*to_pos && is_whitespace(*to_pos))
to_pos++;
if (!*to_pos) return 0; if (!*to_pos)
return 0;
char to_part[64] = {0}; char to_part[64] = {0};
size_t to_len = 0; size_t to_len = 0;
@@ -304,7 +350,8 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
*to_unit = find_unit(to_part); *to_unit = find_unit(to_part);
if (!*to_unit) { if (!*to_unit) {
const char *try_ptr = to_pos; const char *try_ptr = to_pos;
while (*try_ptr && is_whitespace(*try_ptr)) try_ptr++; while (*try_ptr && is_whitespace(*try_ptr))
try_ptr++;
char try_buf[64] = {0}; char try_buf[64] = {0};
size_t try_len = 0; size_t try_len = 0;
while (*try_ptr && try_len < 63) { while (*try_ptr && try_len < 63) {
@@ -317,7 +364,8 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
break; break;
} }
char *last_space = strrchr(try_buf, ' '); char *last_space = strrchr(try_buf, ' ');
if (!last_space) break; if (!last_space)
break;
*last_space = '\0'; *last_space = '\0';
try_len = strlen(try_buf); try_len = strlen(try_buf);
} }
@@ -326,21 +374,30 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
return *to_unit ? 1 : 0; return *to_unit ? 1 : 0;
} }
static double convert_temp(double value, const UnitDef *from, const UnitDef *to) { static double convert_temp(double value, const UnitDef *from,
const UnitDef *to) {
double celsius = 0; double celsius = 0;
if (strcmp(from->name, "celsius") == 0) celsius = value; if (strcmp(from->name, "celsius") == 0)
else if (strcmp(from->name, "fahrenheit") == 0) celsius = (value - 32) * 5.0 / 9.0; celsius = value;
else if (strcmp(from->name, "kelvin") == 0) celsius = value - 273.15; else if (strcmp(from->name, "fahrenheit") == 0)
celsius = (value - 32) * 5.0 / 9.0;
else if (strcmp(from->name, "kelvin") == 0)
celsius = value - 273.15;
if (strcmp(to->name, "celsius") == 0) return celsius; if (strcmp(to->name, "celsius") == 0)
else if (strcmp(to->name, "fahrenheit") == 0) return celsius * 9.0 / 5.0 + 32; return celsius;
else if (strcmp(to->name, "kelvin") == 0) return celsius + 273.15; else if (strcmp(to->name, "fahrenheit") == 0)
return celsius * 9.0 / 5.0 + 32;
else if (strcmp(to->name, "kelvin") == 0)
return celsius + 273.15;
return 0; return 0;
} }
static double convert_value(double value, const UnitDef *from, const UnitDef *to) { static double convert_value(double value, const UnitDef *from,
if (from->type != to->type) return 0; const UnitDef *to) {
if (from->type != to->type)
return 0;
if (from->type == UNIT_TEMP) { if (from->type == UNIT_TEMP) {
return convert_temp(value, from, to); return convert_temp(value, from, to);
@@ -351,7 +408,8 @@ static double convert_value(double value, const UnitDef *from, const UnitDef *to
} }
static void format_number(double val, char *buf, size_t bufsize) { static void format_number(double val, char *buf, size_t bufsize) {
if (bufsize == 0) return; if (bufsize == 0)
return;
if (val == 0) { if (val == 0) {
snprintf(buf, bufsize, "0"); snprintf(buf, bufsize, "0");
return; return;
@@ -361,52 +419,62 @@ static void format_number(double val, char *buf, size_t bufsize) {
} else if (fabs(val) < 1) { } else if (fabs(val) < 1) {
snprintf(buf, bufsize, "%.2f", val); snprintf(buf, bufsize, "%.2f", val);
char *p = buf + strlen(buf) - 1; char *p = buf + strlen(buf) - 1;
while (p > buf && *p == '0') *p-- = '\0'; while (p > buf && *p == '0')
if (*p == '.') *p = '\0'; *p-- = '\0';
if (*p == '.')
*p = '\0';
} else if (fmod(val + 0.0001, 1.0) < 0.0002) { } else if (fmod(val + 0.0001, 1.0) < 0.0002) {
snprintf(buf, bufsize, "%.0f", val); snprintf(buf, bufsize, "%.0f", val);
} else { } else {
snprintf(buf, bufsize, "%.2f", val); snprintf(buf, bufsize, "%.2f", val);
char *p = buf + strlen(buf) - 1; char *p = buf + strlen(buf) - 1;
while (p > buf && *p == '0') *p-- = '\0'; while (p > buf && *p == '0')
if (*p == '.') *p = '\0'; *p-- = '\0';
if (*p == '.')
*p = '\0';
} }
} }
static const char *pluralize(const char *unit, double value, char *buf, size_t bufsize) { static const char *pluralize(const char *unit, double value, char *buf,
size_t bufsize) {
int is_one = (fabs(value - 1.0) < 0.0001 || fabs(value + 1.0) < 0.0001); int is_one = (fabs(value - 1.0) < 0.0001 || fabs(value + 1.0) < 0.0001);
size_t len = strlen(unit); size_t len = strlen(unit);
if (len == 0 || bufsize == 0) return unit; if (len == 0 || bufsize == 0)
return unit;
strncpy(buf, unit, bufsize - 1); strncpy(buf, unit, bufsize - 1);
buf[bufsize - 1] = '\0'; buf[bufsize - 1] = '\0';
if (strcmp(unit, "foot") == 0 || strcmp(unit, "square foot") == 0) { if (strcmp(unit, "foot") == 0 || strcmp(unit, "square foot") == 0) {
if (is_one) strcpy(buf, unit); if (is_one)
else strcpy(buf, strcmp(unit, "square foot") == 0 ? "square feet" : "feet"); strcpy(buf, unit);
else
strcpy(buf, strcmp(unit, "square foot") == 0 ? "square feet" : "feet");
return buf; return buf;
} }
if (strcmp(unit, "inch") == 0 || strcmp(unit, "square inch") == 0) { if (strcmp(unit, "inch") == 0 || strcmp(unit, "square inch") == 0) {
if (is_one) strcpy(buf, unit); if (is_one)
else strcpy(buf, strcmp(unit, "square inch") == 0 ? "square inches" : "inches"); strcpy(buf, unit);
else
strcpy(buf,
strcmp(unit, "square inch") == 0 ? "square inches" : "inches");
return buf; return buf;
} }
if (strcmp(unit, "stone") == 0) { if (strcmp(unit, "stone") == 0) {
if (is_one) strcpy(buf, "stone"); if (is_one)
else strcpy(buf, "stones"); strcpy(buf, "stone");
else
strcpy(buf, "stones");
return buf; return buf;
} }
if (strcmp(unit, "celsius") == 0 || if (strcmp(unit, "celsius") == 0 || strcmp(unit, "fahrenheit") == 0 ||
strcmp(unit, "fahrenheit") == 0 ||
strcmp(unit, "kelvin") == 0) { strcmp(unit, "kelvin") == 0) {
strcpy(buf, unit); strcpy(buf, unit);
return buf; return buf;
} }
if (unit[len-1] == 's' || if (unit[len - 1] == 's' || unit[len - 1] == 'x' || unit[len - 1] == 'z' ||
unit[len-1] == 'x' ||
unit[len-1] == 'z' ||
(len >= 2 && unit[len - 2] == 'c' && unit[len - 1] == 'h') || (len >= 2 && unit[len - 2] == 'c' && unit[len - 1] == 'h') ||
(len >= 2 && unit[len - 2] == 's' && unit[len - 1] == 'h')) { (len >= 2 && unit[len - 2] == 's' && unit[len - 1] == 'h')) {
if (!is_one) { if (!is_one) {
@@ -456,7 +524,8 @@ static const char *pluralize(const char *unit, double value, char *buf, size_t b
return buf; return buf;
} }
static char *build_html(double value, const UnitDef *from, double result, const UnitDef *to) { static char *build_html(double value, const UnitDef *from, double result,
const UnitDef *to) {
static char html[4096]; static char html[4096];
char val_buf[64], res_buf[64], from_name_buf[64], to_name_buf[64]; char val_buf[64], res_buf[64], from_name_buf[64], to_name_buf[64];
format_number(value, val_buf, sizeof(val_buf)); format_number(value, val_buf, sizeof(val_buf));
@@ -470,26 +539,30 @@ static char *build_html(double value, const UnitDef *from, double result, const
"<div style='font-size: 1.3em; margin-bottom: 8px;'>" "<div style='font-size: 1.3em; margin-bottom: 8px;'>"
"<b>%s %s</b> = <b>%s %s</b>" "<b>%s %s</b> = <b>%s %s</b>"
"</div>", "</div>",
val_buf, from_name_buf, val_buf, from_name_buf, res_buf, to_name_buf);
res_buf, to_name_buf);
snprintf(html + n, sizeof(html) - n, "</div>"); snprintf(html + n, sizeof(html) - n, "</div>");
return html; return html;
} }
InfoBox fetch_unit_conv_data(const char *query) { InfoBox fetch_unit_conv_data(const char *query) {
InfoBox info = {NULL, NULL, NULL, NULL}; InfoBox info = {NULL, NULL, NULL, NULL};
if (!query) return info; if (!query)
return info;
double value = 0; double value = 0;
const UnitDef *from = NULL; const UnitDef *from = NULL;
const UnitDef *to = NULL; const UnitDef *to = NULL;
if (!parse_conversion_query(query, &value, &from, &to)) return info; if (!parse_conversion_query(query, &value, &from, &to))
if (!from || !to) return info; return info;
if (from->type != to->type) return info; if (!from || !to)
return info;
if (from->type != to->type)
return info;
double result = convert_value(value, from, to); double result = convert_value(value, from, to);
if (result == 0 && value != 0 && from->type != UNIT_TEMP) return info; if (result == 0 && value != 0 && from->type != UNIT_TEMP)
return info;
info.title = strdup("Unit Conversion"); info.title = strdup("Unit Conversion");
info.extract = strdup(build_html(value, from, result, to)); info.extract = strdup(build_html(value, from, result, to));

View File

@@ -14,12 +14,14 @@ struct WikiMemoryStruct {
}; };
static void shorten_summary(char **extract_ptr, int max_chars) { static void shorten_summary(char **extract_ptr, int max_chars) {
if (!extract_ptr || !*extract_ptr) return; if (!extract_ptr || !*extract_ptr)
return;
char *text = *extract_ptr; char *text = *extract_ptr;
int len = strlen(text); int len = strlen(text);
if (len <= max_chars) return; if (len <= max_chars)
return;
int end_pos = max_chars; int end_pos = max_chars;
for (int i = max_chars; i > (max_chars / 2); i--) { for (int i = max_chars; i > (max_chars / 2); i--) {
@@ -72,7 +74,8 @@ static void extract_wiki_info(xmlNode *node, InfoBox *info) {
const char *base_article_url = "https://en.wikipedia.org/wiki/"; const char *base_article_url = "https://en.wikipedia.org/wiki/";
char *formatted_title = strdup((const char *)title); char *formatted_title = strdup((const char *)title);
for (int i = 0; formatted_title[i]; i++) { for (int i = 0; formatted_title[i]; i++) {
if (formatted_title[i] == ' ') formatted_title[i] = '_'; if (formatted_title[i] == ' ')
formatted_title[i] = '_';
} }
info->url = info->url =
@@ -148,11 +151,11 @@ InfoBox fetch_wiki_data(char *api_url) {
char *construct_wiki_url(const char *search_term) { char *construct_wiki_url(const char *search_term) {
CURL *curl = curl_easy_init(); CURL *curl = curl_easy_init();
if (!curl) return NULL; if (!curl)
return NULL;
char *escaped_term = curl_easy_escape(curl, search_term, 0); char *escaped_term = curl_easy_escape(curl, search_term, 0);
const char *base = const char *base = "https://en.wikipedia.org/w/"
"https://en.wikipedia.org/w/"
"api.php?action=query&prop=extracts|pageimages&exintro&" "api.php?action=query&prop=extracts|pageimages&exintro&"
"explaintext&pithumbsize=400&format=xml&origin=*&titles="; "explaintext&pithumbsize=400&format=xml&origin=*&titles=";

View File

@@ -7,15 +7,16 @@
#include "Config.h" #include "Config.h"
#include "Proxy/Proxy.h" #include "Proxy/Proxy.h"
#include "Scraping/Scraping.h"
#include "Routes/Home.h" #include "Routes/Home.h"
#include "Routes/Images.h"
#include "Routes/ImageProxy.h" #include "Routes/ImageProxy.h"
#include "Routes/Images.h"
#include "Routes/Search.h" #include "Routes/Search.h"
#include "Scraping/Scraping.h"
int handle_opensearch(UrlParams *params) { int handle_opensearch(UrlParams *params) {
(void)params; (void)params;
serve_static_file_with_mime("opensearch.xml", "application/opensearchdescription+xml"); serve_static_file_with_mime("opensearch.xml",
"application/opensearchdescription+xml");
return 0; return 0;
} }
@@ -30,15 +31,13 @@ int main() {
curl_global_init(CURL_GLOBAL_DEFAULT); curl_global_init(CURL_GLOBAL_DEFAULT);
Config config = { Config config = {.host = "0.0.0.0",
.host = "0.0.0.0",
.port = 5000, .port = 5000,
.proxy = "", .proxy = "",
.proxy_list_file = "", .proxy_list_file = "",
.max_proxy_retries = 3, .max_proxy_retries = 3,
.randomize_username = 0, .randomize_username = 0,
.randomize_password = 0 .randomize_password = 0};
};
if (load_config("config.ini", &config) != 0) { if (load_config("config.ini", &config) != 0) {
fprintf(stderr, "Warning: Could not load config file, using defaults\n"); fprintf(stderr, "Warning: Could not load config file, using defaults\n");
@@ -46,17 +45,21 @@ int main() {
if (config.proxy_list_file[0] != '\0') { if (config.proxy_list_file[0] != '\0') {
if (load_proxy_list(config.proxy_list_file) < 0) { if (load_proxy_list(config.proxy_list_file) < 0) {
fprintf(stderr, "Warning: Failed to load proxy list, continuing without proxies\n"); fprintf(
stderr,
"Warning: Failed to load proxy list, continuing without proxies\n");
} }
} }
max_proxy_retries = config.max_proxy_retries; max_proxy_retries = config.max_proxy_retries;
set_proxy_config(config.proxy, config.randomize_username, config.randomize_password); set_proxy_config(config.proxy, config.randomize_username,
config.randomize_password);
if (proxy_url[0] != '\0') { if (proxy_url[0] != '\0') {
fprintf(stderr, "Using proxy: %s\n", proxy_url); fprintf(stderr, "Using proxy: %s\n", proxy_url);
} else if (proxy_count > 0) { } else if (proxy_count > 0) {
fprintf(stderr, "Using %d proxies from %s\n", proxy_count, config.proxy_list_file); fprintf(stderr, "Using %d proxies from %s\n", proxy_count,
config.proxy_list_file);
} }
set_handler("/", home_handler); set_handler("/", home_handler);

View File

@@ -1,9 +1,9 @@
#include "Proxy.h" #include "Proxy.h"
#include <pthread.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <time.h> #include <time.h>
#include <pthread.h>
Proxy *proxy_list = NULL; Proxy *proxy_list = NULL;
int proxy_count = 0; int proxy_count = 0;
@@ -13,7 +13,8 @@ int randomize_password = 0;
char proxy_url[512] = {0}; char proxy_url[512] = {0};
static pthread_mutex_t proxy_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t proxy_mutex = PTHREAD_MUTEX_INITIALIZER;
static const char RAND_CHARS[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; static const char RAND_CHARS[] =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
static void generate_random_string(char *buf, size_t len) { static void generate_random_string(char *buf, size_t len) {
for (size_t i = 0; i < len - 1; i++) { for (size_t i = 0; i < len - 1; i++) {
@@ -32,12 +33,17 @@ void set_proxy_config(const char *proxy_str, int rand_user, int rand_pass) {
} }
static Proxy parse_proxy_line(const char *line) { static Proxy parse_proxy_line(const char *line) {
Proxy proxy = {.type = PROXY_SOCKS5, .port = 0, .username[0] = '\0', .password[0] = '\0', .failures = 0}; Proxy proxy = {.type = PROXY_SOCKS5,
.port = 0,
.username[0] = '\0',
.password[0] = '\0',
.failures = 0};
const char *host_start = NULL; const char *host_start = NULL;
const char *port_start = NULL; const char *port_start = NULL;
size_t len = strlen(line); size_t len = strlen(line);
if (len == 0) return proxy; if (len == 0)
return proxy;
if (strncmp(line, "http://", 7) == 0) { if (strncmp(line, "http://", 7) == 0) {
proxy.type = PROXY_HTTP; proxy.type = PROXY_HTTP;
@@ -56,14 +62,16 @@ static Proxy parse_proxy_line(const char *line) {
if (at) { if (at) {
char cred_buf[128]; char cred_buf[128];
size_t cred_len = at - host_start; size_t cred_len = at - host_start;
if (cred_len >= sizeof(cred_buf)) cred_len = sizeof(cred_buf) - 1; if (cred_len >= sizeof(cred_buf))
cred_len = sizeof(cred_buf) - 1;
strncpy(cred_buf, host_start, cred_len); strncpy(cred_buf, host_start, cred_len);
cred_buf[cred_len] = '\0'; cred_buf[cred_len] = '\0';
char *colon = strchr(cred_buf, ':'); char *colon = strchr(cred_buf, ':');
if (colon) { if (colon) {
size_t user_len = colon - cred_buf; size_t user_len = colon - cred_buf;
if (user_len >= sizeof(proxy.username)) user_len = sizeof(proxy.username) - 1; if (user_len >= sizeof(proxy.username))
user_len = sizeof(proxy.username) - 1;
strncpy(proxy.username, cred_buf, user_len); strncpy(proxy.username, cred_buf, user_len);
proxy.username[user_len] = '\0'; proxy.username[user_len] = '\0';
strncpy(proxy.password, colon + 1, sizeof(proxy.password) - 1); strncpy(proxy.password, colon + 1, sizeof(proxy.password) - 1);
@@ -76,7 +84,8 @@ static Proxy parse_proxy_line(const char *line) {
if (port_start) { if (port_start) {
char host_buf[256]; char host_buf[256];
size_t host_len = port_start - host_start; size_t host_len = port_start - host_start;
if (host_len >= sizeof(host_buf)) host_len = sizeof(host_buf) - 1; if (host_len >= sizeof(host_buf))
host_len = sizeof(host_buf) - 1;
strncpy(host_buf, host_start, host_len); strncpy(host_buf, host_start, host_len);
host_buf[host_len] = '\0'; host_buf[host_len] = '\0';
snprintf(proxy.host, sizeof(proxy.host), "%.*s", (int)host_len, host_buf); snprintf(proxy.host, sizeof(proxy.host), "%.*s", (int)host_len, host_buf);
@@ -125,7 +134,8 @@ int load_proxy_list(const char *filename) {
} }
char *p = line; char *p = line;
while (*p == ' ' || *p == '\t') p++; while (*p == ' ' || *p == '\t')
p++;
char *end = p + strlen(p) - 1; char *end = p + strlen(p) - 1;
while (end > p && (*end == ' ' || *end == '\t')) { while (end > p && (*end == ' ' || *end == '\t')) {
@@ -133,7 +143,8 @@ int load_proxy_list(const char *filename) {
end--; end--;
} }
if (p[0] == '\0') continue; if (p[0] == '\0')
continue;
Proxy proxy = parse_proxy_line(p); Proxy proxy = parse_proxy_line(p);
if (proxy.port == 0) { if (proxy.port == 0) {
@@ -205,7 +216,8 @@ Proxy *get_random_proxy(void) {
} }
void record_proxy_failure(Proxy *proxy) { void record_proxy_failure(Proxy *proxy) {
if (!proxy) return; if (!proxy)
return;
pthread_mutex_lock(&proxy_mutex); pthread_mutex_lock(&proxy_mutex);
proxy->failures++; proxy->failures++;
pthread_mutex_unlock(&proxy_mutex); pthread_mutex_unlock(&proxy_mutex);
@@ -227,8 +239,10 @@ void apply_proxy_settings(CURL *curl) {
char username[32] = {0}; char username[32] = {0};
char password[32] = {0}; char password[32] = {0};
if (randomize_username) generate_random_string(username, sizeof(username)); if (randomize_username)
if (randomize_password) generate_random_string(password, sizeof(password)); generate_random_string(username, sizeof(username));
if (randomize_password)
generate_random_string(password, sizeof(password));
snprintf(userpwd, sizeof(userpwd), "%s:%s", username, password); snprintf(userpwd, sizeof(userpwd), "%s:%s", username, password);
curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd); curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
@@ -237,7 +251,8 @@ void apply_proxy_settings(CURL *curl) {
Proxy *proxy = get_random_proxy(); Proxy *proxy = get_random_proxy();
if (proxy) { if (proxy) {
char proxy_url_buf[512]; char proxy_url_buf[512];
snprintf(proxy_url_buf, sizeof(proxy_url_buf), "%s:%d", proxy->host, proxy->port); snprintf(proxy_url_buf, sizeof(proxy_url_buf), "%s:%d", proxy->host,
proxy->port);
curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url_buf); curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url_buf);
if (proxy->type == PROXY_HTTP) { if (proxy->type == PROXY_HTTP) {
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
@@ -249,7 +264,8 @@ void apply_proxy_settings(CURL *curl) {
if (proxy->username[0] != '\0' || proxy->password[0] != '\0') { if (proxy->username[0] != '\0' || proxy->password[0] != '\0') {
char userpwd[128]; char userpwd[128];
snprintf(userpwd, sizeof(userpwd), "%s:%s", proxy->username, proxy->password); snprintf(userpwd, sizeof(userpwd), "%s:%s", proxy->username,
proxy->password);
curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd); curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
} }
} }

View File

@@ -30,11 +30,7 @@ static int is_allowed_domain(const char *url) {
} }
strncpy(host, protocol, host_len); strncpy(host, protocol, host_len);
const char *allowed_domains[] = { const char *allowed_domains[] = {"mm.bing.net", "th.bing.com", NULL};
"mm.bing.net",
"th.bing.com",
NULL
};
for (int i = 0; allowed_domains[i] != NULL; i++) { for (int i = 0; allowed_domains[i] != NULL; i++) {
size_t domain_len = strlen(allowed_domains[i]); size_t domain_len = strlen(allowed_domains[i]);
@@ -66,7 +62,8 @@ static size_t write_callback(void *contents, size_t size, size_t nmemb,
new_capacity = buf->size + realsize; new_capacity = buf->size + realsize;
} }
char *new_data = realloc(buf->data, new_capacity); char *new_data = realloc(buf->data, new_capacity);
if (!new_data) return 0; if (!new_data)
return 0;
buf->data = new_data; buf->data = new_data;
buf->capacity = new_capacity; buf->capacity = new_capacity;
} }
@@ -101,11 +98,7 @@ int image_proxy_handler(UrlParams *params) {
return 0; return 0;
} }
MemoryBuffer buf = { MemoryBuffer buf = {.data = malloc(8192), .size = 0, .capacity = 8192};
.data = malloc(8192),
.size = 0,
.capacity = 8192
};
if (!buf.data) { if (!buf.data) {
curl_easy_cleanup(curl); curl_easy_cleanup(curl);
@@ -141,7 +134,8 @@ int image_proxy_handler(UrlParams *params) {
return 0; return 0;
} }
const char *mime_type = strlen(content_type) > 0 ? content_type : "image/jpeg"; const char *mime_type =
strlen(content_type) > 0 ? content_type : "image/jpeg";
serve_data(buf.data, buf.size, mime_type); serve_data(buf.data, buf.size, mime_type);
free(buf.data); free(buf.data);

View File

@@ -1,7 +1,7 @@
#include "Images.h" #include "Images.h"
#include "../Utility/Unescape.h"
#include "../Proxy/Proxy.h" #include "../Proxy/Proxy.h"
#include "../Scraping/Scraping.h" #include "../Scraping/Scraping.h"
#include "../Utility/Unescape.h"
#include <curl/curl.h> #include <curl/curl.h>
#include <libxml/HTMLparser.h> #include <libxml/HTMLparser.h>
@@ -76,7 +76,8 @@ int images_handler(UrlParams *params) {
raw_query = params->params[i].value; raw_query = params->params[i].value;
} else if (strcmp(params->params[i].key, "p") == 0) { } else if (strcmp(params->params[i].key, "p") == 0) {
int parsed = atoi(params->params[i].value); int parsed = atoi(params->params[i].value);
if (parsed > 1) page = parsed; if (parsed > 1)
page = parsed;
} }
} }
} }
@@ -96,7 +97,8 @@ int images_handler(UrlParams *params) {
if (!raw_query || strlen(raw_query) == 0) { if (!raw_query || strlen(raw_query) == 0) {
send_response("<h1>No query provided</h1>"); send_response("<h1>No query provided</h1>");
if (display_query) free(display_query); if (display_query)
free(display_query);
free_context(&ctx); free_context(&ctx);
return -1; return -1;
} }
@@ -104,7 +106,8 @@ int images_handler(UrlParams *params) {
CURL *tmp = curl_easy_init(); CURL *tmp = curl_easy_init();
if (!tmp) { if (!tmp) {
send_response("<h1>Error initializing curl</h1>"); send_response("<h1>Error initializing curl</h1>");
if (display_query) free(display_query); if (display_query)
free(display_query);
free_context(&ctx); free_context(&ctx);
return -1; return -1;
} }
@@ -113,15 +116,16 @@ int images_handler(UrlParams *params) {
if (!encoded_query) { if (!encoded_query) {
send_response("<h1>Error encoding query</h1>"); send_response("<h1>Error encoding query</h1>");
if (display_query) free(display_query); if (display_query)
free(display_query);
free_context(&ctx); free_context(&ctx);
return -1; return -1;
} }
char url[1024]; char url[1024];
int first = (page - 1) * 32 + 1; int first = (page - 1) * 32 + 1;
snprintf(url, sizeof(url), snprintf(url, sizeof(url), "https://www.bing.com/images/search?q=%s&first=%d",
"https://www.bing.com/images/search?q=%s&first=%d", encoded_query, first); encoded_query, first);
char *html = fetch_images_html(url); char *html = fetch_images_html(url);
if (!html) { if (!html) {
@@ -168,7 +172,8 @@ int images_handler(UrlParams *params) {
inner_counts = malloc(sizeof(int) * max_images); inner_counts = malloc(sizeof(int) * max_images);
for (int i = 0; i < nodes; i++) { for (int i = 0; i < nodes; i++) {
if (image_count >= 32) break; if (image_count >= 32)
break;
xmlNodePtr node = xpathObj->nodesetval->nodeTab[i]; xmlNodePtr node = xpathObj->nodesetval->nodeTab[i];
xmlNodePtr img_node = NULL; xmlNodePtr img_node = NULL;
@@ -177,25 +182,32 @@ int images_handler(UrlParams *params) {
xmlNodePtr thumb_link = NULL; xmlNodePtr thumb_link = NULL;
for (xmlNodePtr child = node->children; child; child = child->next) { for (xmlNodePtr child = node->children; child; child = child->next) {
if (child->type != XML_ELEMENT_NODE) continue; if (child->type != XML_ELEMENT_NODE)
continue;
if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) { if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) {
xmlChar *class = xmlGetProp(child, (const xmlChar *)"class"); xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
if (class) { if (class) {
if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) { if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) {
thumb_link = child; thumb_link = child;
for (xmlNodePtr thumb_child = child->children; thumb_child; thumb_child = thumb_child->next) { for (xmlNodePtr thumb_child = child->children; thumb_child;
thumb_child = thumb_child->next) {
if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) { if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) {
xmlChar *div_class = xmlGetProp(thumb_child, (const xmlChar *)"class"); xmlChar *div_class =
if (div_class && xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) { xmlGetProp(thumb_child, (const xmlChar *)"class");
for (xmlNodePtr cico_child = thumb_child->children; cico_child; cico_child = cico_child->next) { if (div_class &&
if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") == 0) { xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) {
for (xmlNodePtr cico_child = thumb_child->children;
cico_child; cico_child = cico_child->next) {
if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") ==
0) {
img_node = cico_child; img_node = cico_child;
break; break;
} }
} }
} }
if (div_class) xmlFree(div_class); if (div_class)
xmlFree(div_class);
} }
} }
} else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) { } else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
@@ -206,32 +218,44 @@ int images_handler(UrlParams *params) {
} else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) { } else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
xmlChar *class = xmlGetProp(child, (const xmlChar *)"class"); xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) { if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
for (xmlNodePtr meta_child = child->children; meta_child; meta_child = meta_child->next) { for (xmlNodePtr meta_child = child->children; meta_child;
meta_child = meta_child->next) {
if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) { if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
xmlChar *div_class = xmlGetProp(meta_child, (const xmlChar *)"class"); xmlChar *div_class =
xmlGetProp(meta_child, (const xmlChar *)"class");
if (div_class) { if (div_class) {
if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) { if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
des_node = meta_child; des_node = meta_child;
} }
xmlFree(div_class); xmlFree(div_class);
} }
} else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") == 0) { } else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") ==
xmlChar *a_class = xmlGetProp(meta_child, (const xmlChar *)"class"); 0) {
if (a_class && xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) { xmlChar *a_class =
xmlGetProp(meta_child, (const xmlChar *)"class");
if (a_class &&
xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
tit_node = meta_child; tit_node = meta_child;
} }
if (a_class) xmlFree(a_class); if (a_class)
xmlFree(a_class);
} }
} }
} }
if (class) xmlFree(class); if (class)
xmlFree(class);
} }
} }
xmlChar *iurl = img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL; xmlChar *iurl =
xmlChar *full_url = thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL; img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
xmlChar *title = des_node ? xmlNodeGetContent(des_node) : (tit_node ? xmlNodeGetContent(tit_node) : NULL); xmlChar *full_url =
xmlChar *rurl = tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL; thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
xmlChar *title = des_node
? xmlNodeGetContent(des_node)
: (tit_node ? xmlNodeGetContent(tit_node) : NULL);
xmlChar *rurl =
tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
if (iurl && strlen((char *)iurl) > 0) { if (iurl && strlen((char *)iurl) > 0) {
char *proxy_url = NULL; char *proxy_url = NULL;
@@ -250,18 +274,24 @@ int images_handler(UrlParams *params) {
} }
image_matrix[image_count] = malloc(sizeof(char *) * 4); image_matrix[image_count] = malloc(sizeof(char *) * 4);
image_matrix[image_count][0] = proxy_url ? strdup(proxy_url) : strdup((char *)iurl); image_matrix[image_count][0] =
proxy_url ? strdup(proxy_url) : strdup((char *)iurl);
image_matrix[image_count][1] = strdup(title ? (char *)title : "Image"); image_matrix[image_count][1] = strdup(title ? (char *)title : "Image");
image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#"); image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#");
image_matrix[image_count][3] = strdup(full_url ? (char *)full_url : "#"); image_matrix[image_count][3] =
strdup(full_url ? (char *)full_url : "#");
inner_counts[image_count] = 4; inner_counts[image_count] = 4;
image_count++; image_count++;
} }
if (iurl) xmlFree(iurl); if (iurl)
if (title) xmlFree(title); xmlFree(iurl);
if (rurl) xmlFree(rurl); if (title)
if (full_url) xmlFree(full_url); xmlFree(title);
if (rurl)
xmlFree(rurl);
if (full_url)
xmlFree(full_url);
} }
} }
@@ -289,9 +319,12 @@ int images_handler(UrlParams *params) {
free(inner_counts); free(inner_counts);
} }
if (xpathObj) xmlXPathFreeObject(xpathObj); if (xpathObj)
if (xpathCtx) xmlXPathFreeContext(xpathCtx); xmlXPathFreeObject(xpathObj);
if (doc) xmlFreeDoc(doc); if (xpathCtx)
xmlXPathFreeContext(xpathCtx);
if (doc)
xmlFreeDoc(doc);
free(html); free(html);
curl_free(encoded_query); curl_free(encoded_query);
free(display_query); free(display_query);

View File

@@ -1,8 +1,8 @@
#include "Search.h" #include "Search.h"
#include "../Infobox/Wikipedia.h"
#include "../Infobox/Calculator.h" #include "../Infobox/Calculator.h"
#include "../Infobox/Dictionary.h" #include "../Infobox/Dictionary.h"
#include "../Infobox/UnitConversion.h" #include "../Infobox/UnitConversion.h"
#include "../Infobox/Wikipedia.h"
#include "../Scraping/Scraping.h" #include "../Scraping/Scraping.h"
#include "../Utility/Display.h" #include "../Utility/Display.h"
#include "../Utility/Unescape.h" #include "../Utility/Unescape.h"
@@ -35,7 +35,8 @@ static void *wiki_thread_func(void *arg) {
} }
static int is_calculator_query(const char *query) { static int is_calculator_query(const char *query) {
if (!query) return 0; if (!query)
return 0;
int has_digit = 0; int has_digit = 0;
int has_math_operator = 0; int has_math_operator = 0;
@@ -49,7 +50,8 @@ static int is_calculator_query(const char *query) {
} }
} }
if (!has_digit || !has_math_operator) return 0; if (!has_digit || !has_math_operator)
return 0;
int len = strlen(query); int len = strlen(query);
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
@@ -63,7 +65,8 @@ static int is_calculator_query(const char *query) {
has_num_before = 1; has_num_before = 1;
break; break;
} }
if (query[j] != ' ') break; if (query[j] != ' ')
break;
} }
for (int j = i + 1; j < len; j++) { for (int j = i + 1; j < len; j++) {
@@ -71,7 +74,8 @@ static int is_calculator_query(const char *query) {
has_num_after = 1; has_num_after = 1;
break; break;
} }
if (query[j] != ' ') break; if (query[j] != ' ')
break;
} }
if (has_num_before || has_num_after) { if (has_num_before || has_num_after) {
@@ -133,9 +137,12 @@ static int add_infobox_to_collection(InfoBox *infobox, char ****collection,
(int *)realloc(*inner_counts, sizeof(int) * (current_count + 1)); (int *)realloc(*inner_counts, sizeof(int) * (current_count + 1));
(*collection)[current_count] = (char **)malloc(sizeof(char *) * 4); (*collection)[current_count] = (char **)malloc(sizeof(char *) * 4);
(*collection)[current_count][0] = infobox->title ? strdup(infobox->title) : NULL; (*collection)[current_count][0] =
(*collection)[current_count][1] = infobox->thumbnail_url ? strdup(infobox->thumbnail_url) : NULL; infobox->title ? strdup(infobox->title) : NULL;
(*collection)[current_count][2] = infobox->extract ? strdup(infobox->extract) : NULL; (*collection)[current_count][1] =
infobox->thumbnail_url ? strdup(infobox->thumbnail_url) : NULL;
(*collection)[current_count][2] =
infobox->extract ? strdup(infobox->extract) : NULL;
(*collection)[current_count][3] = infobox->url ? strdup(infobox->url) : NULL; (*collection)[current_count][3] = infobox->url ? strdup(infobox->url) : NULL;
(*inner_counts)[current_count] = 4; (*inner_counts)[current_count] = 4;
@@ -153,7 +160,8 @@ int results_handler(UrlParams *params) {
raw_query = params->params[i].value; raw_query = params->params[i].value;
} else if (strcmp(params->params[i].key, "p") == 0) { } else if (strcmp(params->params[i].key, "p") == 0) {
int parsed = atoi(params->params[i].value); int parsed = atoi(params->params[i].value);
if (parsed > 1) page = parsed; if (parsed > 1)
page = parsed;
} }
} }
} }
@@ -219,22 +227,26 @@ int results_handler(UrlParams *params) {
if (page == 1) { if (page == 1) {
if (dict_data.success) { if (dict_data.success) {
infobox_count = add_infobox_to_collection(&dict_data.result, &infobox_matrix, infobox_count =
add_infobox_to_collection(&dict_data.result, &infobox_matrix,
&infobox_inner_counts, infobox_count); &infobox_inner_counts, infobox_count);
} }
if (calc_data.success) { if (calc_data.success) {
infobox_count = add_infobox_to_collection(&calc_data.result, &infobox_matrix, infobox_count =
add_infobox_to_collection(&calc_data.result, &infobox_matrix,
&infobox_inner_counts, infobox_count); &infobox_inner_counts, infobox_count);
} }
if (unit_data.success) { if (unit_data.success) {
infobox_count = add_infobox_to_collection(&unit_data.result, &infobox_matrix, infobox_count =
add_infobox_to_collection(&unit_data.result, &infobox_matrix,
&infobox_inner_counts, infobox_count); &infobox_inner_counts, infobox_count);
} }
if (wiki_data.success) { if (wiki_data.success) {
infobox_count = add_infobox_to_collection(&wiki_data.result, &infobox_matrix, infobox_count =
add_infobox_to_collection(&wiki_data.result, &infobox_matrix,
&infobox_inner_counts, infobox_count); &infobox_inner_counts, infobox_count);
} }
} }
@@ -243,7 +255,8 @@ int results_handler(UrlParams *params) {
context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix, context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix,
infobox_count, infobox_inner_counts); infobox_count, infobox_inner_counts);
for (int i = 0; i < infobox_count; i++) { for (int i = 0; i < infobox_count; i++) {
for (int j = 0; j < 4; j++) free(infobox_matrix[i][j]); for (int j = 0; j < 4; j++)
free(infobox_matrix[i][j]);
free(infobox_matrix[i]); free(infobox_matrix[i]);
} }
free(infobox_matrix); free(infobox_matrix);
@@ -286,8 +299,12 @@ int results_handler(UrlParams *params) {
results_matrix[unique_count][0] = strdup(display_url); results_matrix[unique_count][0] = strdup(display_url);
results_matrix[unique_count][1] = strdup(pretty_url); results_matrix[unique_count][1] = strdup(pretty_url);
results_matrix[unique_count][2] = all_results[i][j].title ? strdup(all_results[i][j].title) : strdup("Untitled"); results_matrix[unique_count][2] = all_results[i][j].title
results_matrix[unique_count][3] = all_results[i][j].snippet ? strdup(all_results[i][j].snippet) : strdup(""); ? strdup(all_results[i][j].title)
: strdup("Untitled");
results_matrix[unique_count][3] =
all_results[i][j].snippet ? strdup(all_results[i][j].snippet)
: strdup("");
results_inner_counts[unique_count] = 4; results_inner_counts[unique_count] = 4;
@@ -301,7 +318,8 @@ int results_handler(UrlParams *params) {
free(all_results[i]); free(all_results[i]);
} }
context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, results_inner_counts); context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count,
results_inner_counts);
char *html = render_template("results.html", &ctx); char *html = render_template("results.html", &ctx);
if (html) { if (html) {
@@ -310,7 +328,8 @@ int results_handler(UrlParams *params) {
} }
for (int i = 0; i < unique_count; i++) { for (int i = 0; i < unique_count; i++) {
for (int j = 0; j < 4; j++) free(results_matrix[i][j]); for (int j = 0; j < 4; j++)
free(results_matrix[i][j]);
free(results_matrix[i]); free(results_matrix[i]);
free(seen_urls[i]); free(seen_urls[i]);
} }
@@ -326,10 +345,14 @@ int results_handler(UrlParams *params) {
} }
if (page == 1) { if (page == 1) {
if (wiki_data.success) free_infobox(&wiki_data.result); if (wiki_data.success)
if (calc_data.success) free_infobox(&calc_data.result); free_infobox(&wiki_data.result);
if (dict_data.success) free_infobox(&dict_data.result); if (calc_data.success)
if (unit_data.success) free_infobox(&unit_data.result); free_infobox(&calc_data.result);
if (dict_data.success)
free_infobox(&dict_data.result);
if (unit_data.success)
free_infobox(&unit_data.result);
} }
free_context(&ctx); free_context(&ctx);

View File

@@ -18,7 +18,8 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
if (mem->size + realsize + 1 > mem->capacity) { if (mem->size + realsize + 1 > mem->capacity) {
size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2; size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
while (new_cap < mem->size + realsize + 1) new_cap *= 2; while (new_cap < mem->size + realsize + 1)
new_cap *= 2;
char *ptr = (char *)realloc(mem->memory, new_cap); char *ptr = (char *)realloc(mem->memory, new_cap);
if (!ptr) { if (!ptr) {
@@ -60,12 +61,14 @@ static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
return 0; return 0;
} }
const char *link_xpath = "//tr[not(contains(@class, 'result-sponsored'))]//a[@class='result-link']"; const char *link_xpath = "//tr[not(contains(@class, "
"'result-sponsored'))]//a[@class='result-link']";
xmlXPathObjectPtr xpathObj = xmlXPathObjectPtr xpathObj =
xmlXPathEvalExpression((xmlChar *)link_xpath, xpathCtx); xmlXPathEvalExpression((xmlChar *)link_xpath, xpathCtx);
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) { if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
if (xpathObj) xmlXPathFreeObject(xpathObj); if (xpathObj)
xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx); xmlXPathFreeContext(xpathCtx);
return 0; return 0;
} }
@@ -101,23 +104,28 @@ static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
xmlXPathObjectPtr sObj = xmlXPathEvalExpression( xmlXPathObjectPtr sObj = xmlXPathEvalExpression(
(xmlChar *)".//td[@class='result-snippet']", xpathCtx); (xmlChar *)".//td[@class='result-snippet']", xpathCtx);
if (sObj && sObj->nodesetval && sObj->nodesetval->nodeNr > 0) { if (sObj && sObj->nodesetval && sObj->nodesetval->nodeNr > 0) {
snippet_text = (char *)xmlNodeGetContent(sObj->nodesetval->nodeTab[0]); snippet_text =
(char *)xmlNodeGetContent(sObj->nodesetval->nodeTab[0]);
} }
if (sObj) xmlXPathFreeObject(sObj); if (sObj)
xmlXPathFreeObject(sObj);
xpathCtx->node = NULL; xpathCtx->node = NULL;
} }
} }
(*out_results)[found_count].url = unescape_search_url(url); (*out_results)[found_count].url = unescape_search_url(url);
(*out_results)[found_count].title = strdup(title ? title : "No Title"); (*out_results)[found_count].title = strdup(title ? title : "No Title");
(*out_results)[found_count].snippet = strdup(snippet_text ? snippet_text : ""); (*out_results)[found_count].snippet =
strdup(snippet_text ? snippet_text : "");
found_count++; found_count++;
if (title) xmlFree(title); if (title)
if (url) xmlFree(url); xmlFree(title);
if (snippet_text) xmlFree(snippet_text); if (url)
xmlFree(url);
if (snippet_text)
xmlFree(snippet_text);
} }
xmlXPathFreeObject(xpathObj); xmlXPathFreeObject(xpathObj);
@@ -139,7 +147,8 @@ static int parse_startpage(const char *engine_name, xmlDocPtr doc,
xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx); xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) { if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
if (xpathObj) xmlXPathFreeObject(xpathObj); if (xpathObj)
xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx); xmlXPathFreeContext(xpathCtx);
return 0; return 0;
} }
@@ -189,12 +198,18 @@ static int parse_startpage(const char *engine_name, xmlDocPtr doc,
found_count++; found_count++;
} }
if (title) xmlFree(title); if (title)
if (url) xmlFree(url); xmlFree(title);
if (snippet_text) xmlFree(snippet_text); if (url)
if (linkObj) xmlXPathFreeObject(linkObj); xmlFree(url);
if (titleObj) xmlXPathFreeObject(titleObj); if (snippet_text)
if (snippetObj) xmlXPathFreeObject(snippetObj); xmlFree(snippet_text);
if (linkObj)
xmlXPathFreeObject(linkObj);
if (titleObj)
xmlXPathFreeObject(titleObj);
if (snippetObj)
xmlXPathFreeObject(snippetObj);
} }
xpathCtx->node = NULL; xpathCtx->node = NULL;
@@ -218,7 +233,8 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx); xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) { if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
if (xpathObj) xmlXPathFreeObject(xpathObj); if (xpathObj)
xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx); xmlXPathFreeContext(xpathCtx);
return 0; return 0;
} }
@@ -269,12 +285,18 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
found_count++; found_count++;
} }
if (title) xmlFree(title); if (title)
if (url) xmlFree(url); xmlFree(title);
if (snippet_text) xmlFree(snippet_text); if (url)
if (linkObj) xmlXPathFreeObject(linkObj); xmlFree(url);
if (titleObj) xmlXPathFreeObject(titleObj); if (snippet_text)
if (snippetObj) xmlXPathFreeObject(snippetObj); xmlFree(snippet_text);
if (linkObj)
xmlXPathFreeObject(linkObj);
if (titleObj)
xmlXPathFreeObject(titleObj);
if (snippetObj)
xmlXPathFreeObject(snippetObj);
} }
xpathCtx->node = NULL; xpathCtx->node = NULL;
@@ -372,13 +394,11 @@ retry:
} }
int page = (job->page < 1) ? 1 : job->page; int page = (job->page < 1) ? 1 : job->page;
int page_value = (page - 1) * job->engine->page_multiplier + job->engine->page_base; int page_value =
(page - 1) * job->engine->page_multiplier + job->engine->page_base;
snprintf(full_url, sizeof(full_url), "%s%s&%s=%d", snprintf(full_url, sizeof(full_url), "%s%s&%s=%d", job->engine->base_url,
job->engine->base_url, encoded_query, job->engine->page_param, page_value);
encoded_query,
job->engine->page_param,
page_value);
curl_free(encoded_query); curl_free(encoded_query);
struct curl_slist *headers = NULL; struct curl_slist *headers = NULL;
@@ -387,7 +407,10 @@ retry:
snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", job->engine->referer); snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", job->engine->referer);
headers = curl_slist_append(headers, host_buf); headers = curl_slist_append(headers, host_buf);
headers = curl_slist_append(headers, ref_buf); headers = curl_slist_append(headers, ref_buf);
headers = curl_slist_append(headers, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); headers = curl_slist_append(
headers,
"Accept: "
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5"); headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5");
headers = curl_slist_append(headers, "DNT: 1"); headers = curl_slist_append(headers, "DNT: 1");
@@ -443,12 +466,14 @@ retry:
struct curl_slist *headers; struct curl_slist *headers;
curl_easy_getinfo(handle, CURLINFO_PRIVATE, &headers); curl_easy_getinfo(handle, CURLINFO_PRIVATE, &headers);
if (headers) curl_slist_free_all(headers); if (headers)
curl_slist_free_all(headers);
free(job->response.memory); free(job->response.memory);
job->response.memory = NULL; job->response.memory = NULL;
curl_multi_remove_handle(multi_handle, handle); curl_multi_remove_handle(multi_handle, handle);
if (handle) curl_easy_cleanup(handle); if (handle)
curl_easy_cleanup(handle);
job->handle = NULL; job->handle = NULL;
break; break;
} }
@@ -477,14 +502,12 @@ retry:
int scrape_engine(const SearchEngine *engine, const char *query, int scrape_engine(const SearchEngine *engine, const char *query,
SearchResult **out_results, int max_results) { SearchResult **out_results, int max_results) {
ScrapeJob job = { ScrapeJob job = {.engine = engine,
.engine = engine,
.query = (char *)query, .query = (char *)query,
.out_results = out_results, .out_results = out_results,
.max_results = max_results, .max_results = max_results,
.results_count = 0, .results_count = 0,
.page = 1 .page = 1};
};
scrape_engines_parallel(&job, 1); scrape_engines_parallel(&job, 1);
return job.results_count; return job.results_count;

View File

@@ -1,8 +1,8 @@
#ifndef SCRAPING_H #ifndef SCRAPING_H
#define SCRAPING_H #define SCRAPING_H
#include <libxml/HTMLparser.h>
#include <curl/curl.h> #include <curl/curl.h>
#include <libxml/HTMLparser.h>
typedef struct { typedef struct {
char *url; char *url;

View File

@@ -5,7 +5,8 @@
#include <strings.h> #include <strings.h>
char *pretty_display_url(const char *input) { char *pretty_display_url(const char *input) {
if (!input) return NULL; if (!input)
return NULL;
const char *start = input; const char *start = input;
@@ -28,7 +29,8 @@ char *pretty_display_url(const char *input) {
} }
char *output = (char *)malloc(strlen(temp) * 3 + 1); char *output = (char *)malloc(strlen(temp) * 3 + 1);
if (!output) return NULL; if (!output)
return NULL;
size_t j = 0; size_t j = 0;
for (size_t i = 0; temp[i] != '\0'; i++) { for (size_t i = 0; temp[i] != '\0'; i++) {

View File

@@ -4,7 +4,8 @@
#include <string.h> #include <string.h>
char *unescape_search_url(const char *input) { char *unescape_search_url(const char *input) {
if (!input) return NULL; if (!input)
return NULL;
const char *key = NULL; const char *key = NULL;
const char *start = NULL; const char *start = NULL;
@@ -14,7 +15,8 @@ char *unescape_search_url(const char *input) {
if (strstr(input, "uddg=")) { if (strstr(input, "uddg=")) {
key = "uddg="; key = "uddg=";
start = strstr(input, key); start = strstr(input, key);
if (!start) return NULL; if (!start)
return NULL;
start += strlen(key); start += strlen(key);
end = strchr(start, '&'); end = strchr(start, '&');
len = end ? (size_t)(end - start) : strlen(start); len = end ? (size_t)(end - start) : strlen(start);
@@ -23,7 +25,8 @@ char *unescape_search_url(const char *input) {
else if (strstr(input, "RU=")) { else if (strstr(input, "RU=")) {
key = "RU="; key = "RU=";
start = strstr(input, key); start = strstr(input, key);
if (!start) return strdup(input); if (!start)
return strdup(input);
start += strlen(key); start += strlen(key);
end = strchr(start, '/'); end = strchr(start, '/');
len = end ? (size_t)(end - start) : strlen(start); len = end ? (size_t)(end - start) : strlen(start);
@@ -34,7 +37,8 @@ char *unescape_search_url(const char *input) {
} }
char *output = (char *)malloc(len * 3 + 1); char *output = (char *)malloc(len * 3 + 1);
if (!output) return NULL; if (!output)
return NULL;
size_t i = 0, j = 0; size_t i = 0, j = 0;
while (i < len) { while (i < len) {
@@ -60,7 +64,8 @@ char *unescape_search_url(const char *input) {
} }
char *url_decode_query(const char *src) { char *url_decode_query(const char *src) {
if (!src) return NULL; if (!src)
return NULL;
char *res = strdup(src); char *res = strdup(src);
char *p = res; char *p = res;
while (*src) { while (*src) {

View File

@@ -7,4 +7,3 @@ char *unescape_search_url(const char *input);
char *url_decode_query(const char *src); char *url_decode_query(const char *src);
#endif #endif

View File

@@ -1,8 +1,11 @@
#include "Utility.h" #include "Utility.h"
int hex_to_int(char c) { int hex_to_int(char c) {
if (c >= '0' && c <= '9') return c - '0'; if (c >= '0' && c <= '9')
if (c >= 'a' && c <= 'f') return c - 'a' + 10; return c - '0';
if (c >= 'A' && c <= 'F') return c - 'A' + 10; if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
if (c >= 'A' && c <= 'F')
return c - 'A' + 10;
return -1; return -1;
} }