those who commit
This commit is contained in:
20
src/Config.c
20
src/Config.c
@@ -24,7 +24,8 @@ int load_config(const char *filename, Config *config) {
|
|||||||
char *end = strchr(line, ']');
|
char *end = strchr(line, ']');
|
||||||
if (end) {
|
if (end) {
|
||||||
*end = '\0';
|
*end = '\0';
|
||||||
snprintf(section, sizeof(section), "%.*s", (int)(sizeof(section) - 1), line + 1);
|
snprintf(section, sizeof(section), "%.*s", (int)(sizeof(section) - 1),
|
||||||
|
line + 1);
|
||||||
section[sizeof(section) - 1] = '\0';
|
section[sizeof(section) - 1] = '\0';
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
@@ -36,8 +37,10 @@ int load_config(const char *filename, Config *config) {
|
|||||||
char *key = line;
|
char *key = line;
|
||||||
char *value = delimiter + 1;
|
char *value = delimiter + 1;
|
||||||
|
|
||||||
while (*key == ' ' || *key == '\t') key++;
|
while (*key == ' ' || *key == '\t')
|
||||||
while (*value == ' ' || *value == '\t') value++;
|
key++;
|
||||||
|
while (*value == ' ' || *value == '\t')
|
||||||
|
value++;
|
||||||
|
|
||||||
char *key_end = key + strlen(key) - 1;
|
char *key_end = key + strlen(key) - 1;
|
||||||
while (key_end > key && (*key_end == ' ' || *key_end == '\t')) {
|
while (key_end > key && (*key_end == ' ' || *key_end == '\t')) {
|
||||||
@@ -46,12 +49,14 @@ int load_config(const char *filename, Config *config) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char *value_end = value + strlen(value) - 1;
|
char *value_end = value + strlen(value) - 1;
|
||||||
while (value_end > value && (*value_end == ' ' || *value_end == '\t' || *value_end == '"' || *value_end == '\'')) {
|
while (value_end > value && (*value_end == ' ' || *value_end == '\t' ||
|
||||||
|
*value_end == '"' || *value_end == '\'')) {
|
||||||
*value_end = '\0';
|
*value_end = '\0';
|
||||||
value_end--;
|
value_end--;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (*value == '"' || *value == '\'') value++;
|
while (*value == '"' || *value == '\'')
|
||||||
|
value++;
|
||||||
|
|
||||||
if (strcmp(section, "server") == 0) {
|
if (strcmp(section, "server") == 0) {
|
||||||
if (strcmp(key, "host") == 0) {
|
if (strcmp(key, "host") == 0) {
|
||||||
@@ -65,7 +70,8 @@ int load_config(const char *filename, Config *config) {
|
|||||||
strncpy(config->proxy, value, sizeof(config->proxy) - 1);
|
strncpy(config->proxy, value, sizeof(config->proxy) - 1);
|
||||||
config->proxy[sizeof(config->proxy) - 1] = '\0';
|
config->proxy[sizeof(config->proxy) - 1] = '\0';
|
||||||
} else if (strcmp(key, "list_file") == 0) {
|
} else if (strcmp(key, "list_file") == 0) {
|
||||||
strncpy(config->proxy_list_file, value, sizeof(config->proxy_list_file) - 1);
|
strncpy(config->proxy_list_file, value,
|
||||||
|
sizeof(config->proxy_list_file) - 1);
|
||||||
config->proxy_list_file[sizeof(config->proxy_list_file) - 1] = '\0';
|
config->proxy_list_file[sizeof(config->proxy_list_file) - 1] = '\0';
|
||||||
} else if (strcmp(key, "max_retries") == 0) {
|
} else if (strcmp(key, "max_retries") == 0) {
|
||||||
config->max_proxy_retries = atoi(value);
|
config->max_proxy_retries = atoi(value);
|
||||||
@@ -80,4 +86,4 @@ int load_config(const char *filename, Config *config) {
|
|||||||
|
|
||||||
fclose(file);
|
fclose(file);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,4 +13,4 @@ typedef struct {
|
|||||||
|
|
||||||
int load_config(const char *filename, Config *config);
|
int load_config(const char *filename, Config *config);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
#include "Calculator.h"
|
#include "Calculator.h"
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <math.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
static char logic_log[4096];
|
static char logic_log[4096];
|
||||||
|
|
||||||
@@ -15,7 +15,8 @@ typedef struct {
|
|||||||
static double parse_expression(Parser *p);
|
static double parse_expression(Parser *p);
|
||||||
|
|
||||||
static void skip_ws(Parser *p) {
|
static void skip_ws(Parser *p) {
|
||||||
while (p->buffer[p->pos] == ' ') p->pos++;
|
while (p->buffer[p->pos] == ' ')
|
||||||
|
p->pos++;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double parse_factor(Parser *p) {
|
static double parse_factor(Parser *p) {
|
||||||
@@ -27,7 +28,8 @@ static double parse_factor(Parser *p) {
|
|||||||
if (p->buffer[p->pos] == '(') {
|
if (p->buffer[p->pos] == '(') {
|
||||||
p->pos++;
|
p->pos++;
|
||||||
double res = parse_expression(p);
|
double res = parse_expression(p);
|
||||||
if (p->buffer[p->pos] == ')') p->pos++;
|
if (p->buffer[p->pos] == ')')
|
||||||
|
p->pos++;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
char *endptr;
|
char *endptr;
|
||||||
@@ -50,7 +52,7 @@ static double parse_term(Parser *p) {
|
|||||||
char step[256];
|
char step[256];
|
||||||
|
|
||||||
snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
|
snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
|
||||||
right, left);
|
right, left);
|
||||||
strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
|
strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
|
||||||
} else
|
} else
|
||||||
break;
|
break;
|
||||||
@@ -72,7 +74,7 @@ static double parse_expression(Parser *p) {
|
|||||||
char step[256];
|
char step[256];
|
||||||
|
|
||||||
snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
|
snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
|
||||||
right, left);
|
right, left);
|
||||||
strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
|
strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
|
||||||
} else
|
} else
|
||||||
break;
|
break;
|
||||||
@@ -82,33 +84,34 @@ static double parse_expression(Parser *p) {
|
|||||||
|
|
||||||
double evaluate(const char *expr) {
|
double evaluate(const char *expr) {
|
||||||
logic_log[0] = '\0';
|
logic_log[0] = '\0';
|
||||||
if (!expr || strlen(expr) == 0) return 0.0;
|
if (!expr || strlen(expr) == 0)
|
||||||
|
return 0.0;
|
||||||
Parser p = {expr, 0};
|
Parser p = {expr, 0};
|
||||||
return parse_expression(&p);
|
return parse_expression(&p);
|
||||||
}
|
}
|
||||||
|
|
||||||
InfoBox fetch_calc_data(char *math_input) {
|
InfoBox fetch_calc_data(char *math_input) {
|
||||||
InfoBox info = {NULL, NULL, NULL, NULL};
|
InfoBox info = {NULL, NULL, NULL, NULL};
|
||||||
if (!math_input) return info;
|
if (!math_input)
|
||||||
|
return info;
|
||||||
|
|
||||||
double result = evaluate(math_input);
|
double result = evaluate(math_input);
|
||||||
|
|
||||||
char html_output[5120];
|
char html_output[5120];
|
||||||
snprintf(html_output, sizeof(html_output),
|
snprintf(html_output, sizeof(html_output),
|
||||||
"<div class='calc-container' style='line-height: 1.6;'>"
|
"<div class='calc-container' style='line-height: 1.6;'>"
|
||||||
"%s"
|
"%s"
|
||||||
"<div style='margin-top: 8px; border-top: 1px solid #eee; "
|
"<div style='margin-top: 8px; border-top: 1px solid #eee; "
|
||||||
"padding-top: 8px; font-size: 1.2em;'>"
|
"padding-top: 8px; font-size: 1.2em;'>"
|
||||||
"<b>%g</b>"
|
"<b>%g</b>"
|
||||||
"</div>"
|
"</div>"
|
||||||
"</div>",
|
"</div>",
|
||||||
strlen(logic_log) > 0 ? logic_log : "<div>Constant value</div>",
|
strlen(logic_log) > 0 ? logic_log : "<div>Constant value</div>",
|
||||||
result);
|
result);
|
||||||
|
|
||||||
info.title = strdup("Calculation");
|
info.title = strdup("Calculation");
|
||||||
info.extract = strdup(html_output);
|
info.extract = strdup(html_output);
|
||||||
info.thumbnail_url =
|
info.thumbnail_url = strdup("/static/calculation.svg");
|
||||||
strdup("/static/calculation.svg");
|
|
||||||
info.url = strdup("#");
|
info.url = strdup("#");
|
||||||
|
|
||||||
return info;
|
return info;
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#include "Dictionary.h"
|
#include "Dictionary.h"
|
||||||
#include "../Proxy/Proxy.h"
|
#include "../Proxy/Proxy.h"
|
||||||
#include "../Scraping/Scraping.h"
|
#include "../Scraping/Scraping.h"
|
||||||
|
#include <ctype.h>
|
||||||
#include <curl/curl.h>
|
#include <curl/curl.h>
|
||||||
#include <libxml/HTMLparser.h>
|
#include <libxml/HTMLparser.h>
|
||||||
#include <libxml/xpath.h>
|
#include <libxml/xpath.h>
|
||||||
@@ -8,42 +9,60 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <strings.h>
|
#include <strings.h>
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
static const char *PREFIXES[] = {
|
static const char *PREFIXES[] = {"what is the definition of ",
|
||||||
"what is the definition of ", "what's the definition of ",
|
"what's the definition of ",
|
||||||
"what is the meaning of ", "what's the meaning of ",
|
"what is the meaning of ",
|
||||||
"what does the word ", "definition of ", "meaning of ", "def of ",
|
"what's the meaning of ",
|
||||||
"define ", "definition ", "define:", "def ", "def:",
|
"what does the word ",
|
||||||
"what does ", "what is ", "what's ", "whats ",
|
"definition of ",
|
||||||
"meaning ", "dictionary ", "dict ", NULL
|
"meaning of ",
|
||||||
};
|
"def of ",
|
||||||
|
"define ",
|
||||||
|
"definition ",
|
||||||
|
"define:",
|
||||||
|
"def ",
|
||||||
|
"def:",
|
||||||
|
"what does ",
|
||||||
|
"what is ",
|
||||||
|
"what's ",
|
||||||
|
"whats ",
|
||||||
|
"meaning ",
|
||||||
|
"dictionary ",
|
||||||
|
"dict ",
|
||||||
|
NULL};
|
||||||
|
|
||||||
static const char *SUFFIXES[] = {
|
static const char *SUFFIXES[] = {
|
||||||
" definition", " def", " meaning", " mean", " means",
|
" definition", " def", " meaning", " mean", " means",
|
||||||
" dictionary", " dict", " define", " defined",
|
" dictionary", " dict", " define", " defined", " definition?",
|
||||||
" definition?", " def?", " meaning?", " mean?", " means?",
|
" def?", " meaning?", " mean?", " means?", " in english",
|
||||||
" in english", " in english?", NULL
|
" in english?", NULL};
|
||||||
};
|
|
||||||
|
|
||||||
static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL};
|
static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL};
|
||||||
|
|
||||||
static const char *strcasestr_impl(const char *haystack, const char *needle) {
|
static const char *strcasestr_impl(const char *haystack, const char *needle) {
|
||||||
if (!haystack || !needle || !*needle) return haystack;
|
if (!haystack || !needle || !*needle)
|
||||||
|
return haystack;
|
||||||
size_t len = strlen(needle);
|
size_t len = strlen(needle);
|
||||||
for (const char *h = haystack; *h; h++) {
|
for (const char *h = haystack; *h; h++) {
|
||||||
if (strncasecmp(h, needle, len) == 0) return h;
|
if (strncasecmp(h, needle, len) == 0)
|
||||||
|
return h;
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct MemStruct { char *memory; size_t size; };
|
struct MemStruct {
|
||||||
|
char *memory;
|
||||||
|
size_t size;
|
||||||
|
};
|
||||||
|
|
||||||
static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) {
|
static size_t WriteCallback(void *contents, size_t size, size_t nmemb,
|
||||||
|
void *userp) {
|
||||||
size_t realsize = size * nmemb;
|
size_t realsize = size * nmemb;
|
||||||
struct MemStruct *mem = (struct MemStruct *)userp;
|
struct MemStruct *mem = (struct MemStruct *)userp;
|
||||||
char *ptr = realloc(mem->memory, mem->size + realsize + 1);
|
char *ptr = realloc(mem->memory, mem->size + realsize + 1);
|
||||||
if (!ptr) return 0;
|
if (!ptr)
|
||||||
|
return 0;
|
||||||
mem->memory = ptr;
|
mem->memory = ptr;
|
||||||
memcpy(&(mem->memory[mem->size]), contents, realsize);
|
memcpy(&(mem->memory[mem->size]), contents, realsize);
|
||||||
mem->size += realsize;
|
mem->size += realsize;
|
||||||
@@ -53,40 +72,57 @@ static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *use
|
|||||||
|
|
||||||
static char *xpath_text(xmlDocPtr doc, const char *xpath) {
|
static char *xpath_text(xmlDocPtr doc, const char *xpath) {
|
||||||
xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
|
xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
|
||||||
if (!ctx) return NULL;
|
if (!ctx)
|
||||||
|
return NULL;
|
||||||
xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
|
xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
|
||||||
xmlXPathFreeContext(ctx);
|
xmlXPathFreeContext(ctx);
|
||||||
if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
|
if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
|
||||||
if (obj) xmlXPathFreeObject(obj);
|
if (obj)
|
||||||
|
xmlXPathFreeObject(obj);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
|
xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
|
||||||
char *result = content ? strdup((char *)content) : NULL;
|
char *result = content ? strdup((char *)content) : NULL;
|
||||||
if (content) xmlFree(content);
|
if (content)
|
||||||
|
xmlFree(content);
|
||||||
xmlXPathFreeObject(obj);
|
xmlXPathFreeObject(obj);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *build_html(const char *word, const char *pron, const char *pos,
|
static char *build_html(const char *word, const char *pron, const char *pos,
|
||||||
const char *def, const char *ex) {
|
const char *def, const char *ex) {
|
||||||
char html[4096];
|
char html[4096];
|
||||||
int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>");
|
int n = snprintf(html, sizeof(html),
|
||||||
if (word) n += snprintf(html + n, sizeof(html) - n,
|
"<div class='dict-container' style='line-height: 1.6;'>");
|
||||||
"<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word);
|
if (word)
|
||||||
if (pron) n += snprintf(html + n, sizeof(html) - n,
|
n += snprintf(html + n, sizeof(html) - n,
|
||||||
"<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron);
|
"<div style='font-size: 1.3em; font-weight: bold; "
|
||||||
if (pos) n += snprintf(html + n, sizeof(html) - n,
|
"margin-bottom: 4px;'>%s</div>",
|
||||||
"<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos);
|
word);
|
||||||
if (def) n += snprintf(html + n, sizeof(html) - n,
|
if (pron)
|
||||||
"<div style='margin-bottom: 8px;'>%s</div>", def);
|
n += snprintf(html + n, sizeof(html) - n,
|
||||||
if (ex) n += snprintf(html + n, sizeof(html) - n,
|
"<div style='color: #666; margin-bottom: 8px;'>/%s/</div>",
|
||||||
"<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex);
|
pron);
|
||||||
|
if (pos)
|
||||||
|
n += snprintf(html + n, sizeof(html) - n,
|
||||||
|
"<div style='font-style: italic; color: #888; margin-bottom: "
|
||||||
|
"8px;'>%s</div>",
|
||||||
|
pos);
|
||||||
|
if (def)
|
||||||
|
n += snprintf(html + n, sizeof(html) - n,
|
||||||
|
"<div style='margin-bottom: 8px;'>%s</div>", def);
|
||||||
|
if (ex)
|
||||||
|
n += snprintf(html + n, sizeof(html) - n,
|
||||||
|
"<div style='color: #555; font-style: italic; margin-top: "
|
||||||
|
"8px;'>\"%s\"</div>",
|
||||||
|
ex);
|
||||||
snprintf(html + n, sizeof(html) - n, "</div>");
|
snprintf(html + n, sizeof(html) - n, "</div>");
|
||||||
return strdup(html);
|
return strdup(html);
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *extract_word(const char *query) {
|
static char *extract_word(const char *query) {
|
||||||
if (!query) return NULL;
|
if (!query)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
const char *start = query;
|
const char *start = query;
|
||||||
|
|
||||||
@@ -98,9 +134,11 @@ static char *extract_word(const char *query) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (*start == ' ') start++;
|
while (*start == ' ')
|
||||||
|
start++;
|
||||||
char *word = strdup(start);
|
char *word = strdup(start);
|
||||||
if (!word) return NULL;
|
if (!word)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
int changed = 1;
|
int changed = 1;
|
||||||
while (changed) {
|
while (changed) {
|
||||||
@@ -130,29 +168,37 @@ static char *extract_word(const char *query) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t len = strlen(word);
|
size_t len = strlen(word);
|
||||||
while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' ||
|
while (len > 0 && (word[len - 1] == ' ' || word[len - 1] == '?' ||
|
||||||
word[len-1] == '!' || word[len-1] == '.')) {
|
word[len - 1] == '!' || word[len - 1] == '.')) {
|
||||||
word[--len] = '\0';
|
word[--len] = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (len == 0) { free(word); return NULL; }
|
if (len == 0) {
|
||||||
|
free(word);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]);
|
for (size_t i = 0; i < len; i++)
|
||||||
|
word[i] = tolower((unsigned char)word[i]);
|
||||||
char *space = strchr(word, ' ');
|
char *space = strchr(word, ' ');
|
||||||
if (space) *space = '\0';
|
if (space)
|
||||||
|
*space = '\0';
|
||||||
|
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
int is_dictionary_query(const char *query) {
|
int is_dictionary_query(const char *query) {
|
||||||
if (!query) return 0;
|
if (!query)
|
||||||
|
return 0;
|
||||||
|
|
||||||
for (int i = 0; PREFIXES[i]; i++) {
|
for (int i = 0; PREFIXES[i]; i++) {
|
||||||
size_t len = strlen(PREFIXES[i]);
|
size_t len = strlen(PREFIXES[i]);
|
||||||
if (strncasecmp(query, PREFIXES[i], len) == 0) {
|
if (strncasecmp(query, PREFIXES[i], len) == 0) {
|
||||||
const char *after = query + len;
|
const char *after = query + len;
|
||||||
while (*after == ' ') after++;
|
while (*after == ' ')
|
||||||
if (*after != '\0') return 1;
|
after++;
|
||||||
|
if (*after != '\0')
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -160,23 +206,29 @@ int is_dictionary_query(const char *query) {
|
|||||||
const char *pos = strcasestr_impl(query, SUFFIXES[i]);
|
const char *pos = strcasestr_impl(query, SUFFIXES[i]);
|
||||||
if (pos) {
|
if (pos) {
|
||||||
const char *after = pos + strlen(SUFFIXES[i]);
|
const char *after = pos + strlen(SUFFIXES[i]);
|
||||||
while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++;
|
while (*after == ' ' || *after == '?' || *after == '!' || *after == '.')
|
||||||
if (*after == '\0' && pos > query && (pos - query) < 100) return 1;
|
after++;
|
||||||
|
if (*after == '\0' && pos > query && (pos - query) < 100)
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strncasecmp(query, "what is ", 8) == 0 ||
|
if (strncasecmp(query, "what is ", 8) == 0 ||
|
||||||
strncasecmp(query, "what's ", 7) == 0 ||
|
strncasecmp(query, "what's ", 7) == 0 ||
|
||||||
strncasecmp(query, "whats ", 6) == 0) {
|
strncasecmp(query, "whats ", 6) == 0) {
|
||||||
const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 :
|
const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8
|
||||||
strncasecmp(query, "what's ", 7) == 0 ? 7 : 6);
|
: strncasecmp(query, "what's ", 7) == 0 ? 7
|
||||||
const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ",
|
: 6);
|
||||||
"our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL};
|
const char *articles[] = {"the ", "your ", "my ", "his ", "her ",
|
||||||
|
"their ", "our ", "this ", "that ", "these ",
|
||||||
|
"those ", "a ", "an ", NULL};
|
||||||
for (int i = 0; articles[i]; i++) {
|
for (int i = 0; articles[i]; i++) {
|
||||||
if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0;
|
if (strncasecmp(word, articles[i], strlen(articles[i])) == 0)
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
const char *space = strchr(word, ' ');
|
const char *space = strchr(word, ' ');
|
||||||
if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1;
|
if (!space || *(space + 1) == '\0' || *(space + 1) == '?')
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -184,10 +236,14 @@ int is_dictionary_query(const char *query) {
|
|||||||
|
|
||||||
char *construct_dictionary_url(const char *query) {
|
char *construct_dictionary_url(const char *query) {
|
||||||
char *word = extract_word(query);
|
char *word = extract_word(query);
|
||||||
if (!word) return NULL;
|
if (!word)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
CURL *curl = curl_easy_init();
|
CURL *curl = curl_easy_init();
|
||||||
if (!curl) { free(word); return NULL; }
|
if (!curl) {
|
||||||
|
free(word);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
char *escaped = curl_easy_escape(curl, word, 0);
|
char *escaped = curl_easy_escape(curl, word, 0);
|
||||||
const char *base = "https://dictionary.cambridge.org/dictionary/english/";
|
const char *base = "https://dictionary.cambridge.org/dictionary/english/";
|
||||||
@@ -207,10 +263,14 @@ InfoBox fetch_dictionary_data(const char *query) {
|
|||||||
InfoBox info = {NULL, NULL, NULL, NULL};
|
InfoBox info = {NULL, NULL, NULL, NULL};
|
||||||
|
|
||||||
char *url = construct_dictionary_url(query);
|
char *url = construct_dictionary_url(query);
|
||||||
if (!url) return info;
|
if (!url)
|
||||||
|
return info;
|
||||||
|
|
||||||
CURL *curl = curl_easy_init();
|
CURL *curl = curl_easy_init();
|
||||||
if (!curl) { free(url); return info; }
|
if (!curl) {
|
||||||
|
free(url);
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
struct MemStruct chunk = {malloc(1), 0};
|
struct MemStruct chunk = {malloc(1), 0};
|
||||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||||
@@ -222,10 +282,13 @@ InfoBox fetch_dictionary_data(const char *query) {
|
|||||||
|
|
||||||
if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
|
if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
|
||||||
htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL,
|
htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL,
|
||||||
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
|
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |
|
||||||
|
HTML_PARSE_NOWARNING);
|
||||||
if (doc) {
|
if (doc) {
|
||||||
char *word = xpath_text(doc, "//span[@class='hw dhw']");
|
char *word = xpath_text(doc, "//span[@class='hw dhw']");
|
||||||
char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']");
|
char *pron = xpath_text(
|
||||||
|
doc,
|
||||||
|
"//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']");
|
||||||
char *pos = xpath_text(doc, "//span[@class='pos dpos']");
|
char *pos = xpath_text(doc, "//span[@class='pos dpos']");
|
||||||
char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]");
|
char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]");
|
||||||
char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]");
|
char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]");
|
||||||
@@ -237,7 +300,11 @@ InfoBox fetch_dictionary_data(const char *query) {
|
|||||||
info.url = strdup(url);
|
info.url = strdup(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(word); free(pron); free(pos); free(def); free(ex);
|
free(word);
|
||||||
|
free(pron);
|
||||||
|
free(pos);
|
||||||
|
free(def);
|
||||||
|
free(ex);
|
||||||
xmlFreeDoc(doc);
|
xmlFreeDoc(doc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -246,4 +313,4 @@ InfoBox fetch_dictionary_data(const char *query) {
|
|||||||
free(chunk.memory);
|
free(chunk.memory);
|
||||||
free(url);
|
free(url);
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,4 +7,4 @@ InfoBox fetch_dictionary_data(const char *word);
|
|||||||
char *construct_dictionary_url(const char *word);
|
char *construct_dictionary_url(const char *word);
|
||||||
int is_dictionary_query(const char *query);
|
int is_dictionary_query(const char *query);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -23,49 +23,73 @@ typedef struct {
|
|||||||
} UnitDef;
|
} UnitDef;
|
||||||
|
|
||||||
static const UnitDef UNITS[] = {
|
static const UnitDef UNITS[] = {
|
||||||
{"metre", {"m", "metres", "meter", "meters"}, UNIT_LENGTH, 1.0},
|
{"metre", {"m", "metres", "meter", "meters"}, UNIT_LENGTH, 1.0},
|
||||||
{"kilometre", {"km", "kilometres", "kilometer", "kilometers"}, UNIT_LENGTH, 1000.0},
|
{"kilometre",
|
||||||
{"centimetre", {"cm", "centimetres", "centimeter", "centimeters"}, UNIT_LENGTH, 0.01},
|
{"km", "kilometres", "kilometer", "kilometers"},
|
||||||
{"millimetre", {"mm", "millimetres", "millimeter", "millimeters"}, UNIT_LENGTH, 0.001},
|
UNIT_LENGTH,
|
||||||
{"mile", {"mi", "miles"}, UNIT_LENGTH, 1609.344},
|
1000.0},
|
||||||
{"yard", {"yd", "yards"}, UNIT_LENGTH, 0.9144},
|
{"centimetre",
|
||||||
{"foot", {"ft", "feet", "'"}, UNIT_LENGTH, 0.3048},
|
{"cm", "centimetres", "centimeter", "centimeters"},
|
||||||
{"inch", {"in", "inches", "\""}, UNIT_LENGTH, 0.0254},
|
UNIT_LENGTH,
|
||||||
|
0.01},
|
||||||
|
{"millimetre",
|
||||||
|
{"mm", "millimetres", "millimeter", "millimeters"},
|
||||||
|
UNIT_LENGTH,
|
||||||
|
0.001},
|
||||||
|
{"mile", {"mi", "miles"}, UNIT_LENGTH, 1609.344},
|
||||||
|
{"yard", {"yd", "yards"}, UNIT_LENGTH, 0.9144},
|
||||||
|
{"foot", {"ft", "feet", "'"}, UNIT_LENGTH, 0.3048},
|
||||||
|
{"inch", {"in", "inches", "\""}, UNIT_LENGTH, 0.0254},
|
||||||
|
|
||||||
{"kilogram", {"kg", "kilograms", "kilo", "kilos"}, UNIT_WEIGHT, 1.0},
|
{"kilogram", {"kg", "kilograms", "kilo", "kilos"}, UNIT_WEIGHT, 1.0},
|
||||||
{"gram", {"g", "grams"}, UNIT_WEIGHT, 0.001},
|
{"gram", {"g", "grams"}, UNIT_WEIGHT, 0.001},
|
||||||
{"milligram", {"mg", "milligrams"}, UNIT_WEIGHT, 0.000001},
|
{"milligram", {"mg", "milligrams"}, UNIT_WEIGHT, 0.000001},
|
||||||
{"pound", {"lb", "lbs", "pounds"}, UNIT_WEIGHT, 0.453592},
|
{"pound", {"lb", "lbs", "pounds"}, UNIT_WEIGHT, 0.453592},
|
||||||
{"ounce", {"oz", "ounces"}, UNIT_WEIGHT, 0.0283495},
|
{"ounce", {"oz", "ounces"}, UNIT_WEIGHT, 0.0283495},
|
||||||
{"tonne", {"tonnes", "tons", "ton"}, UNIT_WEIGHT, 1000.0},
|
{"tonne", {"tonnes", "tons", "ton"}, UNIT_WEIGHT, 1000.0},
|
||||||
{"stone", {"st", "stones"}, UNIT_WEIGHT, 6.35029},
|
{"stone", {"st", "stones"}, UNIT_WEIGHT, 6.35029},
|
||||||
|
|
||||||
{"celsius", {"c", "°c", "degrees celsius", "degrees c"}, UNIT_TEMP, 1.0},
|
{"celsius", {"c", "°c", "degrees celsius", "degrees c"}, UNIT_TEMP, 1.0},
|
||||||
{"fahrenheit", {"f", "°f", "degrees fahrenheit", "degrees f"}, UNIT_TEMP, 1.0},
|
{"fahrenheit",
|
||||||
{"kelvin", {"k", "degrees kelvin", "degrees k"}, UNIT_TEMP, 1.0},
|
{"f", "°f", "degrees fahrenheit", "degrees f"},
|
||||||
|
UNIT_TEMP,
|
||||||
|
1.0},
|
||||||
|
{"kelvin", {"k", "degrees kelvin", "degrees k"}, UNIT_TEMP, 1.0},
|
||||||
|
|
||||||
{"litre", {"l", "litres", "liter", "liters"}, UNIT_VOLUME, 1.0},
|
{"litre", {"l", "litres", "liter", "liters"}, UNIT_VOLUME, 1.0},
|
||||||
{"millilitre", {"ml", "millilitres", "milliliter", "milliliters"}, UNIT_VOLUME, 0.001},
|
{"millilitre",
|
||||||
{"gallon", {"gal", "gallons"}, UNIT_VOLUME, 3.78541},
|
{"ml", "millilitres", "milliliter", "milliliters"},
|
||||||
{"quart", {"qt", "quarts"}, UNIT_VOLUME, 0.946353},
|
UNIT_VOLUME,
|
||||||
{"pint", {"pt", "pints"}, UNIT_VOLUME, 0.473176},
|
0.001},
|
||||||
{"cup", {"cups"}, UNIT_VOLUME, 0.236588},
|
{"gallon", {"gal", "gallons"}, UNIT_VOLUME, 3.78541},
|
||||||
{"fluid ounce", {"fl oz", "fluid ounces"}, UNIT_VOLUME, 0.0295735},
|
{"quart", {"qt", "quarts"}, UNIT_VOLUME, 0.946353},
|
||||||
|
{"pint", {"pt", "pints"}, UNIT_VOLUME, 0.473176},
|
||||||
|
{"cup", {"cups"}, UNIT_VOLUME, 0.236588},
|
||||||
|
{"fluid ounce", {"fl oz", "fluid ounces"}, UNIT_VOLUME, 0.0295735},
|
||||||
|
|
||||||
{"square metre", {"sqm", "sq m", "m2", "square metres"}, UNIT_AREA, 1.0},
|
{"square metre", {"sqm", "sq m", "m2", "square metres"}, UNIT_AREA, 1.0},
|
||||||
{"square foot", {"sqft", "sq ft", "ft2", "square feet"}, UNIT_AREA, 0.092903},
|
{"square foot",
|
||||||
{"square kilometre", {"sqkm", "sq km", "km2", "square kilometres"}, UNIT_AREA, 1000000.0},
|
{"sqft", "sq ft", "ft2", "square feet"},
|
||||||
{"square mile", {"sqmi", "sq mi", "mi2", "square miles"}, UNIT_AREA, 2589988.0},
|
UNIT_AREA,
|
||||||
{"acre", {"acres"}, UNIT_AREA, 4046.86},
|
0.092903},
|
||||||
{"hectare", {"ha", "hectares"}, UNIT_AREA, 10000.0},
|
{"square kilometre",
|
||||||
|
{"sqkm", "sq km", "km2", "square kilometres"},
|
||||||
|
UNIT_AREA,
|
||||||
|
1000000.0},
|
||||||
|
{"square mile",
|
||||||
|
{"sqmi", "sq mi", "mi2", "square miles"},
|
||||||
|
UNIT_AREA,
|
||||||
|
2589988.0},
|
||||||
|
{"acre", {"acres"}, UNIT_AREA, 4046.86},
|
||||||
|
{"hectare", {"ha", "hectares"}, UNIT_AREA, 10000.0},
|
||||||
|
|
||||||
{"second", {"sec", "seconds", "s"}, UNIT_TIME, 1.0},
|
{"second", {"sec", "seconds", "s"}, UNIT_TIME, 1.0},
|
||||||
{"minute", {"min", "minutes"}, UNIT_TIME, 60.0},
|
{"minute", {"min", "minutes"}, UNIT_TIME, 60.0},
|
||||||
{"hour", {"hr", "hours", "h"}, UNIT_TIME, 3600.0},
|
{"hour", {"hr", "hours", "h"}, UNIT_TIME, 3600.0},
|
||||||
{"day", {"days", "d"}, UNIT_TIME, 86400.0},
|
{"day", {"days", "d"}, UNIT_TIME, 86400.0},
|
||||||
{"week", {"weeks", "wk"}, UNIT_TIME, 604800.0},
|
{"week", {"weeks", "wk"}, UNIT_TIME, 604800.0},
|
||||||
{"month", {"months", "mo"}, UNIT_TIME, 2629746.0},
|
{"month", {"months", "mo"}, UNIT_TIME, 2629746.0},
|
||||||
{"year", {"years", "yr"}, UNIT_TIME, 31556952.0},
|
{"year", {"years", "yr"}, UNIT_TIME, 31556952.0},
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int UNIT_COUNT = sizeof(UNITS) / sizeof(UNITS[0]);
|
static const int UNIT_COUNT = sizeof(UNITS) / sizeof(UNITS[0]);
|
||||||
@@ -75,70 +99,73 @@ static int is_whitespace(char c) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static const UnitDef *find_unit(const char *str) {
|
static const UnitDef *find_unit(const char *str) {
|
||||||
if (!str || !*str) return NULL;
|
if (!str || !*str)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
size_t len = strlen(str);
|
size_t len = strlen(str);
|
||||||
char normalized[64] = {0};
|
char normalized[64] = {0};
|
||||||
size_t j = 0;
|
size_t j = 0;
|
||||||
|
|
||||||
for (size_t i = 0; i < len && j < 63; i++) {
|
for (size_t i = 0; i < len && j < 63; i++) {
|
||||||
if ((unsigned char)str[i] == 0xC2 && (unsigned char)str[i+1] == 0xB0) {
|
if ((unsigned char)str[i] == 0xC2 && (unsigned char)str[i + 1] == 0xB0) {
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (str[i] == '^' && i + 1 < len && str[i + 1] == '2') {
|
if (str[i] == '^' && i + 1 < len && str[i + 1] == '2') {
|
||||||
normalized[j++] = '2';
|
normalized[j++] = '2';
|
||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
normalized[j++] = tolower((unsigned char)str[i]);
|
normalized[j++] = tolower((unsigned char)str[i]);
|
||||||
}
|
}
|
||||||
normalized[j] = '\0';
|
normalized[j] = '\0';
|
||||||
|
|
||||||
for (int i = 0; i < UNIT_COUNT; i++) {
|
for (int i = 0; i < UNIT_COUNT; i++) {
|
||||||
if (strcmp(normalized, UNITS[i].name) == 0) return &UNITS[i];
|
if (strcmp(normalized, UNITS[i].name) == 0)
|
||||||
for (int k = 0; k < 4 && UNITS[i].alias[k]; k++) {
|
return &UNITS[i];
|
||||||
if (strcmp(normalized, UNITS[i].alias[k]) == 0) return &UNITS[i];
|
for (int k = 0; k < 4 && UNITS[i].alias[k]; k++) {
|
||||||
}
|
if (strcmp(normalized, UNITS[i].alias[k]) == 0)
|
||||||
|
return &UNITS[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int is_unit_conv_query(const char *query) {
|
int is_unit_conv_query(const char *query) {
|
||||||
if (!query) return 0;
|
if (!query)
|
||||||
|
return 0;
|
||||||
|
|
||||||
const char *patterns[] = {
|
const char *patterns[] = {" to ", " in ", " into ", " = ",
|
||||||
" to ", " in ", " into ",
|
" equals ", " equal ", " convert ", " conversion ",
|
||||||
" = ", " equals ", " equal ",
|
" -> ", " → ", NULL};
|
||||||
" convert ", " conversion ",
|
|
||||||
" -> ", " → ",
|
|
||||||
NULL
|
|
||||||
};
|
|
||||||
|
|
||||||
int has_pattern = 0;
|
int has_pattern = 0;
|
||||||
for (int i = 0; patterns[i]; i++) {
|
for (int i = 0; patterns[i]; i++) {
|
||||||
if (strstr(query, patterns[i])) {
|
if (strstr(query, patterns[i])) {
|
||||||
has_pattern = 1;
|
has_pattern = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!has_pattern) {
|
if (!has_pattern) {
|
||||||
const char *last_space = strrchr(query, ' ');
|
const char *last_space = strrchr(query, ' ');
|
||||||
if (last_space) {
|
if (last_space) {
|
||||||
const UnitDef *u = find_unit(last_space + 1);
|
const UnitDef *u = find_unit(last_space + 1);
|
||||||
if (u) {
|
if (u) {
|
||||||
const char *before = query;
|
const char *before = query;
|
||||||
while (*before && is_whitespace(*before)) before++;
|
while (*before && is_whitespace(*before))
|
||||||
const char *num_end = before;
|
before++;
|
||||||
while (*num_end &&
|
const char *num_end = before;
|
||||||
(isdigit(*num_end) || *num_end == '.' || *num_end == '-' ||
|
while (*num_end &&
|
||||||
*num_end == '+' || *num_end == '/' || *num_end == '\'' || *num_end == '"')) {
|
(isdigit(*num_end) || *num_end == '.' || *num_end == '-' ||
|
||||||
num_end++;
|
*num_end == '+' || *num_end == '/' || *num_end == '\'' ||
|
||||||
|
*num_end == '"')) {
|
||||||
|
num_end++;
|
||||||
|
}
|
||||||
|
if (num_end > before)
|
||||||
|
has_pattern = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (num_end > before) has_pattern = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return has_pattern;
|
return has_pattern;
|
||||||
@@ -146,65 +173,68 @@ int is_unit_conv_query(const char *query) {
|
|||||||
|
|
||||||
static double parse_value(const char **ptr) {
|
static double parse_value(const char **ptr) {
|
||||||
const char *p = *ptr;
|
const char *p = *ptr;
|
||||||
while (*p && is_whitespace(*p)) p++;
|
while (*p && is_whitespace(*p))
|
||||||
|
p++;
|
||||||
|
|
||||||
double value = 0.0;
|
double value = 0.0;
|
||||||
int has_num = 0;
|
int has_num = 0;
|
||||||
|
|
||||||
if (*p == '-' || *p == '+') p++;
|
if (*p == '-' || *p == '+')
|
||||||
|
p++;
|
||||||
while (*p >= '0' && *p <= '9') {
|
while (*p >= '0' && *p <= '9') {
|
||||||
value = value * 10 + (*p - '0');
|
value = value * 10 + (*p - '0');
|
||||||
has_num = 1;
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
if (*p == '.') {
|
|
||||||
p++;
|
|
||||||
double frac = 0.1;
|
|
||||||
while (*p >= '0' && *p <= '9') {
|
|
||||||
value += (*p - '0') * frac;
|
|
||||||
frac *= 0.1;
|
|
||||||
has_num = 1;
|
has_num = 1;
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (*p == '/' && has_num) {
|
|
||||||
p++;
|
|
||||||
double denom = 0.0;
|
|
||||||
int has_denom = 0;
|
|
||||||
while (*p >= '0' && *p <= '9') {
|
|
||||||
denom = denom * 10 + (*p - '0');
|
|
||||||
has_denom = 1;
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
if (has_denom && denom > 0) {
|
|
||||||
value = value / denom;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
while (*p == '\'' || *p == '"') {
|
|
||||||
double extra = 0.0;
|
|
||||||
p++;
|
|
||||||
while (*p >= '0' && *p <= '9') {
|
|
||||||
extra = extra * 10 + (*p - '0');
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
if (*p == '.') {
|
if (*p == '.') {
|
||||||
p++;
|
p++;
|
||||||
double frac = 0.1;
|
double frac = 0.1;
|
||||||
while (*p >= '0' && *p <= '9') {
|
while (*p >= '0' && *p <= '9') {
|
||||||
extra += (*p - '0') * frac;
|
value += (*p - '0') * frac;
|
||||||
frac *= 0.1;
|
frac *= 0.1;
|
||||||
p++;
|
has_num = 1;
|
||||||
|
p++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (*p == '\'' || *p == '"') p++;
|
|
||||||
value += extra * (p[-1] == '\'' ? 0.3048 : 0.0254);
|
if (*p == '/' && has_num) {
|
||||||
|
p++;
|
||||||
|
double denom = 0.0;
|
||||||
|
int has_denom = 0;
|
||||||
|
while (*p >= '0' && *p <= '9') {
|
||||||
|
denom = denom * 10 + (*p - '0');
|
||||||
|
has_denom = 1;
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
if (has_denom && denom > 0) {
|
||||||
|
value = value / denom;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (*p == '\'' || *p == '"') {
|
||||||
|
double extra = 0.0;
|
||||||
|
p++;
|
||||||
|
while (*p >= '0' && *p <= '9') {
|
||||||
|
extra = extra * 10 + (*p - '0');
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
if (*p == '.') {
|
||||||
|
p++;
|
||||||
|
double frac = 0.1;
|
||||||
|
while (*p >= '0' && *p <= '9') {
|
||||||
|
extra += (*p - '0') * frac;
|
||||||
|
frac *= 0.1;
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (*p == '\'' || *p == '"')
|
||||||
|
p++;
|
||||||
|
value += extra * (p[-1] == '\'' ? 0.3048 : 0.0254);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!has_num) {
|
if (!has_num) {
|
||||||
*ptr = p;
|
*ptr = p;
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
*ptr = p;
|
*ptr = p;
|
||||||
@@ -212,10 +242,13 @@ static double parse_value(const char **ptr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int is_separator(char c) {
|
static int is_separator(char c) {
|
||||||
return is_whitespace(c) || c == ',' || c == '.' || c == '(' || c == ')' || c == '\0';
|
return is_whitespace(c) || c == ',' || c == '.' || c == '(' || c == ')' ||
|
||||||
|
c == '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
static int parse_conversion_query(const char *query, double *value, const UnitDef **from_unit, const UnitDef **to_unit) {
|
static int parse_conversion_query(const char *query, double *value,
|
||||||
|
const UnitDef **from_unit,
|
||||||
|
const UnitDef **to_unit) {
|
||||||
*value = 0;
|
*value = 0;
|
||||||
*from_unit = NULL;
|
*from_unit = NULL;
|
||||||
*to_unit = NULL;
|
*to_unit = NULL;
|
||||||
@@ -223,127 +256,151 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
|
|||||||
const char *value_end = query;
|
const char *value_end = query;
|
||||||
*value = parse_value(&value_end);
|
*value = parse_value(&value_end);
|
||||||
|
|
||||||
if (value_end == query) return 0;
|
if (value_end == query)
|
||||||
|
return 0;
|
||||||
|
|
||||||
const char *p = value_end;
|
const char *p = value_end;
|
||||||
while (*p && is_whitespace(*p)) p++;
|
while (*p && is_whitespace(*p))
|
||||||
|
p++;
|
||||||
|
|
||||||
size_t remaining = strlen(p);
|
size_t remaining = strlen(p);
|
||||||
if (remaining < 2) return 0;
|
if (remaining < 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
const char *to_keywords[] = {" to ", " in ", " into ", " -> ", " → ", " = ", NULL};
|
const char *to_keywords[] = {" to ", " in ", " into ", " -> ",
|
||||||
|
" → ", " = ", NULL};
|
||||||
const char *to_pos = NULL;
|
const char *to_pos = NULL;
|
||||||
size_t keyword_len = 0;
|
size_t keyword_len = 0;
|
||||||
for (int i = 0; to_keywords[i]; i++) {
|
for (int i = 0; to_keywords[i]; i++) {
|
||||||
const char *found = strstr(p, to_keywords[i]);
|
const char *found = strstr(p, to_keywords[i]);
|
||||||
if (found) {
|
if (found) {
|
||||||
to_pos = found + strlen(to_keywords[i]);
|
to_pos = found + strlen(to_keywords[i]);
|
||||||
keyword_len = strlen(to_keywords[i]);
|
keyword_len = strlen(to_keywords[i]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!to_pos) {
|
if (!to_pos) {
|
||||||
const char *last_space = strrchr(p, ' ');
|
const char *last_space = strrchr(p, ' ');
|
||||||
if (last_space && last_space > p) {
|
if (last_space && last_space > p) {
|
||||||
char from_part[64] = {0};
|
char from_part[64] = {0};
|
||||||
size_t len = last_space - p;
|
size_t len = last_space - p;
|
||||||
if (len < 63) {
|
if (len < 63) {
|
||||||
strncpy(from_part, p, len);
|
strncpy(from_part, p, len);
|
||||||
*from_unit = find_unit(from_part);
|
*from_unit = find_unit(from_part);
|
||||||
if (*from_unit) {
|
if (*from_unit) {
|
||||||
*to_unit = find_unit(last_space + 1);
|
*to_unit = find_unit(last_space + 1);
|
||||||
return *to_unit ? 1 : 0;
|
return *to_unit ? 1 : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
return 0;
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char from_part[64] = {0};
|
char from_part[64] = {0};
|
||||||
size_t from_len = to_pos - p - keyword_len;
|
size_t from_len = to_pos - p - keyword_len;
|
||||||
if (from_len > 63) from_len = 63;
|
if (from_len > 63)
|
||||||
|
from_len = 63;
|
||||||
strncpy(from_part, p, from_len);
|
strncpy(from_part, p, from_len);
|
||||||
|
|
||||||
char *end_from = from_part + from_len;
|
char *end_from = from_part + from_len;
|
||||||
while (end_from > from_part && is_whitespace(end_from[-1])) end_from--;
|
while (end_from > from_part && is_whitespace(end_from[-1]))
|
||||||
|
end_from--;
|
||||||
*end_from = '\0';
|
*end_from = '\0';
|
||||||
|
|
||||||
*from_unit = find_unit(from_part);
|
*from_unit = find_unit(from_part);
|
||||||
if (!*from_unit) {
|
if (!*from_unit) {
|
||||||
char *end = from_part + strlen(from_part);
|
char *end = from_part + strlen(from_part);
|
||||||
while (end > from_part) {
|
while (end > from_part) {
|
||||||
while (end > from_part && is_whitespace(end[-1])) end--;
|
while (end > from_part && is_whitespace(end[-1]))
|
||||||
if (end <= from_part) break;
|
end--;
|
||||||
char *start = end;
|
if (end <= from_part)
|
||||||
while (start > from_part && !is_whitespace(start[-1])) start--;
|
break;
|
||||||
size_t word_len = end - start;
|
char *start = end;
|
||||||
memmove(from_part + word_len + 1, from_part, start - from_part);
|
while (start > from_part && !is_whitespace(start[-1]))
|
||||||
from_part[word_len] = ' ';
|
start--;
|
||||||
from_part[word_len + 1] = '\0';
|
size_t word_len = end - start;
|
||||||
*from_unit = find_unit(from_part);
|
memmove(from_part + word_len + 1, from_part, start - from_part);
|
||||||
if (*from_unit) break;
|
from_part[word_len] = ' ';
|
||||||
end = start;
|
from_part[word_len + 1] = '\0';
|
||||||
}
|
*from_unit = find_unit(from_part);
|
||||||
|
if (*from_unit)
|
||||||
|
break;
|
||||||
|
end = start;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!*from_unit) return 0;
|
if (!*from_unit)
|
||||||
|
return 0;
|
||||||
|
|
||||||
while (*to_pos && is_whitespace(*to_pos)) to_pos++;
|
while (*to_pos && is_whitespace(*to_pos))
|
||||||
|
to_pos++;
|
||||||
|
|
||||||
if (!*to_pos) return 0;
|
if (!*to_pos)
|
||||||
|
return 0;
|
||||||
|
|
||||||
char to_part[64] = {0};
|
char to_part[64] = {0};
|
||||||
size_t to_len = 0;
|
size_t to_len = 0;
|
||||||
const char *tp = to_pos;
|
const char *tp = to_pos;
|
||||||
while (*tp && !is_separator(*tp) && to_len < 63) {
|
while (*tp && !is_separator(*tp) && to_len < 63) {
|
||||||
to_part[to_len++] = *tp++;
|
to_part[to_len++] = *tp++;
|
||||||
}
|
}
|
||||||
to_part[to_len] = '\0';
|
to_part[to_len] = '\0';
|
||||||
|
|
||||||
*to_unit = find_unit(to_part);
|
*to_unit = find_unit(to_part);
|
||||||
if (!*to_unit) {
|
if (!*to_unit) {
|
||||||
const char *try_ptr = to_pos;
|
const char *try_ptr = to_pos;
|
||||||
while (*try_ptr && is_whitespace(*try_ptr)) try_ptr++;
|
while (*try_ptr && is_whitespace(*try_ptr))
|
||||||
char try_buf[64] = {0};
|
try_ptr++;
|
||||||
size_t try_len = 0;
|
char try_buf[64] = {0};
|
||||||
while (*try_ptr && try_len < 63) {
|
size_t try_len = 0;
|
||||||
try_buf[try_len++] = *try_ptr++;
|
while (*try_ptr && try_len < 63) {
|
||||||
}
|
try_buf[try_len++] = *try_ptr++;
|
||||||
while (try_len > 0) {
|
}
|
||||||
*to_unit = find_unit(try_buf);
|
while (try_len > 0) {
|
||||||
if (*to_unit) {
|
*to_unit = find_unit(try_buf);
|
||||||
strcpy(to_part, try_buf);
|
if (*to_unit) {
|
||||||
break;
|
strcpy(to_part, try_buf);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
char *last_space = strrchr(try_buf, ' ');
|
||||||
|
if (!last_space)
|
||||||
|
break;
|
||||||
|
*last_space = '\0';
|
||||||
|
try_len = strlen(try_buf);
|
||||||
}
|
}
|
||||||
char *last_space = strrchr(try_buf, ' ');
|
|
||||||
if (!last_space) break;
|
|
||||||
*last_space = '\0';
|
|
||||||
try_len = strlen(try_buf);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return *to_unit ? 1 : 0;
|
return *to_unit ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double convert_temp(double value, const UnitDef *from, const UnitDef *to) {
|
static double convert_temp(double value, const UnitDef *from,
|
||||||
|
const UnitDef *to) {
|
||||||
double celsius = 0;
|
double celsius = 0;
|
||||||
|
|
||||||
if (strcmp(from->name, "celsius") == 0) celsius = value;
|
if (strcmp(from->name, "celsius") == 0)
|
||||||
else if (strcmp(from->name, "fahrenheit") == 0) celsius = (value - 32) * 5.0 / 9.0;
|
celsius = value;
|
||||||
else if (strcmp(from->name, "kelvin") == 0) celsius = value - 273.15;
|
else if (strcmp(from->name, "fahrenheit") == 0)
|
||||||
|
celsius = (value - 32) * 5.0 / 9.0;
|
||||||
|
else if (strcmp(from->name, "kelvin") == 0)
|
||||||
|
celsius = value - 273.15;
|
||||||
|
|
||||||
if (strcmp(to->name, "celsius") == 0) return celsius;
|
if (strcmp(to->name, "celsius") == 0)
|
||||||
else if (strcmp(to->name, "fahrenheit") == 0) return celsius * 9.0 / 5.0 + 32;
|
return celsius;
|
||||||
else if (strcmp(to->name, "kelvin") == 0) return celsius + 273.15;
|
else if (strcmp(to->name, "fahrenheit") == 0)
|
||||||
|
return celsius * 9.0 / 5.0 + 32;
|
||||||
|
else if (strcmp(to->name, "kelvin") == 0)
|
||||||
|
return celsius + 273.15;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double convert_value(double value, const UnitDef *from, const UnitDef *to) {
|
static double convert_value(double value, const UnitDef *from,
|
||||||
if (from->type != to->type) return 0;
|
const UnitDef *to) {
|
||||||
|
if (from->type != to->type)
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (from->type == UNIT_TEMP) {
|
if (from->type == UNIT_TEMP) {
|
||||||
return convert_temp(value, from, to);
|
return convert_temp(value, from, to);
|
||||||
}
|
}
|
||||||
|
|
||||||
double base_value = value * from->to_base;
|
double base_value = value * from->to_base;
|
||||||
@@ -351,112 +408,124 @@ static double convert_value(double value, const UnitDef *from, const UnitDef *to
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void format_number(double val, char *buf, size_t bufsize) {
|
static void format_number(double val, char *buf, size_t bufsize) {
|
||||||
if (bufsize == 0) return;
|
if (bufsize == 0)
|
||||||
|
return;
|
||||||
if (val == 0) {
|
if (val == 0) {
|
||||||
snprintf(buf, bufsize, "0");
|
snprintf(buf, bufsize, "0");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (fabs(val) < 0.01 && fabs(val) > 0) {
|
if (fabs(val) < 0.01 && fabs(val) > 0) {
|
||||||
snprintf(buf, bufsize, "%.2g", val);
|
snprintf(buf, bufsize, "%.2g", val);
|
||||||
} else if (fabs(val) < 1) {
|
} else if (fabs(val) < 1) {
|
||||||
snprintf(buf, bufsize, "%.2f", val);
|
snprintf(buf, bufsize, "%.2f", val);
|
||||||
char *p = buf + strlen(buf) - 1;
|
char *p = buf + strlen(buf) - 1;
|
||||||
while (p > buf && *p == '0') *p-- = '\0';
|
while (p > buf && *p == '0')
|
||||||
if (*p == '.') *p = '\0';
|
*p-- = '\0';
|
||||||
|
if (*p == '.')
|
||||||
|
*p = '\0';
|
||||||
} else if (fmod(val + 0.0001, 1.0) < 0.0002) {
|
} else if (fmod(val + 0.0001, 1.0) < 0.0002) {
|
||||||
snprintf(buf, bufsize, "%.0f", val);
|
snprintf(buf, bufsize, "%.0f", val);
|
||||||
} else {
|
} else {
|
||||||
snprintf(buf, bufsize, "%.2f", val);
|
snprintf(buf, bufsize, "%.2f", val);
|
||||||
char *p = buf + strlen(buf) - 1;
|
char *p = buf + strlen(buf) - 1;
|
||||||
while (p > buf && *p == '0') *p-- = '\0';
|
while (p > buf && *p == '0')
|
||||||
if (*p == '.') *p = '\0';
|
*p-- = '\0';
|
||||||
|
if (*p == '.')
|
||||||
|
*p = '\0';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *pluralize(const char *unit, double value, char *buf, size_t bufsize) {
|
static const char *pluralize(const char *unit, double value, char *buf,
|
||||||
|
size_t bufsize) {
|
||||||
int is_one = (fabs(value - 1.0) < 0.0001 || fabs(value + 1.0) < 0.0001);
|
int is_one = (fabs(value - 1.0) < 0.0001 || fabs(value + 1.0) < 0.0001);
|
||||||
|
|
||||||
size_t len = strlen(unit);
|
size_t len = strlen(unit);
|
||||||
if (len == 0 || bufsize == 0) return unit;
|
if (len == 0 || bufsize == 0)
|
||||||
|
return unit;
|
||||||
|
|
||||||
strncpy(buf, unit, bufsize - 1);
|
strncpy(buf, unit, bufsize - 1);
|
||||||
buf[bufsize - 1] = '\0';
|
buf[bufsize - 1] = '\0';
|
||||||
|
|
||||||
if (strcmp(unit, "foot") == 0 || strcmp(unit, "square foot") == 0) {
|
if (strcmp(unit, "foot") == 0 || strcmp(unit, "square foot") == 0) {
|
||||||
if (is_one) strcpy(buf, unit);
|
if (is_one)
|
||||||
else strcpy(buf, strcmp(unit, "square foot") == 0 ? "square feet" : "feet");
|
strcpy(buf, unit);
|
||||||
return buf;
|
else
|
||||||
|
strcpy(buf, strcmp(unit, "square foot") == 0 ? "square feet" : "feet");
|
||||||
|
return buf;
|
||||||
}
|
}
|
||||||
if (strcmp(unit, "inch") == 0 || strcmp(unit, "square inch") == 0) {
|
if (strcmp(unit, "inch") == 0 || strcmp(unit, "square inch") == 0) {
|
||||||
if (is_one) strcpy(buf, unit);
|
if (is_one)
|
||||||
else strcpy(buf, strcmp(unit, "square inch") == 0 ? "square inches" : "inches");
|
strcpy(buf, unit);
|
||||||
return buf;
|
else
|
||||||
|
strcpy(buf,
|
||||||
|
strcmp(unit, "square inch") == 0 ? "square inches" : "inches");
|
||||||
|
return buf;
|
||||||
}
|
}
|
||||||
if (strcmp(unit, "stone") == 0) {
|
if (strcmp(unit, "stone") == 0) {
|
||||||
if (is_one) strcpy(buf, "stone");
|
if (is_one)
|
||||||
else strcpy(buf, "stones");
|
strcpy(buf, "stone");
|
||||||
return buf;
|
else
|
||||||
|
strcpy(buf, "stones");
|
||||||
|
return buf;
|
||||||
}
|
}
|
||||||
if (strcmp(unit, "celsius") == 0 ||
|
if (strcmp(unit, "celsius") == 0 || strcmp(unit, "fahrenheit") == 0 ||
|
||||||
strcmp(unit, "fahrenheit") == 0 ||
|
strcmp(unit, "kelvin") == 0) {
|
||||||
strcmp(unit, "kelvin") == 0) {
|
strcpy(buf, unit);
|
||||||
strcpy(buf, unit);
|
return buf;
|
||||||
return buf;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unit[len-1] == 's' ||
|
if (unit[len - 1] == 's' || unit[len - 1] == 'x' || unit[len - 1] == 'z' ||
|
||||||
unit[len-1] == 'x' ||
|
(len >= 2 && unit[len - 2] == 'c' && unit[len - 1] == 'h') ||
|
||||||
unit[len-1] == 'z' ||
|
(len >= 2 && unit[len - 2] == 's' && unit[len - 1] == 'h')) {
|
||||||
(len >= 2 && unit[len-2] == 'c' && unit[len-1] == 'h') ||
|
if (!is_one) {
|
||||||
(len >= 2 && unit[len-2] == 's' && unit[len-1] == 'h')) {
|
buf[len] = 'e';
|
||||||
if (!is_one) {
|
buf[len + 1] = '\0';
|
||||||
buf[len] = 'e';
|
}
|
||||||
buf[len+1] = '\0';
|
} else if (unit[len - 1] == 'y' && len >= 2 &&
|
||||||
}
|
!(unit[len - 2] == 'a' || unit[len - 2] == 'e' ||
|
||||||
} else if (unit[len-1] == 'y' && len >= 2 &&
|
unit[len - 2] == 'i' || unit[len - 2] == 'o' ||
|
||||||
!(unit[len-2] == 'a' || unit[len-2] == 'e' ||
|
unit[len - 2] == 'u')) {
|
||||||
unit[len-2] == 'i' || unit[len-2] == 'o' ||
|
if (is_one) {
|
||||||
unit[len-2] == 'u')) {
|
buf[len - 1] = '\0';
|
||||||
if (is_one) {
|
} else {
|
||||||
buf[len-1] = '\0';
|
buf[len] = 's';
|
||||||
|
buf[len + 1] = '\0';
|
||||||
|
}
|
||||||
|
} else if (len >= 2 && unit[len - 2] == 'f' && unit[len - 1] == 'e') {
|
||||||
|
if (is_one) {
|
||||||
|
buf[len - 2] = '\0';
|
||||||
|
} else {
|
||||||
|
buf[len - 1] = 's';
|
||||||
|
buf[len] = '\0';
|
||||||
|
}
|
||||||
|
} else if (unit[len - 1] == 'f' && len >= 1) {
|
||||||
|
if (is_one) {
|
||||||
|
buf[len - 1] = '\0';
|
||||||
|
} else {
|
||||||
|
buf[len - 1] = 'v';
|
||||||
|
buf[len] = 'e';
|
||||||
|
buf[len + 1] = 's';
|
||||||
|
buf[len + 2] = '\0';
|
||||||
|
}
|
||||||
|
} else if (unit[len - 1] == 'e' && len >= 2 && unit[len - 2] == 'f') {
|
||||||
|
if (is_one) {
|
||||||
|
buf[len - 2] = '\0';
|
||||||
|
} else {
|
||||||
|
buf[len - 1] = 's';
|
||||||
|
buf[len] = '\0';
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
buf[len] = 's';
|
if (!is_one) {
|
||||||
buf[len+1] = '\0';
|
buf[len] = 's';
|
||||||
}
|
buf[len + 1] = '\0';
|
||||||
} else if (len >= 2 && unit[len-2] == 'f' && unit[len-1] == 'e') {
|
}
|
||||||
if (is_one) {
|
|
||||||
buf[len-2] = '\0';
|
|
||||||
} else {
|
|
||||||
buf[len-1] = 's';
|
|
||||||
buf[len] = '\0';
|
|
||||||
}
|
|
||||||
} else if (unit[len-1] == 'f' && len >= 1) {
|
|
||||||
if (is_one) {
|
|
||||||
buf[len-1] = '\0';
|
|
||||||
} else {
|
|
||||||
buf[len-1] = 'v';
|
|
||||||
buf[len] = 'e';
|
|
||||||
buf[len+1] = 's';
|
|
||||||
buf[len+2] = '\0';
|
|
||||||
}
|
|
||||||
} else if (unit[len-1] == 'e' && len >= 2 && unit[len-2] == 'f') {
|
|
||||||
if (is_one) {
|
|
||||||
buf[len-2] = '\0';
|
|
||||||
} else {
|
|
||||||
buf[len-1] = 's';
|
|
||||||
buf[len] = '\0';
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (!is_one) {
|
|
||||||
buf[len] = 's';
|
|
||||||
buf[len+1] = '\0';
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *build_html(double value, const UnitDef *from, double result, const UnitDef *to) {
|
static char *build_html(double value, const UnitDef *from, double result,
|
||||||
|
const UnitDef *to) {
|
||||||
static char html[4096];
|
static char html[4096];
|
||||||
char val_buf[64], res_buf[64], from_name_buf[64], to_name_buf[64];
|
char val_buf[64], res_buf[64], from_name_buf[64], to_name_buf[64];
|
||||||
format_number(value, val_buf, sizeof(val_buf));
|
format_number(value, val_buf, sizeof(val_buf));
|
||||||
@@ -466,30 +535,34 @@ static char *build_html(double value, const UnitDef *from, double result, const
|
|||||||
pluralize(to->name, result, to_name_buf, sizeof(to_name_buf));
|
pluralize(to->name, result, to_name_buf, sizeof(to_name_buf));
|
||||||
|
|
||||||
int n = snprintf(html, sizeof(html),
|
int n = snprintf(html, sizeof(html),
|
||||||
"<div class='unit-conv-container' style='line-height: 1.6;'>"
|
"<div class='unit-conv-container' style='line-height: 1.6;'>"
|
||||||
"<div style='font-size: 1.3em; margin-bottom: 8px;'>"
|
"<div style='font-size: 1.3em; margin-bottom: 8px;'>"
|
||||||
"<b>%s %s</b> = <b>%s %s</b>"
|
"<b>%s %s</b> = <b>%s %s</b>"
|
||||||
"</div>",
|
"</div>",
|
||||||
val_buf, from_name_buf,
|
val_buf, from_name_buf, res_buf, to_name_buf);
|
||||||
res_buf, to_name_buf);
|
|
||||||
snprintf(html + n, sizeof(html) - n, "</div>");
|
snprintf(html + n, sizeof(html) - n, "</div>");
|
||||||
return html;
|
return html;
|
||||||
}
|
}
|
||||||
|
|
||||||
InfoBox fetch_unit_conv_data(const char *query) {
|
InfoBox fetch_unit_conv_data(const char *query) {
|
||||||
InfoBox info = {NULL, NULL, NULL, NULL};
|
InfoBox info = {NULL, NULL, NULL, NULL};
|
||||||
if (!query) return info;
|
if (!query)
|
||||||
|
return info;
|
||||||
|
|
||||||
double value = 0;
|
double value = 0;
|
||||||
const UnitDef *from = NULL;
|
const UnitDef *from = NULL;
|
||||||
const UnitDef *to = NULL;
|
const UnitDef *to = NULL;
|
||||||
|
|
||||||
if (!parse_conversion_query(query, &value, &from, &to)) return info;
|
if (!parse_conversion_query(query, &value, &from, &to))
|
||||||
if (!from || !to) return info;
|
return info;
|
||||||
if (from->type != to->type) return info;
|
if (!from || !to)
|
||||||
|
return info;
|
||||||
|
if (from->type != to->type)
|
||||||
|
return info;
|
||||||
|
|
||||||
double result = convert_value(value, from, to);
|
double result = convert_value(value, from, to);
|
||||||
if (result == 0 && value != 0 && from->type != UNIT_TEMP) return info;
|
if (result == 0 && value != 0 && from->type != UNIT_TEMP)
|
||||||
|
return info;
|
||||||
|
|
||||||
info.title = strdup("Unit Conversion");
|
info.title = strdup("Unit Conversion");
|
||||||
info.extract = strdup(build_html(value, from, result, to));
|
info.extract = strdup(build_html(value, from, result, to));
|
||||||
|
|||||||
@@ -14,41 +14,43 @@ struct WikiMemoryStruct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static void shorten_summary(char **extract_ptr, int max_chars) {
|
static void shorten_summary(char **extract_ptr, int max_chars) {
|
||||||
if (!extract_ptr || !*extract_ptr) return;
|
if (!extract_ptr || !*extract_ptr)
|
||||||
|
return;
|
||||||
|
|
||||||
char *text = *extract_ptr;
|
char *text = *extract_ptr;
|
||||||
int len = strlen(text);
|
int len = strlen(text);
|
||||||
|
|
||||||
if (len <= max_chars) return;
|
if (len <= max_chars)
|
||||||
|
return;
|
||||||
|
|
||||||
int end_pos = max_chars;
|
int end_pos = max_chars;
|
||||||
for (int i = max_chars; i > (max_chars / 2); i--) {
|
for (int i = max_chars; i > (max_chars / 2); i--) {
|
||||||
if (text[i] == '.' || text[i] == '!' || text[i] == '?') {
|
if (text[i] == '.' || text[i] == '!' || text[i] == '?') {
|
||||||
end_pos = i + 1;
|
end_pos = i + 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
char *new_text = (char *)malloc(end_pos + 4);
|
char *new_text = (char *)malloc(end_pos + 4);
|
||||||
|
|
||||||
if (new_text) {
|
if (new_text) {
|
||||||
strncpy(new_text, text, end_pos);
|
strncpy(new_text, text, end_pos);
|
||||||
new_text[end_pos] = '\0';
|
new_text[end_pos] = '\0';
|
||||||
strcat(new_text, "...");
|
strcat(new_text, "...");
|
||||||
free(*extract_ptr);
|
free(*extract_ptr);
|
||||||
*extract_ptr = new_text;
|
*extract_ptr = new_text;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t WikiWriteMemoryCallback(void *contents, size_t size, size_t nmemb,
|
static size_t WikiWriteMemoryCallback(void *contents, size_t size, size_t nmemb,
|
||||||
void *userp) {
|
void *userp) {
|
||||||
size_t realsize = size * nmemb;
|
size_t realsize = size * nmemb;
|
||||||
struct WikiMemoryStruct *mem = (struct WikiMemoryStruct *)userp;
|
struct WikiMemoryStruct *mem = (struct WikiMemoryStruct *)userp;
|
||||||
|
|
||||||
char *ptr = realloc(mem->memory, mem->size + realsize + 1);
|
char *ptr = realloc(mem->memory, mem->size + realsize + 1);
|
||||||
if (ptr == NULL) {
|
if (ptr == NULL) {
|
||||||
fprintf(stderr, "Not enough memory (realloc returned NULL)\n");
|
fprintf(stderr, "Not enough memory (realloc returned NULL)\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
mem->memory = ptr;
|
mem->memory = ptr;
|
||||||
@@ -63,48 +65,49 @@ static void extract_wiki_info(xmlNode *node, InfoBox *info) {
|
|||||||
xmlNode *cur_node = NULL;
|
xmlNode *cur_node = NULL;
|
||||||
|
|
||||||
for (cur_node = node; cur_node; cur_node = cur_node->next) {
|
for (cur_node = node; cur_node; cur_node = cur_node->next) {
|
||||||
if (cur_node->type == XML_ELEMENT_NODE) {
|
if (cur_node->type == XML_ELEMENT_NODE) {
|
||||||
if (strcmp((const char *)cur_node->name, "page") == 0) {
|
if (strcmp((const char *)cur_node->name, "page") == 0) {
|
||||||
xmlChar *title = xmlGetProp(cur_node, (const xmlChar *)"title");
|
xmlChar *title = xmlGetProp(cur_node, (const xmlChar *)"title");
|
||||||
if (title) {
|
if (title) {
|
||||||
info->title = strdup((const char *)title);
|
info->title = strdup((const char *)title);
|
||||||
|
|
||||||
const char *base_article_url = "https://en.wikipedia.org/wiki/";
|
const char *base_article_url = "https://en.wikipedia.org/wiki/";
|
||||||
char *formatted_title = strdup((const char *)title);
|
char *formatted_title = strdup((const char *)title);
|
||||||
for (int i = 0; formatted_title[i]; i++) {
|
for (int i = 0; formatted_title[i]; i++) {
|
||||||
if (formatted_title[i] == ' ') formatted_title[i] = '_';
|
if (formatted_title[i] == ' ')
|
||||||
|
formatted_title[i] = '_';
|
||||||
|
}
|
||||||
|
|
||||||
|
info->url =
|
||||||
|
malloc(strlen(base_article_url) + strlen(formatted_title) + 1);
|
||||||
|
if (info->url) {
|
||||||
|
strcpy(info->url, base_article_url);
|
||||||
|
strcat(info->url, formatted_title);
|
||||||
|
}
|
||||||
|
free(formatted_title);
|
||||||
|
xmlFree(title);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info->url =
|
if (strcmp((const char *)cur_node->name, "thumbnail") == 0) {
|
||||||
malloc(strlen(base_article_url) + strlen(formatted_title) + 1);
|
xmlChar *source = xmlGetProp(cur_node, (const xmlChar *)"source");
|
||||||
if (info->url) {
|
if (source) {
|
||||||
strcpy(info->url, base_article_url);
|
info->thumbnail_url = strdup((const char *)source);
|
||||||
strcat(info->url, formatted_title);
|
xmlFree(source);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
free(formatted_title);
|
|
||||||
xmlFree(title);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strcmp((const char *)cur_node->name, "thumbnail") == 0) {
|
if (strcmp((const char *)cur_node->name, "extract") == 0) {
|
||||||
xmlChar *source = xmlGetProp(cur_node, (const xmlChar *)"source");
|
xmlChar *content = xmlNodeGetContent(cur_node);
|
||||||
if (source) {
|
if (content) {
|
||||||
info->thumbnail_url = strdup((const char *)source);
|
info->extract = strdup((const char *)content);
|
||||||
xmlFree(source);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strcmp((const char *)cur_node->name, "extract") == 0) {
|
shorten_summary(&(info->extract), 300);
|
||||||
xmlChar *content = xmlNodeGetContent(cur_node);
|
xmlFree(content);
|
||||||
if (content) {
|
}
|
||||||
info->extract = strdup((const char *)content);
|
}
|
||||||
|
|
||||||
shorten_summary(&(info->extract), 300);
|
|
||||||
xmlFree(content);
|
|
||||||
}
|
}
|
||||||
}
|
extract_wiki_info(cur_node->children, info);
|
||||||
}
|
|
||||||
extract_wiki_info(cur_node->children, info);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -120,27 +123,27 @@ InfoBox fetch_wiki_data(char *api_url) {
|
|||||||
curl_handle = curl_easy_init();
|
curl_handle = curl_easy_init();
|
||||||
|
|
||||||
if (curl_handle) {
|
if (curl_handle) {
|
||||||
curl_easy_setopt(curl_handle, CURLOPT_URL, api_url);
|
curl_easy_setopt(curl_handle, CURLOPT_URL, api_url);
|
||||||
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION,
|
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION,
|
||||||
WikiWriteMemoryCallback);
|
WikiWriteMemoryCallback);
|
||||||
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
|
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
|
||||||
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
|
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
|
||||||
apply_proxy_settings(curl_handle);
|
apply_proxy_settings(curl_handle);
|
||||||
|
|
||||||
res = curl_easy_perform(curl_handle);
|
res = curl_easy_perform(curl_handle);
|
||||||
|
|
||||||
if (res == CURLE_OK) {
|
if (res == CURLE_OK) {
|
||||||
xmlDocPtr doc =
|
xmlDocPtr doc =
|
||||||
xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
|
xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
|
||||||
if (doc != NULL) {
|
if (doc != NULL) {
|
||||||
xmlNode *root_element = xmlDocGetRootElement(doc);
|
xmlNode *root_element = xmlDocGetRootElement(doc);
|
||||||
extract_wiki_info(root_element, &info);
|
extract_wiki_info(root_element, &info);
|
||||||
xmlFreeDoc(doc);
|
xmlFreeDoc(doc);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
curl_easy_cleanup(curl_handle);
|
curl_easy_cleanup(curl_handle);
|
||||||
free(chunk.memory);
|
free(chunk.memory);
|
||||||
}
|
}
|
||||||
|
|
||||||
return info;
|
return info;
|
||||||
@@ -148,18 +151,18 @@ InfoBox fetch_wiki_data(char *api_url) {
|
|||||||
|
|
||||||
char *construct_wiki_url(const char *search_term) {
|
char *construct_wiki_url(const char *search_term) {
|
||||||
CURL *curl = curl_easy_init();
|
CURL *curl = curl_easy_init();
|
||||||
if (!curl) return NULL;
|
if (!curl)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
char *escaped_term = curl_easy_escape(curl, search_term, 0);
|
char *escaped_term = curl_easy_escape(curl, search_term, 0);
|
||||||
const char *base =
|
const char *base = "https://en.wikipedia.org/w/"
|
||||||
"https://en.wikipedia.org/w/"
|
"api.php?action=query&prop=extracts|pageimages&exintro&"
|
||||||
"api.php?action=query&prop=extracts|pageimages&exintro&"
|
"explaintext&pithumbsize=400&format=xml&origin=*&titles=";
|
||||||
"explaintext&pithumbsize=400&format=xml&origin=*&titles=";
|
|
||||||
|
|
||||||
char *full_url = malloc(strlen(base) + strlen(escaped_term) + 1);
|
char *full_url = malloc(strlen(base) + strlen(escaped_term) + 1);
|
||||||
if (full_url) {
|
if (full_url) {
|
||||||
strcpy(full_url, base);
|
strcpy(full_url, base);
|
||||||
strcat(full_url, escaped_term);
|
strcat(full_url, escaped_term);
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_free(escaped_term);
|
curl_free(escaped_term);
|
||||||
|
|||||||
53
src/Main.c
53
src/Main.c
@@ -7,15 +7,16 @@
|
|||||||
|
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
#include "Proxy/Proxy.h"
|
#include "Proxy/Proxy.h"
|
||||||
#include "Scraping/Scraping.h"
|
|
||||||
#include "Routes/Home.h"
|
#include "Routes/Home.h"
|
||||||
#include "Routes/Images.h"
|
|
||||||
#include "Routes/ImageProxy.h"
|
#include "Routes/ImageProxy.h"
|
||||||
|
#include "Routes/Images.h"
|
||||||
#include "Routes/Search.h"
|
#include "Routes/Search.h"
|
||||||
|
#include "Scraping/Scraping.h"
|
||||||
|
|
||||||
int handle_opensearch(UrlParams *params) {
|
int handle_opensearch(UrlParams *params) {
|
||||||
(void)params;
|
(void)params;
|
||||||
serve_static_file_with_mime("opensearch.xml", "application/opensearchdescription+xml");
|
serve_static_file_with_mime("opensearch.xml",
|
||||||
|
"application/opensearchdescription+xml");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -30,33 +31,35 @@ int main() {
|
|||||||
|
|
||||||
curl_global_init(CURL_GLOBAL_DEFAULT);
|
curl_global_init(CURL_GLOBAL_DEFAULT);
|
||||||
|
|
||||||
Config config = {
|
Config config = {.host = "0.0.0.0",
|
||||||
.host = "0.0.0.0",
|
.port = 5000,
|
||||||
.port = 5000,
|
.proxy = "",
|
||||||
.proxy = "",
|
.proxy_list_file = "",
|
||||||
.proxy_list_file = "",
|
.max_proxy_retries = 3,
|
||||||
.max_proxy_retries = 3,
|
.randomize_username = 0,
|
||||||
.randomize_username = 0,
|
.randomize_password = 0};
|
||||||
.randomize_password = 0
|
|
||||||
};
|
|
||||||
|
|
||||||
if (load_config("config.ini", &config) != 0) {
|
if (load_config("config.ini", &config) != 0) {
|
||||||
fprintf(stderr, "Warning: Could not load config file, using defaults\n");
|
fprintf(stderr, "Warning: Could not load config file, using defaults\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (config.proxy_list_file[0] != '\0') {
|
if (config.proxy_list_file[0] != '\0') {
|
||||||
if (load_proxy_list(config.proxy_list_file) < 0) {
|
if (load_proxy_list(config.proxy_list_file) < 0) {
|
||||||
fprintf(stderr, "Warning: Failed to load proxy list, continuing without proxies\n");
|
fprintf(
|
||||||
}
|
stderr,
|
||||||
|
"Warning: Failed to load proxy list, continuing without proxies\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
max_proxy_retries = config.max_proxy_retries;
|
max_proxy_retries = config.max_proxy_retries;
|
||||||
set_proxy_config(config.proxy, config.randomize_username, config.randomize_password);
|
set_proxy_config(config.proxy, config.randomize_username,
|
||||||
|
config.randomize_password);
|
||||||
|
|
||||||
if (proxy_url[0] != '\0') {
|
if (proxy_url[0] != '\0') {
|
||||||
fprintf(stderr, "Using proxy: %s\n", proxy_url);
|
fprintf(stderr, "Using proxy: %s\n", proxy_url);
|
||||||
} else if (proxy_count > 0) {
|
} else if (proxy_count > 0) {
|
||||||
fprintf(stderr, "Using %d proxies from %s\n", proxy_count, config.proxy_list_file);
|
fprintf(stderr, "Using %d proxies from %s\n", proxy_count,
|
||||||
|
config.proxy_list_file);
|
||||||
}
|
}
|
||||||
|
|
||||||
set_handler("/", home_handler);
|
set_handler("/", home_handler);
|
||||||
@@ -70,14 +73,14 @@ int main() {
|
|||||||
int result = beaker_run(config.host, config.port);
|
int result = beaker_run(config.host, config.port);
|
||||||
|
|
||||||
if (result != 0) {
|
if (result != 0) {
|
||||||
fprintf(stderr, "Error: Beaker server failed to start.\n");
|
fprintf(stderr, "Error: Beaker server failed to start.\n");
|
||||||
curl_global_cleanup();
|
curl_global_cleanup();
|
||||||
xmlCleanupParser();
|
xmlCleanupParser();
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_global_cleanup();
|
curl_global_cleanup();
|
||||||
xmlCleanupParser();
|
xmlCleanupParser();
|
||||||
free_proxy_list();
|
free_proxy_list();
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
#include "Proxy.h"
|
#include "Proxy.h"
|
||||||
|
#include <pthread.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <pthread.h>
|
|
||||||
|
|
||||||
Proxy *proxy_list = NULL;
|
Proxy *proxy_list = NULL;
|
||||||
int proxy_count = 0;
|
int proxy_count = 0;
|
||||||
@@ -13,76 +13,85 @@ int randomize_password = 0;
|
|||||||
char proxy_url[512] = {0};
|
char proxy_url[512] = {0};
|
||||||
static pthread_mutex_t proxy_mutex = PTHREAD_MUTEX_INITIALIZER;
|
static pthread_mutex_t proxy_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
static const char RAND_CHARS[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
|
static const char RAND_CHARS[] =
|
||||||
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
|
||||||
|
|
||||||
static void generate_random_string(char *buf, size_t len) {
|
static void generate_random_string(char *buf, size_t len) {
|
||||||
for (size_t i = 0; i < len - 1; i++) {
|
for (size_t i = 0; i < len - 1; i++) {
|
||||||
buf[i] = RAND_CHARS[rand() % (sizeof(RAND_CHARS) - 1)];
|
buf[i] = RAND_CHARS[rand() % (sizeof(RAND_CHARS) - 1)];
|
||||||
}
|
}
|
||||||
buf[len - 1] = '\0';
|
buf[len - 1] = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_proxy_config(const char *proxy_str, int rand_user, int rand_pass) {
|
void set_proxy_config(const char *proxy_str, int rand_user, int rand_pass) {
|
||||||
if (proxy_str && proxy_str[0]) {
|
if (proxy_str && proxy_str[0]) {
|
||||||
strncpy(proxy_url, proxy_str, sizeof(proxy_url) - 1);
|
strncpy(proxy_url, proxy_str, sizeof(proxy_url) - 1);
|
||||||
proxy_url[sizeof(proxy_url) - 1] = '\0';
|
proxy_url[sizeof(proxy_url) - 1] = '\0';
|
||||||
}
|
}
|
||||||
randomize_username = rand_user;
|
randomize_username = rand_user;
|
||||||
randomize_password = rand_pass;
|
randomize_password = rand_pass;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Proxy parse_proxy_line(const char *line) {
|
static Proxy parse_proxy_line(const char *line) {
|
||||||
Proxy proxy = {.type = PROXY_SOCKS5, .port = 0, .username[0] = '\0', .password[0] = '\0', .failures = 0};
|
Proxy proxy = {.type = PROXY_SOCKS5,
|
||||||
|
.port = 0,
|
||||||
|
.username[0] = '\0',
|
||||||
|
.password[0] = '\0',
|
||||||
|
.failures = 0};
|
||||||
const char *host_start = NULL;
|
const char *host_start = NULL;
|
||||||
const char *port_start = NULL;
|
const char *port_start = NULL;
|
||||||
|
|
||||||
size_t len = strlen(line);
|
size_t len = strlen(line);
|
||||||
if (len == 0) return proxy;
|
if (len == 0)
|
||||||
|
return proxy;
|
||||||
|
|
||||||
if (strncmp(line, "http://", 7) == 0) {
|
if (strncmp(line, "http://", 7) == 0) {
|
||||||
proxy.type = PROXY_HTTP;
|
proxy.type = PROXY_HTTP;
|
||||||
host_start = line + 7;
|
host_start = line + 7;
|
||||||
} else if (strncmp(line, "socks5://", 9) == 0) {
|
} else if (strncmp(line, "socks5://", 9) == 0) {
|
||||||
proxy.type = PROXY_SOCKS5;
|
proxy.type = PROXY_SOCKS5;
|
||||||
host_start = line + 9;
|
host_start = line + 9;
|
||||||
} else if (strncmp(line, "socks4://", 9) == 0) {
|
} else if (strncmp(line, "socks4://", 9) == 0) {
|
||||||
proxy.type = PROXY_SOCKS4;
|
proxy.type = PROXY_SOCKS4;
|
||||||
host_start = line + 9;
|
host_start = line + 9;
|
||||||
} else {
|
} else {
|
||||||
host_start = line;
|
host_start = line;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *at = strchr(host_start, '@');
|
const char *at = strchr(host_start, '@');
|
||||||
if (at) {
|
if (at) {
|
||||||
char cred_buf[128];
|
char cred_buf[128];
|
||||||
size_t cred_len = at - host_start;
|
size_t cred_len = at - host_start;
|
||||||
if (cred_len >= sizeof(cred_buf)) cred_len = sizeof(cred_buf) - 1;
|
if (cred_len >= sizeof(cred_buf))
|
||||||
strncpy(cred_buf, host_start, cred_len);
|
cred_len = sizeof(cred_buf) - 1;
|
||||||
cred_buf[cred_len] = '\0';
|
strncpy(cred_buf, host_start, cred_len);
|
||||||
|
cred_buf[cred_len] = '\0';
|
||||||
char *colon = strchr(cred_buf, ':');
|
|
||||||
if (colon) {
|
char *colon = strchr(cred_buf, ':');
|
||||||
size_t user_len = colon - cred_buf;
|
if (colon) {
|
||||||
if (user_len >= sizeof(proxy.username)) user_len = sizeof(proxy.username) - 1;
|
size_t user_len = colon - cred_buf;
|
||||||
strncpy(proxy.username, cred_buf, user_len);
|
if (user_len >= sizeof(proxy.username))
|
||||||
proxy.username[user_len] = '\0';
|
user_len = sizeof(proxy.username) - 1;
|
||||||
strncpy(proxy.password, colon + 1, sizeof(proxy.password) - 1);
|
strncpy(proxy.username, cred_buf, user_len);
|
||||||
proxy.password[sizeof(proxy.password) - 1] = '\0';
|
proxy.username[user_len] = '\0';
|
||||||
}
|
strncpy(proxy.password, colon + 1, sizeof(proxy.password) - 1);
|
||||||
host_start = at + 1;
|
proxy.password[sizeof(proxy.password) - 1] = '\0';
|
||||||
|
}
|
||||||
|
host_start = at + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
port_start = strchr(host_start, ':');
|
port_start = strchr(host_start, ':');
|
||||||
if (port_start) {
|
if (port_start) {
|
||||||
char host_buf[256];
|
char host_buf[256];
|
||||||
size_t host_len = port_start - host_start;
|
size_t host_len = port_start - host_start;
|
||||||
if (host_len >= sizeof(host_buf)) host_len = sizeof(host_buf) - 1;
|
if (host_len >= sizeof(host_buf))
|
||||||
strncpy(host_buf, host_start, host_len);
|
host_len = sizeof(host_buf) - 1;
|
||||||
host_buf[host_len] = '\0';
|
strncpy(host_buf, host_start, host_len);
|
||||||
snprintf(proxy.host, sizeof(proxy.host), "%.*s", (int)host_len, host_buf);
|
host_buf[host_len] = '\0';
|
||||||
proxy.port = atoi(port_start + 1);
|
snprintf(proxy.host, sizeof(proxy.host), "%.*s", (int)host_len, host_buf);
|
||||||
|
proxy.port = atoi(port_start + 1);
|
||||||
} else {
|
} else {
|
||||||
snprintf(proxy.host, sizeof(proxy.host), "%s", host_start);
|
snprintf(proxy.host, sizeof(proxy.host), "%s", host_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
return proxy;
|
return proxy;
|
||||||
@@ -90,71 +99,73 @@ static Proxy parse_proxy_line(const char *line) {
|
|||||||
|
|
||||||
int load_proxy_list(const char *filename) {
|
int load_proxy_list(const char *filename) {
|
||||||
if (!filename || filename[0] == '\0') {
|
if (!filename || filename[0] == '\0') {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_lock(&proxy_mutex);
|
pthread_mutex_lock(&proxy_mutex);
|
||||||
|
|
||||||
if (proxy_list) {
|
if (proxy_list) {
|
||||||
free(proxy_list);
|
free(proxy_list);
|
||||||
proxy_list = NULL;
|
proxy_list = NULL;
|
||||||
}
|
}
|
||||||
proxy_count = 0;
|
proxy_count = 0;
|
||||||
|
|
||||||
FILE *file = fopen(filename, "r");
|
FILE *file = fopen(filename, "r");
|
||||||
if (!file) {
|
if (!file) {
|
||||||
pthread_mutex_unlock(&proxy_mutex);
|
pthread_mutex_unlock(&proxy_mutex);
|
||||||
fprintf(stderr, "[WARN] Could not open proxy list file: %s\n", filename);
|
fprintf(stderr, "[WARN] Could not open proxy list file: %s\n", filename);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int capacity = 16;
|
int capacity = 16;
|
||||||
proxy_list = (Proxy *)malloc(capacity * sizeof(Proxy));
|
proxy_list = (Proxy *)malloc(capacity * sizeof(Proxy));
|
||||||
if (!proxy_list) {
|
if (!proxy_list) {
|
||||||
fclose(file);
|
fclose(file);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
proxy_count = 0;
|
proxy_count = 0;
|
||||||
|
|
||||||
char line[512];
|
char line[512];
|
||||||
while (fgets(line, sizeof(line), file)) {
|
while (fgets(line, sizeof(line), file)) {
|
||||||
line[strcspn(line, "\r\n")] = 0;
|
line[strcspn(line, "\r\n")] = 0;
|
||||||
|
|
||||||
if (line[0] == '\0' || line[0] == '#') {
|
if (line[0] == '\0' || line[0] == '#') {
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
char *p = line;
|
|
||||||
while (*p == ' ' || *p == '\t') p++;
|
|
||||||
|
|
||||||
char *end = p + strlen(p) - 1;
|
|
||||||
while (end > p && (*end == ' ' || *end == '\t')) {
|
|
||||||
*end = '\0';
|
|
||||||
end--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (p[0] == '\0') continue;
|
|
||||||
|
|
||||||
Proxy proxy = parse_proxy_line(p);
|
|
||||||
if (proxy.port == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (proxy_count >= capacity) {
|
|
||||||
capacity *= 2;
|
|
||||||
Proxy *new_list = (Proxy *)realloc(proxy_list, capacity * sizeof(Proxy));
|
|
||||||
if (!new_list) {
|
|
||||||
free(proxy_list);
|
|
||||||
proxy_list = NULL;
|
|
||||||
proxy_count = 0;
|
|
||||||
fclose(file);
|
|
||||||
pthread_mutex_unlock(&proxy_mutex);
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
proxy_list = new_list;
|
|
||||||
}
|
|
||||||
|
|
||||||
proxy_list[proxy_count++] = proxy;
|
char *p = line;
|
||||||
|
while (*p == ' ' || *p == '\t')
|
||||||
|
p++;
|
||||||
|
|
||||||
|
char *end = p + strlen(p) - 1;
|
||||||
|
while (end > p && (*end == ' ' || *end == '\t')) {
|
||||||
|
*end = '\0';
|
||||||
|
end--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p[0] == '\0')
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Proxy proxy = parse_proxy_line(p);
|
||||||
|
if (proxy.port == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (proxy_count >= capacity) {
|
||||||
|
capacity *= 2;
|
||||||
|
Proxy *new_list = (Proxy *)realloc(proxy_list, capacity * sizeof(Proxy));
|
||||||
|
if (!new_list) {
|
||||||
|
free(proxy_list);
|
||||||
|
proxy_list = NULL;
|
||||||
|
proxy_count = 0;
|
||||||
|
fclose(file);
|
||||||
|
pthread_mutex_unlock(&proxy_mutex);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
proxy_list = new_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy_list[proxy_count++] = proxy;
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose(file);
|
fclose(file);
|
||||||
@@ -166,8 +177,8 @@ int load_proxy_list(const char *filename) {
|
|||||||
void free_proxy_list(void) {
|
void free_proxy_list(void) {
|
||||||
pthread_mutex_lock(&proxy_mutex);
|
pthread_mutex_lock(&proxy_mutex);
|
||||||
if (proxy_list) {
|
if (proxy_list) {
|
||||||
free(proxy_list);
|
free(proxy_list);
|
||||||
proxy_list = NULL;
|
proxy_list = NULL;
|
||||||
}
|
}
|
||||||
proxy_count = 0;
|
proxy_count = 0;
|
||||||
pthread_mutex_unlock(&proxy_mutex);
|
pthread_mutex_unlock(&proxy_mutex);
|
||||||
@@ -176,8 +187,8 @@ void free_proxy_list(void) {
|
|||||||
Proxy *get_random_proxy(void) {
|
Proxy *get_random_proxy(void) {
|
||||||
pthread_mutex_lock(&proxy_mutex);
|
pthread_mutex_lock(&proxy_mutex);
|
||||||
if (proxy_count == 0) {
|
if (proxy_count == 0) {
|
||||||
pthread_mutex_unlock(&proxy_mutex);
|
pthread_mutex_unlock(&proxy_mutex);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int start = rand() % proxy_count;
|
int start = rand() % proxy_count;
|
||||||
@@ -185,19 +196,19 @@ Proxy *get_random_proxy(void) {
|
|||||||
Proxy *selected = NULL;
|
Proxy *selected = NULL;
|
||||||
|
|
||||||
while (checked < proxy_count) {
|
while (checked < proxy_count) {
|
||||||
int idx = (start + checked) % proxy_count;
|
int idx = (start + checked) % proxy_count;
|
||||||
if (proxy_list[idx].failures < max_proxy_retries) {
|
if (proxy_list[idx].failures < max_proxy_retries) {
|
||||||
selected = &proxy_list[idx];
|
selected = &proxy_list[idx];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
checked++;
|
checked++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!selected) {
|
if (!selected) {
|
||||||
for (int i = 0; i < proxy_count; i++) {
|
for (int i = 0; i < proxy_count; i++) {
|
||||||
proxy_list[i].failures = 0;
|
proxy_list[i].failures = 0;
|
||||||
}
|
}
|
||||||
selected = &proxy_list[rand() % proxy_count];
|
selected = &proxy_list[rand() % proxy_count];
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_unlock(&proxy_mutex);
|
pthread_mutex_unlock(&proxy_mutex);
|
||||||
@@ -205,7 +216,8 @@ Proxy *get_random_proxy(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void record_proxy_failure(Proxy *proxy) {
|
void record_proxy_failure(Proxy *proxy) {
|
||||||
if (!proxy) return;
|
if (!proxy)
|
||||||
|
return;
|
||||||
pthread_mutex_lock(&proxy_mutex);
|
pthread_mutex_lock(&proxy_mutex);
|
||||||
proxy->failures++;
|
proxy->failures++;
|
||||||
pthread_mutex_unlock(&proxy_mutex);
|
pthread_mutex_unlock(&proxy_mutex);
|
||||||
@@ -213,45 +225,49 @@ void record_proxy_failure(Proxy *proxy) {
|
|||||||
|
|
||||||
void apply_proxy_settings(CURL *curl) {
|
void apply_proxy_settings(CURL *curl) {
|
||||||
if (proxy_url[0] != '\0') {
|
if (proxy_url[0] != '\0') {
|
||||||
curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url);
|
curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url);
|
||||||
if (strncmp(proxy_url, "socks5://", 9) == 0) {
|
if (strncmp(proxy_url, "socks5://", 9) == 0) {
|
||||||
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
|
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
|
||||||
} else if (strncmp(proxy_url, "socks4://", 9) == 0) {
|
} else if (strncmp(proxy_url, "socks4://", 9) == 0) {
|
||||||
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
|
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
|
||||||
} else {
|
|
||||||
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (randomize_username || randomize_password) {
|
|
||||||
char userpwd[256];
|
|
||||||
char username[32] = {0};
|
|
||||||
char password[32] = {0};
|
|
||||||
|
|
||||||
if (randomize_username) generate_random_string(username, sizeof(username));
|
|
||||||
if (randomize_password) generate_random_string(password, sizeof(password));
|
|
||||||
|
|
||||||
snprintf(userpwd, sizeof(userpwd), "%s:%s", username, password);
|
|
||||||
curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
|
|
||||||
}
|
|
||||||
} else if (proxy_count > 0) {
|
|
||||||
Proxy *proxy = get_random_proxy();
|
|
||||||
if (proxy) {
|
|
||||||
char proxy_url_buf[512];
|
|
||||||
snprintf(proxy_url_buf, sizeof(proxy_url_buf), "%s:%d", proxy->host, proxy->port);
|
|
||||||
curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url_buf);
|
|
||||||
if (proxy->type == PROXY_HTTP) {
|
|
||||||
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
|
|
||||||
} else if (proxy->type == PROXY_SOCKS4) {
|
|
||||||
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
|
|
||||||
} else {
|
} else {
|
||||||
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
|
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (proxy->username[0] != '\0' || proxy->password[0] != '\0') {
|
if (randomize_username || randomize_password) {
|
||||||
char userpwd[128];
|
char userpwd[256];
|
||||||
snprintf(userpwd, sizeof(userpwd), "%s:%s", proxy->username, proxy->password);
|
char username[32] = {0};
|
||||||
curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
|
char password[32] = {0};
|
||||||
|
|
||||||
|
if (randomize_username)
|
||||||
|
generate_random_string(username, sizeof(username));
|
||||||
|
if (randomize_password)
|
||||||
|
generate_random_string(password, sizeof(password));
|
||||||
|
|
||||||
|
snprintf(userpwd, sizeof(userpwd), "%s:%s", username, password);
|
||||||
|
curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
|
||||||
|
}
|
||||||
|
} else if (proxy_count > 0) {
|
||||||
|
Proxy *proxy = get_random_proxy();
|
||||||
|
if (proxy) {
|
||||||
|
char proxy_url_buf[512];
|
||||||
|
snprintf(proxy_url_buf, sizeof(proxy_url_buf), "%s:%d", proxy->host,
|
||||||
|
proxy->port);
|
||||||
|
curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url_buf);
|
||||||
|
if (proxy->type == PROXY_HTTP) {
|
||||||
|
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
|
||||||
|
} else if (proxy->type == PROXY_SOCKS4) {
|
||||||
|
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
|
||||||
|
} else {
|
||||||
|
curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (proxy->username[0] != '\0' || proxy->password[0] != '\0') {
|
||||||
|
char userpwd[128];
|
||||||
|
snprintf(userpwd, sizeof(userpwd), "%s:%s", proxy->username,
|
||||||
|
proxy->password);
|
||||||
|
curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,9 +16,9 @@ typedef struct {
|
|||||||
static int is_allowed_domain(const char *url) {
|
static int is_allowed_domain(const char *url) {
|
||||||
const char *protocol = strstr(url, "://");
|
const char *protocol = strstr(url, "://");
|
||||||
if (!protocol) {
|
if (!protocol) {
|
||||||
protocol = url;
|
protocol = url;
|
||||||
} else {
|
} else {
|
||||||
protocol += 3;
|
protocol += 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *path = strchr(protocol, '/');
|
const char *path = strchr(protocol, '/');
|
||||||
@@ -26,49 +26,46 @@ static int is_allowed_domain(const char *url) {
|
|||||||
|
|
||||||
char host[256] = {0};
|
char host[256] = {0};
|
||||||
if (host_len >= sizeof(host)) {
|
if (host_len >= sizeof(host)) {
|
||||||
host_len = sizeof(host) - 1;
|
host_len = sizeof(host) - 1;
|
||||||
}
|
}
|
||||||
strncpy(host, protocol, host_len);
|
strncpy(host, protocol, host_len);
|
||||||
|
|
||||||
const char *allowed_domains[] = {
|
const char *allowed_domains[] = {"mm.bing.net", "th.bing.com", NULL};
|
||||||
"mm.bing.net",
|
|
||||||
"th.bing.com",
|
|
||||||
NULL
|
|
||||||
};
|
|
||||||
|
|
||||||
for (int i = 0; allowed_domains[i] != NULL; i++) {
|
for (int i = 0; allowed_domains[i] != NULL; i++) {
|
||||||
size_t domain_len = strlen(allowed_domains[i]);
|
size_t domain_len = strlen(allowed_domains[i]);
|
||||||
size_t host_str_len = strlen(host);
|
size_t host_str_len = strlen(host);
|
||||||
|
|
||||||
if (host_str_len >= domain_len) {
|
if (host_str_len >= domain_len) {
|
||||||
const char *suffix = host + host_str_len - domain_len;
|
const char *suffix = host + host_str_len - domain_len;
|
||||||
if (strcmp(suffix, allowed_domains[i]) == 0) {
|
if (strcmp(suffix, allowed_domains[i]) == 0) {
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t write_callback(void *contents, size_t size, size_t nmemb,
|
static size_t write_callback(void *contents, size_t size, size_t nmemb,
|
||||||
void *userp) {
|
void *userp) {
|
||||||
size_t realsize = size * nmemb;
|
size_t realsize = size * nmemb;
|
||||||
MemoryBuffer *buf = (MemoryBuffer *)userp;
|
MemoryBuffer *buf = (MemoryBuffer *)userp;
|
||||||
|
|
||||||
if (buf->size + realsize > MAX_IMAGE_SIZE) {
|
if (buf->size + realsize > MAX_IMAGE_SIZE) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buf->size + realsize > buf->capacity) {
|
if (buf->size + realsize > buf->capacity) {
|
||||||
size_t new_capacity = buf->capacity * 2;
|
size_t new_capacity = buf->capacity * 2;
|
||||||
if (new_capacity < buf->size + realsize) {
|
if (new_capacity < buf->size + realsize) {
|
||||||
new_capacity = buf->size + realsize;
|
new_capacity = buf->size + realsize;
|
||||||
}
|
}
|
||||||
char *new_data = realloc(buf->data, new_capacity);
|
char *new_data = realloc(buf->data, new_capacity);
|
||||||
if (!new_data) return 0;
|
if (!new_data)
|
||||||
buf->data = new_data;
|
return 0;
|
||||||
buf->capacity = new_capacity;
|
buf->data = new_data;
|
||||||
|
buf->capacity = new_capacity;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(buf->data + buf->size, contents, realsize);
|
memcpy(buf->data + buf->size, contents, realsize);
|
||||||
@@ -79,38 +76,34 @@ static size_t write_callback(void *contents, size_t size, size_t nmemb,
|
|||||||
int image_proxy_handler(UrlParams *params) {
|
int image_proxy_handler(UrlParams *params) {
|
||||||
const char *url = NULL;
|
const char *url = NULL;
|
||||||
for (int i = 0; i < params->count; i++) {
|
for (int i = 0; i < params->count; i++) {
|
||||||
if (strcmp(params->params[i].key, "url") == 0) {
|
if (strcmp(params->params[i].key, "url") == 0) {
|
||||||
url = params->params[i].value;
|
url = params->params[i].value;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!url || strlen(url) == 0) {
|
if (!url || strlen(url) == 0) {
|
||||||
send_response("Missing 'url' parameter");
|
send_response("Missing 'url' parameter");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_allowed_domain(url)) {
|
if (!is_allowed_domain(url)) {
|
||||||
send_response("Domain not allowed");
|
send_response("Domain not allowed");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
CURL *curl = curl_easy_init();
|
CURL *curl = curl_easy_init();
|
||||||
if (!curl) {
|
if (!curl) {
|
||||||
send_response("Failed to initialize curl");
|
send_response("Failed to initialize curl");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryBuffer buf = {
|
MemoryBuffer buf = {.data = malloc(8192), .size = 0, .capacity = 8192};
|
||||||
.data = malloc(8192),
|
|
||||||
.size = 0,
|
|
||||||
.capacity = 8192
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!buf.data) {
|
if (!buf.data) {
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
send_response("Memory allocation failed");
|
send_response("Memory allocation failed");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||||
@@ -130,18 +123,19 @@ int image_proxy_handler(UrlParams *params) {
|
|||||||
|
|
||||||
char content_type[64] = {0};
|
char content_type[64] = {0};
|
||||||
if (content_type_ptr) {
|
if (content_type_ptr) {
|
||||||
strncpy(content_type, content_type_ptr, sizeof(content_type) - 1);
|
strncpy(content_type, content_type_ptr, sizeof(content_type) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
|
|
||||||
if (res != CURLE_OK || response_code != 200) {
|
if (res != CURLE_OK || response_code != 200) {
|
||||||
free(buf.data);
|
free(buf.data);
|
||||||
send_response("Failed to fetch image");
|
send_response("Failed to fetch image");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *mime_type = strlen(content_type) > 0 ? content_type : "image/jpeg";
|
const char *mime_type =
|
||||||
|
strlen(content_type) > 0 ? content_type : "image/jpeg";
|
||||||
serve_data(buf.data, buf.size, mime_type);
|
serve_data(buf.data, buf.size, mime_type);
|
||||||
|
|
||||||
free(buf.data);
|
free(buf.data);
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#include "Images.h"
|
#include "Images.h"
|
||||||
#include "../Utility/Unescape.h"
|
|
||||||
#include "../Proxy/Proxy.h"
|
#include "../Proxy/Proxy.h"
|
||||||
#include "../Scraping/Scraping.h"
|
#include "../Scraping/Scraping.h"
|
||||||
|
#include "../Utility/Unescape.h"
|
||||||
|
|
||||||
#include <curl/curl.h>
|
#include <curl/curl.h>
|
||||||
#include <libxml/HTMLparser.h>
|
#include <libxml/HTMLparser.h>
|
||||||
@@ -17,12 +17,12 @@ struct MemoryBlock {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static size_t ImageWriteCallback(void *data, size_t size, size_t nmemb,
|
static size_t ImageWriteCallback(void *data, size_t size, size_t nmemb,
|
||||||
void *userp) {
|
void *userp) {
|
||||||
size_t realsize = size * nmemb;
|
size_t realsize = size * nmemb;
|
||||||
struct MemoryBlock *mem = (struct MemoryBlock *)userp;
|
struct MemoryBlock *mem = (struct MemoryBlock *)userp;
|
||||||
char *ptr = (char *)realloc(mem->response, mem->size + realsize + 1);
|
char *ptr = (char *)realloc(mem->response, mem->size + realsize + 1);
|
||||||
if (ptr == NULL) {
|
if (ptr == NULL) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
mem->response = ptr;
|
mem->response = ptr;
|
||||||
memcpy(&(mem->response[mem->size]), data, realsize);
|
memcpy(&(mem->response[mem->size]), data, realsize);
|
||||||
@@ -35,30 +35,30 @@ static char *fetch_images_html(const char *url) {
|
|||||||
CURL *curl_handle;
|
CURL *curl_handle;
|
||||||
struct MemoryBlock chunk = {.response = malloc(1), .size = 0};
|
struct MemoryBlock chunk = {.response = malloc(1), .size = 0};
|
||||||
if (!chunk.response) {
|
if (!chunk.response) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_handle = curl_easy_init();
|
curl_handle = curl_easy_init();
|
||||||
if (!curl_handle) {
|
if (!curl_handle) {
|
||||||
free(chunk.response);
|
free(chunk.response);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
|
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
|
||||||
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, ImageWriteCallback);
|
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, ImageWriteCallback);
|
||||||
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
|
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
|
||||||
curl_easy_setopt(
|
curl_easy_setopt(
|
||||||
curl_handle, CURLOPT_USERAGENT,
|
curl_handle, CURLOPT_USERAGENT,
|
||||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko");
|
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko");
|
||||||
curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
|
curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
|
||||||
curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 10L);
|
curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 10L);
|
||||||
apply_proxy_settings(curl_handle);
|
apply_proxy_settings(curl_handle);
|
||||||
|
|
||||||
CURLcode res = curl_easy_perform(curl_handle);
|
CURLcode res = curl_easy_perform(curl_handle);
|
||||||
if (res != CURLE_OK) {
|
if (res != CURLE_OK) {
|
||||||
free(chunk.response);
|
free(chunk.response);
|
||||||
curl_easy_cleanup(curl_handle);
|
curl_easy_cleanup(curl_handle);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_easy_cleanup(curl_handle);
|
curl_easy_cleanup(curl_handle);
|
||||||
@@ -71,15 +71,16 @@ int images_handler(UrlParams *params) {
|
|||||||
int page = 1;
|
int page = 1;
|
||||||
|
|
||||||
if (params) {
|
if (params) {
|
||||||
for (int i = 0; i < params->count; i++) {
|
for (int i = 0; i < params->count; i++) {
|
||||||
if (strcmp(params->params[i].key, "q") == 0) {
|
if (strcmp(params->params[i].key, "q") == 0) {
|
||||||
raw_query = params->params[i].value;
|
raw_query = params->params[i].value;
|
||||||
} else if (strcmp(params->params[i].key, "p") == 0) {
|
} else if (strcmp(params->params[i].key, "p") == 0) {
|
||||||
int parsed = atoi(params->params[i].value);
|
int parsed = atoi(params->params[i].value);
|
||||||
if (parsed > 1) page = parsed;
|
if (parsed > 1)
|
||||||
|
page = parsed;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
context_set(&ctx, "query", raw_query);
|
context_set(&ctx, "query", raw_query);
|
||||||
|
|
||||||
@@ -87,7 +88,7 @@ int images_handler(UrlParams *params) {
|
|||||||
snprintf(page_str, sizeof(page_str), "%d", page);
|
snprintf(page_str, sizeof(page_str), "%d", page);
|
||||||
snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
|
snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
|
||||||
snprintf(next_str, sizeof(next_str), "%d", page + 1);
|
snprintf(next_str, sizeof(next_str), "%d", page + 1);
|
||||||
context_set(&ctx, "page", page_str);
|
context_set(&ctx, "page", page_str);
|
||||||
context_set(&ctx, "prev_page", prev_str);
|
context_set(&ctx, "prev_page", prev_str);
|
||||||
context_set(&ctx, "next_page", next_str);
|
context_set(&ctx, "next_page", next_str);
|
||||||
|
|
||||||
@@ -95,207 +96,239 @@ int images_handler(UrlParams *params) {
|
|||||||
context_set(&ctx, "query", display_query);
|
context_set(&ctx, "query", display_query);
|
||||||
|
|
||||||
if (!raw_query || strlen(raw_query) == 0) {
|
if (!raw_query || strlen(raw_query) == 0) {
|
||||||
send_response("<h1>No query provided</h1>");
|
send_response("<h1>No query provided</h1>");
|
||||||
if (display_query) free(display_query);
|
if (display_query)
|
||||||
free_context(&ctx);
|
free(display_query);
|
||||||
return -1;
|
free_context(&ctx);
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
CURL *tmp = curl_easy_init();
|
CURL *tmp = curl_easy_init();
|
||||||
if (!tmp) {
|
if (!tmp) {
|
||||||
send_response("<h1>Error initializing curl</h1>");
|
send_response("<h1>Error initializing curl</h1>");
|
||||||
if (display_query) free(display_query);
|
if (display_query)
|
||||||
free_context(&ctx);
|
free(display_query);
|
||||||
return -1;
|
free_context(&ctx);
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
char *encoded_query = curl_easy_escape(tmp, raw_query, 0);
|
char *encoded_query = curl_easy_escape(tmp, raw_query, 0);
|
||||||
curl_easy_cleanup(tmp);
|
curl_easy_cleanup(tmp);
|
||||||
|
|
||||||
if (!encoded_query) {
|
if (!encoded_query) {
|
||||||
send_response("<h1>Error encoding query</h1>");
|
send_response("<h1>Error encoding query</h1>");
|
||||||
if (display_query) free(display_query);
|
if (display_query)
|
||||||
free_context(&ctx);
|
free(display_query);
|
||||||
return -1;
|
free_context(&ctx);
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
char url[1024];
|
char url[1024];
|
||||||
int first = (page - 1) * 32 + 1;
|
int first = (page - 1) * 32 + 1;
|
||||||
snprintf(url, sizeof(url),
|
snprintf(url, sizeof(url), "https://www.bing.com/images/search?q=%s&first=%d",
|
||||||
"https://www.bing.com/images/search?q=%s&first=%d", encoded_query, first);
|
encoded_query, first);
|
||||||
|
|
||||||
char *html = fetch_images_html(url);
|
char *html = fetch_images_html(url);
|
||||||
if (!html) {
|
if (!html) {
|
||||||
send_response("<h1>Error fetching images</h1>");
|
send_response("<h1>Error fetching images</h1>");
|
||||||
free(encoded_query);
|
free(encoded_query);
|
||||||
free(display_query);
|
free(display_query);
|
||||||
free_context(&ctx);
|
free_context(&ctx);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
htmlDocPtr doc = htmlReadMemory(html, (int)strlen(html), NULL, NULL,
|
htmlDocPtr doc = htmlReadMemory(html, (int)strlen(html), NULL, NULL,
|
||||||
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR);
|
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR);
|
||||||
if (!doc) {
|
if (!doc) {
|
||||||
free(html);
|
free(html);
|
||||||
free(encoded_query);
|
free(encoded_query);
|
||||||
free(display_query);
|
free(display_query);
|
||||||
free_context(&ctx);
|
free_context(&ctx);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
|
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
|
||||||
|
|
||||||
if (!xpathCtx) {
|
if (!xpathCtx) {
|
||||||
xmlFreeDoc(doc);
|
xmlFreeDoc(doc);
|
||||||
free(html);
|
free(html);
|
||||||
free(encoded_query);
|
free(encoded_query);
|
||||||
free(display_query);
|
free(display_query);
|
||||||
free_context(&ctx);
|
free_context(&ctx);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
xmlXPathObjectPtr xpathObj =
|
xmlXPathObjectPtr xpathObj =
|
||||||
xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx);
|
xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx);
|
||||||
|
|
||||||
int image_count = 0;
|
int image_count = 0;
|
||||||
char ***image_matrix = NULL;
|
char ***image_matrix = NULL;
|
||||||
int *inner_counts = NULL;
|
int *inner_counts = NULL;
|
||||||
|
|
||||||
if (xpathObj && xpathObj->nodesetval) {
|
if (xpathObj && xpathObj->nodesetval) {
|
||||||
int nodes = xpathObj->nodesetval->nodeNr;
|
int nodes = xpathObj->nodesetval->nodeNr;
|
||||||
|
|
||||||
int max_images = (nodes < 32) ? nodes : 32;
|
int max_images = (nodes < 32) ? nodes : 32;
|
||||||
image_matrix = malloc(sizeof(char **) * max_images);
|
image_matrix = malloc(sizeof(char **) * max_images);
|
||||||
inner_counts = malloc(sizeof(int) * max_images);
|
inner_counts = malloc(sizeof(int) * max_images);
|
||||||
|
|
||||||
for (int i = 0; i < nodes; i++) {
|
for (int i = 0; i < nodes; i++) {
|
||||||
if (image_count >= 32) break;
|
if (image_count >= 32)
|
||||||
|
break;
|
||||||
|
|
||||||
xmlNodePtr node = xpathObj->nodesetval->nodeTab[i];
|
xmlNodePtr node = xpathObj->nodesetval->nodeTab[i];
|
||||||
xmlNodePtr img_node = NULL;
|
xmlNodePtr img_node = NULL;
|
||||||
xmlNodePtr tit_node = NULL;
|
xmlNodePtr tit_node = NULL;
|
||||||
xmlNodePtr des_node = NULL;
|
xmlNodePtr des_node = NULL;
|
||||||
xmlNodePtr thumb_link = NULL;
|
xmlNodePtr thumb_link = NULL;
|
||||||
|
|
||||||
for (xmlNodePtr child = node->children; child; child = child->next) {
|
for (xmlNodePtr child = node->children; child; child = child->next) {
|
||||||
if (child->type != XML_ELEMENT_NODE) continue;
|
if (child->type != XML_ELEMENT_NODE)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) {
|
if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) {
|
||||||
xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
|
xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
|
||||||
if (class) {
|
if (class) {
|
||||||
if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) {
|
if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) {
|
||||||
thumb_link = child;
|
thumb_link = child;
|
||||||
for (xmlNodePtr thumb_child = child->children; thumb_child; thumb_child = thumb_child->next) {
|
for (xmlNodePtr thumb_child = child->children; thumb_child;
|
||||||
if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) {
|
thumb_child = thumb_child->next) {
|
||||||
xmlChar *div_class = xmlGetProp(thumb_child, (const xmlChar *)"class");
|
if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) {
|
||||||
if (div_class && xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) {
|
xmlChar *div_class =
|
||||||
for (xmlNodePtr cico_child = thumb_child->children; cico_child; cico_child = cico_child->next) {
|
xmlGetProp(thumb_child, (const xmlChar *)"class");
|
||||||
if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") == 0) {
|
if (div_class &&
|
||||||
img_node = cico_child;
|
xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) {
|
||||||
break;
|
for (xmlNodePtr cico_child = thumb_child->children;
|
||||||
|
cico_child; cico_child = cico_child->next) {
|
||||||
|
if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") ==
|
||||||
|
0) {
|
||||||
|
img_node = cico_child;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (div_class)
|
||||||
|
xmlFree(div_class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
|
||||||
|
tit_node = child;
|
||||||
|
}
|
||||||
|
xmlFree(class);
|
||||||
|
}
|
||||||
|
} else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
|
||||||
|
xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
|
||||||
|
if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
|
||||||
|
for (xmlNodePtr meta_child = child->children; meta_child;
|
||||||
|
meta_child = meta_child->next) {
|
||||||
|
if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
|
||||||
|
xmlChar *div_class =
|
||||||
|
xmlGetProp(meta_child, (const xmlChar *)"class");
|
||||||
|
if (div_class) {
|
||||||
|
if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
|
||||||
|
des_node = meta_child;
|
||||||
|
}
|
||||||
|
xmlFree(div_class);
|
||||||
|
}
|
||||||
|
} else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") ==
|
||||||
|
0) {
|
||||||
|
xmlChar *a_class =
|
||||||
|
xmlGetProp(meta_child, (const xmlChar *)"class");
|
||||||
|
if (a_class &&
|
||||||
|
xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
|
||||||
|
tit_node = meta_child;
|
||||||
|
}
|
||||||
|
if (a_class)
|
||||||
|
xmlFree(a_class);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (class)
|
||||||
|
xmlFree(class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlChar *iurl =
|
||||||
|
img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
|
||||||
|
xmlChar *full_url =
|
||||||
|
thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
|
||||||
|
xmlChar *title = des_node
|
||||||
|
? xmlNodeGetContent(des_node)
|
||||||
|
: (tit_node ? xmlNodeGetContent(tit_node) : NULL);
|
||||||
|
xmlChar *rurl =
|
||||||
|
tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
|
||||||
|
|
||||||
|
if (iurl && strlen((char *)iurl) > 0) {
|
||||||
|
char *proxy_url = NULL;
|
||||||
|
CURL *esc_curl = curl_easy_init();
|
||||||
|
if (esc_curl) {
|
||||||
|
char *encoded = curl_easy_escape(esc_curl, (char *)iurl, 0);
|
||||||
|
if (encoded) {
|
||||||
|
size_t proxy_len = strlen("/proxy?url=") + strlen(encoded) + 1;
|
||||||
|
proxy_url = malloc(proxy_len);
|
||||||
|
if (proxy_url) {
|
||||||
|
snprintf(proxy_url, proxy_len, "/proxy?url=%s", encoded);
|
||||||
|
}
|
||||||
|
curl_free(encoded);
|
||||||
}
|
}
|
||||||
if (div_class) xmlFree(div_class);
|
curl_easy_cleanup(esc_curl);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
|
|
||||||
tit_node = child;
|
|
||||||
}
|
|
||||||
xmlFree(class);
|
|
||||||
}
|
|
||||||
} else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
|
|
||||||
xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
|
|
||||||
if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
|
|
||||||
for (xmlNodePtr meta_child = child->children; meta_child; meta_child = meta_child->next) {
|
|
||||||
if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
|
|
||||||
xmlChar *div_class = xmlGetProp(meta_child, (const xmlChar *)"class");
|
|
||||||
if (div_class) {
|
|
||||||
if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
|
|
||||||
des_node = meta_child;
|
|
||||||
}
|
|
||||||
xmlFree(div_class);
|
|
||||||
}
|
|
||||||
} else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") == 0) {
|
|
||||||
xmlChar *a_class = xmlGetProp(meta_child, (const xmlChar *)"class");
|
|
||||||
if (a_class && xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
|
|
||||||
tit_node = meta_child;
|
|
||||||
}
|
|
||||||
if (a_class) xmlFree(a_class);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (class) xmlFree(class);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
xmlChar *iurl = img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
|
image_matrix[image_count] = malloc(sizeof(char *) * 4);
|
||||||
xmlChar *full_url = thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
|
image_matrix[image_count][0] =
|
||||||
xmlChar *title = des_node ? xmlNodeGetContent(des_node) : (tit_node ? xmlNodeGetContent(tit_node) : NULL);
|
proxy_url ? strdup(proxy_url) : strdup((char *)iurl);
|
||||||
xmlChar *rurl = tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
|
image_matrix[image_count][1] = strdup(title ? (char *)title : "Image");
|
||||||
|
image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#");
|
||||||
if (iurl && strlen((char *)iurl) > 0) {
|
image_matrix[image_count][3] =
|
||||||
char *proxy_url = NULL;
|
strdup(full_url ? (char *)full_url : "#");
|
||||||
CURL *esc_curl = curl_easy_init();
|
inner_counts[image_count] = 4;
|
||||||
if (esc_curl) {
|
image_count++;
|
||||||
char *encoded = curl_easy_escape(esc_curl, (char *)iurl, 0);
|
|
||||||
if (encoded) {
|
|
||||||
size_t proxy_len = strlen("/proxy?url=") + strlen(encoded) + 1;
|
|
||||||
proxy_url = malloc(proxy_len);
|
|
||||||
if (proxy_url) {
|
|
||||||
snprintf(proxy_url, proxy_len, "/proxy?url=%s", encoded);
|
|
||||||
}
|
}
|
||||||
curl_free(encoded);
|
|
||||||
}
|
|
||||||
curl_easy_cleanup(esc_curl);
|
|
||||||
}
|
|
||||||
|
|
||||||
image_matrix[image_count] = malloc(sizeof(char *) * 4);
|
if (iurl)
|
||||||
image_matrix[image_count][0] = proxy_url ? strdup(proxy_url) : strdup((char *)iurl);
|
xmlFree(iurl);
|
||||||
image_matrix[image_count][1] = strdup(title ? (char *)title : "Image");
|
if (title)
|
||||||
image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#");
|
xmlFree(title);
|
||||||
image_matrix[image_count][3] = strdup(full_url ? (char *)full_url : "#");
|
if (rurl)
|
||||||
inner_counts[image_count] = 4;
|
xmlFree(rurl);
|
||||||
image_count++;
|
if (full_url)
|
||||||
|
xmlFree(full_url);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iurl) xmlFree(iurl);
|
|
||||||
if (title) xmlFree(title);
|
|
||||||
if (rurl) xmlFree(rurl);
|
|
||||||
if (full_url) xmlFree(full_url);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
context_set_array_of_arrays(&ctx, "images", image_matrix, image_count,
|
context_set_array_of_arrays(&ctx, "images", image_matrix, image_count,
|
||||||
inner_counts);
|
inner_counts);
|
||||||
|
|
||||||
char *rendered = render_template("images.html", &ctx);
|
char *rendered = render_template("images.html", &ctx);
|
||||||
if (rendered) {
|
if (rendered) {
|
||||||
send_response(rendered);
|
send_response(rendered);
|
||||||
free(rendered);
|
free(rendered);
|
||||||
} else {
|
} else {
|
||||||
send_response("<h1>Error rendering image results</h1>");
|
send_response("<h1>Error rendering image results</h1>");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (image_matrix) {
|
if (image_matrix) {
|
||||||
for (int i = 0; i < image_count; i++) {
|
for (int i = 0; i < image_count; i++) {
|
||||||
for (int j = 0; j < 4; j++) {
|
for (int j = 0; j < 4; j++) {
|
||||||
free(image_matrix[i][j]);
|
free(image_matrix[i][j]);
|
||||||
|
}
|
||||||
|
free(image_matrix[i]);
|
||||||
}
|
}
|
||||||
free(image_matrix[i]);
|
free(image_matrix);
|
||||||
}
|
|
||||||
free(image_matrix);
|
|
||||||
}
|
}
|
||||||
if (inner_counts) {
|
if (inner_counts) {
|
||||||
free(inner_counts);
|
free(inner_counts);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (xpathObj) xmlXPathFreeObject(xpathObj);
|
if (xpathObj)
|
||||||
if (xpathCtx) xmlXPathFreeContext(xpathCtx);
|
xmlXPathFreeObject(xpathObj);
|
||||||
if (doc) xmlFreeDoc(doc);
|
if (xpathCtx)
|
||||||
|
xmlXPathFreeContext(xpathCtx);
|
||||||
|
if (doc)
|
||||||
|
xmlFreeDoc(doc);
|
||||||
free(html);
|
free(html);
|
||||||
curl_free(encoded_query);
|
curl_free(encoded_query);
|
||||||
free(display_query);
|
free(display_query);
|
||||||
free_context(&ctx);
|
free_context(&ctx);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
#include "Search.h"
|
#include "Search.h"
|
||||||
#include "../Infobox/Wikipedia.h"
|
|
||||||
#include "../Infobox/Calculator.h"
|
#include "../Infobox/Calculator.h"
|
||||||
#include "../Infobox/Dictionary.h"
|
#include "../Infobox/Dictionary.h"
|
||||||
#include "../Infobox/UnitConversion.h"
|
#include "../Infobox/UnitConversion.h"
|
||||||
|
#include "../Infobox/Wikipedia.h"
|
||||||
#include "../Scraping/Scraping.h"
|
#include "../Scraping/Scraping.h"
|
||||||
#include "../Utility/Display.h"
|
#include "../Utility/Display.h"
|
||||||
#include "../Utility/Unescape.h"
|
#include "../Utility/Unescape.h"
|
||||||
@@ -23,62 +23,66 @@ static void *wiki_thread_func(void *arg) {
|
|||||||
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
|
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
|
||||||
char *dynamic_url = construct_wiki_url(data->query);
|
char *dynamic_url = construct_wiki_url(data->query);
|
||||||
if (dynamic_url) {
|
if (dynamic_url) {
|
||||||
data->result = fetch_wiki_data(dynamic_url);
|
data->result = fetch_wiki_data(dynamic_url);
|
||||||
data->success =
|
data->success =
|
||||||
(data->result.title != NULL && data->result.extract != NULL &&
|
(data->result.title != NULL && data->result.extract != NULL &&
|
||||||
strlen(data->result.extract) > 10);
|
strlen(data->result.extract) > 10);
|
||||||
free(dynamic_url);
|
free(dynamic_url);
|
||||||
} else {
|
} else {
|
||||||
data->success = 0;
|
data->success = 0;
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int is_calculator_query(const char *query) {
|
static int is_calculator_query(const char *query) {
|
||||||
if (!query) return 0;
|
if (!query)
|
||||||
|
return 0;
|
||||||
|
|
||||||
int has_digit = 0;
|
int has_digit = 0;
|
||||||
int has_math_operator = 0;
|
int has_math_operator = 0;
|
||||||
|
|
||||||
for (const char *p = query; *p; p++) {
|
for (const char *p = query; *p; p++) {
|
||||||
if (isdigit(*p) || *p == '.') {
|
if (isdigit(*p) || *p == '.') {
|
||||||
has_digit = 1;
|
has_digit = 1;
|
||||||
}
|
}
|
||||||
if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '^') {
|
if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '^') {
|
||||||
has_math_operator = 1;
|
has_math_operator = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!has_digit || !has_math_operator) return 0;
|
if (!has_digit || !has_math_operator)
|
||||||
|
return 0;
|
||||||
|
|
||||||
int len = strlen(query);
|
int len = strlen(query);
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
char c = query[i];
|
char c = query[i];
|
||||||
if (c == '+' || c == '-' || c == '*' || c == '/' || c == '^') {
|
if (c == '+' || c == '-' || c == '*' || c == '/' || c == '^') {
|
||||||
int has_num_before = 0;
|
int has_num_before = 0;
|
||||||
int has_num_after = 0;
|
int has_num_after = 0;
|
||||||
|
|
||||||
for (int j = i - 1; j >= 0; j--) {
|
for (int j = i - 1; j >= 0; j--) {
|
||||||
if (isdigit(query[j]) || query[j] == '.') {
|
if (isdigit(query[j]) || query[j] == '.') {
|
||||||
has_num_before = 1;
|
has_num_before = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (query[j] != ' ') break;
|
if (query[j] != ' ')
|
||||||
}
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
for (int j = i + 1; j < len; j++) {
|
for (int j = i + 1; j < len; j++) {
|
||||||
if (isdigit(query[j]) || query[j] == '.') {
|
if (isdigit(query[j]) || query[j] == '.') {
|
||||||
has_num_after = 1;
|
has_num_after = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (query[j] != ' ') break;
|
if (query[j] != ' ')
|
||||||
}
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (has_num_before || has_num_after) {
|
if (has_num_before || has_num_after) {
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -87,11 +91,11 @@ static void *calc_thread_func(void *arg) {
|
|||||||
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
|
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
|
||||||
|
|
||||||
if (is_calculator_query(data->query)) {
|
if (is_calculator_query(data->query)) {
|
||||||
data->result = fetch_calc_data((char *)data->query);
|
data->result = fetch_calc_data((char *)data->query);
|
||||||
data->success =
|
data->success =
|
||||||
(data->result.title != NULL && data->result.extract != NULL);
|
(data->result.title != NULL && data->result.extract != NULL);
|
||||||
} else {
|
} else {
|
||||||
data->success = 0;
|
data->success = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -101,11 +105,11 @@ static void *dict_thread_func(void *arg) {
|
|||||||
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
|
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
|
||||||
|
|
||||||
if (is_dictionary_query(data->query)) {
|
if (is_dictionary_query(data->query)) {
|
||||||
data->result = fetch_dictionary_data(data->query);
|
data->result = fetch_dictionary_data(data->query);
|
||||||
data->success =
|
data->success =
|
||||||
(data->result.title != NULL && data->result.extract != NULL);
|
(data->result.title != NULL && data->result.extract != NULL);
|
||||||
} else {
|
} else {
|
||||||
data->success = 0;
|
data->success = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -115,27 +119,30 @@ static void *unit_thread_func(void *arg) {
|
|||||||
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
|
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
|
||||||
|
|
||||||
if (is_unit_conv_query(data->query)) {
|
if (is_unit_conv_query(data->query)) {
|
||||||
data->result = fetch_unit_conv_data(data->query);
|
data->result = fetch_unit_conv_data(data->query);
|
||||||
data->success =
|
data->success =
|
||||||
(data->result.title != NULL && data->result.extract != NULL);
|
(data->result.title != NULL && data->result.extract != NULL);
|
||||||
} else {
|
} else {
|
||||||
data->success = 0;
|
data->success = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int add_infobox_to_collection(InfoBox *infobox, char ****collection,
|
static int add_infobox_to_collection(InfoBox *infobox, char ****collection,
|
||||||
int **inner_counts, int current_count) {
|
int **inner_counts, int current_count) {
|
||||||
*collection =
|
*collection =
|
||||||
(char ***)realloc(*collection, sizeof(char **) * (current_count + 1));
|
(char ***)realloc(*collection, sizeof(char **) * (current_count + 1));
|
||||||
*inner_counts =
|
*inner_counts =
|
||||||
(int *)realloc(*inner_counts, sizeof(int) * (current_count + 1));
|
(int *)realloc(*inner_counts, sizeof(int) * (current_count + 1));
|
||||||
|
|
||||||
(*collection)[current_count] = (char **)malloc(sizeof(char *) * 4);
|
(*collection)[current_count] = (char **)malloc(sizeof(char *) * 4);
|
||||||
(*collection)[current_count][0] = infobox->title ? strdup(infobox->title) : NULL;
|
(*collection)[current_count][0] =
|
||||||
(*collection)[current_count][1] = infobox->thumbnail_url ? strdup(infobox->thumbnail_url) : NULL;
|
infobox->title ? strdup(infobox->title) : NULL;
|
||||||
(*collection)[current_count][2] = infobox->extract ? strdup(infobox->extract) : NULL;
|
(*collection)[current_count][1] =
|
||||||
|
infobox->thumbnail_url ? strdup(infobox->thumbnail_url) : NULL;
|
||||||
|
(*collection)[current_count][2] =
|
||||||
|
infobox->extract ? strdup(infobox->extract) : NULL;
|
||||||
(*collection)[current_count][3] = infobox->url ? strdup(infobox->url) : NULL;
|
(*collection)[current_count][3] = infobox->url ? strdup(infobox->url) : NULL;
|
||||||
(*inner_counts)[current_count] = 4;
|
(*inner_counts)[current_count] = 4;
|
||||||
|
|
||||||
@@ -148,15 +155,16 @@ int results_handler(UrlParams *params) {
|
|||||||
int page = 1;
|
int page = 1;
|
||||||
|
|
||||||
if (params) {
|
if (params) {
|
||||||
for (int i = 0; i < params->count; i++) {
|
for (int i = 0; i < params->count; i++) {
|
||||||
if (strcmp(params->params[i].key, "q") == 0) {
|
if (strcmp(params->params[i].key, "q") == 0) {
|
||||||
raw_query = params->params[i].value;
|
raw_query = params->params[i].value;
|
||||||
} else if (strcmp(params->params[i].key, "p") == 0) {
|
} else if (strcmp(params->params[i].key, "p") == 0) {
|
||||||
int parsed = atoi(params->params[i].value);
|
int parsed = atoi(params->params[i].value);
|
||||||
if (parsed > 1) page = parsed;
|
if (parsed > 1)
|
||||||
|
page = parsed;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
context_set(&ctx, "query", raw_query);
|
context_set(&ctx, "query", raw_query);
|
||||||
|
|
||||||
@@ -164,14 +172,14 @@ int results_handler(UrlParams *params) {
|
|||||||
snprintf(page_str, sizeof(page_str), "%d", page);
|
snprintf(page_str, sizeof(page_str), "%d", page);
|
||||||
snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
|
snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
|
||||||
snprintf(next_str, sizeof(next_str), "%d", page + 1);
|
snprintf(next_str, sizeof(next_str), "%d", page + 1);
|
||||||
context_set(&ctx, "page", page_str);
|
context_set(&ctx, "page", page_str);
|
||||||
context_set(&ctx, "prev_page", prev_str);
|
context_set(&ctx, "prev_page", prev_str);
|
||||||
context_set(&ctx, "next_page", next_str);
|
context_set(&ctx, "next_page", next_str);
|
||||||
|
|
||||||
if (!raw_query || strlen(raw_query) == 0) {
|
if (!raw_query || strlen(raw_query) == 0) {
|
||||||
send_response("<h1>No query provided</h1>");
|
send_response("<h1>No query provided</h1>");
|
||||||
free_context(&ctx);
|
free_context(&ctx);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_t wiki_tid, calc_tid, dict_tid, unit_tid;
|
pthread_t wiki_tid, calc_tid, dict_tid, unit_tid;
|
||||||
@@ -181,36 +189,36 @@ int results_handler(UrlParams *params) {
|
|||||||
InfoBoxThreadData unit_data = {.query = raw_query, .success = 0};
|
InfoBoxThreadData unit_data = {.query = raw_query, .success = 0};
|
||||||
|
|
||||||
if (page == 1) {
|
if (page == 1) {
|
||||||
pthread_create(&wiki_tid, NULL, wiki_thread_func, &wiki_data);
|
pthread_create(&wiki_tid, NULL, wiki_thread_func, &wiki_data);
|
||||||
pthread_create(&calc_tid, NULL, calc_thread_func, &calc_data);
|
pthread_create(&calc_tid, NULL, calc_thread_func, &calc_data);
|
||||||
pthread_create(&dict_tid, NULL, dict_thread_func, &dict_data);
|
pthread_create(&dict_tid, NULL, dict_thread_func, &dict_data);
|
||||||
pthread_create(&unit_tid, NULL, unit_thread_func, &unit_data);
|
pthread_create(&unit_tid, NULL, unit_thread_func, &unit_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
ScrapeJob jobs[ENGINE_COUNT];
|
ScrapeJob jobs[ENGINE_COUNT];
|
||||||
SearchResult *all_results[ENGINE_COUNT];
|
SearchResult *all_results[ENGINE_COUNT];
|
||||||
|
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||||
all_results[i] = NULL;
|
all_results[i] = NULL;
|
||||||
jobs[i].engine = &ENGINE_REGISTRY[i];
|
jobs[i].engine = &ENGINE_REGISTRY[i];
|
||||||
jobs[i].query = raw_query;
|
jobs[i].query = raw_query;
|
||||||
jobs[i].out_results = &all_results[i];
|
jobs[i].out_results = &all_results[i];
|
||||||
jobs[i].max_results = 10;
|
jobs[i].max_results = 10;
|
||||||
jobs[i].results_count = 0;
|
jobs[i].results_count = 0;
|
||||||
jobs[i].page = page;
|
jobs[i].page = page;
|
||||||
jobs[i].handle = NULL;
|
jobs[i].handle = NULL;
|
||||||
jobs[i].response.memory = NULL;
|
jobs[i].response.memory = NULL;
|
||||||
jobs[i].response.size = 0;
|
jobs[i].response.size = 0;
|
||||||
jobs[i].response.capacity = 0;
|
jobs[i].response.capacity = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
scrape_engines_parallel(jobs, ENGINE_COUNT);
|
scrape_engines_parallel(jobs, ENGINE_COUNT);
|
||||||
|
|
||||||
if (page == 1) {
|
if (page == 1) {
|
||||||
pthread_join(wiki_tid, NULL);
|
pthread_join(wiki_tid, NULL);
|
||||||
pthread_join(calc_tid, NULL);
|
pthread_join(calc_tid, NULL);
|
||||||
pthread_join(dict_tid, NULL);
|
pthread_join(dict_tid, NULL);
|
||||||
pthread_join(unit_tid, NULL);
|
pthread_join(unit_tid, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
char ***infobox_matrix = NULL;
|
char ***infobox_matrix = NULL;
|
||||||
@@ -218,120 +226,135 @@ int results_handler(UrlParams *params) {
|
|||||||
int infobox_count = 0;
|
int infobox_count = 0;
|
||||||
|
|
||||||
if (page == 1) {
|
if (page == 1) {
|
||||||
if (dict_data.success) {
|
if (dict_data.success) {
|
||||||
infobox_count = add_infobox_to_collection(&dict_data.result, &infobox_matrix,
|
infobox_count =
|
||||||
&infobox_inner_counts, infobox_count);
|
add_infobox_to_collection(&dict_data.result, &infobox_matrix,
|
||||||
}
|
&infobox_inner_counts, infobox_count);
|
||||||
|
}
|
||||||
|
|
||||||
if (calc_data.success) {
|
if (calc_data.success) {
|
||||||
infobox_count = add_infobox_to_collection(&calc_data.result, &infobox_matrix,
|
infobox_count =
|
||||||
&infobox_inner_counts, infobox_count);
|
add_infobox_to_collection(&calc_data.result, &infobox_matrix,
|
||||||
}
|
&infobox_inner_counts, infobox_count);
|
||||||
|
}
|
||||||
|
|
||||||
if (unit_data.success) {
|
if (unit_data.success) {
|
||||||
infobox_count = add_infobox_to_collection(&unit_data.result, &infobox_matrix,
|
infobox_count =
|
||||||
&infobox_inner_counts, infobox_count);
|
add_infobox_to_collection(&unit_data.result, &infobox_matrix,
|
||||||
}
|
&infobox_inner_counts, infobox_count);
|
||||||
|
}
|
||||||
|
|
||||||
if (wiki_data.success) {
|
if (wiki_data.success) {
|
||||||
infobox_count = add_infobox_to_collection(&wiki_data.result, &infobox_matrix,
|
infobox_count =
|
||||||
&infobox_inner_counts, infobox_count);
|
add_infobox_to_collection(&wiki_data.result, &infobox_matrix,
|
||||||
}
|
&infobox_inner_counts, infobox_count);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (infobox_count > 0) {
|
if (infobox_count > 0) {
|
||||||
context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix,
|
context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix,
|
||||||
infobox_count, infobox_inner_counts);
|
infobox_count, infobox_inner_counts);
|
||||||
for (int i = 0; i < infobox_count; i++) {
|
for (int i = 0; i < infobox_count; i++) {
|
||||||
for (int j = 0; j < 4; j++) free(infobox_matrix[i][j]);
|
for (int j = 0; j < 4; j++)
|
||||||
free(infobox_matrix[i]);
|
free(infobox_matrix[i][j]);
|
||||||
}
|
free(infobox_matrix[i]);
|
||||||
free(infobox_matrix);
|
}
|
||||||
free(infobox_inner_counts);
|
free(infobox_matrix);
|
||||||
|
free(infobox_inner_counts);
|
||||||
}
|
}
|
||||||
|
|
||||||
int total_results = 0;
|
int total_results = 0;
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||||
total_results += jobs[i].results_count;
|
total_results += jobs[i].results_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (total_results > 0) {
|
if (total_results > 0) {
|
||||||
char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results);
|
char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results);
|
||||||
int *results_inner_counts = (int *)malloc(sizeof(int) * total_results);
|
int *results_inner_counts = (int *)malloc(sizeof(int) * total_results);
|
||||||
char **seen_urls = (char **)malloc(sizeof(char *) * total_results);
|
char **seen_urls = (char **)malloc(sizeof(char *) * total_results);
|
||||||
int unique_count = 0;
|
int unique_count = 0;
|
||||||
|
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||||
for (int j = 0; j < jobs[i].results_count; j++) {
|
for (int j = 0; j < jobs[i].results_count; j++) {
|
||||||
char *display_url = all_results[i][j].url;
|
char *display_url = all_results[i][j].url;
|
||||||
|
|
||||||
int is_duplicate = 0;
|
int is_duplicate = 0;
|
||||||
for (int k = 0; k < unique_count; k++) {
|
for (int k = 0; k < unique_count; k++) {
|
||||||
if (strcmp(seen_urls[k], display_url) == 0) {
|
if (strcmp(seen_urls[k], display_url) == 0) {
|
||||||
is_duplicate = 1;
|
is_duplicate = 1;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_duplicate) {
|
||||||
|
free(all_results[i][j].url);
|
||||||
|
free(all_results[i][j].title);
|
||||||
|
free(all_results[i][j].snippet);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
seen_urls[unique_count] = strdup(display_url);
|
||||||
|
results_matrix[unique_count] = (char **)malloc(sizeof(char *) * 4);
|
||||||
|
char *pretty_url = pretty_display_url(display_url);
|
||||||
|
|
||||||
|
results_matrix[unique_count][0] = strdup(display_url);
|
||||||
|
results_matrix[unique_count][1] = strdup(pretty_url);
|
||||||
|
results_matrix[unique_count][2] = all_results[i][j].title
|
||||||
|
? strdup(all_results[i][j].title)
|
||||||
|
: strdup("Untitled");
|
||||||
|
results_matrix[unique_count][3] =
|
||||||
|
all_results[i][j].snippet ? strdup(all_results[i][j].snippet)
|
||||||
|
: strdup("");
|
||||||
|
|
||||||
|
results_inner_counts[unique_count] = 4;
|
||||||
|
|
||||||
|
free(pretty_url);
|
||||||
|
free(all_results[i][j].url);
|
||||||
|
free(all_results[i][j].title);
|
||||||
|
free(all_results[i][j].snippet);
|
||||||
|
|
||||||
|
unique_count++;
|
||||||
}
|
}
|
||||||
|
free(all_results[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_duplicate) {
|
context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count,
|
||||||
free(all_results[i][j].url);
|
results_inner_counts);
|
||||||
free(all_results[i][j].title);
|
|
||||||
free(all_results[i][j].snippet);
|
char *html = render_template("results.html", &ctx);
|
||||||
continue;
|
if (html) {
|
||||||
|
send_response(html);
|
||||||
|
free(html);
|
||||||
}
|
}
|
||||||
|
|
||||||
seen_urls[unique_count] = strdup(display_url);
|
for (int i = 0; i < unique_count; i++) {
|
||||||
results_matrix[unique_count] = (char **)malloc(sizeof(char *) * 4);
|
for (int j = 0; j < 4; j++)
|
||||||
char *pretty_url = pretty_display_url(display_url);
|
free(results_matrix[i][j]);
|
||||||
|
free(results_matrix[i]);
|
||||||
results_matrix[unique_count][0] = strdup(display_url);
|
free(seen_urls[i]);
|
||||||
results_matrix[unique_count][1] = strdup(pretty_url);
|
|
||||||
results_matrix[unique_count][2] = all_results[i][j].title ? strdup(all_results[i][j].title) : strdup("Untitled");
|
|
||||||
results_matrix[unique_count][3] = all_results[i][j].snippet ? strdup(all_results[i][j].snippet) : strdup("");
|
|
||||||
|
|
||||||
results_inner_counts[unique_count] = 4;
|
|
||||||
|
|
||||||
free(pretty_url);
|
|
||||||
free(all_results[i][j].url);
|
|
||||||
free(all_results[i][j].title);
|
|
||||||
free(all_results[i][j].snippet);
|
|
||||||
|
|
||||||
unique_count++;
|
|
||||||
}
|
}
|
||||||
free(all_results[i]);
|
free(seen_urls);
|
||||||
}
|
free(results_matrix);
|
||||||
|
free(results_inner_counts);
|
||||||
context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, results_inner_counts);
|
|
||||||
|
|
||||||
char *html = render_template("results.html", &ctx);
|
|
||||||
if (html) {
|
|
||||||
send_response(html);
|
|
||||||
free(html);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < unique_count; i++) {
|
|
||||||
for (int j = 0; j < 4; j++) free(results_matrix[i][j]);
|
|
||||||
free(results_matrix[i]);
|
|
||||||
free(seen_urls[i]);
|
|
||||||
}
|
|
||||||
free(seen_urls);
|
|
||||||
free(results_matrix);
|
|
||||||
free(results_inner_counts);
|
|
||||||
} else {
|
} else {
|
||||||
char *html = render_template("results.html", &ctx);
|
char *html = render_template("results.html", &ctx);
|
||||||
if (html) {
|
if (html) {
|
||||||
send_response(html);
|
send_response(html);
|
||||||
free(html);
|
free(html);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (page == 1) {
|
if (page == 1) {
|
||||||
if (wiki_data.success) free_infobox(&wiki_data.result);
|
if (wiki_data.success)
|
||||||
if (calc_data.success) free_infobox(&calc_data.result);
|
free_infobox(&wiki_data.result);
|
||||||
if (dict_data.success) free_infobox(&dict_data.result);
|
if (calc_data.success)
|
||||||
if (unit_data.success) free_infobox(&unit_data.result);
|
free_infobox(&calc_data.result);
|
||||||
|
if (dict_data.success)
|
||||||
|
free_infobox(&dict_data.result);
|
||||||
|
if (unit_data.success)
|
||||||
|
free_infobox(&unit_data.result);
|
||||||
}
|
}
|
||||||
free_context(&ctx);
|
free_context(&ctx);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,21 +11,22 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
|
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
|
||||||
void *userp) {
|
void *userp) {
|
||||||
size_t realsize = size * nmemb;
|
size_t realsize = size * nmemb;
|
||||||
MemoryBuffer *mem = (MemoryBuffer *)userp;
|
MemoryBuffer *mem = (MemoryBuffer *)userp;
|
||||||
|
|
||||||
if (mem->size + realsize + 1 > mem->capacity) {
|
if (mem->size + realsize + 1 > mem->capacity) {
|
||||||
|
|
||||||
size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
|
size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
|
||||||
while (new_cap < mem->size + realsize + 1) new_cap *= 2;
|
while (new_cap < mem->size + realsize + 1)
|
||||||
|
new_cap *= 2;
|
||||||
|
|
||||||
char *ptr = (char *)realloc(mem->memory, new_cap);
|
char *ptr = (char *)realloc(mem->memory, new_cap);
|
||||||
if (!ptr) {
|
if (!ptr) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
mem->memory = ptr;
|
mem->memory = ptr;
|
||||||
mem->capacity = new_cap;
|
mem->capacity = new_cap;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(&(mem->memory[mem->size]), contents, realsize);
|
memcpy(&(mem->memory[mem->size]), contents, realsize);
|
||||||
@@ -37,37 +38,39 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
|
|||||||
|
|
||||||
static const char *get_random_user_agent() {
|
static const char *get_random_user_agent() {
|
||||||
static const char *agents[] = {
|
static const char *agents[] = {
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
|
||||||
"like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
"like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
|
||||||
"(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
"(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like "
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like "
|
||||||
"Gecko) "
|
"Gecko) "
|
||||||
"Chrome/120.0.0.0` Safari/537.36",
|
"Chrome/120.0.0.0` Safari/537.36",
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 "
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 "
|
||||||
"Firefox/121.0",
|
"Firefox/121.0",
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
|
||||||
"(KHTML, like Gecko) Version/17.2 Safari/605.1.15"};
|
"(KHTML, like Gecko) Version/17.2 Safari/605.1.15"};
|
||||||
return agents[rand() % 5];
|
return agents[rand() % 5];
|
||||||
}
|
}
|
||||||
|
|
||||||
static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
|
static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
|
||||||
SearchResult **out_results, int max_results) {
|
SearchResult **out_results, int max_results) {
|
||||||
(void)engine_name;
|
(void)engine_name;
|
||||||
int found_count = 0;
|
int found_count = 0;
|
||||||
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
|
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
|
||||||
if (!xpathCtx) {
|
if (!xpathCtx) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *link_xpath = "//tr[not(contains(@class, 'result-sponsored'))]//a[@class='result-link']";
|
const char *link_xpath = "//tr[not(contains(@class, "
|
||||||
|
"'result-sponsored'))]//a[@class='result-link']";
|
||||||
xmlXPathObjectPtr xpathObj =
|
xmlXPathObjectPtr xpathObj =
|
||||||
xmlXPathEvalExpression((xmlChar *)link_xpath, xpathCtx);
|
xmlXPathEvalExpression((xmlChar *)link_xpath, xpathCtx);
|
||||||
|
|
||||||
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
|
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
|
||||||
if (xpathObj) xmlXPathFreeObject(xpathObj);
|
if (xpathObj)
|
||||||
xmlXPathFreeContext(xpathCtx);
|
xmlXPathFreeObject(xpathObj);
|
||||||
return 0;
|
xmlXPathFreeContext(xpathCtx);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int num_links = xpathObj->nodesetval->nodeNr;
|
int num_links = xpathObj->nodesetval->nodeNr;
|
||||||
@@ -75,49 +78,54 @@ static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
|
|||||||
int actual_alloc = (num_links < max_results) ? num_links : max_results;
|
int actual_alloc = (num_links < max_results) ? num_links : max_results;
|
||||||
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
|
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
|
||||||
if (!*out_results) {
|
if (!*out_results) {
|
||||||
xmlXPathFreeObject(xpathObj);
|
xmlXPathFreeObject(xpathObj);
|
||||||
xmlXPathFreeContext(xpathCtx);
|
xmlXPathFreeContext(xpathCtx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < num_links && found_count < max_results; i++) {
|
for (int i = 0; i < num_links && found_count < max_results; i++) {
|
||||||
xmlNodePtr linkNode = xpathObj->nodesetval->nodeTab[i];
|
xmlNodePtr linkNode = xpathObj->nodesetval->nodeTab[i];
|
||||||
char *title = (char *)xmlNodeGetContent(linkNode);
|
char *title = (char *)xmlNodeGetContent(linkNode);
|
||||||
char *url = (char *)xmlGetProp(linkNode, (xmlChar *)"href");
|
char *url = (char *)xmlGetProp(linkNode, (xmlChar *)"href");
|
||||||
char *snippet_text = NULL;
|
char *snippet_text = NULL;
|
||||||
|
|
||||||
xmlNodePtr current = linkNode->parent;
|
xmlNodePtr current = linkNode->parent;
|
||||||
while (current && xmlStrcasecmp(current->name, (const xmlChar *)"tr") != 0)
|
while (current && xmlStrcasecmp(current->name, (const xmlChar *)"tr") != 0)
|
||||||
current = current->parent;
|
current = current->parent;
|
||||||
|
|
||||||
if (current && current->next) {
|
if (current && current->next) {
|
||||||
xmlNodePtr snippetRow = current->next;
|
xmlNodePtr snippetRow = current->next;
|
||||||
while (snippetRow &&
|
while (snippetRow &&
|
||||||
xmlStrcasecmp(snippetRow->name, (const xmlChar *)"tr") != 0)
|
xmlStrcasecmp(snippetRow->name, (const xmlChar *)"tr") != 0)
|
||||||
snippetRow = snippetRow->next;
|
snippetRow = snippetRow->next;
|
||||||
if (snippetRow) {
|
if (snippetRow) {
|
||||||
|
|
||||||
xpathCtx->node = snippetRow;
|
xpathCtx->node = snippetRow;
|
||||||
xmlXPathObjectPtr sObj = xmlXPathEvalExpression(
|
xmlXPathObjectPtr sObj = xmlXPathEvalExpression(
|
||||||
(xmlChar *)".//td[@class='result-snippet']", xpathCtx);
|
(xmlChar *)".//td[@class='result-snippet']", xpathCtx);
|
||||||
if (sObj && sObj->nodesetval && sObj->nodesetval->nodeNr > 0) {
|
if (sObj && sObj->nodesetval && sObj->nodesetval->nodeNr > 0) {
|
||||||
snippet_text = (char *)xmlNodeGetContent(sObj->nodesetval->nodeTab[0]);
|
snippet_text =
|
||||||
|
(char *)xmlNodeGetContent(sObj->nodesetval->nodeTab[0]);
|
||||||
|
}
|
||||||
|
if (sObj)
|
||||||
|
xmlXPathFreeObject(sObj);
|
||||||
|
xpathCtx->node = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (sObj) xmlXPathFreeObject(sObj);
|
|
||||||
xpathCtx->node = NULL;
|
|
||||||
|
|
||||||
}
|
(*out_results)[found_count].url = unescape_search_url(url);
|
||||||
}
|
(*out_results)[found_count].title = strdup(title ? title : "No Title");
|
||||||
|
(*out_results)[found_count].snippet =
|
||||||
|
strdup(snippet_text ? snippet_text : "");
|
||||||
|
|
||||||
(*out_results)[found_count].url = unescape_search_url(url);
|
found_count++;
|
||||||
(*out_results)[found_count].title = strdup(title ? title : "No Title");
|
|
||||||
(*out_results)[found_count].snippet = strdup(snippet_text ? snippet_text : "");
|
|
||||||
|
|
||||||
found_count++;
|
if (title)
|
||||||
|
xmlFree(title);
|
||||||
if (title) xmlFree(title);
|
if (url)
|
||||||
if (url) xmlFree(url);
|
xmlFree(url);
|
||||||
if (snippet_text) xmlFree(snippet_text);
|
if (snippet_text)
|
||||||
|
xmlFree(snippet_text);
|
||||||
}
|
}
|
||||||
|
|
||||||
xmlXPathFreeObject(xpathObj);
|
xmlXPathFreeObject(xpathObj);
|
||||||
@@ -126,22 +134,23 @@ static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int parse_startpage(const char *engine_name, xmlDocPtr doc,
|
static int parse_startpage(const char *engine_name, xmlDocPtr doc,
|
||||||
SearchResult **out_results, int max_results) {
|
SearchResult **out_results, int max_results) {
|
||||||
(void)engine_name;
|
(void)engine_name;
|
||||||
int found_count = 0;
|
int found_count = 0;
|
||||||
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
|
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
|
||||||
if (!xpathCtx) {
|
if (!xpathCtx) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *container_xpath = "//div[contains(@class, 'result')]";
|
const char *container_xpath = "//div[contains(@class, 'result')]";
|
||||||
xmlXPathObjectPtr xpathObj =
|
xmlXPathObjectPtr xpathObj =
|
||||||
xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
|
xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
|
||||||
|
|
||||||
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
|
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
|
||||||
if (xpathObj) xmlXPathFreeObject(xpathObj);
|
if (xpathObj)
|
||||||
xmlXPathFreeContext(xpathCtx);
|
xmlXPathFreeObject(xpathObj);
|
||||||
return 0;
|
xmlXPathFreeContext(xpathCtx);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int num_results = xpathObj->nodesetval->nodeNr;
|
int num_results = xpathObj->nodesetval->nodeNr;
|
||||||
@@ -149,55 +158,61 @@ static int parse_startpage(const char *engine_name, xmlDocPtr doc,
|
|||||||
int actual_alloc = (num_results < max_results) ? num_results : max_results;
|
int actual_alloc = (num_results < max_results) ? num_results : max_results;
|
||||||
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
|
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
|
||||||
if (!*out_results) {
|
if (!*out_results) {
|
||||||
xmlXPathFreeObject(xpathObj);
|
xmlXPathFreeObject(xpathObj);
|
||||||
xmlXPathFreeContext(xpathCtx);
|
xmlXPathFreeContext(xpathCtx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < num_results && found_count < max_results; i++) {
|
for (int i = 0; i < num_results && found_count < max_results; i++) {
|
||||||
xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
|
xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
|
||||||
xpathCtx->node = resultNode;
|
xpathCtx->node = resultNode;
|
||||||
|
|
||||||
xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
|
xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
|
||||||
(xmlChar *)".//a[contains(@class, 'result-link')]", xpathCtx);
|
(xmlChar *)".//a[contains(@class, 'result-link')]", xpathCtx);
|
||||||
char *url =
|
char *url =
|
||||||
(linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
|
(linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
|
||||||
? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
|
? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
|
||||||
(xmlChar *)"href")
|
(xmlChar *)"href")
|
||||||
: NULL;
|
: NULL;
|
||||||
|
|
||||||
xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
|
xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
|
||||||
(xmlChar *)".//h2[contains(@class, 'wgl-title')]", xpathCtx);
|
(xmlChar *)".//h2[contains(@class, 'wgl-title')]", xpathCtx);
|
||||||
char *title =
|
char *title =
|
||||||
(titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
|
(titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
|
||||||
? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
|
? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
|
||||||
: NULL;
|
: NULL;
|
||||||
|
|
||||||
xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
|
xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
|
||||||
(xmlChar *)".//p[contains(@class, 'description')]", xpathCtx);
|
(xmlChar *)".//p[contains(@class, 'description')]", xpathCtx);
|
||||||
char *snippet_text =
|
char *snippet_text =
|
||||||
(snippetObj && snippetObj->nodesetval &&
|
(snippetObj && snippetObj->nodesetval &&
|
||||||
snippetObj->nodesetval->nodeNr > 0)
|
snippetObj->nodesetval->nodeNr > 0)
|
||||||
? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
|
? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
|
||||||
: NULL;
|
: NULL;
|
||||||
|
|
||||||
if (url && title) {
|
if (url && title) {
|
||||||
(*out_results)[found_count].url = strdup(url);
|
(*out_results)[found_count].url = strdup(url);
|
||||||
(*out_results)[found_count].title = strdup(title);
|
(*out_results)[found_count].title = strdup(title);
|
||||||
(*out_results)[found_count].snippet =
|
(*out_results)[found_count].snippet =
|
||||||
strdup(snippet_text ? snippet_text : "");
|
strdup(snippet_text ? snippet_text : "");
|
||||||
found_count++;
|
found_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (title)
|
||||||
|
xmlFree(title);
|
||||||
|
if (url)
|
||||||
|
xmlFree(url);
|
||||||
|
if (snippet_text)
|
||||||
|
xmlFree(snippet_text);
|
||||||
|
if (linkObj)
|
||||||
|
xmlXPathFreeObject(linkObj);
|
||||||
|
if (titleObj)
|
||||||
|
xmlXPathFreeObject(titleObj);
|
||||||
|
if (snippetObj)
|
||||||
|
xmlXPathFreeObject(snippetObj);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (title) xmlFree(title);
|
xpathCtx->node = NULL;
|
||||||
if (url) xmlFree(url);
|
|
||||||
if (snippet_text) xmlFree(snippet_text);
|
|
||||||
if (linkObj) xmlXPathFreeObject(linkObj);
|
|
||||||
if (titleObj) xmlXPathFreeObject(titleObj);
|
|
||||||
if (snippetObj) xmlXPathFreeObject(snippetObj);
|
|
||||||
}
|
|
||||||
|
|
||||||
xpathCtx->node = NULL;
|
|
||||||
|
|
||||||
xmlXPathFreeObject(xpathObj);
|
xmlXPathFreeObject(xpathObj);
|
||||||
xmlXPathFreeContext(xpathCtx);
|
xmlXPathFreeContext(xpathCtx);
|
||||||
@@ -205,22 +220,23 @@ static int parse_startpage(const char *engine_name, xmlDocPtr doc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
|
static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
|
||||||
SearchResult **out_results, int max_results) {
|
SearchResult **out_results, int max_results) {
|
||||||
(void)engine_name;
|
(void)engine_name;
|
||||||
int found_count = 0;
|
int found_count = 0;
|
||||||
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
|
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
|
||||||
if (!xpathCtx) {
|
if (!xpathCtx) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *container_xpath = "//div[contains(@class, 'algo-sr')]";
|
const char *container_xpath = "//div[contains(@class, 'algo-sr')]";
|
||||||
xmlXPathObjectPtr xpathObj =
|
xmlXPathObjectPtr xpathObj =
|
||||||
xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
|
xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
|
||||||
|
|
||||||
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
|
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
|
||||||
if (xpathObj) xmlXPathFreeObject(xpathObj);
|
if (xpathObj)
|
||||||
xmlXPathFreeContext(xpathCtx);
|
xmlXPathFreeObject(xpathObj);
|
||||||
return 0;
|
xmlXPathFreeContext(xpathCtx);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int num_results = xpathObj->nodesetval->nodeNr;
|
int num_results = xpathObj->nodesetval->nodeNr;
|
||||||
@@ -228,53 +244,59 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
|
|||||||
int actual_alloc = (num_results < max_results) ? num_results : max_results;
|
int actual_alloc = (num_results < max_results) ? num_results : max_results;
|
||||||
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
|
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
|
||||||
if (!*out_results) {
|
if (!*out_results) {
|
||||||
xmlXPathFreeObject(xpathObj);
|
xmlXPathFreeObject(xpathObj);
|
||||||
xmlXPathFreeContext(xpathCtx);
|
xmlXPathFreeContext(xpathCtx);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < num_results && found_count < max_results; i++) {
|
for (int i = 0; i < num_results && found_count < max_results; i++) {
|
||||||
xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
|
xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
|
||||||
xpathCtx->node = resultNode;
|
xpathCtx->node = resultNode;
|
||||||
|
|
||||||
xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
|
xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
|
||||||
(xmlChar *)".//div[contains(@class, 'compTitle')]//a[@target='_blank']",
|
(xmlChar *)".//div[contains(@class, 'compTitle')]//a[@target='_blank']",
|
||||||
xpathCtx);
|
xpathCtx);
|
||||||
char *url =
|
char *url =
|
||||||
(linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
|
(linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
|
||||||
? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
|
? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
|
||||||
(xmlChar *)"href")
|
(xmlChar *)"href")
|
||||||
: NULL;
|
: NULL;
|
||||||
|
|
||||||
xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
|
xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
|
||||||
(xmlChar *)".//h3[contains(@class, 'title')]", xpathCtx);
|
(xmlChar *)".//h3[contains(@class, 'title')]", xpathCtx);
|
||||||
char *title =
|
char *title =
|
||||||
(titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
|
(titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
|
||||||
? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
|
? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
|
||||||
: NULL;
|
: NULL;
|
||||||
|
|
||||||
xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
|
xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
|
||||||
(xmlChar *)".//div[contains(@class, 'compText')]//p", xpathCtx);
|
(xmlChar *)".//div[contains(@class, 'compText')]//p", xpathCtx);
|
||||||
char *snippet_text =
|
char *snippet_text =
|
||||||
(snippetObj && snippetObj->nodesetval &&
|
(snippetObj && snippetObj->nodesetval &&
|
||||||
snippetObj->nodesetval->nodeNr > 0)
|
snippetObj->nodesetval->nodeNr > 0)
|
||||||
? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
|
? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
|
||||||
: NULL;
|
: NULL;
|
||||||
|
|
||||||
if (url && title) {
|
if (url && title) {
|
||||||
(*out_results)[found_count].url = unescape_search_url(url);
|
(*out_results)[found_count].url = unescape_search_url(url);
|
||||||
(*out_results)[found_count].title = strdup(title);
|
(*out_results)[found_count].title = strdup(title);
|
||||||
(*out_results)[found_count].snippet =
|
(*out_results)[found_count].snippet =
|
||||||
strdup(snippet_text ? snippet_text : "");
|
strdup(snippet_text ? snippet_text : "");
|
||||||
found_count++;
|
found_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (title) xmlFree(title);
|
if (title)
|
||||||
if (url) xmlFree(url);
|
xmlFree(title);
|
||||||
if (snippet_text) xmlFree(snippet_text);
|
if (url)
|
||||||
if (linkObj) xmlXPathFreeObject(linkObj);
|
xmlFree(url);
|
||||||
if (titleObj) xmlXPathFreeObject(titleObj);
|
if (snippet_text)
|
||||||
if (snippetObj) xmlXPathFreeObject(snippetObj);
|
xmlFree(snippet_text);
|
||||||
|
if (linkObj)
|
||||||
|
xmlXPathFreeObject(linkObj);
|
||||||
|
if (titleObj)
|
||||||
|
xmlXPathFreeObject(titleObj);
|
||||||
|
if (snippetObj)
|
||||||
|
xmlXPathFreeObject(snippetObj);
|
||||||
}
|
}
|
||||||
|
|
||||||
xpathCtx->node = NULL;
|
xpathCtx->node = NULL;
|
||||||
@@ -284,36 +306,36 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
const SearchEngine ENGINE_REGISTRY[] = {
|
const SearchEngine ENGINE_REGISTRY[] = {
|
||||||
{.name = "DuckDuckGo Lite",
|
{.name = "DuckDuckGo Lite",
|
||||||
.base_url = "https://lite.duckduckgo.com/lite/?q=",
|
.base_url = "https://lite.duckduckgo.com/lite/?q=",
|
||||||
.host_header = "lite.duckduckgo.com",
|
.host_header = "lite.duckduckgo.com",
|
||||||
.referer = "https://lite.duckduckgo.com/",
|
.referer = "https://lite.duckduckgo.com/",
|
||||||
.page_param = "s",
|
.page_param = "s",
|
||||||
.page_multiplier = 30,
|
.page_multiplier = 30,
|
||||||
.page_base = 0,
|
.page_base = 0,
|
||||||
.parser = parse_ddg_lite},
|
.parser = parse_ddg_lite},
|
||||||
{.name = "Startpage",
|
{.name = "Startpage",
|
||||||
.base_url = "https://www.startpage.com/sp/search?q=",
|
.base_url = "https://www.startpage.com/sp/search?q=",
|
||||||
.host_header = "www.startpage.com",
|
.host_header = "www.startpage.com",
|
||||||
.referer = "https://www.startpage.com/",
|
.referer = "https://www.startpage.com/",
|
||||||
.page_param = "page",
|
.page_param = "page",
|
||||||
.page_multiplier = 1,
|
.page_multiplier = 1,
|
||||||
.page_base = 1,
|
.page_base = 1,
|
||||||
.parser = parse_startpage},
|
.parser = parse_startpage},
|
||||||
{.name = "Yahoo",
|
{.name = "Yahoo",
|
||||||
.base_url = "https://search.yahoo.com/search?p=",
|
.base_url = "https://search.yahoo.com/search?p=",
|
||||||
.host_header = "search.yahoo.com",
|
.host_header = "search.yahoo.com",
|
||||||
.referer = "https://search.yahoo.com/",
|
.referer = "https://search.yahoo.com/",
|
||||||
.page_param = "b",
|
.page_param = "b",
|
||||||
.page_multiplier = 10,
|
.page_multiplier = 10,
|
||||||
.page_base = 1,
|
.page_base = 1,
|
||||||
.parser = parse_yahoo}};
|
.parser = parse_yahoo}};
|
||||||
|
|
||||||
const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
|
const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
|
||||||
|
|
||||||
static void configure_curl_handle(CURL *curl, const char *full_url,
|
static void configure_curl_handle(CURL *curl, const char *full_url,
|
||||||
MemoryBuffer *chunk,
|
MemoryBuffer *chunk,
|
||||||
struct curl_slist *headers) {
|
struct curl_slist *headers) {
|
||||||
curl_easy_setopt(curl, CURLOPT_URL, full_url);
|
curl_easy_setopt(curl, CURLOPT_URL, full_url);
|
||||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
|
||||||
@@ -340,152 +362,153 @@ int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) {
|
|||||||
retry:
|
retry:
|
||||||
CURLM *multi_handle = curl_multi_init();
|
CURLM *multi_handle = curl_multi_init();
|
||||||
if (!multi_handle) {
|
if (!multi_handle) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < num_jobs; i++) {
|
for (int i = 0; i < num_jobs; i++) {
|
||||||
ScrapeJob *job = &jobs[i];
|
ScrapeJob *job = &jobs[i];
|
||||||
|
|
||||||
if (job->handle) {
|
if (job->handle) {
|
||||||
curl_easy_cleanup(job->handle);
|
curl_easy_cleanup(job->handle);
|
||||||
job->handle = NULL;
|
job->handle = NULL;
|
||||||
}
|
}
|
||||||
if (job->response.memory) {
|
if (job->response.memory) {
|
||||||
free(job->response.memory);
|
free(job->response.memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
job->handle = curl_easy_init();
|
||||||
|
if (!job->handle) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
job->response.memory = (char *)malloc(16384);
|
||||||
|
job->response.size = 0;
|
||||||
|
job->response.capacity = 16384;
|
||||||
|
|
||||||
|
char full_url[1024];
|
||||||
|
char *encoded_query = curl_easy_escape(job->handle, job->query, 0);
|
||||||
|
if (!encoded_query) {
|
||||||
|
curl_easy_cleanup(job->handle);
|
||||||
|
job->handle = NULL;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
int page = (job->page < 1) ? 1 : job->page;
|
||||||
|
int page_value =
|
||||||
|
(page - 1) * job->engine->page_multiplier + job->engine->page_base;
|
||||||
|
|
||||||
|
snprintf(full_url, sizeof(full_url), "%s%s&%s=%d", job->engine->base_url,
|
||||||
|
encoded_query, job->engine->page_param, page_value);
|
||||||
|
curl_free(encoded_query);
|
||||||
|
|
||||||
|
struct curl_slist *headers = NULL;
|
||||||
|
char host_buf[256], ref_buf[256];
|
||||||
|
snprintf(host_buf, sizeof(host_buf), "Host: %s", job->engine->host_header);
|
||||||
|
snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", job->engine->referer);
|
||||||
|
headers = curl_slist_append(headers, host_buf);
|
||||||
|
headers = curl_slist_append(headers, ref_buf);
|
||||||
|
headers = curl_slist_append(
|
||||||
|
headers,
|
||||||
|
"Accept: "
|
||||||
|
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
|
||||||
|
headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5");
|
||||||
|
headers = curl_slist_append(headers, "DNT: 1");
|
||||||
|
|
||||||
|
configure_curl_handle(job->handle, full_url, &job->response, headers);
|
||||||
|
|
||||||
|
curl_easy_setopt(job->handle, CURLOPT_PRIVATE, headers);
|
||||||
|
|
||||||
|
curl_multi_add_handle(multi_handle, job->handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
job->handle = curl_easy_init();
|
usleep(100000 + (rand() % 100000));
|
||||||
if (!job->handle) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
job->response.memory = (char *)malloc(16384);
|
|
||||||
job->response.size = 0;
|
|
||||||
job->response.capacity = 16384;
|
|
||||||
|
|
||||||
char full_url[1024];
|
|
||||||
char *encoded_query = curl_easy_escape(job->handle, job->query, 0);
|
|
||||||
if (!encoded_query) {
|
|
||||||
curl_easy_cleanup(job->handle);
|
|
||||||
job->handle = NULL;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
int page = (job->page < 1) ? 1 : job->page;
|
|
||||||
int page_value = (page - 1) * job->engine->page_multiplier + job->engine->page_base;
|
|
||||||
|
|
||||||
snprintf(full_url, sizeof(full_url), "%s%s&%s=%d",
|
|
||||||
job->engine->base_url,
|
|
||||||
encoded_query,
|
|
||||||
job->engine->page_param,
|
|
||||||
page_value);
|
|
||||||
curl_free(encoded_query);
|
|
||||||
|
|
||||||
struct curl_slist *headers = NULL;
|
|
||||||
char host_buf[256], ref_buf[256];
|
|
||||||
snprintf(host_buf, sizeof(host_buf), "Host: %s", job->engine->host_header);
|
|
||||||
snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", job->engine->referer);
|
|
||||||
headers = curl_slist_append(headers, host_buf);
|
|
||||||
headers = curl_slist_append(headers, ref_buf);
|
|
||||||
headers = curl_slist_append(headers, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
|
|
||||||
headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5");
|
|
||||||
headers = curl_slist_append(headers, "DNT: 1");
|
|
||||||
|
|
||||||
configure_curl_handle(job->handle, full_url, &job->response, headers);
|
|
||||||
|
|
||||||
curl_easy_setopt(job->handle, CURLOPT_PRIVATE, headers);
|
|
||||||
|
|
||||||
curl_multi_add_handle(multi_handle, job->handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
usleep(100000 + (rand() % 100000));
|
|
||||||
|
|
||||||
int still_running = 0;
|
int still_running = 0;
|
||||||
curl_multi_perform(multi_handle, &still_running);
|
curl_multi_perform(multi_handle, &still_running);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
int numfds = 0;
|
int numfds = 0;
|
||||||
CURLMcode mc = curl_multi_wait(multi_handle, NULL, 0, 1000, &numfds);
|
CURLMcode mc = curl_multi_wait(multi_handle, NULL, 0, 1000, &numfds);
|
||||||
|
|
||||||
if (mc != CURLM_OK) {
|
if (mc != CURLM_OK) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_multi_perform(multi_handle, &still_running);
|
curl_multi_perform(multi_handle, &still_running);
|
||||||
} while (still_running);
|
} while (still_running);
|
||||||
|
|
||||||
CURLMsg *msg;
|
CURLMsg *msg;
|
||||||
int msgs_left;
|
int msgs_left;
|
||||||
while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
|
while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
|
||||||
if (msg->msg == CURLMSG_DONE) {
|
if (msg->msg == CURLMSG_DONE) {
|
||||||
CURL *handle = msg->easy_handle;
|
CURL *handle = msg->easy_handle;
|
||||||
|
|
||||||
for (int i = 0; i < num_jobs; i++) {
|
for (int i = 0; i < num_jobs; i++) {
|
||||||
if (jobs[i].handle && jobs[i].handle == handle) {
|
if (jobs[i].handle && jobs[i].handle == handle) {
|
||||||
ScrapeJob *job = &jobs[i];
|
ScrapeJob *job = &jobs[i];
|
||||||
|
|
||||||
long response_code;
|
long response_code;
|
||||||
curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code);
|
curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code);
|
||||||
|
|
||||||
if (msg->data.result == CURLE_OK && job->response.size > 0) {
|
if (msg->data.result == CURLE_OK && job->response.size > 0) {
|
||||||
xmlDocPtr doc = htmlReadMemory(
|
xmlDocPtr doc = htmlReadMemory(
|
||||||
job->response.memory, job->response.size, NULL, NULL,
|
job->response.memory, job->response.size, NULL, NULL,
|
||||||
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
|
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
|
||||||
|
|
||||||
if (doc) {
|
if (doc) {
|
||||||
job->results_count = job->engine->parser(
|
job->results_count = job->engine->parser(
|
||||||
job->engine->name, doc, job->out_results, job->max_results);
|
job->engine->name, doc, job->out_results, job->max_results);
|
||||||
xmlFreeDoc(doc);
|
xmlFreeDoc(doc);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
job->results_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct curl_slist *headers;
|
||||||
|
curl_easy_getinfo(handle, CURLINFO_PRIVATE, &headers);
|
||||||
|
if (headers)
|
||||||
|
curl_slist_free_all(headers);
|
||||||
|
|
||||||
|
free(job->response.memory);
|
||||||
|
job->response.memory = NULL;
|
||||||
|
curl_multi_remove_handle(multi_handle, handle);
|
||||||
|
if (handle)
|
||||||
|
curl_easy_cleanup(handle);
|
||||||
|
job->handle = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
job->results_count = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct curl_slist *headers;
|
|
||||||
curl_easy_getinfo(handle, CURLINFO_PRIVATE, &headers);
|
|
||||||
if (headers) curl_slist_free_all(headers);
|
|
||||||
|
|
||||||
free(job->response.memory);
|
|
||||||
job->response.memory = NULL;
|
|
||||||
curl_multi_remove_handle(multi_handle, handle);
|
|
||||||
if (handle) curl_easy_cleanup(handle);
|
|
||||||
job->handle = NULL;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_multi_cleanup(multi_handle);
|
curl_multi_cleanup(multi_handle);
|
||||||
|
|
||||||
if (retries < max_proxy_retries && proxy_count > 0) {
|
if (retries < max_proxy_retries && proxy_count > 0) {
|
||||||
int any_failed = 0;
|
int any_failed = 0;
|
||||||
for (int i = 0; i < num_jobs; i++) {
|
for (int i = 0; i < num_jobs; i++) {
|
||||||
if (jobs[i].results_count == 0 && jobs[i].response.size == 0) {
|
if (jobs[i].results_count == 0 && jobs[i].response.size == 0) {
|
||||||
any_failed = 1;
|
any_failed = 1;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (any_failed) {
|
||||||
|
retries++;
|
||||||
|
goto retry;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (any_failed) {
|
|
||||||
retries++;
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scrape_engine(const SearchEngine *engine, const char *query,
|
int scrape_engine(const SearchEngine *engine, const char *query,
|
||||||
SearchResult **out_results, int max_results) {
|
SearchResult **out_results, int max_results) {
|
||||||
ScrapeJob job = {
|
ScrapeJob job = {.engine = engine,
|
||||||
.engine = engine,
|
.query = (char *)query,
|
||||||
.query = (char *)query,
|
.out_results = out_results,
|
||||||
.out_results = out_results,
|
.max_results = max_results,
|
||||||
.max_results = max_results,
|
.results_count = 0,
|
||||||
.results_count = 0,
|
.page = 1};
|
||||||
.page = 1
|
|
||||||
};
|
|
||||||
|
|
||||||
scrape_engines_parallel(&job, 1);
|
scrape_engines_parallel(&job, 1);
|
||||||
return job.results_count;
|
return job.results_count;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
#ifndef SCRAPING_H
|
#ifndef SCRAPING_H
|
||||||
#define SCRAPING_H
|
#define SCRAPING_H
|
||||||
|
|
||||||
#include <libxml/HTMLparser.h>
|
|
||||||
#include <curl/curl.h>
|
#include <curl/curl.h>
|
||||||
|
#include <libxml/HTMLparser.h>
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char *url;
|
char *url;
|
||||||
@@ -11,7 +11,7 @@ typedef struct {
|
|||||||
} SearchResult;
|
} SearchResult;
|
||||||
|
|
||||||
typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc,
|
typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc,
|
||||||
SearchResult **out_results, int max_results);
|
SearchResult **out_results, int max_results);
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char *name;
|
const char *name;
|
||||||
@@ -20,8 +20,8 @@ typedef struct {
|
|||||||
const char *referer;
|
const char *referer;
|
||||||
|
|
||||||
const char *page_param;
|
const char *page_param;
|
||||||
int page_multiplier;
|
int page_multiplier;
|
||||||
int page_base;
|
int page_base;
|
||||||
ParserFunc parser;
|
ParserFunc parser;
|
||||||
} SearchEngine;
|
} SearchEngine;
|
||||||
|
|
||||||
@@ -36,7 +36,7 @@ typedef struct {
|
|||||||
char *query;
|
char *query;
|
||||||
SearchResult **out_results;
|
SearchResult **out_results;
|
||||||
int max_results;
|
int max_results;
|
||||||
int page;
|
int page;
|
||||||
CURL *handle;
|
CURL *handle;
|
||||||
MemoryBuffer response;
|
MemoryBuffer response;
|
||||||
int results_count;
|
int results_count;
|
||||||
@@ -46,7 +46,7 @@ extern const SearchEngine ENGINE_REGISTRY[];
|
|||||||
extern const int ENGINE_COUNT;
|
extern const int ENGINE_COUNT;
|
||||||
|
|
||||||
int scrape_engine(const SearchEngine *engine, const char *query,
|
int scrape_engine(const SearchEngine *engine, const char *query,
|
||||||
SearchResult **out_results, int max_results);
|
SearchResult **out_results, int max_results);
|
||||||
|
|
||||||
int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs);
|
int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs);
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,8 @@
|
|||||||
#include <strings.h>
|
#include <strings.h>
|
||||||
|
|
||||||
char *pretty_display_url(const char *input) {
|
char *pretty_display_url(const char *input) {
|
||||||
if (!input) return NULL;
|
if (!input)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
const char *start = input;
|
const char *start = input;
|
||||||
|
|
||||||
@@ -28,7 +29,8 @@ char *pretty_display_url(const char *input) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char *output = (char *)malloc(strlen(temp) * 3 + 1);
|
char *output = (char *)malloc(strlen(temp) * 3 + 1);
|
||||||
if (!output) return NULL;
|
if (!output)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
size_t j = 0;
|
size_t j = 0;
|
||||||
for (size_t i = 0; temp[i] != '\0'; i++) {
|
for (size_t i = 0; temp[i] != '\0'; i++) {
|
||||||
|
|||||||
@@ -4,7 +4,8 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
char *unescape_search_url(const char *input) {
|
char *unescape_search_url(const char *input) {
|
||||||
if (!input) return NULL;
|
if (!input)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
const char *key = NULL;
|
const char *key = NULL;
|
||||||
const char *start = NULL;
|
const char *start = NULL;
|
||||||
@@ -14,7 +15,8 @@ char *unescape_search_url(const char *input) {
|
|||||||
if (strstr(input, "uddg=")) {
|
if (strstr(input, "uddg=")) {
|
||||||
key = "uddg=";
|
key = "uddg=";
|
||||||
start = strstr(input, key);
|
start = strstr(input, key);
|
||||||
if (!start) return NULL;
|
if (!start)
|
||||||
|
return NULL;
|
||||||
start += strlen(key);
|
start += strlen(key);
|
||||||
end = strchr(start, '&');
|
end = strchr(start, '&');
|
||||||
len = end ? (size_t)(end - start) : strlen(start);
|
len = end ? (size_t)(end - start) : strlen(start);
|
||||||
@@ -23,7 +25,8 @@ char *unescape_search_url(const char *input) {
|
|||||||
else if (strstr(input, "RU=")) {
|
else if (strstr(input, "RU=")) {
|
||||||
key = "RU=";
|
key = "RU=";
|
||||||
start = strstr(input, key);
|
start = strstr(input, key);
|
||||||
if (!start) return strdup(input);
|
if (!start)
|
||||||
|
return strdup(input);
|
||||||
start += strlen(key);
|
start += strlen(key);
|
||||||
end = strchr(start, '/');
|
end = strchr(start, '/');
|
||||||
len = end ? (size_t)(end - start) : strlen(start);
|
len = end ? (size_t)(end - start) : strlen(start);
|
||||||
@@ -34,7 +37,8 @@ char *unescape_search_url(const char *input) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char *output = (char *)malloc(len * 3 + 1);
|
char *output = (char *)malloc(len * 3 + 1);
|
||||||
if (!output) return NULL;
|
if (!output)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
size_t i = 0, j = 0;
|
size_t i = 0, j = 0;
|
||||||
while (i < len) {
|
while (i < len) {
|
||||||
@@ -60,7 +64,8 @@ char *unescape_search_url(const char *input) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char *url_decode_query(const char *src) {
|
char *url_decode_query(const char *src) {
|
||||||
if (!src) return NULL;
|
if (!src)
|
||||||
|
return NULL;
|
||||||
char *res = strdup(src);
|
char *res = strdup(src);
|
||||||
char *p = res;
|
char *p = res;
|
||||||
while (*src) {
|
while (*src) {
|
||||||
|
|||||||
@@ -7,4 +7,3 @@ char *unescape_search_url(const char *input);
|
|||||||
char *url_decode_query(const char *src);
|
char *url_decode_query(const char *src);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,11 @@
|
|||||||
#include "Utility.h"
|
#include "Utility.h"
|
||||||
|
|
||||||
int hex_to_int(char c) {
|
int hex_to_int(char c) {
|
||||||
if (c >= '0' && c <= '9') return c - '0';
|
if (c >= '0' && c <= '9')
|
||||||
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
|
return c - '0';
|
||||||
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
|
if (c >= 'a' && c <= 'f')
|
||||||
|
return c - 'a' + 10;
|
||||||
|
if (c >= 'A' && c <= 'F')
|
||||||
|
return c - 'A' + 10;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<OpenSearchDescription
|
<OpenSearchDescription
|
||||||
xmlns="http://a9.com/-/spec/opensearch/1.1/"
|
xmlns="http://a9.com/-/spec/opensearch/1.1/"
|
||||||
xmlns:moz="http://www.mozilla.org/2006/browser/search/">
|
xmlns:moz="http://www.mozilla.org/2006/browser/search/">
|
||||||
<ShortName>OmniSearch</ShortName>
|
<ShortName>OmniSearch</ShortName>
|
||||||
<Description>Lightweight metasearch engine</Description>
|
<Description>Lightweight metasearch engine</Description>
|
||||||
@@ -8,4 +8,4 @@
|
|||||||
<InputEncoding>UTF-8</InputEncoding>
|
<InputEncoding>UTF-8</InputEncoding>
|
||||||
<OutputEncoding>UTF-8</OutputEncoding>
|
<OutputEncoding>UTF-8</OutputEncoding>
|
||||||
<moz:SearchForm>https://search.bwaaa.monster/</moz:SearchForm>
|
<moz:SearchForm>https://search.bwaaa.monster/</moz:SearchForm>
|
||||||
</OpenSearchDescription>
|
</OpenSearchDescription>
|
||||||
|
|||||||
Reference in New Issue
Block a user