refactor: put HTTP and XML logic into reusable modules
This commit is contained in:
81
src/Utility/HttpClient.c
Normal file
81
src/Utility/HttpClient.c
Normal file
@@ -0,0 +1,81 @@
|
||||
#include "HttpClient.h"
|
||||
#include "../Proxy/Proxy.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static size_t write_callback(void *contents, size_t size, size_t nmemb,
|
||||
void *userp) {
|
||||
size_t realsize = size * nmemb;
|
||||
HttpResponse *mem = (HttpResponse *)userp;
|
||||
|
||||
if (mem->size + realsize + 1 > mem->capacity) {
|
||||
size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
|
||||
while (new_cap < mem->size + realsize + 1)
|
||||
new_cap *= 2;
|
||||
|
||||
char *ptr = realloc(mem->memory, new_cap);
|
||||
if (!ptr) {
|
||||
return 0;
|
||||
}
|
||||
mem->memory = ptr;
|
||||
mem->capacity = new_cap;
|
||||
}
|
||||
|
||||
memcpy(&(mem->memory[mem->size]), contents, realsize);
|
||||
mem->size += realsize;
|
||||
mem->memory[mem->size] = 0;
|
||||
|
||||
return realsize;
|
||||
}
|
||||
|
||||
HttpResponse http_get(const char *url, const char *user_agent) {
|
||||
HttpResponse resp = {.memory = NULL, .size = 0, .capacity = 0};
|
||||
|
||||
if (!url) {
|
||||
return resp;
|
||||
}
|
||||
|
||||
resp.memory = malloc(16384);
|
||||
if (!resp.memory) {
|
||||
return resp;
|
||||
}
|
||||
resp.capacity = 16384;
|
||||
|
||||
CURL *curl = curl_easy_init();
|
||||
if (!curl) {
|
||||
free(resp.memory);
|
||||
resp.memory = NULL;
|
||||
return resp;
|
||||
}
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp);
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT,
|
||||
user_agent ? user_agent : "libcurl-agent/1.0");
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 15L);
|
||||
apply_proxy_settings(curl);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
curl_easy_cleanup(curl);
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
free(resp.memory);
|
||||
resp.memory = NULL;
|
||||
resp.size = 0;
|
||||
resp.capacity = 0;
|
||||
}
|
||||
|
||||
return resp;
|
||||
}
|
||||
|
||||
void http_response_free(HttpResponse *resp) {
|
||||
if (!resp) {
|
||||
return;
|
||||
}
|
||||
free(resp->memory);
|
||||
resp->memory = NULL;
|
||||
resp->size = 0;
|
||||
resp->capacity = 0;
|
||||
}
|
||||
16
src/Utility/HttpClient.h
Normal file
16
src/Utility/HttpClient.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef HTTPCLIENT_H
|
||||
#define HTTPCLIENT_H
|
||||
|
||||
#include <curl/curl.h>
|
||||
#include <stddef.h>
|
||||
|
||||
typedef struct {
|
||||
char *memory;
|
||||
size_t size;
|
||||
size_t capacity;
|
||||
} HttpResponse;
|
||||
|
||||
HttpResponse http_get(const char *url, const char *user_agent);
|
||||
void http_response_free(HttpResponse *resp);
|
||||
|
||||
#endif
|
||||
65
src/Utility/XmlHelper.c
Normal file
65
src/Utility/XmlHelper.c
Normal file
@@ -0,0 +1,65 @@
|
||||
#include "XmlHelper.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
SearchResult *xml_result_alloc(int count, int max_results) {
|
||||
if (count <= 0 || max_results <= 0) {
|
||||
return NULL;
|
||||
}
|
||||
int actual = (count < max_results) ? count : max_results;
|
||||
return (SearchResult *)calloc(actual, sizeof(SearchResult));
|
||||
}
|
||||
|
||||
void xml_result_free(SearchResult *results, int count) {
|
||||
if (!results) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < count; i++) {
|
||||
free(results[i].url);
|
||||
free(results[i].title);
|
||||
free(results[i].snippet);
|
||||
}
|
||||
free(results);
|
||||
}
|
||||
|
||||
xmlXPathObjectPtr xml_xpath_eval(xmlXPathContextPtr ctx, const char *xpath) {
|
||||
if (!ctx || !xpath) {
|
||||
return NULL;
|
||||
}
|
||||
return xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
|
||||
}
|
||||
|
||||
char *xml_node_content(xmlNodePtr node) {
|
||||
if (!node) {
|
||||
return NULL;
|
||||
}
|
||||
char *content = (char *)xmlNodeGetContent(node);
|
||||
return content;
|
||||
}
|
||||
|
||||
char *xpath_text(xmlDocPtr doc, const char *xpath) {
|
||||
if (!doc || !xpath) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
|
||||
if (!ctx) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
|
||||
xmlXPathFreeContext(ctx);
|
||||
|
||||
if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
|
||||
if (obj)
|
||||
xmlXPathFreeObject(obj);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
|
||||
char *result = content ? strdup((char *)content) : NULL;
|
||||
if (content)
|
||||
xmlFree(content);
|
||||
xmlXPathFreeObject(obj);
|
||||
return result;
|
||||
}
|
||||
14
src/Utility/XmlHelper.h
Normal file
14
src/Utility/XmlHelper.h
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef XMLHELPER_H
|
||||
#define XMLHELPER_H
|
||||
|
||||
#include "../Scraping/Scraping.h"
|
||||
#include <libxml/xpath.h>
|
||||
|
||||
SearchResult *xml_result_alloc(int count, int max_results);
|
||||
void xml_result_free(SearchResult *results, int count);
|
||||
|
||||
xmlXPathObjectPtr xml_xpath_eval(xmlXPathContextPtr ctx, const char *xpath);
|
||||
char *xml_node_content(xmlNodePtr node);
|
||||
char *xpath_text(xmlDocPtr doc, const char *xpath);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user