Object hashes (#104)

* factoring out object_info for general use by several Ray components

* addressing comments

* Replace SHA256 task hash with MD5

Add object hash to object table (always overwrites)

Support for table operations that span multiple asynchronous Redis
commands

Add a new object location in a transaction, using Redis's optimistic
concurrency

Use Redis GETSET instead of transactions and Python frontend code for object hashing

Remove spurious log message

Fix for object_table_add

Revert "Replace SHA256 task hash with MD5"

This reverts commit e599de473c8dad9189ccb0600429534b469b76a2.

Revert to sha256

Test case for illegal puts

Use SETNX to set object hashes

Initialize digest with zeros

Initialize plasma_request with zeros

* Fixes

* replace SHA256 with a faster hash in the object store

* Fix valgrind

* Address Robert's comments

* Check that plasma_compute_object_hash succeeds.

* Don't run test_illegal_put test with valgrind because it causes an intentional crash which causes valgrind to complain.

* Debugging after rebase.

* handling Robert's comments

* Fix bugs after rebase.

* final fixes for Stephanie's PR

* fix
This commit is contained in:
Stephanie Wang 2016-12-08 20:57:08 -08:00 committed by Philipp Moritz
parent 4a62a3c5d7
commit 61904c4c3e
22 changed files with 1713 additions and 100 deletions

View file

@ -14,6 +14,8 @@ const UT_icd object_id_icd = {sizeof(object_id), NULL, NULL, NULL};
const unique_id NIL_ID = {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255}};
const unsigned char NIL_DIGEST[DIGEST_SIZE] = {0};
unique_id globally_unique_id(void) {
/* Use /dev/urandom for "real" randomness. */
int fd;

View file

@ -12,18 +12,27 @@
#endif
#include "utarray.h"
#include "sha256.h"
/** Definitions for Ray logging levels. */
#define RAY_COMMON_DEBUG 0
#define RAY_COMMON_INFO 1
#define RAY_COMMON_WARNING 2
#define RAY_COMMON_ERROR 3
#define RAY_COMMON_FATAL 4
/* Default logging level is INFO. */
/**
* RAY_COMMON_LOG_LEVEL should be defined to one of the above logging level
* integer values. Any logging statement in the code with a logging level
* greater than or equal to RAY_COMMON_LOG_LEVEL will be outputted to stderr.
* The default logging level is INFO. */
#ifndef RAY_COMMON_LOG_LEVEL
#define RAY_COMMON_LOG_LEVEL RAY_COMMON_INFO
#endif
/**
* Macros to enable each level of Ray logging statements depending on the
* current logging level. */
#if (RAY_COMMON_LOG_LEVEL > RAY_COMMON_DEBUG)
#define LOG_DEBUG(M, ...)
#else
@ -74,6 +83,7 @@
} while (0)
#endif
/** Assertion definitions, with optional logging. */
#define CHECKM(COND, M, ...) \
if (!(COND)) { \
LOG_FATAL("Check failure: %s \n" M, #COND, ##__VA_ARGS__); \
@ -97,6 +107,7 @@
* and is responsible for freeing it. */
#define OWNER
/** Definitions for unique ID types. */
#define UNIQUE_ID_SIZE 20
#define UNIQUE_ID_EQ(id1, id2) (memcmp((id1).id, (id2).id, UNIQUE_ID_SIZE) == 0)
@ -152,4 +163,9 @@ bool db_client_ids_equal(db_client_id first_id, db_client_id second_id);
#define MAX(x, y) ((x) >= (y) ? (x) : (y))
#define MIN(x, y) ((x) <= (y) ? (x) : (y))
/** Definitions for computing hash digests. */
#define DIGEST_SIZE SHA256_BLOCK_SIZE
extern const unsigned char NIL_DIGEST[DIGEST_SIZE];
#endif

View file

@ -14,6 +14,7 @@ typedef struct {
int64_t metadata_size;
int64_t create_time;
int64_t construct_duration;
unsigned char digest[DIGEST_SIZE];
} object_info;
#endif

View file

@ -13,11 +13,14 @@ void object_table_lookup(db_handle *db_handle,
void object_table_add(db_handle *db_handle,
object_id object_id,
unsigned char digest[],
retry_info *retry,
object_table_done_callback done_callback,
void *user_context) {
CHECK(db_handle != NULL);
init_table_callback(db_handle, object_id, __func__, NULL, retry,
unsigned char *digest_copy = malloc(DIGEST_SIZE);
memcpy(digest_copy, digest, DIGEST_SIZE);
init_table_callback(db_handle, object_id, __func__, digest_copy, retry,
done_callback, redis_object_table_add, user_context);
}

View file

@ -56,6 +56,7 @@ typedef void (*object_table_done_callback)(object_id object_id,
*/
void object_table_add(db_handle *db_handle,
object_id object_id,
unsigned char digest[],
retry_info *retry,
object_table_done_callback done_callback,
void *user_context);

View file

@ -32,6 +32,20 @@
} \
} while (0)
/**
* A header for callbacks of a single Redis asynchronous command. The user must
* pass in the table operation's timer ID as the asynchronous command's
* privdata field when executing the asynchronous command. The user must define
* variable names for DB and CB_DATA. After this piece of code runs, DB
* will hold a reference to the database handle, CB_DATA will hold a reference
* to the callback data for this table operation. The user must pass in the
* redisReply pointer as the REPLY argument.
*
* This header also short-circuits the entire callback if: (1) there was no
* reply from Redis, or (2) the callback data for this table operation was
* already removed, meaning that the operation was already marked as succeeded
* or failed.
*/
#define REDIS_CALLBACK_HEADER(DB, CB_DATA, REPLY) \
if ((REPLY) == NULL) { \
return; \
@ -47,6 +61,55 @@
do { \
} while (0)
/**
* A data structure to track the status of a table operation attempt that spans
* multiple Redis commands. Each attempt at a table operation is associated
* with a unique redis_requests_info instance. To use this data structure, pass
* it as the `privdata` argument for the callback of each asynchronous Redis
* command.
*/
typedef struct {
/** The timer ID that uniquely identifies this table operation. All retry
* attempts of a table operation share the same timer ID. */
int64_t timer_id;
/** The index of the next command to try for this operation. This may be
* different across different attempts of the same table operation. */
int request_index;
/** Whether the current invocation of the callback was triggered by a reply
* to an asynchronous Redis command. If not, then the callback was called
* directly. */
bool is_redis_reply;
} redis_requests_info;
/**
* A header for callbacks similar to REDIS_CALLBACK_HEADER, but for operations
* that span multiple Redis commands. The differences are:
* - Instead of passing in the table operation's timer ID as the asynchronous
* command callback's `privdata` argument, the user must pass a pointer to a
* redis_requests_info instance.
* - The user must define an additional REQUEST_INFO variable name, which will
* hold a reference to the redis_requests_info passed into the Redis
* asynchronous command.
*/
#define REDIS_MULTI_CALLBACK_HEADER(DB, CB_DATA, REPLY, REQUEST_INFO) \
db_handle *DB = c->data; \
redis_requests_info *REQUEST_INFO = privdata; \
DCHECK(REQUEST_INFO != NULL); \
if ((REPLY) == NULL && REQUEST_INFO->is_redis_reply) { \
free(REQUEST_INFO); \
return; \
} \
table_callback_data *CB_DATA = \
outstanding_callbacks_find(REQUEST_INFO->timer_id); \
if (CB_DATA == NULL) { \
/* the callback data structure has been \
* already freed; just ignore this reply */ \
free(privdata); \
return; \
} \
do { \
} while (0)
db_handle *db_connect(const char *address,
int port,
const char *client_type,
@ -167,6 +230,7 @@ task *parse_redis_task_table_entry(task_id id,
char *key = redis_replies[i]->str;
redisReply *value = redis_replies[i + 1];
if (strcmp(key, "node") == 0) {
DCHECK(value->len == sizeof(node_id));
memcpy(&node, value->str, value->len);
} else if (strcmp(key, "state") == 0) {
int scanned = sscanf(value->str, "%d", (int *) &state);
@ -199,27 +263,105 @@ task *parse_redis_task_table_entry(task_id id,
void redis_object_table_add_callback(redisAsyncContext *c,
void *r,
void *privdata) {
REDIS_CALLBACK_HEADER(db, callback_data, r);
LOG_DEBUG("Calling object table add callback");
REDIS_MULTI_CALLBACK_HEADER(db, callback_data, r, requests_info);
redisReply *reply = r;
object_id id = callback_data->id;
unsigned char *digest = callback_data->data;
#define NUM_CHECK_AND_SET_COMMANDS 3
#define CHECK_AND_SET_SETNX_INDEX 0
#define CHECK_AND_SET_GET_INDEX 1
#define CHECK_AND_SET_SADD_INDEX 2
/* Check that we're at a valid command index. */
int request_index = requests_info->request_index;
LOG_DEBUG("Object table add request index is %d", request_index);
CHECK(request_index <= NUM_CHECK_AND_SET_COMMANDS);
/* If we're on a valid command index, execute the current command and
* register a callback that will execute the next command by incrementing the
* request_index. */
int status = REDIS_OK;
++requests_info->request_index;
if (request_index == CHECK_AND_SET_SETNX_INDEX) {
/* Atomically set the object hash and get the previous value to compare to
* our hash, if a previous value existed. */
requests_info->is_redis_reply = true;
status =
redisAsyncCommand(db->context, redis_object_table_add_callback,
(void *) requests_info, "SETNX objhash:%b %b", id.id,
sizeof(object_id), digest, (size_t) DIGEST_SIZE);
} else if (request_index == CHECK_AND_SET_GET_INDEX) {
/* If there was an object hash in the table previously, check that it's
* equal to ours. */
CHECKM(reply->type == REDIS_REPLY_INTEGER,
"Expected Redis integer, received type %d %s", reply->type,
reply->str);
CHECKM(reply->integer == 0 || reply->integer == 1,
"Expected 0 or 1 from REDIS, received %lld", reply->integer);
if (reply->integer == 1) {
requests_info->is_redis_reply = false;
redis_object_table_add_callback(c, reply, (void *) requests_info);
} else {
requests_info->is_redis_reply = true;
status = redisAsyncCommand(db->context, redis_object_table_add_callback,
(void *) requests_info, "GET objhash:%b",
id.id, sizeof(object_id));
}
} else if (request_index == CHECK_AND_SET_SADD_INDEX) {
if (requests_info->is_redis_reply) {
CHECKM(reply->type == REDIS_REPLY_STRING,
"Expected Redis string, received type %d %s", reply->type,
reply->str);
DCHECK(reply->len == DIGEST_SIZE);
if (memcmp(digest, reply->str, reply->len) != 0) {
/* If our object hash doesn't match the one recorded in the table,
* report the error back to the user and exit immediately. */
LOG_FATAL(
"Found objects with different value but same object ID, most "
"likely because a nondeterministic task was executed twice, either "
"for reconstruction or for speculation.");
}
}
/* Add ourselves to the object's locations. */
requests_info->is_redis_reply = true;
status = redisAsyncCommand(db->context, redis_object_table_add_callback,
(void *) requests_info, "SADD obj:%b %b", id.id,
sizeof(id.id), (char *) db->client.id,
sizeof(db->client.id));
} else {
/* We finished executing all the Redis commands for this attempt at the
* table operation. */
free(requests_info);
/* If the transaction failed, exit and let the table operation's timout
* handler handle it. */
if (reply->type == REDIS_REPLY_NIL) {
return;
}
/* Else, call the done callback and clean up the table state. */
if (callback_data->done_callback) {
task_table_done_callback done_callback = callback_data->done_callback;
done_callback(callback_data->id, callback_data->user_context);
}
destroy_timer_callback(db->loop, callback_data);
}
/* If there was an error executing the current command, this attempt was a
* failure, so clean up the request info. */
if ((status == REDIS_ERR) || db->context->err) {
LOG_REDIS_DEBUG(db->context, "could not add object_table entry");
free(requests_info);
}
}
void redis_object_table_add(table_callback_data *callback_data) {
CHECK(callback_data);
LOG_DEBUG("Calling object table add");
redis_requests_info *requests_info = malloc(sizeof(redis_requests_info));
requests_info->timer_id = callback_data->timer_id;
requests_info->request_index = 0;
requests_info->is_redis_reply = false;
db_handle *db = callback_data->db_handle;
object_id id = callback_data->id;
int status = redisAsyncCommand(db->context, redis_object_table_add_callback,
(void *) callback_data->timer_id,
"SADD obj:%b %b", id.id, sizeof(id.id),
(char *) db->client.id, sizeof(db->client.id));
if ((status == REDIS_ERR) || db->context->err) {
LOG_REDIS_DEBUG(db->context, "could not add object_table entry");
}
redis_object_table_add_callback(db->context, NULL, (void *) requests_info);
}
void redis_object_table_lookup(table_callback_data *callback_data) {
@ -514,17 +656,17 @@ void redis_task_table_publish(table_callback_data *callback_data,
* The first entry in the callback corresponds to RPUSH, and the second entry to
* PUBLISH.
*/
#define NUM_DB_REQUESTS 2
#define PUSH_INDEX 0
#define PUBLISH_INDEX 1
#define NUM_PUBLISH_COMMANDS 2
#define PUBLISH_PUSH_INDEX 0
#define PUBLISH_PUBLISH_INDEX 1
if (callback_data->requests_info == NULL) {
callback_data->requests_info = malloc(NUM_DB_REQUESTS * sizeof(bool));
for (int i = 0; i < NUM_DB_REQUESTS; i++) {
callback_data->requests_info = malloc(NUM_PUBLISH_COMMANDS * sizeof(bool));
for (int i = 0; i < NUM_PUBLISH_COMMANDS; i++) {
((bool *) callback_data->requests_info)[i] = false;
}
}
if (((bool *) callback_data->requests_info)[PUSH_INDEX] == false) {
if (((bool *) callback_data->requests_info)[PUBLISH_PUSH_INDEX] == false) {
/* If the task has already been added to the task table, only update the
* scheduling information fields. */
int status = REDIS_OK;
@ -547,7 +689,7 @@ void redis_task_table_publish(table_callback_data *callback_data,
}
}
if (((bool *) callback_data->requests_info)[PUBLISH_INDEX] == false) {
if (((bool *) callback_data->requests_info)[PUBLISH_PUBLISH_INDEX] == false) {
int status = redisAsyncCommand(
db->context, redis_task_table_publish_publish_callback,
(void *) callback_data->timer_id, "PUBLISH task:%b:%d %b",
@ -575,9 +717,9 @@ void redis_task_table_publish_push_callback(redisAsyncContext *c,
LOG_DEBUG("Calling publish push callback");
REDIS_CALLBACK_HEADER(db, callback_data, r);
CHECK(callback_data->requests_info != NULL);
((bool *) callback_data->requests_info)[PUSH_INDEX] = true;
((bool *) callback_data->requests_info)[PUBLISH_PUSH_INDEX] = true;
if (((bool *) callback_data->requests_info)[PUBLISH_INDEX] == true) {
if (((bool *) callback_data->requests_info)[PUBLISH_PUBLISH_INDEX] == true) {
if (callback_data->done_callback) {
task_table_done_callback done_callback = callback_data->done_callback;
done_callback(callback_data->id, callback_data->user_context);
@ -592,9 +734,9 @@ void redis_task_table_publish_publish_callback(redisAsyncContext *c,
LOG_DEBUG("Calling publish publish callback");
REDIS_CALLBACK_HEADER(db, callback_data, r);
CHECK(callback_data->requests_info != NULL);
((bool *) callback_data->requests_info)[PUBLISH_INDEX] = true;
((bool *) callback_data->requests_info)[PUBLISH_PUBLISH_INDEX] = true;
if (((bool *) callback_data->requests_info)[PUSH_INDEX] == true) {
if (((bool *) callback_data->requests_info)[PUBLISH_PUSH_INDEX] == true) {
if (callback_data->done_callback) {
task_table_done_callback done_callback = callback_data->done_callback;
done_callback(callback_data->id, callback_data->user_context);

View file

@ -101,13 +101,13 @@ task_id compute_task_id(task_spec *spec) {
}
/* Compute a SHA256 hash of the task_spec. */
SHA256_CTX ctx;
BYTE buff[SHA256_BLOCK_SIZE];
BYTE buff[DIGEST_SIZE];
sha256_init(&ctx);
sha256_update(&ctx, (BYTE *) spec, task_spec_size(spec));
sha256_final(&ctx, buff);
/* Create a task ID out of the hash. This will truncate the hash. */
task_id task_id;
CHECK(sizeof(task_id) <= SHA256_BLOCK_SIZE);
CHECK(sizeof(task_id) <= DIGEST_SIZE);
memcpy(&task_id.id, buff, sizeof(task_id.id));
return task_id;
}

View file

@ -77,8 +77,10 @@ TEST object_table_lookup_test(void) {
.timeout = TIMEOUT,
.fail_callback = timeout_callback,
};
object_table_add(db1, id, &retry, add_done_callback, NULL);
object_table_add(db2, id, &retry, add_done_callback, NULL);
object_table_add(db1, id, (unsigned char *) NIL_DIGEST, &retry,
add_done_callback, NULL);
object_table_add(db2, id, (unsigned char *) NIL_DIGEST, &retry,
add_done_callback, NULL);
event_loop_add_timer(loop, 200, (event_loop_timer_handler) timeout_handler,
NULL);
event_loop_run(loop);

View file

@ -146,7 +146,7 @@ int lookup_failed = 0;
void lookup_done_callback(object_id object_id,
int manager_count,
OWNER const char *manager_vector[],
const char *manager_vector[],
void *context) {
/* The done callback should not be called. */
CHECK(0);
@ -202,8 +202,8 @@ TEST add_timeout_test(void) {
retry_info retry = {
.num_retries = 5, .timeout = 100, .fail_callback = add_fail_callback,
};
object_table_add(db, NIL_ID, &retry, add_done_callback,
(void *) add_timeout_context);
object_table_add(db, NIL_ID, (unsigned char *) NIL_DIGEST, &retry,
add_done_callback, (void *) add_timeout_context);
/* Disconnect the database to see if the lookup times out. */
close(db->context->c.fd);
event_loop_run(g_loop);
@ -272,6 +272,7 @@ int64_t reconnect_context_callback(event_loop *loop,
db->sync_context = redisConnect("127.0.0.1", 6379);
/* Re-attach the database to the event loop (the file descriptor changed). */
db_attach(db, loop, true);
LOG_DEBUG("Reconnected to Redis");
return EVENT_LOOP_TIMER_DONE;
}
@ -289,7 +290,7 @@ int lookup_retry_succeeded = 0;
void lookup_retry_done_callback(object_id object_id,
int manager_count,
OWNER const char *manager_vector[],
const char *manager_vector[],
void *context) {
CHECK(context == (void *) lookup_retry_context);
lookup_retry_succeeded = 1;
@ -304,6 +305,7 @@ void lookup_retry_fail_callback(unique_id id,
TEST lookup_retry_test(void) {
g_loop = event_loop_create();
lookup_retry_succeeded = 0;
db_handle *db =
db_connect("127.0.0.1", 6379, "plasma_manager", "127.0.0.1", 11235);
db_attach(db, g_loop, false);
@ -345,7 +347,7 @@ void add_retry_fail_callback(unique_id id,
void *user_context,
void *user_data) {
/* The fail callback should not be called. */
CHECK(0);
LOG_FATAL("add_retry_succeded value was %d", add_retry_succeeded);
}
TEST add_retry_test(void) {
@ -358,8 +360,8 @@ TEST add_retry_test(void) {
.timeout = 100,
.fail_callback = add_retry_fail_callback,
};
object_table_add(db, NIL_ID, &retry, add_retry_done_callback,
(void *) add_retry_context);
object_table_add(db, NIL_ID, (unsigned char *) NIL_DIGEST, &retry,
add_retry_done_callback, (void *) add_retry_context);
/* Disconnect the database to let the add time out the first time. */
close(db->context->c.fd);
/* Install handler for reconnecting the database. */
@ -377,6 +379,54 @@ TEST add_retry_test(void) {
PASS();
}
/* === Test add then lookup retry === */
void add_lookup_done_callback(object_id object_id,
int manager_count,
const char *manager_vector[],
void *context) {
CHECK(context == (void *) lookup_retry_context);
CHECK(manager_count == 1);
CHECK(strcmp(manager_vector[0], "127.0.0.1:11235") == 0);
lookup_retry_succeeded = 1;
}
void add_lookup_callback(object_id object_id, void *user_context) {
db_handle *db = user_context;
retry_info retry = {
.num_retries = 5,
.timeout = 100,
.fail_callback = lookup_retry_fail_callback,
};
object_table_lookup(db, NIL_ID, &retry, add_lookup_done_callback,
(void *) lookup_retry_context);
}
TEST add_lookup_test(void) {
g_loop = event_loop_create();
lookup_retry_succeeded = 0;
db_handle *db =
db_connect("127.0.0.1", 6379, "plasma_manager", "127.0.0.1", 11235);
db_attach(db, g_loop, true);
retry_info retry = {
.num_retries = 5,
.timeout = 100,
.fail_callback = lookup_retry_fail_callback,
};
object_table_add(db, NIL_ID, (unsigned char *) NIL_DIGEST, &retry,
add_lookup_callback, (void *) db);
/* Install handler for terminating the event loop. */
event_loop_add_timer(g_loop, 750,
(event_loop_timer_handler) terminate_event_loop_callback,
NULL);
event_loop_run(g_loop);
db_disconnect(db);
destroy_outstanding_callbacks(g_loop);
event_loop_destroy(g_loop);
ASSERT(lookup_retry_succeeded);
PASS();
}
/* === Test subscribe retry === */
const char *subscribe_retry_context = "subscribe_retry";
@ -462,7 +512,7 @@ void lookup_late_fail_callback(unique_id id,
void lookup_late_done_callback(object_id object_id,
int manager_count,
OWNER const char *manager_vector[],
const char *manager_vector[],
void *context) {
/* This function should never be called. */
CHECK(0);
@ -518,8 +568,8 @@ TEST add_late_test(void) {
retry_info retry = {
.num_retries = 0, .timeout = 0, .fail_callback = add_late_fail_callback,
};
object_table_add(db, NIL_ID, &retry, add_late_done_callback,
(void *) add_late_context);
object_table_add(db, NIL_ID, (unsigned char *) NIL_DIGEST, &retry,
add_late_done_callback, (void *) add_late_context);
/* Install handler for terminating the event loop. */
event_loop_add_timer(g_loop, 750,
(event_loop_timer_handler) terminate_event_loop_callback,
@ -601,9 +651,10 @@ void subscribe_success_done_callback(object_id object_id,
const char *manager_vector[],
void *user_context) {
retry_info retry = {
.num_retries = 0, .timeout = 0, .fail_callback = NULL,
.num_retries = 0, .timeout = 750, .fail_callback = NULL,
};
object_table_add((db_handle *) user_context, object_id, &retry, NULL, NULL);
object_table_add((db_handle *) user_context, object_id,
(unsigned char *) NIL_DIGEST, &retry, NULL, NULL);
subscribe_success_done = 1;
}
@ -670,7 +721,7 @@ TEST subscribe_object_present_test(void) {
retry_info retry = {
.num_retries = 0, .timeout = 100, .fail_callback = NULL,
};
object_table_add(db, id, &retry, NULL, NULL);
object_table_add(db, id, (unsigned char *) NIL_DIGEST, &retry, NULL, NULL);
object_table_subscribe(
db, id, subscribe_object_present_object_available_callback,
(void *) subscribe_object_present_context, &retry, NULL, (void *) db);
@ -751,7 +802,8 @@ int64_t add_object_callback(event_loop *loop, int64_t timer_id, void *context) {
retry_info retry = {
.num_retries = 0, .timeout = 100, .fail_callback = NULL,
};
object_table_add(db, NIL_ID, &retry, NULL, NULL);
object_table_add(db, NIL_ID, (unsigned char *) NIL_DIGEST, &retry, NULL,
NULL);
/* Reset the timer to this large value, so it doesn't trigger again. */
return 10000;
}
@ -794,6 +846,7 @@ SUITE(object_table_tests) {
RUN_REDIS_TEST(subscribe_timeout_test);
RUN_REDIS_TEST(lookup_retry_test);
RUN_REDIS_TEST(add_retry_test);
RUN_REDIS_TEST(add_lookup_test);
RUN_REDIS_TEST(subscribe_retry_test);
RUN_REDIS_TEST(lookup_late_test);
RUN_REDIS_TEST(add_late_test);

View file

@ -57,7 +57,7 @@ if(APPLE)
SET(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
endif(APPLE)
include_directories("${PYTHON_INCLUDE_DIRS}")
include_directories("${PYTHON_INCLUDE_DIRS}" thirdparty)
set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L")
@ -78,6 +78,7 @@ add_library(plasma SHARED
plasma.c
plasma_extension.c
plasma_client.c
thirdparty/xxhash.c
fling.c)
get_filename_component(PYTHON_SHARED_LIBRARY ${PYTHON_LIBRARIES} NAME)

View file

@ -1,5 +1,5 @@
CC = gcc
CFLAGS = -g -Wall -Wextra -Werror=implicit-function-declaration -Wno-sign-compare -Wno-unused-parameter -Wno-type-limits -Wno-missing-field-initializers --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -I. -I../common -I../common/thirdparty
CFLAGS = -g -Wall -Wextra -Werror=implicit-function-declaration -Wno-sign-compare -Wno-unused-parameter -Wno-type-limits -Wno-missing-field-initializers --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -I. -Ithirdparty -I../common -I../common/thirdparty
TEST_CFLAGS = -DPLASMA_TEST=1 -I.
BUILD = build
@ -13,26 +13,26 @@ clean:
cd ../common; make clean
rm -rf $(BUILD)/*
$(BUILD)/manager_tests: test/manager_tests.c plasma.h plasma.c plasma_client.h plasma_client.c plasma_manager.h plasma_manager.c fling.h fling.c common
$(CC) $(CFLAGS) $(TEST_CFLAGS) -o $@ test/manager_tests.c plasma.c plasma_manager.c plasma_client.c fling.c ../common/build/libcommon.a ../common/thirdparty/hiredis/libhiredis.a
$(BUILD)/manager_tests: test/manager_tests.c plasma.h plasma.c plasma_client.h plasma_client.c thirdparty/xxhash.c plasma_manager.h plasma_manager.c fling.h fling.c common
$(CC) $(CFLAGS) $(TEST_CFLAGS) -o $@ test/manager_tests.c plasma.c plasma_manager.c plasma_client.c thirdparty/xxhash.c fling.c ../common/build/libcommon.a ../common/thirdparty/hiredis/libhiredis.a
$(BUILD)/client_tests: test/client_tests.c plasma.h plasma.c plasma_client.h plasma_client.c plasma_manager.h plasma_manager.c fling.h fling.c common
$(CC) $(CFLAGS) $(TEST_CFLAGS) -o $@ test/client_tests.c plasma.c plasma_manager.c plasma_client.c fling.c ../common/build/libcommon.a ../common/thirdparty/hiredis/libhiredis.a
$(BUILD)/client_tests: test/client_tests.c plasma.h plasma.c plasma_client.h plasma_client.c thirdparty/xxhash.c plasma_manager.h plasma_manager.c fling.h fling.c common
$(CC) $(CFLAGS) $(TEST_CFLAGS) -o $@ test/client_tests.c plasma.c plasma_manager.c plasma_client.c thirdparty/xxhash.c fling.c ../common/build/libcommon.a ../common/thirdparty/hiredis/libhiredis.a
$(BUILD)/plasma_store: plasma_store.c plasma.h plasma.c eviction_policy.c fling.h fling.c malloc.c malloc.h thirdparty/dlmalloc.c common
$(CC) $(CFLAGS) plasma_store.c plasma.c eviction_policy.c fling.c malloc.c ../common/build/libcommon.a -o $(BUILD)/plasma_store
$(BUILD)/plasma_manager: plasma_manager.c plasma.h plasma.c plasma_client.c fling.h fling.c common
$(CC) $(CFLAGS) plasma_manager.c plasma.c plasma_client.c fling.c ../common/build/libcommon.a ../common/thirdparty/hiredis/libhiredis.a -o $(BUILD)/plasma_manager
$(BUILD)/plasma_manager: plasma_manager.c plasma.h plasma.c plasma_client.c thirdparty/xxhash.c fling.h fling.c common
$(CC) $(CFLAGS) plasma_manager.c plasma.c plasma_client.c thirdparty/xxhash.c fling.c ../common/build/libcommon.a ../common/thirdparty/hiredis/libhiredis.a -o $(BUILD)/plasma_manager
$(BUILD)/plasma_client.so: plasma.h plasma.c plasma_client.c fling.h fling.c common
$(CC) $(CFLAGS) plasma.c plasma_client.c fling.c ../common/build/libcommon.a -fPIC -shared -o $(BUILD)/plasma_client.so
$(CC) $(CFLAGS) plasma.c plasma_client.c thirdparty/xxhash.c fling.c ../common/build/libcommon.a -fPIC -shared -o $(BUILD)/plasma_client.so
$(BUILD)/libplasma_client.a: plasma.o plasma_client.o fling.o
$(BUILD)/libplasma_client.a: plasma.o plasma_client.o fling.o thirdparty/xxhash.o
ar rcs $@ $^
$(BUILD)/example: plasma_client.c plasma.h plasma.c example.c fling.h fling.c common
$(CC) $(CFLAGS) plasma_client.c plasma.c example.c fling.c ../common/build/libcommon.a -o $(BUILD)/example
$(BUILD)/example: plasma_client.c thirdparty/xxhash.c plasma.h plasma.c example.c fling.h fling.c common
$(CC) $(CFLAGS) plasma_client.c thirdparty/xxhash.c plasma.c example.c fling.c ../common/build/libcommon.a -o $(BUILD)/example
common: FORCE
cd ../common; make

View file

@ -1,6 +1,5 @@
import os
import random
import socket
import subprocess
import time
import libplasma
@ -135,6 +134,18 @@ class PlasmaClient(object):
"""
return libplasma.contains(self.conn, object_id)
def hash(self, object_id):
"""Compute the hash of an object in the object store.
Args:
object_id (str): A string used to identify an object.
Returns:
A digest string object's SHA256 hash. If the object isn't in the object
store, the string will have length zero.
"""
return libplasma.hash(self.conn, object_id)
def seal(self, object_id):
"""Seal the buffer in the PlasmaStore for a particular object ID.
@ -209,27 +220,11 @@ class PlasmaClient(object):
def subscribe(self):
"""Subscribe to notifications about sealed objects."""
fd = libplasma.subscribe(self.conn)
self.notification_sock = socket.fromfd(fd, socket.AF_UNIX, socket.SOCK_STREAM)
self.notification_fd = fd
# Make the socket non-blocking.
self.notification_sock.setblocking(0)
self.notification_fd = libplasma.subscribe(self.conn)
def get_next_notification(self):
"""Get the next notification from the notification socket."""
if not self.notification_sock:
raise Exception("To get notifications, first call subscribe.")
# Loop until we've read PLASMA_ID_SIZE bytes from the socket.
while True:
try:
rv = libplasma.receive_notification(self.notification_fd)
obj_id, data_size, metadata_size = rv
except socket.error:
time.sleep(0.001)
else:
assert len(obj_id) == PLASMA_ID_SIZE
break
return obj_id, data_size, metadata_size
return libplasma.receive_notification(self.notification_fd)
DEFAULT_PLASMA_STORE_MEMORY = 10 ** 9

View file

@ -152,6 +152,9 @@ typedef struct {
int port;
/** A number of bytes. This is used for eviction requests. */
int64_t num_bytes;
/** A digest describing the object. This is used for detecting
* nondeterministic tasks. */
unsigned char digest[DIGEST_SIZE];
/** The number of object IDs that will be included in this request. */
int num_object_ids;
/** The object requests that the request is about. */
@ -208,6 +211,8 @@ typedef struct {
UT_array *clients;
/** The state of the object, e.g., whether it is open or sealed. */
object_state state;
/** The digest of the object. Used to see if two objects are the same. */
unsigned char digest[DIGEST_SIZE];
} object_table_entry;
/** The plasma store information that is exposed to the eviction policy. */
@ -216,6 +221,15 @@ typedef struct {
object_table_entry *objects;
} plasma_store_info;
typedef struct {
/** The ID of the object. */
object_id obj_id;
/** The size of the object. */
int64_t object_size;
/** The digest of the object used, used to see if two objects are the same. */
unsigned char digest[DIGEST_SIZE];
} object_id_notification;
/**
* Create a plasma request with one object ID on the stack.
*

View file

@ -27,6 +27,12 @@
#include "fling.h"
#include "uthash.h"
#include "utringbuffer.h"
#include "sha256.h"
#define XXH_STATIC_LINKING_ONLY
#include "xxhash.h"
#define XXH64_DEFAULT_SEED 0
/* Number of times we try connecting to a socket. */
#define NUM_CONNECT_ATTEMPTS 50
@ -327,11 +333,49 @@ void plasma_release(plasma_connection *conn, object_id obj_id) {
void plasma_contains(plasma_connection *conn,
object_id object_id,
int *has_object) {
/* Check if we already have a reference to the object. */
object_in_use_entry *object_entry;
HASH_FIND(hh, conn->objects_in_use, &object_id, sizeof(object_id),
object_entry);
if (object_entry) {
*has_object = 1;
} else {
/* If we don't already have a reference to the object, check with the store
* to see if we have the object. */
plasma_request req = plasma_make_request(object_id);
CHECK(plasma_send_request(conn->store_conn, PLASMA_CONTAINS, &req) >= 0);
plasma_reply reply;
CHECK(plasma_receive_reply(conn->store_conn, sizeof(reply), &reply) >= 0);
*has_object = reply.has_object;
}
}
bool plasma_compute_object_hash(plasma_connection *conn,
object_id obj_id,
unsigned char *digest) {
/* If we don't have the object, return an empty digest. */
int has_object;
plasma_contains(conn, obj_id, &has_object);
if (!has_object) {
return false;
}
/* Get the plasma object data. */
int64_t size;
uint8_t *data;
int64_t metadata_size;
uint8_t *metadata;
plasma_get(conn, obj_id, &size, &data, &metadata_size, &metadata);
/* Compute the hash. */
XXH64_state_t hash_state;
XXH64_reset(&hash_state, XXH64_DEFAULT_SEED);
XXH64_update(&hash_state, (unsigned char *) data, size);
XXH64_update(&hash_state, (unsigned char *) metadata, metadata_size);
uint64_t hash = XXH64_digest(&hash_state);
DCHECK(DIGEST_SIZE >= sizeof(uint64_t));
memcpy(digest, &hash, DIGEST_SIZE);
/* Release the plasma object. */
plasma_release(conn, obj_id);
return true;
}
void plasma_seal(plasma_connection *conn, object_id object_id) {
@ -347,6 +391,7 @@ void plasma_seal(plasma_connection *conn, object_id object_id) {
object_entry->is_sealed = true;
/* Send the seal request to Plasma. */
plasma_request req = plasma_make_request(object_id);
CHECK(plasma_compute_object_hash(conn, object_id, req.digest));
CHECK(plasma_send_request(conn->store_conn, PLASMA_SEAL, &req) >= 0);
}

View file

@ -138,6 +138,19 @@ void plasma_contains(plasma_connection *conn,
object_id object_id,
int *has_object);
/**
* Compute the hash of an object in the object store.
*
* @param conn The object containing the connection state.
* @param object_id The ID of the object we want to hash.
* @param digest A pointer at which to return the hash digest of the object.
* The pointer must have at least DIGEST_SIZE bytes allocated.
* @return A boolean representing whether the hash operation succeeded.
*/
bool plasma_compute_object_hash(plasma_connection *conn,
object_id object_id,
unsigned char *digest);
/**
* Seal an object in the object store. The object will be immutable after this
* call.

View file

@ -78,6 +78,23 @@ PyObject *PyPlasma_create(PyObject *self, PyObject *args) {
return PyBuffer_FromReadWriteMemory((void *) data, (Py_ssize_t) size);
}
PyObject *PyPlasma_hash(PyObject *self, PyObject *args) {
plasma_connection *conn;
object_id object_id;
if (!PyArg_ParseTuple(args, "O&O&", PyObjectToPlasmaConnection, &conn,
PyObjectToUniqueID, &object_id)) {
return NULL;
}
unsigned char digest[DIGEST_SIZE];
bool success = plasma_compute_object_hash(conn, object_id, digest);
if (success) {
PyObject *digest_string = PyString_FromStringAndSize(digest, DIGEST_SIZE);
return digest_string;
} else {
Py_RETURN_NONE;
}
}
PyObject *PyPlasma_seal(PyObject *self, PyObject *args) {
plasma_connection *conn;
object_id object_id;
@ -344,6 +361,8 @@ static PyMethodDef plasma_methods[] = {
{"disconnect", PyPlasma_disconnect, METH_VARARGS,
"Disconnect from plasma."},
{"create", PyPlasma_create, METH_VARARGS, "Create a new plasma object."},
{"hash", PyPlasma_hash, METH_VARARGS,
"Compute the hash of a plasma object."},
{"seal", PyPlasma_seal, METH_VARARGS, "Seal a plasma object."},
{"get", PyPlasma_get, METH_VARARGS, "Get a plasma object."},
{"contains", PyPlasma_contains, METH_VARARGS,

View file

@ -1528,7 +1528,7 @@ void process_object_notification(event_loop *loop,
if (state->db) {
/* TODO(swang): Log the error if we fail to add the object, and possibly
* retry later? */
object_table_add(state->db, obj_id, &retry, NULL, NULL);
object_table_add(state->db, obj_id, object_info.digest, &retry, NULL, NULL);
}
/* If we were trying to fetch this object, finish up the fetch request. */

View file

@ -306,7 +306,9 @@ int contains_object(client *client_context, object_id object_id) {
}
/* Seal an object that has been created in the hash table. */
void seal_object(client *client_context, object_id object_id) {
void seal_object(client *client_context,
object_id object_id,
unsigned char digest[]) {
LOG_DEBUG("sealing object"); // TODO(pcm): add object_id here
plasma_store_state *plasma_state = client_context->plasma_state;
object_table_entry *entry;
@ -316,6 +318,8 @@ void seal_object(client *client_context, object_id object_id) {
CHECK(entry->state == PLASMA_CREATED);
/* Set the state of object to SEALED. */
entry->state = PLASMA_SEALED;
/* Set the object digest. */
memcpy(entry->info.digest, digest, DIGEST_SIZE);
/* Inform all subscribers that a new object has been sealed. */
notification_queue *queue, *temp_queue;
HASH_ITER(hh, plasma_state->pending_notifications, queue, temp_queue) {
@ -519,7 +523,7 @@ void process_message(event_loop *loop,
break;
case PLASMA_SEAL:
DCHECK(req->num_object_ids == 1);
seal_object(client_context, req->object_requests[0].object_id);
seal_object(client_context, req->object_requests[0].object_id, req->digest);
break;
case PLASMA_DELETE:
/* TODO(rkn): In the future, we can use this method to give hints to the

View file

@ -73,9 +73,13 @@ void release_object(client *client_context, object_id object_id);
*
* @param client_context The context of the client making this request.
* @param object_id Object ID of the object to be sealed.
* @param digest The digest of the object. This is used to tell if two objects
* with the same object ID are the same.
* @return Void.
*/
void seal_object(client *client_context, object_id object_id);
void seal_object(client *client_context,
object_id object_id,
unsigned char digest[]);
/**
* Check if the plasma store contains an object:

View file

@ -146,6 +146,91 @@ class TestPlasmaClient(unittest.TestCase):
for object_id in real_object_ids:
self.assertTrue(self.plasma_client.contains(object_id))
def test_hash(self):
# Check the hash of an object that doesn't exist.
object_id1 = random_object_id()
h = self.plasma_client.hash(object_id1)
length = 1000
# Create a random object, and check that the hash function always returns
# the same value.
metadata = generate_metadata(length)
memory_buffer = self.plasma_client.create(object_id1, length, metadata)
for i in range(length):
memory_buffer[i] = chr(i % 256)
self.plasma_client.seal(object_id1)
self.assertEqual(self.plasma_client.hash(object_id1),
self.plasma_client.hash(object_id1))
# Create a second object with the same value as the first, and check that
# their hashes are equal.
object_id2 = random_object_id()
memory_buffer = self.plasma_client.create(object_id2, length, metadata)
for i in range(length):
memory_buffer[i] = chr(i % 256)
self.plasma_client.seal(object_id2)
self.assertEqual(self.plasma_client.hash(object_id1),
self.plasma_client.hash(object_id2))
# Create a third object with a different value from the first two, and
# check that its hash is different.
object_id3 = random_object_id()
metadata = generate_metadata(length)
memory_buffer = self.plasma_client.create(object_id3, length, metadata)
for i in range(length):
memory_buffer[i] = chr((i + 1) % 256)
self.plasma_client.seal(object_id3)
self.assertNotEqual(self.plasma_client.hash(object_id1),
self.plasma_client.hash(object_id3))
# Create a fourth object with the same value as the third, but different
# metadata. Check that its hash is different from any of the previous
# three.
object_id4 = random_object_id()
metadata4 = generate_metadata(length)
memory_buffer = self.plasma_client.create(object_id4, length, metadata4)
for i in range(length):
memory_buffer[i] = chr((i + 1) % 256)
self.plasma_client.seal(object_id4)
self.assertNotEqual(self.plasma_client.hash(object_id1),
self.plasma_client.hash(object_id4))
self.assertNotEqual(self.plasma_client.hash(object_id3),
self.plasma_client.hash(object_id4))
def test_many_hashes(self):
hashes = []
length = 2 ** 10
for i in range(256):
object_id = random_object_id()
memory_buffer = self.plasma_client.create(object_id, length)
for j in range(length):
memory_buffer[j] = chr(i)
self.plasma_client.seal(object_id)
hashes.append(self.plasma_client.hash(object_id))
# Create objects of varying length. Each pair has two bits different.
for i in range(length):
object_id = random_object_id()
memory_buffer = self.plasma_client.create(object_id, length)
for j in range(length):
memory_buffer[j] = chr(0)
memory_buffer[i] = chr(1)
self.plasma_client.seal(object_id)
hashes.append(self.plasma_client.hash(object_id))
# Create objects of varying length, all with value 0.
for i in range(length):
object_id = random_object_id()
memory_buffer = self.plasma_client.create(object_id, i)
for j in range(i):
memory_buffer[j] = chr(0)
self.plasma_client.seal(object_id)
hashes.append(self.plasma_client.hash(object_id))
# Check that all hashes were unique.
self.assertEqual(len(set(hashes)), 256 + length + length)
# def test_individual_delete(self):
# length = 100
# # Create an object id string.
@ -289,26 +374,24 @@ class TestPlasmaManager(unittest.TestCase):
self.client1 = plasma.PlasmaClient(store_name1, manager_name1)
self.client2 = plasma.PlasmaClient(store_name2, manager_name2)
# Store the processes that will be explicitly killed during tearDown so
# that a test case can remove ones that will be killed during the test.
# NOTE: If this specific order is changed, valgrind will fail.
self.processes_to_kill = [self.p4, self.p5, self.p2, self.p3]
def tearDown(self):
# Kill the PlasmaStore and PlasmaManager processes.
if USE_VALGRIND:
time.sleep(1) # give processes opportunity to finish work
self.p4.send_signal(signal.SIGTERM)
self.p4.wait()
self.p5.send_signal(signal.SIGTERM)
self.p5.wait()
self.p2.send_signal(signal.SIGTERM)
self.p2.wait()
self.p3.send_signal(signal.SIGTERM)
self.p3.wait()
if self.p2.returncode != 0 or self.p3.returncode != 0 or self.p4.returncode != 0 or self.p5.returncode != 0:
for process in self.processes_to_kill:
process.send_signal(signal.SIGTERM)
process.wait()
if process.returncode != 0:
print("aborting due to valgrind error")
os._exit(-1)
else:
self.p2.kill()
self.p3.kill()
self.p4.kill()
self.p5.kill()
for process in self.processes_to_kill:
process.kill()
self.redis_process.kill()
def test_fetch(self):
@ -528,6 +611,39 @@ class TestPlasmaManager(unittest.TestCase):
assert_get_object_equal(self, self.client1, self.client2, object_id2,
memory_buffer=memory_buffer2, metadata=metadata2)
def test_illegal_put(self):
"""
Test doing a put at the same object ID, but with different object data. The
first put should succeed. The second put should cause the plasma manager to
exit with a fatal error.
"""
if USE_VALGRIND:
# Don't run this test when we are using valgrind because when processes
# die without freeing up their state, valgrind complains.
return
# Create and seal the first object.
length = 1000
object_id = random_object_id()
memory_buffer1 = self.client1.create(object_id, length)
for i in range(length):
memory_buffer1[i] = chr(i % 256)
self.client1.seal(object_id)
# Create and seal the second object. It has all the same data as the first
# object, with one bit flipped.
memory_buffer2 = self.client2.create(object_id, length)
for i in range(length):
j = i
if j == 0:
j += 1
memory_buffer2[i] = chr(j % 256)
self.client2.seal(object_id)
# Give the second manager some time to complete the seal, then make sure it
# exited.
time.sleep(2)
self.p5.poll()
self.assertNotEqual(self.p5.returncode, None)
self.processes_to_kill.remove(self.p5)
def test_illegal_functionality(self):
# Create an object id string.
object_id = random_object_id()

889
src/plasma/thirdparty/xxhash.c vendored Normal file
View file

@ -0,0 +1,889 @@
/*
* xxHash - Fast Hash algorithm
* Copyright (C) 2012-2016, Yann Collet
*
* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at :
* - xxHash homepage: http://www.xxhash.com
* - xxHash source repository : https://github.com/Cyan4973/xxHash
*/
/* *************************************
* Tuning parameters
***************************************/
/*!XXH_FORCE_MEMORY_ACCESS :
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
* The below switch allow to select different access method for improved performance.
* Method 0 (default) : use `memcpy()`. Safe and portable.
* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
* Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
* It can generate buggy code on targets which do not support unaligned memory accesses.
* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
* See http://stackoverflow.com/a/32095106/646947 for details.
* Prefer these methods in priority order (0 > 1 > 2)
*/
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
# define XXH_FORCE_MEMORY_ACCESS 2
# elif defined(__INTEL_COMPILER) || \
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
# define XXH_FORCE_MEMORY_ACCESS 1
# endif
#endif
/*!XXH_ACCEPT_NULL_INPUT_POINTER :
* If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
* When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
* By default, this option is disabled. To enable it, uncomment below define :
*/
/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
/*!XXH_FORCE_NATIVE_FORMAT :
* By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
* Results are therefore identical for little-endian and big-endian CPU.
* This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
* Should endian-independence be of no importance for your application, you may set the #define below to 1,
* to improve speed for Big-endian CPU.
* This option has no impact on Little_Endian CPU.
*/
#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
# define XXH_FORCE_NATIVE_FORMAT 0
#endif
/*!XXH_FORCE_ALIGN_CHECK :
* This is a minor performance trick, only useful with lots of very small keys.
* It means : check for aligned/unaligned input.
* The check costs one initial branch per hash; set to 0 when the input data
* is guaranteed to be aligned.
*/
#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
# define XXH_FORCE_ALIGN_CHECK 0
# else
# define XXH_FORCE_ALIGN_CHECK 1
# endif
#endif
/* *************************************
* Includes & Memory related functions
***************************************/
/*! Modify the local functions below should you wish to use some other memory routines
* for malloc(), free() */
#include <stdlib.h>
static void* XXH_malloc(size_t s) { return malloc(s); }
static void XXH_free (void* p) { free(p); }
/*! and for memcpy() */
#include <string.h>
static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
#define XXH_STATIC_LINKING_ONLY
#include "xxhash.h"
/* *************************************
* Compiler Specific Options
***************************************/
#ifdef _MSC_VER /* Visual Studio */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# define FORCE_INLINE static __forceinline
#else
# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# ifdef __GNUC__
# define FORCE_INLINE static inline __attribute__((always_inline))
# else
# define FORCE_INLINE static inline
# endif
# else
# define FORCE_INLINE static
# endif /* __STDC_VERSION__ */
#endif
/* *************************************
* Basic Types
***************************************/
#ifndef MEM_MODULE
# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
# else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
# endif
#endif
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
typedef union { U32 u32; } __attribute__((packed)) unalign;
static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
#else
/* portable and safe solution. Generally efficient.
* see : http://stackoverflow.com/a/32095106/646947
*/
static U32 XXH_read32(const void* memPtr)
{
U32 val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
/* ****************************************
* Compiler-specific Functions and Macros
******************************************/
#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
#if defined(_MSC_VER)
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
#endif
#if defined(_MSC_VER) /* Visual Studio */
# define XXH_swap32 _byteswap_ulong
#elif XXH_GCC_VERSION >= 403
# define XXH_swap32 __builtin_bswap32
#else
static U32 XXH_swap32 (U32 x)
{
return ((x << 24) & 0xff000000 ) |
((x << 8) & 0x00ff0000 ) |
((x >> 8) & 0x0000ff00 ) |
((x >> 24) & 0x000000ff );
}
#endif
/* *************************************
* Architecture Macros
***************************************/
typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
#ifndef XXH_CPU_LITTLE_ENDIAN
static const int g_one = 1;
# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one))
#endif
/* ***************************
* Memory reads
*****************************/
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
else
return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
}
FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
{
return XXH_readLE32_align(ptr, endian, XXH_unaligned);
}
static U32 XXH_readBE32(const void* ptr)
{
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
}
/* *************************************
* Macros
***************************************/
#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
/* *******************************************************************
* 32-bits hash functions
*********************************************************************/
static const U32 PRIME32_1 = 2654435761U;
static const U32 PRIME32_2 = 2246822519U;
static const U32 PRIME32_3 = 3266489917U;
static const U32 PRIME32_4 = 668265263U;
static const U32 PRIME32_5 = 374761393U;
static U32 XXH32_round(U32 seed, U32 input)
{
seed += input * PRIME32_2;
seed = XXH_rotl32(seed, 13);
seed *= PRIME32_1;
return seed;
}
FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U32 h32;
#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL) {
len=0;
bEnd=p=(const BYTE*)(size_t)16;
}
#endif
if (len>=16) {
const BYTE* const limit = bEnd - 16;
U32 v1 = seed + PRIME32_1 + PRIME32_2;
U32 v2 = seed + PRIME32_2;
U32 v3 = seed + 0;
U32 v4 = seed - PRIME32_1;
do {
v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
} while (p<=limit);
h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
} else {
h32 = seed + PRIME32_5;
}
h32 += (U32) len;
while (p+4<=bEnd) {
h32 += XXH_get32bits(p) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
p+=4;
}
while (p<bEnd) {
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
{
#if 0
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH32_state_t state;
XXH32_reset(&state, seed);
XXH32_update(&state, input, len);
return XXH32_digest(&state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if (XXH_FORCE_ALIGN_CHECK) {
if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
} }
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
/*====== Hash streaming ======*/
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
{
return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
}
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
{
XXH_free(statePtr);
return XXH_OK;
}
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
{
memcpy(dstState, srcState, sizeof(*dstState));
}
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
{
XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
state.v1 = seed + PRIME32_1 + PRIME32_2;
state.v2 = seed + PRIME32_2;
state.v3 = seed + 0;
state.v4 = seed - PRIME32_1;
memcpy(statePtr, &state, sizeof(state));
return XXH_OK;
}
FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
{
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len_32 += (unsigned)len;
state->large_len |= (len>=16) | (state->total_len_32>=16);
if (state->memsize + len < 16) { /* fill in tmp buffer */
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
state->memsize += (unsigned)len;
return XXH_OK;
}
if (state->memsize) { /* some data left from previous update */
XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
{ const U32* p32 = state->mem32;
state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
}
p += 16-state->memsize;
state->memsize = 0;
}
if (p <= bEnd-16) {
const BYTE* const limit = bEnd - 16;
U32 v1 = state->v1;
U32 v2 = state->v2;
U32 v3 = state->v3;
U32 v4 = state->v4;
do {
v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
} while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd) {
XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
state->memsize = (unsigned)(bEnd-p);
}
return XXH_OK;
}
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
}
FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
{
const BYTE * p = (const BYTE*)state->mem32;
const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
U32 h32;
if (state->large_len) {
h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
} else {
h32 = state->v3 /* == seed */ + PRIME32_5;
}
h32 += state->total_len_32;
while (p+4<=bEnd) {
h32 += XXH_readLE32(p, endian) * PRIME32_3;
h32 = XXH_rotl32(h32, 17) * PRIME32_4;
p+=4;
}
while (p<bEnd) {
h32 += (*p) * PRIME32_5;
h32 = XXH_rotl32(h32, 11) * PRIME32_1;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_digest_endian(state_in, XXH_littleEndian);
else
return XXH32_digest_endian(state_in, XXH_bigEndian);
}
/*====== Canonical representation ======*/
/*! Default XXH result types are basic unsigned 32 and 64 bits.
* The canonical representation follows human-readable write convention, aka big-endian (large digits first).
* These functions allow transformation of hash result into and from its canonical format.
* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
*/
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
{
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
memcpy(dst, &hash, sizeof(*dst));
}
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
{
return XXH_readBE32(src);
}
#ifndef XXH_NO_LONG_LONG
/* *******************************************************************
* 64-bits hash functions
*********************************************************************/
/*====== Memory access ======*/
#ifndef MEM_MODULE
# define MEM_MODULE
# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
typedef uint64_t U64;
# else
typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
# endif
#endif
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
/* currently only defined for gcc and icc */
typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64;
static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
#else
/* portable and safe solution. Generally efficient.
* see : http://stackoverflow.com/a/32095106/646947
*/
static U64 XXH_read64(const void* memPtr)
{
U64 val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
#if defined(_MSC_VER) /* Visual Studio */
# define XXH_swap64 _byteswap_uint64
#elif XXH_GCC_VERSION >= 403
# define XXH_swap64 __builtin_bswap64
#else
static U64 XXH_swap64 (U64 x)
{
return ((x << 56) & 0xff00000000000000ULL) |
((x << 40) & 0x00ff000000000000ULL) |
((x << 24) & 0x0000ff0000000000ULL) |
((x << 8) & 0x000000ff00000000ULL) |
((x >> 8) & 0x00000000ff000000ULL) |
((x >> 24) & 0x0000000000ff0000ULL) |
((x >> 40) & 0x000000000000ff00ULL) |
((x >> 56) & 0x00000000000000ffULL);
}
#endif
FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
{
if (align==XXH_unaligned)
return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
else
return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
}
FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
{
return XXH_readLE64_align(ptr, endian, XXH_unaligned);
}
static U64 XXH_readBE64(const void* ptr)
{
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
}
/*====== xxh64 ======*/
static const U64 PRIME64_1 = 11400714785074694791ULL;
static const U64 PRIME64_2 = 14029467366897019727ULL;
static const U64 PRIME64_3 = 1609587929392839161ULL;
static const U64 PRIME64_4 = 9650029242287828579ULL;
static const U64 PRIME64_5 = 2870177450012600261ULL;
static U64 XXH64_round(U64 acc, U64 input)
{
acc += input * PRIME64_2;
acc = XXH_rotl64(acc, 31);
acc *= PRIME64_1;
return acc;
}
static U64 XXH64_mergeRound(U64 acc, U64 val)
{
val = XXH64_round(0, val);
acc ^= val;
acc = acc * PRIME64_1 + PRIME64_4;
return acc;
}
FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
U64 h64;
#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (p==NULL) {
len=0;
bEnd=p=(const BYTE*)(size_t)32;
}
#endif
if (len>=32) {
const BYTE* const limit = bEnd - 32;
U64 v1 = seed + PRIME64_1 + PRIME64_2;
U64 v2 = seed + PRIME64_2;
U64 v3 = seed + 0;
U64 v4 = seed - PRIME64_1;
do {
v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
} while (p<=limit);
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
h64 = XXH64_mergeRound(h64, v1);
h64 = XXH64_mergeRound(h64, v2);
h64 = XXH64_mergeRound(h64, v3);
h64 = XXH64_mergeRound(h64, v4);
} else {
h64 = seed + PRIME64_5;
}
h64 += (U64) len;
while (p+8<=bEnd) {
U64 const k1 = XXH64_round(0, XXH_get64bits(p));
h64 ^= k1;
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
p+=8;
}
if (p+4<=bEnd) {
h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p+=4;
}
while (p<bEnd) {
h64 ^= (*p) * PRIME64_5;
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
{
#if 0
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
XXH64_state_t state;
XXH64_reset(&state, seed);
XXH64_update(&state, input, len);
return XXH64_digest(&state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if (XXH_FORCE_ALIGN_CHECK) {
if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
else
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
} }
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
else
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
#endif
}
/*====== Hash Streaming ======*/
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
{
return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
}
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
{
XXH_free(statePtr);
return XXH_OK;
}
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
{
memcpy(dstState, srcState, sizeof(*dstState));
}
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
{
XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
state.v1 = seed + PRIME64_1 + PRIME64_2;
state.v2 = seed + PRIME64_2;
state.v3 = seed + 0;
state.v4 = seed - PRIME64_1;
memcpy(statePtr, &state, sizeof(state));
return XXH_OK;
}
FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
{
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
if (input==NULL) return XXH_ERROR;
#endif
state->total_len += len;
if (state->memsize + len < 32) { /* fill in tmp buffer */
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
state->memsize += (U32)len;
return XXH_OK;
}
if (state->memsize) { /* tmp buffer is full */
XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
p += 32-state->memsize;
state->memsize = 0;
}
if (p+32 <= bEnd) {
const BYTE* const limit = bEnd - 32;
U64 v1 = state->v1;
U64 v2 = state->v2;
U64 v3 = state->v3;
U64 v4 = state->v4;
do {
v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
} while (p<=limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < bEnd) {
XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
state->memsize = (unsigned)(bEnd-p);
}
return XXH_OK;
}
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
else
return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
}
FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
{
const BYTE * p = (const BYTE*)state->mem64;
const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
U64 h64;
if (state->total_len >= 32) {
U64 const v1 = state->v1;
U64 const v2 = state->v2;
U64 const v3 = state->v3;
U64 const v4 = state->v4;
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
h64 = XXH64_mergeRound(h64, v1);
h64 = XXH64_mergeRound(h64, v2);
h64 = XXH64_mergeRound(h64, v3);
h64 = XXH64_mergeRound(h64, v4);
} else {
h64 = state->v3 + PRIME64_5;
}
h64 += (U64) state->total_len;
while (p+8<=bEnd) {
U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
h64 ^= k1;
h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
p+=8;
}
if (p+4<=bEnd) {
h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p+=4;
}
while (p<bEnd) {
h64 ^= (*p) * PRIME64_5;
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_digest_endian(state_in, XXH_littleEndian);
else
return XXH64_digest_endian(state_in, XXH_bigEndian);
}
/*====== Canonical representation ======*/
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
{
XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
memcpy(dst, &hash, sizeof(*dst));
}
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
{
return XXH_readBE64(src);
}
#endif /* XXH_NO_LONG_LONG */

293
src/plasma/thirdparty/xxhash.h vendored Normal file
View file

@ -0,0 +1,293 @@
/*
xxHash - Extremely Fast Hash algorithm
Header File
Copyright (C) 2012-2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : https://github.com/Cyan4973/xxHash
*/
/* Notice extracted from xxHash homepage :
xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
It also successfully passes all tests from the SMHasher suite.
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
Name Speed Q.Score Author
xxHash 5.4 GB/s 10
CrapWow 3.2 GB/s 2 Andrew
MumurHash 3a 2.7 GB/s 10 Austin Appleby
SpookyHash 2.0 GB/s 10 Bob Jenkins
SBox 1.4 GB/s 9 Bret Mulvey
Lookup3 1.2 GB/s 9 Bob Jenkins
SuperFastHash 1.2 GB/s 1 Paul Hsieh
CityHash64 1.05 GB/s 10 Pike & Alakuijala
FNV 0.55 GB/s 5 Fowler, Noll, Vo
CRC32 0.43 GB/s 9
MD5-32 0.33 GB/s 10 Ronald L. Rivest
SHA1-32 0.28 GB/s 10
Q.Score is a measure of quality of the hash function.
It depends on successfully passing SMHasher test set.
10 is a perfect score.
A 64-bits version, named XXH64, is available since r35.
It offers much better speed, but for 64-bits applications only.
Name Speed on 64 bits Speed on 32 bits
XXH64 13.8 GB/s 1.9 GB/s
XXH32 6.8 GB/s 6.0 GB/s
*/
#ifndef XXHASH_H_5627135585666179
#define XXHASH_H_5627135585666179 1
#if defined (__cplusplus)
extern "C" {
#endif
/* ****************************
* Definitions
******************************/
#include <stddef.h> /* size_t */
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
/* ****************************
* API modifier
******************************/
/** XXH_PRIVATE_API
* This is useful to include xxhash functions in `static` mode
* in order to inline them, and remove their symbol from the public list.
* Methodology :
* #define XXH_PRIVATE_API
* #include "xxhash.h"
* `xxhash.c` is automatically included.
* It's not useful to compile and link it as a separate module.
*/
#ifdef XXH_PRIVATE_API
# ifndef XXH_STATIC_LINKING_ONLY
# define XXH_STATIC_LINKING_ONLY
# endif
# if defined(__GNUC__)
# define XXH_PUBLIC_API static __inline __attribute__((unused))
# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define XXH_PUBLIC_API static inline
# elif defined(_MSC_VER)
# define XXH_PUBLIC_API static __inline
# else
# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */
# endif
#else
# define XXH_PUBLIC_API /* do nothing */
#endif /* XXH_PRIVATE_API */
/*!XXH_NAMESPACE, aka Namespace Emulation :
If you want to include _and expose_ xxHash functions from within your own library,
but also want to avoid symbol collisions with other libraries which may also include xxHash,
you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
Note that no change is required within the calling program as long as it includes `xxhash.h` :
regular symbol name will be automatically translated by this header.
*/
#ifdef XXH_NAMESPACE
# define XXH_CAT(A,B) A##B
# define XXH_NAME2(A,B) XXH_CAT(A,B)
# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
#endif
/* *************************************
* Version
***************************************/
#define XXH_VERSION_MAJOR 0
#define XXH_VERSION_MINOR 6
#define XXH_VERSION_RELEASE 2
#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
XXH_PUBLIC_API unsigned XXH_versionNumber (void);
/*-**********************************************************************
* 32-bits hash
************************************************************************/
typedef unsigned int XXH32_hash_t;
/*! XXH32() :
Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
The memory between input & input+length must be valid (allocated and read-accessible).
"seed" can be used to alter the result predictably.
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
/*====== Streaming ======*/
typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
/*
These functions generate the xxHash of an input provided in multiple segments.
Note that, for small input, they are slower than single-call functions, due to state management.
For small input, prefer `XXH32()` and `XXH64()` .
XXH state must first be allocated, using XXH*_createState() .
Start a new hash by initializing state with a seed, using XXH*_reset().
Then, feed the hash state by calling XXH*_update() as many times as necessary.
Obviously, input must be allocated and read accessible.
The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
Finally, a hash value can be produced anytime, by using XXH*_digest().
This function returns the nn-bits hash as an int or long long.
It's still possible to continue inserting input into the hash state after a digest,
and generate some new hashes later on, by calling again XXH*_digest().
When done, free XXH state space if it was allocated dynamically.
*/
/*====== Canonical representation ======*/
typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
* The canonical representation uses human-readable write convention, aka big-endian (large digits first).
* These functions allow transformation of hash result into and from its canonical format.
* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
*/
#ifndef XXH_NO_LONG_LONG
/*-**********************************************************************
* 64-bits hash
************************************************************************/
typedef unsigned long long XXH64_hash_t;
/*! XXH64() :
Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
"seed" can be used to alter the result predictably.
This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
*/
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
/*====== Streaming ======*/
typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
/*====== Canonical representation ======*/
typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
#endif /* XXH_NO_LONG_LONG */
#ifdef XXH_STATIC_LINKING_ONLY
/* ================================================================================================
This section contains definitions which are not guaranteed to remain stable.
They may change in future versions, becoming incompatible with a different version of the library.
They shall only be used with static linking.
Never use these definitions in association with dynamic linking !
=================================================================================================== */
/* These definitions are only meant to make possible
static allocation of XXH state, on stack or in a struct for example.
Never use members directly. */
struct XXH32_state_s {
unsigned total_len_32;
unsigned large_len;
unsigned v1;
unsigned v2;
unsigned v3;
unsigned v4;
unsigned mem32[4]; /* buffer defined as U32 for alignment */
unsigned memsize;
unsigned reserved; /* never read nor write, will be removed in a future version */
}; /* typedef'd to XXH32_state_t */
#ifndef XXH_NO_LONG_LONG /* remove 64-bits support */
struct XXH64_state_s {
unsigned long long total_len;
unsigned long long v1;
unsigned long long v2;
unsigned long long v3;
unsigned long long v4;
unsigned long long mem64[4]; /* buffer defined as U64 for alignment */
unsigned memsize;
unsigned reserved[2]; /* never read nor write, will be removed in a future version */
}; /* typedef'd to XXH64_state_t */
#endif
#ifdef XXH_PRIVATE_API
# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
#endif
#endif /* XXH_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
}
#endif
#endif /* XXHASH_H_5627135585666179 */