more blobs poc

This commit is contained in:
Jedd Morgan
2025-11-24 18:28:50 +00:00
parent 94d2a01880
commit a560f7f159
18 changed files with 142 additions and 99 deletions
+65 -22
View File
@@ -1,5 +1,6 @@
using System.Diagnostics.CodeAnalysis;
using System.Diagnostics.Contracts;
using System.Runtime.CompilerServices;
using System.Security.Cryptography;
using System.Text;
#if NET6_0_OR_GREATER
@@ -8,47 +9,58 @@ using System.Runtime.InteropServices;
namespace Speckle.Sdk.Common;
/// <summary>
/// Helpers for hashing data to a hex string
/// </summary>
public static class Sha256
{
public const string DEFAULT_FORMAT = "x2";
public const int HASH_SIZE_CHARS = 64; // SHA256.HashSizeInBytes * sizeof(char)
#if NET6_0_OR_GREATER
/// <param name="input">the value to hash</param>
/// <param name="format"><c>"x2"</c> for lower case, <c>"X2"</c> for uppercase.</param>
/// <param name="length">Desired length of the returned string. Must be 2 &#x2264; Length &#x2264; 64, and must be a multiple of 2</param>
/// <returns><inheritdoc cref="GetString(string, string?, int)"/></returns>
[Pure]
public static string GetString(
ReadOnlySpan<char> input,
[StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format = "x2",
int length = SHA256.HashSizeInBytes * sizeof(char)
)
/// <param name="destination">Output hash; it must have <c>2 &#x2264; Length &#x2264; 64</c>, and must be a multiple of 2</param>
/// <param name="formatUpperCase"><see langword="true"/> for upper case, false otherwise</param>
public static void Hash(ReadOnlySpan<char> input, bool formatUpperCase, Span<char> destination)
{
ReadOnlySpan<byte> inputBytes = MemoryMarshal.AsBytes(input);
Hash(inputBytes, formatUpperCase, destination);
}
public static void Hash(ReadOnlySpan<byte> input, bool formatUpperCase, Span<char> destination)
{
Span<byte> hash = stackalloc byte[SHA256.HashSizeInBytes];
SHA256.HashData(inputBytes, hash);
SHA256.HashData(input, hash);
Span<char> output = stackalloc char[length];
FormatHash(hash, formatUpperCase, destination);
}
for (int i = 0, j = 0; j < length; i += sizeof(byte), j += sizeof(char))
public static void Hash(Stream source, bool formatUpperCase, Span<char> destination)
{
Span<byte> hash = stackalloc byte[SHA256.HashSizeInBytes];
SHA256.HashData(source, hash);
FormatHash(hash, formatUpperCase, destination);
}
private static void FormatHash(ReadOnlySpan<byte> input, bool formatUpperCase, Span<char> output)
{
for (int i = 0, j = 0; j < output.Length; i += sizeof(byte), j += sizeof(char))
{
hash[i].TryFormat(output[j..], out _, format);
input[i].TryFormat(output[j..], out _, formatUpperCase ? "X2" : "x2");
}
return new string(output);
}
#endif
/// <param name="input">the value to hash</param>
/// <param name="format"><c>"x2"</c> for lower case, <c>"X2"</c> for uppercase.</param>
/// <param name="length">Desired length of the returned string</param>
/// <param name="outputLengthChars">Desired length of the returned string</param>
/// <returns>the hash string</returns>
/// <exception cref="FormatException"><paramref name="format"/> is not a recognised numeric format</exception>
/// <exception cref="ArgumentOutOfRangeException"><inheritdoc cref="StringBuilder.ToString(int, int)"/></exception>
[Pure]
public static string GetString(
public static string Hash(
string input,
[StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format = "x2",
int length = 64
[StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format = DEFAULT_FORMAT,
int outputLengthChars = HASH_SIZE_CHARS
)
{
var inputBytes = Encoding.Unicode.GetBytes(input);
@@ -59,12 +71,43 @@ public static class Sha256
byte[] hash = sha256.ComputeHash(inputBytes);
#endif
StringBuilder sb = new(64);
StringBuilder sb = new(HASH_SIZE_CHARS);
foreach (byte b in hash)
{
sb.Append(b.ToString(format));
}
return sb.ToString(0, length);
return sb.ToString(0, outputLengthChars);
}
/// <inheritdoc cref="Hash(string, string?, int)"/>
[Pure]
public static string Hash(
Stream input,
[StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format = DEFAULT_FORMAT,
int outputLengthChars = HASH_SIZE_CHARS
)
{
#if NET6_0_OR_GREATER
byte[] hash = SHA256.HashData(input);
#else
using var sha256 = SHA256.Create();
byte[] hash = sha256.ComputeHash(input);
#endif
return FormatHash(hash, format, outputLengthChars);
}
[Pure]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static string FormatHash(byte[] hash, string? format, int outputLengthChars)
{
StringBuilder sb = new(HASH_SIZE_CHARS);
foreach (byte b in hash)
{
sb.Append(b.ToString(format));
}
return sb.ToString(0, outputLengthChars);
}
}
+1 -4
View File
@@ -14,8 +14,6 @@ public sealed class Blob : Base
private string? _hash;
private bool _isHashExpired = true;
public Blob() { }
[SetsRequiredMembers]
public Blob(string filePath)
{
@@ -32,7 +30,6 @@ public sealed class Blob : Base
_isHashExpired = true;
}
}
public required string originalPath { get; set; }
[JsonIgnore]
@@ -51,7 +48,7 @@ public sealed class Blob : Base
{
if ((_isHashExpired || _hash == null))
{
_hash = HashUtility.HashFile(filePath);
_hash = HashUtility.CalculateBlobHash(filePath);
}
return _hash;
+27 -14
View File
@@ -1,26 +1,39 @@
using System.Diagnostics.CodeAnalysis;
using System.Security.Cryptography;
using System.Diagnostics.Contracts;
using Speckle.Sdk.Common;
using Speckle.Sdk.Serialisation;
namespace Speckle.Sdk.Models;
/// <summary>
/// Helper functions for calculating hash based Ids for Speckle core concepts
/// </summary>
public static class HashUtility
{
public enum HashingFunctions
public const int HASH_LENGTH_CHARS = 32;
[Pure]
public static Id ComputeObjectId(Json serialized)
{
SHA256,
MD5,
#if NET6_0_OR_GREATER
Span<char> hash = stackalloc char[HASH_LENGTH_CHARS];
Sha256.Hash(serialized.Value.AsSpan(), false, hash);
return new Id(new string(hash));
#else
string hash = Sha256.Hash(serialized.Value, outputLengthChars: HashUtility.HASH_LENGTH_CHARS);
return new Id(hash);
#endif
}
public const int HASH_LENGTH = 32;
[SuppressMessage("Security", "CA5351:Do Not Use Broken Cryptographic Algorithms")]
public static string HashFile(string filePath, HashingFunctions func = HashingFunctions.SHA256)
[Pure]
public static string CalculateBlobHash(string filePath)
{
using HashAlgorithm hashAlgorithm = func == HashingFunctions.MD5 ? MD5.Create() : SHA256.Create();
using var stream = File.OpenRead(filePath);
var hash = hashAlgorithm.ComputeHash(stream);
return BitConverter.ToString(hash, 0, HASH_LENGTH).Replace("-", "").ToLowerInvariant();
#if NET6_0_OR_GREATER
Span<char> hash = stackalloc char[HASH_LENGTH_CHARS];
Sha256.Hash(stream, false, hash);
return new(hash);
#else
return Sha256.Hash(stream, "x2", HASH_LENGTH_CHARS);
#endif
}
}
@@ -1,19 +0,0 @@
using System.Diagnostics.Contracts;
using Speckle.Sdk.Common;
using Speckle.Sdk.Models;
namespace Speckle.Sdk.Serialisation;
public static class IdGenerator
{
[Pure]
public static Id ComputeId(Json serialized)
{
#if NET6_0_OR_GREATER
string hash = Sha256.GetString(serialized.Value.AsSpan(), length: HashUtility.HASH_LENGTH);
#else
string hash = Sha256.GetString(serialized.Value, length: HashUtility.HASH_LENGTH);
#endif
return new Id(hash);
}
}
@@ -358,7 +358,7 @@ public class SpeckleObjectSerializer
if (writer is SerializerIdWriter serializerIdWriter)
{
(var json, writer) = serializerIdWriter.FinishIdWriter();
id = IdGenerator.ComputeId(json);
id = HashUtility.ComputeObjectId(json);
}
else
{
@@ -1,4 +1,5 @@
using Microsoft.Extensions.Logging;
using Speckle.Sdk.Common;
using Speckle.Sdk.Dependencies;
using Speckle.Sdk.Dependencies.Serialization;
using Speckle.Sdk.SQLite;
@@ -25,7 +26,7 @@ public sealed class ObjectSaver(
IProgress<ProgressArgs>? progress,
ISqLiteJsonCacheManager sqLiteJsonCacheManager,
IServerObjectManager serverObjectManager,
IServerBlobManager serverBlobManager,
IServerBlobManager? serverBlobManager,
ILogger<ObjectSaver> logger,
SerializeProcessOptions options,
CancellationToken cancellationToken
@@ -45,7 +46,10 @@ public sealed class ObjectSaver(
protected override async Task SendBlobToServerInternal(Batch<BlobItem> batch)
{
var objectBatch = batch.Items.Distinct().Select(x => x.Blob).ToList();
// Callers should either setup a blob manager, or not try and send blobs
serverBlobManager.NotNull("No blob manager was setup to handle sending blobs");
var objectBatch = batch.Items.Distinct().Select(x => (x.Blob.id.NotNull(), x.Blob.filePath)).ToList();
// var hasObjects = await serverBlobManager
// .HasObjects(objectBatch.Select(x => x.Id.Value).Freeze(), _cancellationTokenSource.Token)
// .ConfigureAwait(false);
@@ -54,9 +58,7 @@ public sealed class ObjectSaver(
{
// Interlocked.Add(ref _uploading, batch.Items.Count);
// progress?.Report(new(ProgressEvent.UploadingObjects, _uploading, null));
await serverBlobManager
.UploadBlobs(objectBatch, true, progress, _cancellationTokenSource.Token)
.ConfigureAwait(false);
await serverBlobManager.UploadBlobs(objectBatch, progress, _cancellationTokenSource.Token).ConfigureAwait(false);
}
}
@@ -343,7 +343,7 @@ public sealed class ObjectSerializer : IObjectSerializer
if (writer is SerializerIdWriter serializerIdWriter)
{
(var json, writer) = serializerIdWriter.FinishIdWriter();
id = IdGenerator.ComputeId(json);
id = HashUtility.ComputeObjectId(json);
}
else
{
@@ -29,13 +29,20 @@ public class SerializeProcessFactory(
var sqLiteJsonCacheManager = sqLiteJsonCacheManagerFactory.CreateFromStream(projectId);
var serverObjectManager = serverObjectManagerFactory.Create(url, projectId, authorizationToken);
var serverBlobManager = serverBlobManagerFactory.Create(url, projectId, authorizationToken);
return CreateSerializeProcess(sqLiteJsonCacheManager, serverObjectManager, serverBlobManager, progress, cancellationToken, options);
return CreateSerializeProcess(
sqLiteJsonCacheManager,
serverObjectManager,
serverBlobManager,
progress,
cancellationToken,
options
);
}
public ISerializeProcess CreateSerializeProcess(
ISqLiteJsonCacheManager sqLiteJsonCacheManager,
IServerObjectManager serverObjectManager,
IServerBlobManager serverBlobManager,
IServerBlobManager? serverBlobManager,
IProgress<ProgressArgs>? progress,
CancellationToken cancellationToken,
SerializeProcessOptions? options = null
@@ -1,12 +1,10 @@
using Speckle.InterfaceGenerator;
using Speckle.Sdk.Helpers;
using Speckle.Sdk.Logging;
namespace Speckle.Sdk.Serialisation.V2;
[GenerateAutoInterface]
public class ServerBlobManagerFactory(ISpeckleHttp speckleHttp, ISdkActivityFactory activityFactory)
: IServerBlobManagerFactory
public sealed class ServerBlobManagerFactory(ISpeckleHttp speckleHttp) : IServerBlobManagerFactory
{
public IServerBlobManager Create(
Uri serverUrl,
@@ -17,6 +15,6 @@ public class ServerBlobManagerFactory(ISpeckleHttp speckleHttp, ISdkActivityFact
{
var client = speckleHttp.CreateHttpClient(authorizationToken: authorizationToken);
client.BaseAddress = serverUrl;
return new ServerBlobManager(client);
return new ServerBlobManager(client, projectId);
}
}
@@ -1,22 +1,13 @@
using Speckle.InterfaceGenerator;
using Speckle.Sdk.Helpers;
using Speckle.Sdk.Transports;
using Speckle.Sdk.Transports.ServerUtils;
namespace Speckle.Sdk.Serialisation.V2;
[GenerateAutoInterface(VisibilityModifier = "public")]
internal sealed class ServerBlobManager : IServerBlobManager
internal sealed class ServerBlobManager(HttpClient authorizedClient, string projectId) : IServerBlobManager
{
private readonly HttpClient _authorizedClient;
public ServerBlobManager(HttpClient authorizedClient)
{
_authorizedClient = authorizedClient;
}
public async Task UploadBlobs(
string projectId,
IReadOnlyCollection<(string blobId, string filePath)> objects,
IProgress<ProgressArgs>? progress,
CancellationToken cancellationToken
@@ -33,9 +24,8 @@ internal sealed class ServerBlobManager : IServerBlobManager
var fileName = Path.GetFileName(filePath);
var stream = File.OpenRead(filePath);
StreamContent fsc = new(stream);
var hash = id.Split(':')[1];
multipartFormDataContent.Add(fsc, $"hash:{hash}", fileName);
multipartFormDataContent.Add(fsc, $"hash:{id}", fileName);
cancellationToken.ThrowIfCancellationRequested();
}
@@ -44,7 +34,7 @@ internal sealed class ServerBlobManager : IServerBlobManager
message.Method = HttpMethod.Post;
message.Content = new ProgressContent(multipartFormDataContent, progress);
using var response = await _authorizedClient.SendAsync(message, cancellationToken).ConfigureAwait(false);
using var response = await authorizedClient.SendAsync(message, cancellationToken).ConfigureAwait(false);
response.EnsureSuccessStatusCode();
}
@@ -59,6 +59,7 @@ public class CancellationTests
new DummySqLiteSendManager(),
new CancellationServerObjectManager(cancellationSource),
null,
null,
cancellationSource.Token,
new SerializeProcessOptions(true, true, false, true)
);
@@ -79,6 +80,7 @@ public class CancellationTests
new DummySqLiteSendManager(),
new CancellationServerObjectManager(cancellationSource),
null,
null,
cancellationSource.Token,
new SerializeProcessOptions(true, true, false, true)
);
@@ -40,6 +40,7 @@ public class DataObjectTests
new MemoryJsonCacheManager(json),
new DummyServerObjectManager(),
null,
null,
default,
new SerializeProcessOptions(false, false, true, true)
);
@@ -37,6 +37,7 @@ public class ExceptionTests
new MemoryJsonCacheManager(objects),
new ExceptionServerObjectManager(),
null,
null,
default,
new SerializeProcessOptions(false, false, false, true)
);
@@ -55,6 +56,7 @@ public class ExceptionTests
new ExceptionSendCacheManager(),
new MemoryServerObjectManager(new()),
null,
null,
default,
new SerializeProcessOptions(false, false, false, true)
);
@@ -92,6 +94,7 @@ public class ExceptionTests
new ExceptionSendCacheManager(exceptionsAfter: 10),
new MemoryServerObjectManager(new()),
null,
null,
default,
new SerializeProcessOptions(false, false, false, true)
{
@@ -146,7 +146,7 @@ public class SerializationTests
jObject.Remove("id");
jObject.Remove("__closure");
var jsonWithoutId = jObject.ToString(Formatting.None);
var newId = IdGenerator.ComputeId(new Json(jsonWithoutId));
var newId = HashUtility.ComputeObjectId(new Json(jsonWithoutId));
id.Should().Be(newId.Value);
}
@@ -227,6 +227,7 @@ public class SerializationTests
SqLiteJsonCacheManager.FromMemory(1),
new MemoryServerObjectManager(newIdToJson),
null,
null,
default,
new SerializeProcessOptions(false, false, false, true) { MaxCacheBatchSize = 1, MaxParallelism = concurrency }
)
@@ -60,7 +60,7 @@ public class BlobApiExceptionalTests : IAsyncLifetime
{
await writer.WriteLineAsync(PAYLOAD);
}
string id = HashUtility.HashFile(filePath);
string id = HashUtility.CalculateBlobHash(filePath);
var ex = await Assert.ThrowsAsync<HttpRequestException>(async () =>
await _sut.UploadBlobs("non-existent-project", [(id, filePath)], null, CancellationToken.None)
);
@@ -34,7 +34,7 @@ public class BlobApiTests : IAsyncLifetime
{
await writer.WriteLineAsync(PAYLOAD);
}
string id = HashUtility.HashFile(filePath);
string id = HashUtility.CalculateBlobHash(filePath);
//act
var preDiff = await _blobApi.HasBlobs(_project.id, [id], CancellationToken.None);
@@ -19,12 +19,14 @@ public class CryptSha256Hash
[Benchmark]
public string Sha256()
{
return Speckle.Sdk.Common.Sha256.GetString(testData);
return Speckle.Sdk.Common.Sha256.Hash(testData);
}
[Benchmark]
public string Sha256_Span()
{
return Speckle.Sdk.Common.Sha256.GetString(testData.AsSpan());
Span<char> resultLowerSpan = stackalloc char[Speckle.Sdk.Common.Sha256.HASH_SIZE_CHARS];
Speckle.Sdk.Common.Sha256.Hash(testData.AsSpan(), false, resultLowerSpan);
return new string(resultLowerSpan);
}
}
@@ -69,8 +69,8 @@ public sealed class HashUtilityTests
[MemberData(nameof(SmallTestCasesSha256))]
public void Sha256(string input, string expected, string _, int length)
{
var resultLower = Speckle.Sdk.Common.Sha256.GetString(input, "x2", length);
var resultUpper = Speckle.Sdk.Common.Sha256.GetString(input, "X2", length);
var resultLower = Speckle.Sdk.Common.Sha256.Hash(input, "x2", length);
var resultUpper = Speckle.Sdk.Common.Sha256.Hash(input, "X2", length);
resultLower.Should().Be(new string(expected.ToLower()[..length]));
@@ -86,19 +86,22 @@ public sealed class HashUtilityTests
int length //Span version of the function must have multiple of 2
)
{
var resultLowerSpan = Speckle.Sdk.Common.Sha256.GetString(input.AsSpan(), "x2", length);
var resultUpperSpan = Speckle.Sdk.Common.Sha256.GetString(input.AsSpan(), "X2", length);
Span<char> resultLowerSpan = stackalloc char[length];
Speckle.Sdk.Common.Sha256.Hash(input.AsSpan(), false, resultLowerSpan);
Span<char> resultUpperSpan = stackalloc char[length];
Speckle.Sdk.Common.Sha256.Hash(input.AsSpan(), true, resultUpperSpan);
resultLowerSpan.Should().Be(new string(expected.ToLower()[..length]));
new string(resultLowerSpan).Should().Be(new string(expected.ToLower()[..length]));
resultUpperSpan.Should().Be(new string(expected.ToUpper()[..length]));
new string(resultUpperSpan).Should().Be(new string(expected.ToUpper()[..length]));
}
[Theory]
[MemberData(nameof(LargeTestCases))]
public void Sha256_LargeDataTests(string input, string expected)
public void Sha256_Span_LargeDataTests(string input, string expected)
{
var computedHash = Speckle.Sdk.Common.Sha256.GetString(input.AsSpan());
computedHash.Should().Be(expected);
Span<char> output = stackalloc char[Speckle.Sdk.Common.Sha256.HASH_SIZE_CHARS];
Speckle.Sdk.Common.Sha256.Hash(input.AsSpan(), false, output);
new string(output).Should().Be(expected);
}
}