[PR]
×
[PR]上記の広告は3ヶ月以上新規記事投稿のないブログに表示されています。新しい記事を書く事で広告が消えます。
プログラミング、3DCGとその他いろいろについて
[PR]上記の広告は3ヶ月以上新規記事投稿のないブログに表示されています。新しい記事を書く事で広告が消えます。
前回は制限(制約)付きボルツマンマシン(Restricted Boltzmann Machine : RBM)の可視ニューロンを(GPUで)更新しました。
今回は隠れニューロンを更新します。
もちろん使うのはGPUです。
using System; using System.Linq; using Cloo; public static class EasyOpenCL { public static ComputeContext ComputeContext; public static ComputeDevice[] Devices { get; private set; } public static ComputeCommandQueue CommandQueue { get; private set; } public const int Multiply = 2; public const int MinGroupSize = 256; static EasyOpenCL() { ComputePlatform platform = ComputePlatform.Platforms .First(p => p.Devices.Any(d => d.Type == ComputeDeviceTypes.Gpu)); Devices = platform .Devices .Where(d => d.Type == ComputeDeviceTypes.Gpu) .ToArray(); ComputeContext = new ComputeContext( Devices, new ComputeContextPropertyList(platform), null, System.IntPtr.Zero ); CommandQueue = new ComputeCommandQueue( EasyOpenCL.ComputeContext, EasyOpenCL.Devices[0], ComputeCommandQueueFlags.None ); } public static T[] Read<T>(this ComputeBuffer<T> buffer) where T : struct { var result = new T[buffer.Count]; EasyOpenCL.CommandQueue.ReadFromBuffer(buffer, ref result, true, null); return result; } public static int GetGroupSize(ComputeKernel kernel) { return (int)Math.Min(MinGroupSize, kernel.GetWorkGroupSize(Devices[0])); } public static int GetStride(int elementCount) { int MulFactor = Multiply * MinGroupSize; return (((elementCount - 1) / MulFactor) + 1) * MulFactor; } } class HiddenNeuronUpdaterReduction { const int visibleNeuronCount = 5; const int hiddenNeuronCount = 4; static float[] weights = Enumerable .Range(0, visibleNeuronCount * hiddenNeuronCount) .Select(i => (float)i) .ToArray(); static float[] visibleNeuronValues = Enumerable .Range(0, visibleNeuronCount) .Select(i => (float)i) .ToArray(); static float[] hiddenBiases = new float[hiddenNeuronCount]; static void Main() { var context = EasyOpenCL.ComputeContext; var program = new ComputeProgram( context, System.IO.File.ReadAllText("hiddenNeuronUpdater.cl") ); try { program.Build(EasyOpenCL.Devices, null, null, System.IntPtr.Zero); } catch { System.Console.WriteLine(program.GetBuildLog(EasyOpenCL.Devices[0])); } var weightBuffer = new ComputeBuffer<float>( context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, weights ); var deltaWeightBuffer = new ComputeBuffer<float>( context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, new float[weightBuffer.Count] ); // updateVisibleNeurons var updateHiddenNeuronKernel = program.CreateKernel("updateHiddenNeuron"); var visibleNeuronValueBuffer = new ComputeBuffer<float>( context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, visibleNeuronValues ); var hiddenNeuronBiasBuffer = new ComputeBuffer<float>( context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, hiddenBiases ); var hiddenNeuronDeltaBiasBuffer = new ComputeBuffer<float>( context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, Enumerable.Range(0, hiddenBiases.Length).Select(i => 0f).ToArray() ); var hiddenNeuronValueBuffer = new ComputeBuffer<float>( context, ComputeMemoryFlags.ReadWrite, hiddenBiases.Length ); var hiddenNeuronProbabilityBuffer = new ComputeBuffer<float>( context, ComputeMemoryFlags.ReadWrite, hiddenBiases.Length ); var random = new Random(0); var hiddenNeuronRandoms = Enumerable .Range(0, hiddenBiases.Length) .Select(i => new Xorshift128Random(random.Next())).ToArray(); var hiddenNeuronRandomBuffer = new ComputeBuffer<Xorshift128Random>( context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, hiddenNeuronRandoms ); var hiddenNeuronInputStride = EasyOpenCL.GetStride(visibleNeuronCount); var hiddenNeuronUpdateGroupSize = EasyOpenCL.GetGroupSize(updateHiddenNeuronKernel); var hiddenNeuronIntermediateResultBuffer = new ComputeBuffer<float>( context, ComputeMemoryFlags.ReadWrite, hiddenNeuronCount * hiddenNeuronInputStride / EasyOpenCL.Multiply / hiddenNeuronUpdateGroupSize ); updateHiddenNeuronKernel.SetMemoryArgument(0, hiddenNeuronValueBuffer); updateHiddenNeuronKernel.SetMemoryArgument(1, hiddenNeuronProbabilityBuffer); updateHiddenNeuronKernel.SetMemoryArgument(2, hiddenNeuronIntermediateResultBuffer); updateHiddenNeuronKernel.SetMemoryArgument(3, hiddenNeuronRandomBuffer); updateHiddenNeuronKernel.SetMemoryArgument(4, hiddenNeuronBiasBuffer); updateHiddenNeuronKernel.SetMemoryArgument(5, weightBuffer); updateHiddenNeuronKernel.SetMemoryArgument(6, visibleNeuronValueBuffer); updateHiddenNeuronKernel.SetValueArgument(7, visibleNeuronCount); updateHiddenNeuronKernel.SetLocalArgument(8, hiddenNeuronUpdateGroupSize * sizeof(float)); var stopwatch = new System.Diagnostics.Stopwatch(); stopwatch.Start(); for (int i = 0; i < 1; i++) { EasyOpenCL.CommandQueue.Execute( updateHiddenNeuronKernel, null, new long[] { hiddenNeuronCount, hiddenNeuronInputStride / EasyOpenCL.Multiply }, new long[] { 1, hiddenNeuronUpdateGroupSize }, null); } EasyOpenCL.CommandQueue.Finish(); stopwatch.Stop(); System.Console.WriteLine("calculation on gpu : " + stopwatch.Elapsed.TotalMilliseconds + "[ms]"); System.Console.WriteLine("visible neuron inputs (cpu)"); writeNeuronMatrix(getNeuronInputsByCpu(), hiddenNeuronCount); System.Console.WriteLine("intermediate result (gpu)"); var intermediate = hiddenNeuronIntermediateResultBuffer.Read(); for (int i = 0; i < hiddenNeuronCount; i++) { for (int j = 0; j < intermediate.Length / hiddenNeuronCount; j++) { System.Console.Write( intermediate[i * intermediate.Length / hiddenNeuronCount + j] + "\t" ); } System.Console.WriteLine(); } System.Console.WriteLine("hidden neuron probabilities (gpu)"); foreach (var number in hiddenNeuronProbabilityBuffer.Read()) { System.Console.Write(number + " "); } System.Console.WriteLine(); System.Console.WriteLine("hidden neuron probabilities (cpu)"); foreach (var number in getNeuronValuesByCpu()) { System.Console.Write(number + " "); } } private static float[] getNeuronInputsByCpu() { var result = new float[visibleNeuronCount * hiddenNeuronCount]; for (int visibleNeuronIndex = 0; visibleNeuronIndex < visibleNeuronCount; visibleNeuronIndex++) { for (int hiddenNeuronIndex = 0; hiddenNeuronIndex < hiddenNeuronCount; hiddenNeuronIndex++) { result[visibleNeuronIndex * hiddenNeuronCount + hiddenNeuronIndex] += weights[visibleNeuronIndex * hiddenNeuronCount + hiddenNeuronIndex] * visibleNeuronValues[visibleNeuronIndex]; } } return result; } private static void writeNeuronMatrix(float[] neuronMatrix, int stride) { for (int i = 0; i < visibleNeuronCount; i++) { for (int j = 0; j < hiddenNeuronCount; j++) { System.Console.Write(neuronMatrix[i * stride + j] + "\t"); } System.Console.WriteLine(); } } private static float[] getNeuronValuesByCpu() { var result = new float[hiddenNeuronCount]; for (int hiddenNeuronIndex = 0; hiddenNeuronIndex < hiddenNeuronCount; hiddenNeuronIndex++) { var sum = 0f; for (int visibleNeuronIndex = 0; visibleNeuronIndex < visibleNeuronCount; visibleNeuronIndex++) { sum += weights[visibleNeuronIndex * hiddenNeuronCount + hiddenNeuronIndex] * visibleNeuronValues[visibleNeuronIndex]; } result[hiddenNeuronIndex] = sum; } return result; } static float sigmoid(float x) { return 1f / (1f + (float)Math.Exp(-x)); } }
float sigmoid(float x) { return 1.0f / (1.0f + exp(-x)); } typedef struct { int w; int x; int y; int z; } Xorshift128Random; int next(Xorshift128Random* random) { int t = (random->x ^ (random->x << 11)); random->x = random->y; random->y = random->z; random->z = random->w; random->w = (random->w = (random->w ^ (random->w >> 19)) ^ (t ^ (t >> 8))); return random->w; } float nextFloat(Xorshift128Random* random) { return ((float)next(random) / INT_MAX); } float nextFloatFromRandoms(__global Xorshift128Random *randoms, int index) { Xorshift128Random random = randoms[index]; float result = nextFloat(&random); randoms[index] = random; return result; } int nextBool(__global Xorshift128Random *randoms, int index, float probability) { return nextFloatFromRandoms(randoms, index) < probability; } __kernel void updateHiddenNeuron( __global float *hiddenNeuronValues, __global float *hiddenNeuronProbabilities, __global float *intermediateResults, __global Xorshift128Random *hiddenNeuronRandoms, const __global float *hiddenNeuronBiases, const __global float *weights, const __global float *visibleNeuronValues, int visibleNeuronCount, __local float *localData ) { int hiddenNeuronIndex = get_global_id(0); int visibleNeuronIndex = get_global_id(1); int localID = get_local_id(1); int hiddenNeuronInputStride = get_global_size(1) * 2; int hiddenNeuronCount = get_global_size(0); float inputs[2]; inputs[0] = (visibleNeuronIndex * 2 < visibleNeuronCount) ? weights[visibleNeuronIndex * 2 * hiddenNeuronCount + hiddenNeuronIndex] * visibleNeuronValues[visibleNeuronIndex * 2] : 0; inputs[1] = ((visibleNeuronIndex * 2 + 1) < visibleNeuronCount) ? weights[(visibleNeuronIndex * 2 + 1)* hiddenNeuronCount + hiddenNeuronIndex] * visibleNeuronValues[visibleNeuronIndex * 2 + 1] : 0; localData[localID] = inputs[0] + inputs[1]; barrier(CLK_LOCAL_MEM_FENCE); for(int s = get_local_size(1) / 2; 0 < s; s /= 2) { if(localID < s) { localData[localID] += localData[localID + s]; } barrier(CLK_LOCAL_MEM_FENCE); } if(localID == 0) { intermediateResults[hiddenNeuronIndex * get_num_groups(1) + get_group_id(1)] = localData[0]; } barrier(CLK_GLOBAL_MEM_FENCE); if(visibleNeuronIndex == 0) { float sum = 0; for(int i = 0; i < get_num_groups(1); i++) { sum += intermediateResults[hiddenNeuronIndex * get_num_groups(1) + i]; } float probability = sum;//sigmoid(sum + hiddenNeuronBiases[hiddenNeuronIndex]); hiddenNeuronProbabilities[hiddenNeuronIndex] = probability; hiddenNeuronValues[hiddenNeuronIndex] = nextBool(hiddenNeuronRandoms, hiddenNeuronIndex, probability) ? 1 : 0; } }
実行するとこんな感じになります:
calculation on gpu : 19.8279[ms] visible neuron inputs (cpu) 0 0 0 0 4 5 6 7 16 18 20 22 36 39 42 45 64 68 72 76 intermediate result (gpu) 120 130 140 150 hidden neuron probabilities (gpu) 120 130 140 150 hidden neuron probabilities (cpu) 120 130 140 150 続行するには何かキーを押してください . . .
上手く動いているようですね。
GPUで計算した結果とCPUで計算した結果が一致しています。