忍者ブログ

Memeplexes

プログラミング、3DCGとその他いろいろについて

GPUでニューラルネットワーク更新(Restricted Boltzmann Machine : RBMの隠れニューロン更新)(OpenCL)

隠れニューロンの更新

前回は制限(制約)付きボルツマンマシン(Restricted Boltzmann Machine : RBM)の可視ニューロンを(GPUで)更新しました。
今回は隠れニューロンを更新します。
もちろん使うのはGPUです。


サンプルコード

HiddenNeuronUpdaterReduction.cs

using System;
using System.Linq;
using Cloo;

public static class EasyOpenCL
{
    public static ComputeContext ComputeContext;
    public static ComputeDevice[] Devices { get; private set; }
    public static ComputeCommandQueue CommandQueue { get; private set; }
    public const int Multiply = 2;
    public const int MinGroupSize = 256;

    static EasyOpenCL()
    {
        ComputePlatform platform = ComputePlatform.Platforms
            .First(p => p.Devices.Any(d => d.Type == ComputeDeviceTypes.Gpu));
        Devices = platform
            .Devices
            .Where(d => d.Type == ComputeDeviceTypes.Gpu)
            .ToArray();
        ComputeContext = new ComputeContext(
            Devices,
            new ComputeContextPropertyList(platform),
            null,
            System.IntPtr.Zero
            );
        CommandQueue = new ComputeCommandQueue(
            EasyOpenCL.ComputeContext,
            EasyOpenCL.Devices[0],
            ComputeCommandQueueFlags.None
            );
    }

    public static T[] Read<T>(this ComputeBuffer<T> buffer) where T : struct
    {
        var result = new T[buffer.Count];
        EasyOpenCL.CommandQueue.ReadFromBuffer(buffer, ref result, true, null);
        return result;
    }

    public static int GetGroupSize(ComputeKernel kernel)
    {
        return (int)Math.Min(MinGroupSize, kernel.GetWorkGroupSize(Devices[0]));
    }


    public static int GetStride(int elementCount)
    {
        int MulFactor = Multiply * MinGroupSize;
        return (((elementCount - 1) / MulFactor) + 1) * MulFactor;
    }
}

class HiddenNeuronUpdaterReduction
{
    const int visibleNeuronCount = 5;
    const int hiddenNeuronCount = 4;

    static float[] weights = Enumerable
        .Range(0, visibleNeuronCount * hiddenNeuronCount)
        .Select(i => (float)i)
        .ToArray();
    static float[] visibleNeuronValues = Enumerable
        .Range(0, visibleNeuronCount)
        .Select(i => (float)i)
        .ToArray();
    static float[] hiddenBiases = new float[hiddenNeuronCount];

    static void Main()
    {
        var context = EasyOpenCL.ComputeContext;
        var program = new ComputeProgram(
            context,
            System.IO.File.ReadAllText("hiddenNeuronUpdater.cl")
            );
        try
        {
            program.Build(EasyOpenCL.Devices, null, null, System.IntPtr.Zero);
        }
        catch
        {
            System.Console.WriteLine(program.GetBuildLog(EasyOpenCL.Devices[0]));
        }
        var weightBuffer = new ComputeBuffer<float>(
            context,
            ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
            weights
            );
        var deltaWeightBuffer = new ComputeBuffer<float>(
            context,
            ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
            new float[weightBuffer.Count]
                );

        // updateVisibleNeurons
        var updateHiddenNeuronKernel = program.CreateKernel("updateHiddenNeuron");
        var visibleNeuronValueBuffer = new ComputeBuffer<float>(
            context,
            ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
            visibleNeuronValues
            );
        var hiddenNeuronBiasBuffer = new ComputeBuffer<float>(
            context,
            ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
            hiddenBiases
            );
        var hiddenNeuronDeltaBiasBuffer = new ComputeBuffer<float>(
            context,
            ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
            Enumerable.Range(0, hiddenBiases.Length).Select(i => 0f).ToArray()
            );
        var hiddenNeuronValueBuffer = new ComputeBuffer<float>(
            context,
            ComputeMemoryFlags.ReadWrite,
            hiddenBiases.Length
            );
        var hiddenNeuronProbabilityBuffer = new ComputeBuffer<float>(
            context,
            ComputeMemoryFlags.ReadWrite,
            hiddenBiases.Length
            );

        var random = new Random(0);
        var hiddenNeuronRandoms = Enumerable
                .Range(0, hiddenBiases.Length)
                .Select(i => new Xorshift128Random(random.Next())).ToArray();
        var hiddenNeuronRandomBuffer = new ComputeBuffer<Xorshift128Random>(
            context,
            ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
            hiddenNeuronRandoms
            );
        var hiddenNeuronInputStride = EasyOpenCL.GetStride(visibleNeuronCount);
        var hiddenNeuronUpdateGroupSize = EasyOpenCL.GetGroupSize(updateHiddenNeuronKernel);
        var hiddenNeuronIntermediateResultBuffer = new ComputeBuffer<float>(
            context,
            ComputeMemoryFlags.ReadWrite,
            hiddenNeuronCount * hiddenNeuronInputStride / EasyOpenCL.Multiply / hiddenNeuronUpdateGroupSize
            );
        updateHiddenNeuronKernel.SetMemoryArgument(0, hiddenNeuronValueBuffer);
        updateHiddenNeuronKernel.SetMemoryArgument(1, hiddenNeuronProbabilityBuffer);
        updateHiddenNeuronKernel.SetMemoryArgument(2, hiddenNeuronIntermediateResultBuffer);
        updateHiddenNeuronKernel.SetMemoryArgument(3, hiddenNeuronRandomBuffer);
        updateHiddenNeuronKernel.SetMemoryArgument(4, hiddenNeuronBiasBuffer);
        updateHiddenNeuronKernel.SetMemoryArgument(5, weightBuffer);
        updateHiddenNeuronKernel.SetMemoryArgument(6, visibleNeuronValueBuffer);
        updateHiddenNeuronKernel.SetValueArgument(7, visibleNeuronCount);
        updateHiddenNeuronKernel.SetLocalArgument(8, hiddenNeuronUpdateGroupSize * sizeof(float));

        var stopwatch = new System.Diagnostics.Stopwatch();
        stopwatch.Start();

        for (int i = 0; i < 1; i++)
        {
            EasyOpenCL.CommandQueue.Execute(
            updateHiddenNeuronKernel,
            null,
            new long[] { hiddenNeuronCount, hiddenNeuronInputStride / EasyOpenCL.Multiply },
            new long[] { 1, hiddenNeuronUpdateGroupSize },
            null);

        }

        EasyOpenCL.CommandQueue.Finish();

        stopwatch.Stop();
        System.Console.WriteLine("calculation on gpu : " + stopwatch.Elapsed.TotalMilliseconds + "[ms]");
        System.Console.WriteLine("visible neuron inputs (cpu)");
        writeNeuronMatrix(getNeuronInputsByCpu(), hiddenNeuronCount);
        System.Console.WriteLine("intermediate result (gpu)");
        var intermediate = hiddenNeuronIntermediateResultBuffer.Read();

        for (int i = 0; i < hiddenNeuronCount; i++)
        {
            for (int j = 0; j < intermediate.Length / hiddenNeuronCount; j++)
            {
                System.Console.Write(
                    intermediate[i * intermediate.Length / hiddenNeuronCount + j] + "\t"
                    );
            }

            System.Console.WriteLine();
        }

        System.Console.WriteLine("hidden neuron probabilities (gpu)");

        foreach (var number in hiddenNeuronProbabilityBuffer.Read())
        {
            System.Console.Write(number + " ");
        }

        System.Console.WriteLine();
        System.Console.WriteLine("hidden neuron probabilities (cpu)");

        foreach (var number in getNeuronValuesByCpu())
        {
            System.Console.Write(number + " ");
        }
    }

    private static float[] getNeuronInputsByCpu()
    {
        var result = new float[visibleNeuronCount * hiddenNeuronCount];

        for (int visibleNeuronIndex = 0; visibleNeuronIndex < visibleNeuronCount; visibleNeuronIndex++)
        {
            for (int hiddenNeuronIndex = 0; hiddenNeuronIndex < hiddenNeuronCount; hiddenNeuronIndex++)
            {
                result[visibleNeuronIndex * hiddenNeuronCount + hiddenNeuronIndex]
                    += weights[visibleNeuronIndex * hiddenNeuronCount + hiddenNeuronIndex]
                    * visibleNeuronValues[visibleNeuronIndex];
            }
        }

        return result;
    }

    private static void writeNeuronMatrix(float[] neuronMatrix, int stride)
    {
        for (int i = 0; i < visibleNeuronCount; i++)
        {
            for (int j = 0; j < hiddenNeuronCount; j++)
            {
                System.Console.Write(neuronMatrix[i * stride + j] + "\t");
            }

            System.Console.WriteLine();
        }
    }

    private static float[] getNeuronValuesByCpu()
    {
        var result = new float[hiddenNeuronCount];

        for (int hiddenNeuronIndex = 0; hiddenNeuronIndex < hiddenNeuronCount; hiddenNeuronIndex++)
        {
            var sum = 0f;

            for (int visibleNeuronIndex = 0; visibleNeuronIndex < visibleNeuronCount; visibleNeuronIndex++)
            {
                sum += weights[visibleNeuronIndex * hiddenNeuronCount + hiddenNeuronIndex]
                    * visibleNeuronValues[visibleNeuronIndex];
            }

            result[hiddenNeuronIndex] = sum;
        }

        return result;
    }

    static float sigmoid(float x)
    {
        return 1f / (1f + (float)Math.Exp(-x));
    }
}

HiddenNeuronUpdater.cl

float sigmoid(float x)
{
	return 1.0f / (1.0f + exp(-x));
}

typedef struct
{
	int w;
	int x;
	int y;
	int z;	
} Xorshift128Random;

int next(Xorshift128Random* random)
{
	int t = (random->x ^ (random->x << 11));
	random->x = random->y;
	random->y = random->z;
	random->z = random->w;
	random->w = (random->w = (random->w ^ (random->w >> 19)) ^ (t ^ (t >> 8)));
	return random->w;
}

float nextFloat(Xorshift128Random* random)
{
	return ((float)next(random) / INT_MAX);
}

float nextFloatFromRandoms(__global Xorshift128Random *randoms, int index)
{
	Xorshift128Random random = randoms[index];
	float result = nextFloat(&random);
	randoms[index] = random;
	return result;
}

int nextBool(__global Xorshift128Random *randoms, int index, float probability)
{
	return nextFloatFromRandoms(randoms, index) < probability;
}

__kernel void updateHiddenNeuron(
	__global float *hiddenNeuronValues,
	__global float *hiddenNeuronProbabilities,
	__global float *intermediateResults,
	__global Xorshift128Random *hiddenNeuronRandoms,
	const __global float *hiddenNeuronBiases, 
	const __global float *weights, 
	const __global float *visibleNeuronValues,
	int visibleNeuronCount,
	__local float *localData
	)
{
	int hiddenNeuronIndex = get_global_id(0);
	int visibleNeuronIndex = get_global_id(1);
	int localID = get_local_id(1);
	int hiddenNeuronInputStride = get_global_size(1) * 2;
	int hiddenNeuronCount = get_global_size(0);

	float inputs[2];
	inputs[0] = (visibleNeuronIndex * 2 < visibleNeuronCount)
		? 
		weights[visibleNeuronIndex * 2 * hiddenNeuronCount + hiddenNeuronIndex] 
		* visibleNeuronValues[visibleNeuronIndex * 2]
		: 0;
	inputs[1] = ((visibleNeuronIndex * 2 + 1) < visibleNeuronCount)
		?
		weights[(visibleNeuronIndex * 2 + 1)* hiddenNeuronCount + hiddenNeuronIndex] 
		* visibleNeuronValues[visibleNeuronIndex * 2 + 1]
		: 0;
	localData[localID] = inputs[0] + inputs[1];
	barrier(CLK_LOCAL_MEM_FENCE);

	for(int s = get_local_size(1) / 2; 0 < s; s /= 2) 
	{
		if(localID < s) 
		{
			localData[localID] += localData[localID + s];
		}

		barrier(CLK_LOCAL_MEM_FENCE);
	}

	if(localID == 0)
	{
		intermediateResults[hiddenNeuronIndex * get_num_groups(1) + get_group_id(1)] = localData[0];
	}

	barrier(CLK_GLOBAL_MEM_FENCE);

	if(visibleNeuronIndex == 0)
	{
		float sum = 0;

		for(int i = 0; i < get_num_groups(1); i++)
		{
			sum += intermediateResults[hiddenNeuronIndex * get_num_groups(1) + i];
		}

		float probability = sum;//sigmoid(sum + hiddenNeuronBiases[hiddenNeuronIndex]);
		hiddenNeuronProbabilities[hiddenNeuronIndex] = probability;
		hiddenNeuronValues[hiddenNeuronIndex] 
			= nextBool(hiddenNeuronRandoms, hiddenNeuronIndex, probability) ? 1 : 0;
	}
}

実行結果

実行するとこんな感じになります:

calculation on gpu : 19.8279[ms]
visible neuron inputs (cpu)
0       0       0       0
4       5       6       7
16      18      20      22
36      39      42      45
64      68      72      76
intermediate result (gpu)
120
130
140
150
hidden neuron probabilities (gpu)
120 130 140 150
hidden neuron probabilities (cpu)
120 130 140 150 続行するには何かキーを押してください . . .

上手く動いているようですね。
GPUで計算した結果とCPUで計算した結果が一致しています。

拍手[0回]

PR