0

Tengo un codigo de una red neuronal, y cuando le paso las pruebas de vez en cuando me da New recent average error: -nan. Me imagino que sera algo de un número de tipo "double" fuera de rango o algo asi, cuando me meto en el debugger y veo el valor de la variable double sale 0xfffffffffff. El codigo es :

    // neural-net-tutorial.cpp
    // David Miller, http://millermattson.com/dave
    // See the associated video for instructions: http://vimeo.com/19569529


    #include <vector>
    #include <iostream>
    #include <cstdlib>
    #include <cassert>
    #include <cmath>
    #include <fstream>
    #include <sstream>

    using namespace std;

    // Silly class to read training data from a text file -- Replace This.
    // Replace class TrainingData with whatever you need to get input data into the
    // program, e.g., connect to a database, or take a stream of data from stdin, or
    // from a file specified by a command line argument, etc.

    class TrainingData
    {
    public:
        TrainingData(const string filename);
        bool isEof(void) { return m_trainingDataFile.eof(); }
        void getTopology(vector<unsigned> &topology);

        // Returns the number of input values read from the file:
        unsigned getNextInputs(vector<double> &inputVals);
        unsigned getTargetOutputs(vector<double> &targetOutputVals);

    private:
        ifstream m_trainingDataFile;
    };

    void TrainingData::getTopology(vector<unsigned> &topology)
    {
        string line;
        string label;

        getline(m_trainingDataFile, line);
        stringstream ss(line);
        ss >> label;
        if (this->isEof() || label.compare("topology:") != 0) {
            abort();
        }

        while (!ss.eof()) {
            unsigned n;
            ss >> n;
            topology.push_back(n);
        }

        return;
    }

    TrainingData::TrainingData(const string filename)
    {
        m_trainingDataFile.open(filename.c_str());
    }

    unsigned TrainingData::getNextInputs(vector<double> &inputVals)
    {
        inputVals.clear();

        string line;
        getline(m_trainingDataFile, line);
        stringstream ss(line);

        string label;
        ss>> label;
        if (label.compare("in:") == 0) {
            double oneValue;
            while (ss >> oneValue) {
                inputVals.push_back(oneValue);
            }
        }

        return inputVals.size();
    }

    unsigned TrainingData::getTargetOutputs(vector<double> &targetOutputVals)
    {
        targetOutputVals.clear();

        string line;
        getline(m_trainingDataFile, line);
        stringstream ss(line);

        string label;
        ss>> label;
        if (label.compare("out:") == 0) {
            double oneValue;
            while (ss >> oneValue) {
                targetOutputVals.push_back(oneValue);
            }
        }

        return targetOutputVals.size();
    }


    struct Connection
    {
        double weight;
        double deltaWeight;
    };


    class Neuron;

    typedef vector<Neuron> Layer;

    // ****************** class Neuron ******************
    class Neuron
    {
    public:
        Neuron(unsigned numOutputs, unsigned myIndex);
        void setOutputVal(double val) { m_outputVal = val; }
        double getOutputVal(void) const { return m_outputVal; }
        void feedForward(const Layer &prevLayer);
        void calcOutputGradients(double targetVal);
        void calcHiddenGradients(const Layer &nextLayer);
        void updateInputWeights(Layer &prevLayer);

    private:
        static double eta;   // [0.0..1.0] overall net training rate
        static double alpha; // [0.0..n] multiplier of last weight change (momentum)
        static double transferFunction(double x);
        static double transferFunctionDerivative(double x);
        static double randomWeight(void) { return rand() / double(RAND_MAX); }
        double sumDOW(const Layer &nextLayer) const;
        double m_outputVal;
        vector<Connection> m_outputWeights;
        unsigned m_myIndex;
        double m_gradient;
    };

    double Neuron::eta = 0.15;    // overall net learning rate, [0.0..1.0]
    double Neuron::alpha = 0.5;   // momentum, multiplier of last deltaWeight, [0.0..1.0]


    void Neuron::updateInputWeights(Layer &prevLayer)
    {
        // The weights to be updated are in the Connection container
        // in the neurons in the preceding layer

        for (unsigned n = 0; n < prevLayer.size(); ++n) {
            Neuron &neuron = prevLayer[n];
            double oldDeltaWeight = neuron.m_outputWeights[m_myIndex].deltaWeight;

            double newDeltaWeight =
                    // Individual input, magnified by the gradient and train rate:
                    eta
                    * neuron.getOutputVal()
                    * m_gradient
                    // Also add momentum = a fraction of the previous delta weight;
                    + alpha
                    * oldDeltaWeight;

            neuron.m_outputWeights[m_myIndex].deltaWeight = newDeltaWeight;
            neuron.m_outputWeights[m_myIndex].weight += newDeltaWeight;
        }
    }

    double Neuron::sumDOW(const Layer &nextLayer) const
    {
        double sum = 0.0;

        // Sum our contributions of the errors at the nodes we feed.

        for (unsigned n = 0; n < nextLayer.size() - 1; ++n) {
            sum += m_outputWeights[n].weight * nextLayer[n].m_gradient;
        }

        return sum;
    }

    void Neuron::calcHiddenGradients(const Layer &nextLayer)
    {
        double dow = sumDOW(nextLayer);
        m_gradient = dow * Neuron::transferFunctionDerivative(m_outputVal);
    }

    void Neuron::calcOutputGradients(double targetVal)
    {
        double delta = targetVal - m_outputVal;
        m_gradient = delta * Neuron::transferFunctionDerivative(m_outputVal);
    }

    double Neuron::transferFunction(double x)
    {
        // tanh - output range [-1.0..1.0]

        return tanh(x);
    }

    double Neuron::transferFunctionDerivative(double x)
    {
        // tanh derivative
        return 1.0 - x * x;
    }

    void Neuron::feedForward(const Layer &prevLayer)
    {
        double sum = 0.0;

        // Sum the previous layer's outputs (which are our inputs)
        // Include the bias node from the previous layer.

        for (unsigned n = 0; n < prevLayer.size(); ++n) {
            sum += prevLayer[n].getOutputVal() *
                    prevLayer[n].m_outputWeights[m_myIndex].weight;
        }

        m_outputVal = Neuron::transferFunction(sum);
    }

    Neuron::Neuron(unsigned numOutputs, unsigned myIndex)
    {
        for (unsigned c = 0; c < numOutputs; ++c) {
            m_outputWeights.push_back(Connection());
            m_outputWeights.back().weight = randomWeight();
        }

        m_myIndex = myIndex;
    }


    // ****************** class Net ******************
    class Net
    {
    public:
        Net(const vector<unsigned> &topology);
        void feedForward(const vector<double> &inputVals);
        void backProp(const vector<double> &targetVals);
        void getResults(vector<double> &resultVals) const;
        double getRecentAverageError(void) const { return m_recentAverageError; }

    private:
        vector<Layer> m_layers; // m_layers[layerNum][neuronNum]
        double m_error;
        double m_recentAverageError;
        static double m_recentAverageSmoothingFactor;
    };


    double Net::m_recentAverageSmoothingFactor = 100.0; // Number of training samples to average over


    void Net::getResults(vector<double> &resultVals) const
    {
        resultVals.clear();

        for (unsigned n = 0; n < m_layers.back().size() - 1; ++n) {
            resultVals.push_back(m_layers.back()[n].getOutputVal());
        }
    }

    void Net::backProp(const vector<double> &targetVals)
    {
        // Calculate overall net error (RMS of output neuron errors)

        Layer &outputLayer = m_layers.back();
        m_error = 0.0;

        for (unsigned n = 0; n < outputLayer.size() - 1; ++n) {
            double delta = targetVals[n] - outputLayer[n].getOutputVal();
            m_error += delta * delta;
        }
        m_error /= outputLayer.size() - 1; // get average error squared
        m_error = sqrt(m_error); // RMS

        // Implement a recent average measurement

        m_recentAverageError =
                (m_recentAverageError * m_recentAverageSmoothingFactor + m_error)
                / (m_recentAverageSmoothingFactor + 1.0);

        // Calculate output layer gradients

        for (unsigned n = 0; n < outputLayer.size() - 1; ++n) {
            outputLayer[n].calcOutputGradients(targetVals[n]);
        }

        // Calculate hidden layer gradients

        for (unsigned layerNum = m_layers.size() - 2; layerNum > 0; --layerNum) {
            Layer &hiddenLayer = m_layers[layerNum];
            Layer &nextLayer = m_layers[layerNum + 1];

            for (unsigned n = 0; n < hiddenLayer.size(); ++n) {
                hiddenLayer[n].calcHiddenGradients(nextLayer);
            }
        }

        // For all layers from outputs to first hidden layer,
        // update connection weights

        for (unsigned layerNum = m_layers.size() - 1; layerNum > 0; --layerNum) {
            Layer &layer = m_layers[layerNum];
            Layer &prevLayer = m_layers[layerNum - 1];

            for (unsigned n = 0; n < layer.size() - 1; ++n) {
                layer[n].updateInputWeights(prevLayer);
            }
        }
    }

    void Net::feedForward(const vector<double> &inputVals)
    {
        assert(inputVals.size() == m_layers[0].size() - 1);

        // Assign (latch) the input values into the input neurons
        for (unsigned i = 0; i < inputVals.size(); ++i) {
            m_layers[0][i].setOutputVal(inputVals[i]);
        }

        // forward propagate
        for (unsigned layerNum = 1; layerNum < m_layers.size(); ++layerNum) {
            Layer &prevLayer = m_layers[layerNum - 1];
            for (unsigned n = 0; n < m_layers[layerNum].size() - 1; ++n) {
                m_layers[layerNum][n].feedForward(prevLayer);
            }
        }
    }

    Net::Net(const vector<unsigned> &topology)
    {
        unsigned numLayers = topology.size();
        for (unsigned layerNum = 0; layerNum < numLayers; ++layerNum) {
            m_layers.push_back(Layer());
            unsigned numOutputs = layerNum == topology.size() - 1 ? 0 : topology[layerNum + 1];

            // We have a new layer, now fill it with neurons, and
            // add a bias neuron in each layer.
            for (unsigned neuronNum = 0; neuronNum <= topology[layerNum]; ++neuronNum) {
                m_layers.back().push_back(Neuron(numOutputs, neuronNum));
                cout << "Made a Neuron!" << endl;
            }

            // Force the bias node's output to 1.0 (it was the last neuron pushed in this layer):
            m_layers.back().back().setOutputVal(1.0);
        }
    }


    void showVectorVals(string label, vector<double> &v)
    {
        cout << label << " ";
        for (unsigned i = 0; i < v.size(); ++i) {
            cout << v[i] << " ";
        }

        cout << endl;
    }


    int main()
    {
        TrainingData trainData("/tmp/trainingData.txt");

        // e.g., { 3, 2, 1 }
        vector<unsigned> topology;
        trainData.getTopology(topology);

        Net myNet(topology);

        vector<double> inputVals, targetVals, resultVals;
        int trainingPass = 0;

        while (!trainData.isEof()) {
            ++trainingPass;
            cout << endl << "Pass " << trainingPass;

            // Get new input data and feed it forward:
            if (trainData.getNextInputs(inputVals) != topology[0]) {
                break;
            }
            showVectorVals(": Inputs:", inputVals);
            myNet.feedForward(inputVals);

            // Collect the net's actual output results:
            myNet.getResults(resultVals);
            showVectorVals("Outputs:", resultVals);

            // Train the net what the outputs should have been:
            trainData.getTargetOutputs(targetVals);
            showVectorVals("Targets:", targetVals);
            assert(targetVals.size() == topology.back());

            myNet.backProp(targetVals);

            // Report how well the training is working, average over recent samples:
            cout << "Net recent average error: "
                    << myNet.getRecentAverageError() << endl;
        }

        cout << endl << "Done" << endl;
    }

El fichero al que hace referencia para recoger las muestras es el resultado del siguiente codigo :

    #include <iostream>
    #include <cmath>
    #include <cstdlib>

    using namespace std;
    int main(){


        cout <<"topology: 2 4 1"<<endl;
        for(int i =2000; i>=0;--i){

            int n1 =(int)(2.0*rand()/double(RAND_MAX));
            int n2 =(int)(2.0*rand()/double(RAND_MAX));
            int t = n1 ^ n2;
            cout <<"in: "<<n1 << ".0 "<<n2 <<".0 "<<endl;
            cout <<"out: "<<t<<".0"<<endl;

        }
    }

introducir la descripción de la imagen aquí

k1k4ss0
  • 659
  • 4
  • 12
  • ¿Así que la mayor parte del tiempo funciona? Lo siento, pero con este montón de código será difícil entender por qué recibió este `recent average error` – Revolucion for Monica Jan 31 '19 at 14:10

1 Answers1

7

"NaN (Not a Number)" No es un Número.


Algunas operaciones matemáticas con números en coma flotante dan como resultado NaN cuando se hace una operación matemática que no tiene representación en el tipo de dato almacenado. Los principales sospechosos suelen ser la división entre cero (que podría dar Inf) o la raíz cuadrada de un número negativo...

¡Oh, sorpresa! tienes raíces cuadradas en tu código:

m_error = sqrt(m_error); // RMS

Operar sobre un NaN propagará el error, pero posiblemente el origen esté en esta operación.

Yo depuraría el código poniendo un punto de interrupción condicional sobre m_error para cuando esta variable sea negativa.

PaperBirdMaster
  • 44,474
  • 6
  • 44
  • 82
  • vale, muchas gracias. Cambiare el codigo a ver que tal – k1k4ss0 Jan 31 '19 at 14:43
  • Ya nos harás saber de tus progresos, tengo curiosidad :) – PaperBirdMaster Jan 31 '19 at 15:00
  • Ahora no entiendo que pasa, estoy compilando el mismo codigo en un ordenador de la universidad, y no sale ese mensaje de error. Me imagino que es por la vercion de g++..... Ahora todas las pruebas salen bien .... Por cierto, la version de g++ en la que estoy actualmente es 5.4.0, si tienes otra version intenta compilarlo a ver que te sale – k1k4ss0 Jan 31 '19 at 16:02
  • Cuando hay comportamientos inconsistentes entre compiladores, es posible que estés entrando en el terreno del "*comportamiento indefinido*" o del "*comportamiento definido por la implementación*", echa un vistazo a [este hilo](https://es.stackoverflow.com/q/164188/2742) para saber más del tema. – PaperBirdMaster Jan 31 '19 at 19:27
  • ya lo silucione. lo que hice fue agregar la siguiente linea : double Net::m_recentAverageError =0.0; y cambiar a "static" dicho atributo dentro de la clase Net. No cambia el resultado y no da el -nan – k1k4ss0 Feb 02 '19 at 02:41
  • Por lo que describes, el problema podría estar relacionado con una falta de inicialización de las variables. ¡Es buena práctica inicializarlas! Te animo a buscar/hacer preguntas sobre el tema. – PaperBirdMaster Feb 02 '19 at 10:34
  • Gracias, yo suelo hacerlo, pero como podras ver en las primeras lineas comentadas, el codigo no es mio. Lo uso mas que todo para entender el funcionamiento de las redes neuronales, y me encontre con este problema. Aun asi, muchas gracias por tu apoyo !!! – k1k4ss0 Feb 02 '19 at 23:17