Issue type Bug Have you reproduced the bug with

Hi <a class="user-mention notranslate" data-hovercard-type="user" data-hoverca

Need Help with a Softmax Warning in TensorFlow 2.16 about tensorflow HOT 3 OPEN

sp00N221 commented on June 3, 2024

Need Help with a Softmax Warning in TensorFlow 2.16

from tensorflow.

Comments (3)

CasualMathEnjoyer commented on June 3, 2024

I am experiencing the same issue when implementing my own transformer encoder decoder. So far, i am still missing positional encoding and some masking layers, i don't know whether those would affect it in any way.

Here is my code, showing the same warning with tf-nightly, and python 3.11.9

import keras
import numpy as np

def possitionalEmbedding(input_dim, output_dim):  # TODO
    return keras.layers.Embedding(input_dim=input_dim, output_dim=output_dim)

def model_func(encoder_vocab_len, decoder_vocab_len, encoder_maxlen, decoder_maxlen, params):
    num_heads, key_dim, d_v, d_ff, d_model, n = params

    encoder_input = keras.Input(shape=(None,))
    decoder_input = keras.Input(shape=(None,))

    # encoder part
    embedded = possitionalEmbedding(encoder_vocab_len, d_model)(encoder_input) # todo possitional embedding
    embedded = keras.layers.Dropout(0.1)(embedded)

    encoded = embedded
    for i in range(n):
        attended_encoded = keras.layers.MultiHeadAttention(num_heads,
                                        key_dim,
                                        dropout=0.1,
                                        use_bias=True,
                                        output_shape=(d_model,))(encoded, encoded, encoded)  # todo padding_mask
        attended_encoded_d = keras.layers.Dropout(0.1)(attended_encoded)
        add = encoded + attended_encoded_d
        normalised = keras.layers.LayerNormalization()(add)
        fed_f = keras.layers.Dense(d_ff)(normalised)  # feed forward 1 part
        fed_ff = keras.layers.Dense(d_model)(keras.activations.relu(fed_f))  # feed forward 2 part
        fed_ff_d = keras.layers.Dropout(0.1)(fed_ff)

        add2 = normalised + fed_ff_d
        normalised2 = keras.layers.LayerNormalization()(add2)

        encoded = normalised2  # and the loop is repeated

    encoder_output = encoded  # output from encoder

    # decoder part
    de_embed = possitionalEmbedding(decoder_vocab_len, d_model)(decoder_input)
    de_embed = keras.layers.Dropout(0.1)(de_embed)

    for i in range(n):
        self_attention = (keras.layers.MultiHeadAttention(num_heads,
                                        key_dim,
                                        dropout=0.1,
                                        use_bias=True,
                                        output_shape=(d_model,))
                            (de_embed, de_embed, de_embed))
        self_attention_d = keras.layers.Dropout(0.1)(self_attention)
        add = de_embed + self_attention_d
        normalised1 = keras.layers.LayerNormalization()(add)
        cross_attention = (keras.layers.MultiHeadAttention(num_heads,
                                        key_dim,
                                        dropout=0.1,
                                        use_bias=True,
                                        output_shape=(d_model,))
                           (normalised1, encoder_output,encoder_output))
        cross_attention_d = keras.layers.Dropout(0.1)(cross_attention)

        add2 = normalised1 + cross_attention_d
        normalised2 = keras.layers.LayerNormalization()(add2)

        fed_f = keras.layers.Dense(d_ff)(normalised2)  # feed forward 1 part
        fed_ff = keras.layers.Dense(d_model)(keras.activations.relu(fed_f))  # feed forward 2 part
        fed_ff_d = keras.layers.Dropout(0.1)(fed_ff)

        add3 = normalised2 + fed_ff_d
        normalised3 = keras.layers.LayerNormalization()(add3)

        de_embed = normalised3

    decoder_dense_output = keras.layers.Dense(decoder_vocab_len, activation='softmax', name='decoder_output')(de_embed)

    return keras.Model(inputs=[encoder_input, decoder_input], outputs=decoder_dense_output)

if __name__ == '__main__':
    params = (8, 64, 64, 256, 512, 6)
    model = model_func(10000, 10000, 100, 100, params)
    model.summary()

    # Generate random input data with appropriate shapes
    encoder_input_data = np.random.randint(0, 10000, (2, 1))  # (batch_size, sequence_length)
    decoder_input_data = np.random.randint(0, 10000, (2, 4))  # (batch_size, sequence_length)

    # Call the model with the random input data
    output = model.call([encoder_input_data, decoder_input_data], training=False)

    # Print the shape of the output
    print(f'Output shape: {output.shape}')

UserWarning: You are using a softmax over axis 3 of a tensor of shape (2, 8, 1, 1). This axis has size 1. The softmax operation will always return the value 1, which is likely not what you intended. Did you mean to use a sigmoid instead?
warnings.warn(
UserWarning: You are using a softmax over axis 3 of a tensor of shape (2, 8, 4, 1). This axis has size 1. The softmax operation will always return the value 1, which is likely not what you intended. Did you mean to use a sigmoid instead?
warnings.warn(

Output shape: (2, 4, 10000)

from tensorflow.

Venkat6871 commented on June 3, 2024

Hi @sp00N221 ,

Sorry for the delay, Can you please check with recent TF compatibility versions? I tried with TF2.16.1 and I cannot reproduce the error.
Please check the screenshot
here. Thanks!

from tensorflow.

sp00N221 commented on June 3, 2024

Hey,

Thank you for taking the time to review my issue. I've had nothing but problems with my task over the past few days. I had a combination of a TransformerBlock and LSTM layers. Coupled with Optuna, it was probably just too many variables and possibilities, causing the model to become unstable. I have now switched to this task:

def objective(trial, features, target):
n_estimators = trial.suggest_int('n_estimators', 50, 300)
max_depth = trial.suggest_int('max_depth', 3, 15)
learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3, log=True)
subsample = trial.suggest_float('subsample', 0.5, 1.0)
colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
gamma = trial.suggest_float('gamma', 0, 5)
min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
reg_lambda = trial.suggest_float('lambda', 1e-8, 10.0, log=True)
reg_alpha = trial.suggest_float('alpha', 1e-8, 10.0, log=True)

model = XGBClassifier(
    n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate,
    subsample=subsample, colsample_bytree=colsample_bytree, gamma=gamma,
    min_child_weight=min_child_weight, reg_lambda=reg_lambda, reg_alpha=reg_alpha,
    random_state=42
)

x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

numeric_features = x_train.columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler())]),
         numeric_features)
    ])

x_train = preprocessor.fit_transform(x_train)
x_test = preprocessor.transform(x_test)

model.set_params(early_stopping_rounds=10, eval_metric='logloss')
model.fit(x_train, y_train, eval_set=[(x_test, y_test)], verbose=False)

predictions = model.predict(x_test)
accuracy = accuracy_score(y_test, predictions)

return accuracy

With this, I have no problems.
Have a nice day!

from tensorflow.

Need Help with a Softmax Warning in TensorFlow 2.16 about tensorflow HOT 3 OPEN

Comments (3)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent