Comments (3)
I am experiencing the same issue when implementing my own transformer encoder decoder. So far, i am still missing positional encoding and some masking layers, i don't know whether those would affect it in any way.
Here is my code, showing the same warning with tf-nightly, and python 3.11.9
import keras
import numpy as np
def possitionalEmbedding(input_dim, output_dim): # TODO
return keras.layers.Embedding(input_dim=input_dim, output_dim=output_dim)
def model_func(encoder_vocab_len, decoder_vocab_len, encoder_maxlen, decoder_maxlen, params):
num_heads, key_dim, d_v, d_ff, d_model, n = params
encoder_input = keras.Input(shape=(None,))
decoder_input = keras.Input(shape=(None,))
# encoder part
embedded = possitionalEmbedding(encoder_vocab_len, d_model)(encoder_input) # todo possitional embedding
embedded = keras.layers.Dropout(0.1)(embedded)
encoded = embedded
for i in range(n):
attended_encoded = keras.layers.MultiHeadAttention(num_heads,
key_dim,
dropout=0.1,
use_bias=True,
output_shape=(d_model,))(encoded, encoded, encoded) # todo padding_mask
attended_encoded_d = keras.layers.Dropout(0.1)(attended_encoded)
add = encoded + attended_encoded_d
normalised = keras.layers.LayerNormalization()(add)
fed_f = keras.layers.Dense(d_ff)(normalised) # feed forward 1 part
fed_ff = keras.layers.Dense(d_model)(keras.activations.relu(fed_f)) # feed forward 2 part
fed_ff_d = keras.layers.Dropout(0.1)(fed_ff)
add2 = normalised + fed_ff_d
normalised2 = keras.layers.LayerNormalization()(add2)
encoded = normalised2 # and the loop is repeated
encoder_output = encoded # output from encoder
# decoder part
de_embed = possitionalEmbedding(decoder_vocab_len, d_model)(decoder_input)
de_embed = keras.layers.Dropout(0.1)(de_embed)
for i in range(n):
self_attention = (keras.layers.MultiHeadAttention(num_heads,
key_dim,
dropout=0.1,
use_bias=True,
output_shape=(d_model,))
(de_embed, de_embed, de_embed))
self_attention_d = keras.layers.Dropout(0.1)(self_attention)
add = de_embed + self_attention_d
normalised1 = keras.layers.LayerNormalization()(add)
cross_attention = (keras.layers.MultiHeadAttention(num_heads,
key_dim,
dropout=0.1,
use_bias=True,
output_shape=(d_model,))
(normalised1, encoder_output,encoder_output))
cross_attention_d = keras.layers.Dropout(0.1)(cross_attention)
add2 = normalised1 + cross_attention_d
normalised2 = keras.layers.LayerNormalization()(add2)
fed_f = keras.layers.Dense(d_ff)(normalised2) # feed forward 1 part
fed_ff = keras.layers.Dense(d_model)(keras.activations.relu(fed_f)) # feed forward 2 part
fed_ff_d = keras.layers.Dropout(0.1)(fed_ff)
add3 = normalised2 + fed_ff_d
normalised3 = keras.layers.LayerNormalization()(add3)
de_embed = normalised3
decoder_dense_output = keras.layers.Dense(decoder_vocab_len, activation='softmax', name='decoder_output')(de_embed)
return keras.Model(inputs=[encoder_input, decoder_input], outputs=decoder_dense_output)
if __name__ == '__main__':
params = (8, 64, 64, 256, 512, 6)
model = model_func(10000, 10000, 100, 100, params)
model.summary()
# Generate random input data with appropriate shapes
encoder_input_data = np.random.randint(0, 10000, (2, 1)) # (batch_size, sequence_length)
decoder_input_data = np.random.randint(0, 10000, (2, 4)) # (batch_size, sequence_length)
# Call the model with the random input data
output = model.call([encoder_input_data, decoder_input_data], training=False)
# Print the shape of the output
print(f'Output shape: {output.shape}')
UserWarning: You are using a softmax over axis 3 of a tensor of shape (2, 8, 1, 1). This axis has size 1. The softmax operation will always return the value 1, which is likely not what you intended. Did you mean to use a sigmoid instead?
warnings.warn(
UserWarning: You are using a softmax over axis 3 of a tensor of shape (2, 8, 4, 1). This axis has size 1. The softmax operation will always return the value 1, which is likely not what you intended. Did you mean to use a sigmoid instead?
warnings.warn(
Output shape: (2, 4, 10000)
from tensorflow.
Hi @sp00N221 ,
- Sorry for the delay, Can you please check with recent TF compatibility versions? I tried with TF2.16.1 and I cannot reproduce the error.
Please check the screenshot
here. Thanks!
from tensorflow.
Hey,
Thank you for taking the time to review my issue. I've had nothing but problems with my task over the past few days. I had a combination of a TransformerBlock and LSTM layers. Coupled with Optuna, it was probably just too many variables and possibilities, causing the model to become unstable. I have now switched to this task:
def objective(trial, features, target):
n_estimators = trial.suggest_int('n_estimators', 50, 300)
max_depth = trial.suggest_int('max_depth', 3, 15)
learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3, log=True)
subsample = trial.suggest_float('subsample', 0.5, 1.0)
colsample_bytree = trial.suggest_float('colsample_bytree', 0.5, 1.0)
gamma = trial.suggest_float('gamma', 0, 5)
min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
reg_lambda = trial.suggest_float('lambda', 1e-8, 10.0, log=True)
reg_alpha = trial.suggest_float('alpha', 1e-8, 10.0, log=True)
model = XGBClassifier(
n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate,
subsample=subsample, colsample_bytree=colsample_bytree, gamma=gamma,
min_child_weight=min_child_weight, reg_lambda=reg_lambda, reg_alpha=reg_alpha,
random_state=42
)
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
numeric_features = x_train.columns
preprocessor = ColumnTransformer(
transformers=[
('num', Pipeline(steps=[('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler())]),
numeric_features)
])
x_train = preprocessor.fit_transform(x_train)
x_test = preprocessor.transform(x_test)
model.set_params(early_stopping_rounds=10, eval_metric='logloss')
model.fit(x_train, y_train, eval_set=[(x_test, y_test)], verbose=False)
predictions = model.predict(x_test)
accuracy = accuracy_score(y_test, predictions)
return accuracy
With this, I have no problems.
Have a nice day!
from tensorflow.
Related Issues (20)
- dynamic input shape with InferenceRunner HOT 1
- Trouble Running TensorFlow v2.16.1 with NVIDIA GeForce 940MX GPU #914 HOT 1
- There is no target called wheel HOT 2
- TensorFlow Cuda in Docker under WSL2 not wokring HOT 13
- "CUDA_ERROR_NOT_FOUND: named symbol not found" in Docker container HOT 10
- There was no error when converting the lite model but an error occurred when calling the Interpreter allocate_tensors() method. It will appear if the Conv1D data_format parameter is set to channels_first and the dilation_rate parameter > 1 HOT 2
- Issue with Tesnorflow JS Face Detection on Production HOT 4
- [RNN] LSTM Model conversion error after upgrading to tf 2.16.1 from 2.15 HOT 3
- Training model with the Poisson loss function and the Adam optimizer resulted in NaN loss HOT 2
- Bazel compiling source code failed because of highwayhash/sip_hash.cc HOT 2
- segmentation fault when tf.histogram_fixed_width receives large `value_range` and `nbins` on CPU mode
- Wrong explanation about an argument of tflite interpreter HOT 1
- Not able to build TensorFlow with GPU support HOT 2
- ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'int'>] HOT 1
- __add__ with floating point values HOT 1
- TypeError: Expected int32, got 1e-07 of type 'float' instead. HOT 2
- Current tensorflow[and-cuda] installed by pip pulls ptxas which causes Jupyter kernel restart
- tflite-runtime 2.11 python wheel for windows
- Running an Integrated Image Segmenter in Java
- ERROR CONVERTING SYMBOLIC TENSOR TO NUMPY ARRAY
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from tensorflow.