Witam mam dokładnie problem z tym co napisałem w tytule. Utworzyłem generator który zapisuje wszystkie ceny bitcoina z binance w formacie OHLC + volumen + rsi
do pliku csv. Następnie date te przekazywane są do modelu który ma próbować przewidzieć wzrost lub spadek ceny za następne 3 świeczki. Wszystko działa dobrze jeżeli nie dodam kolumny z volumenem. Próbowałem ją zamienić na int oraz szukałem odpowiedzi w google ale nic mi nie pomaga. Ogólnie przerabiam skrypt z tego poradnika aby czegoś się nauczyć https://pythonprogramming.net/crypto-rnn-model-deep-learning-python-tensorflow-keras/
Jak źle wstawiłem kod to mnie poprawcie.
KOD ŹRÓDŁOWY DO POBIERANIA DANYCH
from binance.client import Client
import pandas as pd
#import talib as ta
client = Client("", "")
klines = client.get_historical_klines("BTCUSDT", Client.KLINE_INTERVAL_1HOUR, "18 Aug, 2017")
for line in klines:
del line[6:]
df = pd.DataFrame(klines, columns=['time', 'low', 'high', 'open', 'close', 'volume'])
#df['rsi'] = ta.RSI(df['close'])
df.to_csv('btc.csv')
MODEL
import pandas as pd
from collections import deque
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint
import time
from sklearn import preprocessing
SEQ_LEN = 6
FUTURE_PERIOD_PREDICT = 3
def classify(current, future):
if float(future) > float(current):
return 1
else:
return 0
main_df = pd.DataFrame()
mdf = pd.read_csv('btc.csv')
mdf.set_index("time", drop=True, inplace=True)
mdf = mdf[['low', 'high', 'open', 'close', 'volume', 'rsi']]
print(mdf)
main_df = mdf
main_df.fillna(method='ffill', inplace=True)
main_df.dropna(inplace=True)
main_df['future'] = main_df['close'].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify, main_df['close'], main_df['future']))
times = sorted(main_df.index.values)
last_5pct = sorted(main_df.index.values)[-int(0.05*len(times))]
validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]
def classify(current, future):
if float(future) > float(current): # if the future price is higher than the current, that's a buy, or a 1
return 1
else: # otherwise... it's a 0!
return 0
def preprocess_df(df):
df = df.drop("future", 1) # don't need this anymore.
for col in df.columns: # go through all of the columns
if col != "target": # normalize all ... except for the target itself!
print(df[col])
df[col] = df[col].pct_change()
df.dropna(inplace=True) # remove the nas created by pct_change
df[col] = preprocessing.scale(df[col].values) # scale between 0 and 1.
df.dropna(inplace=True) # cleanup again... jic.
sequential_data = [] # this is a list that will CONTAIN the sequences
prev_days = deque(maxlen=SEQ_LEN) # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in
for i in df.values: # iterate over the values
prev_days.append([n for n in i[:-1]]) # store all but the target
if len(prev_days) == SEQ_LEN: # make sure we have 60 sequences!
sequential_data.append([np.array(prev_days), i[-1]]) # append those bad boys!
random.shuffle(sequential_data) # shuffle for good measure.
buys = [] # list that will store our buy sequences and targets
sells = [] # list that will store our sell sequences and targets
for seq, target in sequential_data: # iterate over the sequential data
if target == 0: # if it's a "not buy"
sells.append([seq, target]) # append to sells list
elif target == 1: # otherwise if the target is a 1...
buys.append([seq, target]) # it's a buy!
random.shuffle(buys) # shuffle the buys
random.shuffle(sells) # shuffle the sells!
lower = min(len(buys), len(sells)) # what's the shorter length?
buys = buys[:lower] # make sure both lists are only up to the shortest length.
sells = sells[:lower] # make sure both lists are only up to the shortest length.
sequential_data = buys+sells # add them together
random.shuffle(sequential_data) # another shuffle, so the model doesn't get confused with all 1 class then the other.
X = []
y = []
for seq, target in sequential_data: # going over our new sequential data
X.append(seq) # X is the sequences
y.append(target) # y is the targets/labels (buys vs sell/notbuy)
return np.array(X), np.array(y)
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)
"""print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")"""
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
# Compile model
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}" # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')) # saves only the best ones
history = model.fit(
train_x, train_y,
batch_size=64,
epochs=10,
validation_data=(validation_x, validation_y))
# Score model
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])