我正在尝试在顶点 ai 端点中部署我的自定义容器以进行预测。申请内容如下。
- 烧瓶 - app.py
import pandas as pd
from flask import Flask, jsonify,request
import tensorflow
import pre_process
import post_process
app = Flask(__name__)
@app.route('/predict',methods=['POST'])
def predict():
req = request.json.get('instances')
input_data = req[0]['email']
#preprocessing
text = pre_process.preprocess(input_data)
vector = pre_process.preprocess_tokenizing(text)
model = tensorflow.keras.models.load_model('model')
#predict
prediction = model.predict(vector)
#postprocessing
value = post_process.postprocess(list(prediction[0]))
return jsonify({'output':{'doc_class':value}})
if __name__=='__main__':
app.run(host='0.0.0.0')
- Dockerfile
FROM python:3.7
WORKDIR /app
COPY . /app
RUN pip install --trusted-host pypi.python.org -r requirements.txt
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]
EXPOSE 5050
- pre_process.py
#import
import pandas as pd
import pickle
import re
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
def preprocess(text):
"""Do all the Preprocessing as shown above and
return a tuple contain preprocess_email,preprocess_subject,preprocess_text for that Text_data"""
#After you store it in the list, Replace those sentances in original text by space.
text = re.sub("(Subject:).+"," ",text,re.I)
#Delete all the sentances where sentence starts with "Write to:" or "From:".
text = re.sub("((Write to:)|(From:)).+","",text,re.I)
#Delete all the tags like "< anyword >"
text = re.sub("<[^><]+>","",text)
#Delete all the data which are present in the brackets.
text = re.sub("\([^()]+\)","",text)
#Remove all the newlines('\n'), tabs('\t'), "-", "".
text = re.sub("[\n\t\\-]+","",text)
#Remove all the words which ends with ":".
text = re.sub("(\w+:)","",text)
#Decontractions, replace words like below to full words.
lines = re.sub(r"n\'t", " not", text)
lines = re.sub(r"\'re", " are", lines)
lines = re.sub(r"\'s", " is", lines)
lines = re.sub(r"\'d", " would", lines)
lines = re.sub(r"\'ll", " will", lines)
lines = re.sub(r"\'t", " not", lines)
lines = re.sub(r"\'ve", " have", lines)
lines = re.sub(r"\'m", " am", lines)
text = lines
#replace numbers with spaces
text = re.sub("\d+"," ",text)
# remove _ from the words starting and/or ending with _
text = re.sub("(\s_)|(_\s)"," ",text)
#remove 1 or 2 letter word before _
text = re.sub("\w{1,2}_","",text)
#convert all letters to lowercase and remove the words which are greater
#than or equal to 15 or less than or equal to 2.
text = text.lower()
text =" ".join([i for i in text.split() if len(i)<15 and len(i)>2])
#replace all letters except A-Z,a-z,_ with space
preprocessed_text = re.sub("\W+"," ",text)
return preprocessed_text
def preprocess_tokenizing(text):
#from tf.keras.preprocessing.text import Tokenizer
#from tf.keras.preprocessing.sequence import pad_sequences
tokenizer = pickle.load(open('tokenizer.pkl','rb'))
max_length = 1019
tokenizer.fit_on_texts([text])
encoded_docs = tokenizer.texts_to_sequences([text])
text_padded = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
return text_padded
- post_process.py
def postprocess(vector):
index = vector.index(max(vector))
classes = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
return classes[index]
- 要求.txt
gunicorn
pandas==1.3.3
numpy==1.19.5
flask
flask-cors
h5py==3.1.0
scikit-learn==0.24.2
tensorflow==2.6.0
模型
分词器.pkl
我正在关注此博客vertex ai 部署,以获取 gcloud 控制台命令以将模型容器化并将其部署到端点。但是该模型需要很长时间才能部署,最终无法部署。
在本地主机上运行容器后,它按预期运行,但没有部署到顶点 ai 端点。我不明白问题出在flask app.py 还是Dockerfile 中,或者问题出在其他地方。