我目前有一个utilities.py
具有此机器学习功能的文件
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
import models
import random
words = [w.strip() for w in open('words.txt') if w == w.lower()]
def scramble(s):
return "".join(random.sample(s, len(s)))
@models.db_session
def check_pronounceability(word):
scrambled = [scramble(w) for w in words]
X = words+scrambled
y = ['word']*len(words) + ['unpronounceable']*len(scrambled)
X_train, X_test, y_train, y_test = train_test_split(X, y)
text_clf = Pipeline([
('vect', CountVectorizer(analyzer='char', ngram_range=(1, 3))),
('clf', MultinomialNB())
])
text_clf = text_clf.fit(X_train, y_train)
stuff = text_clf.predict_proba([word])
pronounceability = round(100*stuff[0][1], 2)
models.Word(word=word, pronounceability=pronounceability)
models.commit()
return pronounceability
然后我打电话给我app.py
from flask import Flask, render_template, jsonify, request
from rq import Queue
from rq.job import Job
from worker import conn
from flask_cors import CORS
from utilities import check_pronounceability
app = Flask(__name__)
q = Queue(connection=conn)
import models
@app.route('/api/word', methods=['POST', 'GET'])
@models.db_session
def check():
if request.method == "POST":
word = request.form['word']
if not word:
return render_template('index.html')
db_word = models.Word.get(word=word)
if not db_word:
job = q.enqueue_call(check_pronounceability, args=(word,))
return jsonify(job=job.id)
阅读python-rq 性能说明后,它指出
您可以用来提高此类作业的吞吐量性能的一种模式是在分叉之前导入必要的模块。
然后我使worker.py
文件看起来像这样
import os
import redis
from rq import Worker, Queue, Connection
listen = ['default']
redis_url = os.getenv('REDISTOGO_URL', 'redis://localhost:6379')
conn = redis.from_url(redis_url)
import utilities
if __name__ == '__main__':
with Connection(conn):
worker = Worker(list(map(Queue, listen)))
worker.work()
我遇到的问题是这仍然运行缓慢,是我做错了什么吗?当我检查一个单词时,有什么方法可以通过将所有内容存储在内存中来加快运行速度?根据我在 python-rq 中所做的 xpost看来我正在正确导入它