I use gae's search API and I'm getting some strange results. When it is returning the number of documents, it is about ten times as many. I'm using an if test in mapreduce to check that entity is visible (a boolean variable) and that the entity is modified during the last 60 days, only entities modified during the last 60 days should be in the index. So what do you think I could be doing wrong? The strange thing it returns about as many elements as the total for the total count of a blank query that should match everything in the index and I'm getting about ten times as many as it should be, but only the number is wrong, when I page through the actual result set it is the correct length and then value for the last page of number of total entities matched is correct. Can you help me? On all pages for the pagination except the last page, the number of results are too many.
Only on the last page the correct number of total entities is displayed. Why?
The mapreduce code I use to build to index is:
def index(entity):
try:
edge = datetime.datetime.now() - timedelta(days=60)
if (entity.published == True and entity.modified > edge):
city_entity = montaomodel.City.all().filter('name =',
entity.city).get()
region_entity = montaomodel.Region.all().filter('name =',
entity.region).get()
price = 0
try:
if entity.price:
price = long(entity.price)
except (Exception), e:
price = 0
logging.info('price conversion failed for entity %s', str(entity.key().id()) )
mileage = -1
try:
if entity.mileage:
mileage = int(entity.mileage)
except (Exception), e:
mileage = -1
logging.info('mileage conversion failed for entity %s', str(entity.key().id()) )
regdate = -1
try:
if entity.regdate:
regdate = int(entity.regdate)
except (Exception), e:
regdate = -1
logging.info('regdate conversion failed for entity %s', str(entity.key().id()) )
company_ad = 0
if entity.company_ad:
company_ad = 1
cityId = 0
if city_entity:
cityId = city_entity.key().id()
regionID = 0
if region_entity:
regionID = region_entity.key().id()
category = 0
if entity.category:
category = entity.category
doc = search.Document(doc_id=str(entity.key()), fields=[
search.TextField(name='title', value=entity.title),
search.TextField(name='text', value=entity.text),
search.TextField(name='city', value=entity.city),
search.TextField(name='region', value=entity.region),
search.NumberField(name='cityID',
value=int(cityId)),
search.NumberField(name='regionID',
value=int(regionID)),
search.NumberField(name='category',
value=int(category)),
search.NumberField(name='constant', value=1),
search.NumberField(name='adID',
value=int(entity.key().id())),
search.TextField(name='name', value=entity.name),
search.DateField(name='date',
value=entity.modified.date()),
search.NumberField(name='price', value=long(price)),
search.NumberField(name='mileage',
value=int(mileage)),
search.NumberField(name='regdate',
value=int(regdate)),
search.TextField(name='type', value=entity.type),
search.TextField(name='currency', value=entity.currency),
search.NumberField(name='company_ad',
value=company_ad),
search.NumberField(name='hour',
value=entity.modified.hour),
search.NumberField(name='minute',
value=entity.modified.minute),
], language='en')
yield search.Index(name='ads').put(doc)
#yield op.db.Put(ad)
except Exception, e:
logging.info('There occurred exception:%s' % str(e))
The search code is
def find_documents(query_string, limit, cursor):
try:
subject_desc = search.SortExpression(
expression='date',
direction=search.SortExpression.DESCENDING,
default_value=datetime.now().date())
# Sort up to 1000 matching results by subject in descending order
sort = search.SortOptions(expressions=[subject_desc], limit=1000)
# Set query options
options = search.QueryOptions(
limit=limit, # the number of results to return
cursor=cursor,
sort_options=sort,
#returned_fields=['author', 'subject', 'summary'],
#snippeted_fields=['content']
)
query = search.Query(query_string=query_string, options=options)
index = search.Index(name=_INDEX_NAME)
# Execute the query
return index.search(query)
except search.Error:
logging.exception('Search failed')
return None