-1

一开始我想说我是使用 Python 的新手,我学到的一切都来自教程。我关于参考价值的问题。我正在编写一些脚本,它正在从网站上删除一些信息。我定义了一些功能:

def MatchPattern(count):
    sock = urllib.urlopen(Link+str(count))
    htmlSource = sock.read()                             
    sock.close()
    root = etree.HTML(htmlSource)
    root = etree.HTML(htmlSource)
    result = etree.tostring(root, pretty_print=True, method="html")
    expr1 = check_reg(root)
    expr2 = check_practice(root)
    D_expr1 = no_ks(root)
    D_expr2 = Registred_by(root)
    D_expr3 = Name_doctor(root)
    D_expr4 = Registration_no(root) 
    D_expr5 = PWZL(root)
    D_expr6 = NIP(root)
    D_expr7 = Spec(root)
    D_expr8 = Start_date(root)

    #-----Reg_practice-----
    R_expr1 = Name_of_practise(root) 
    R_expr2 = TERYT(root) 
    R_expr3 = Street(root)
    R_expr4 = House_no(root)
    R_expr5 = Flat_no(root)
    R_expr6 = Post_code(root)
    R_expr7 = City(root)
    R_expr8 = Practice_no(root)
    R_expr9 = Kind_of_practice(root)

    #------Serv_practice -----
    S_expr1 = TERYT2(root)
    S_expr2 = Street2(root)
    S_expr3 = House_no2(root)
    S_expr4 = Flat_no2(root)
    S_expr5 = Post_code2(root)
    S_expr6 = City2(root)
    S_expr7 = Phone_no(root)

    return expr1
    return expr2
    return D_expr1
    return D_expr2
    return D_expr3
    return D_expr4 
    return D_expr5
    return D_expr6
    return D_expr7
    return D_expr8

    #-----Reg_practice-----
    return R_expr1 
    return R_expr2
    return R_expr3
    return R_expr4
    return R_expr5
    return R_expr6
    return R_expr7
    return R_expr8
    return R_expr9

    #------Serv_practice -----
    return S_expr1
    return S_expr2
    return S_expr3
    return S_expr4
    return S_expr5
    return S_expr6
    return S_expr7

所以现在在脚本中我想检查我的 fynction 返回的 expr1 的值。我不知道该怎么做。你们能帮帮我吗?我的函数写对了吗?

编辑:我无法添加答案,所以我编辑我当前的帖子

这是我的全部脚本。有些评论是用我的母语写的,但我会用英语添加一些评论

#! /usr/bin/env python
#encoding:UTF-8-

# ----------------------------- importujemy potrzebne biblioteki i skrypty -----------------------
# ------------------------------------------------------------------------------------------------
import urllib
from lxml import etree, html
import sys
import re
import MySQLdb as mdb
from TOR_connections import *
from XPathSelection import *
import os

# ------------------------------ Definiuje xPathSelectors ------------------------------------------
# --------------------------------------------------------------------------------------------------
# -------Doctors -----
check_reg = etree.XPath("string(//html/body/div/table[1]/tr[3]/td[2]/text())") #warunek Lekarz
check_practice = etree.XPath("string(//html/body/div/table[3]/tr[4]/td[2]/text())") #warunek praktyka

no_ks = etree.XPath("string(//html/body/div/table[1]/tr[1]/td[2]/text())")
Registred_by = etree.XPath("string(//html/body/div/table[1]/tr[4]/td[2]/text())")
Name_doctor = etree.XPath("string(//html/body/div/table[2]/tr[2]/td[2]/text())")
Registration_no = etree.XPath("string(//html/body/div/table[2]/tr[3]/td[2]/text())") 
PWZL = etree.XPath("string(//html/body/div/table[2]/tr[4]/td[2]/text())") 
NIP = etree.XPath("string(//html/body/div/table[2]/tr[5]/td[2]/text())") 
Spec = etree.XPath("string(//html/body/div/table[2]/tr[18]/td[2]/text())") 
Start_date = etree.XPath("string(//html/body/div/table[2]/tr[20]/td[2]/text())") 

#-----Reg_practice-----
Name_of_practise = etree.XPath("string(//html/body/div/table[2]/tr[1]/td[2]/text())") 
TERYT = etree.XPath("string(//html/body/div/table[2]/tr[7]/td[2]/*/text())") 
Street = etree.XPath("string(//html/body/div/table[2]/tr[8]/td[2]/text())") 
House_no = etree.XPath("string(//html/body/div/table[2]/tr[9]/td[2]/*/text())")
Flat_no = etree.XPath("string(//html/body/div/table[2]/tr[10]/td[2]/*/text())")
Post_code = etree.XPath("string(//html/body/div/table[2]/tr[11]/td[2]/*/text())")
City = etree.XPath("string(//html/body/div/table[2]/tr[12]/td[2]/*/text())") 
Practice_no = etree.XPath("string(//html/body/div/table[3]/tr[4]/td[2]/text())")
Kind_of_practice = etree.XPath("string(//html/body/div/table[3]/tr[5]/td[2]/text())")

#------Serv_practice -----
TERYT2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[2]/td[2]/*/text())") 
Street2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[3]/td[2]/text())") 
House_no2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[4]/td[2]/*/text())") 
Flat_no2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[5]/td[2]/i/text())") 
Post_code2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[6]/td[2]/*/text())") 
City2 = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[7]/td[2]/*/text())")
Phone_no = etree.XPath("string(//html/body/div/table[3]/tr[14]/td/table/tr[8]/td[2]/text())")

# --------------------------- deklaracje zmiennych globalnych ----------------------------------
# ----------------------------------------------------------------------------------------------
decrease = 9
No = 1
Link = "http://rpwdl.csioz.gov.pl/rpz/druk/wyswietlKsiegaServletPub?idKsiega="

# --------------------------- funkcje zdefiniowane ----------------------------------
# ----------------------------------------------------------------------------------------------
def MatchPattern(count):
    sock = urllib.urlopen(Link+str(count))
    htmlSource = sock.read()                             
    sock.close()
    root = etree.HTML(htmlSource)
    root = etree.HTML(htmlSource)
    result = etree.tostring(root, pretty_print=True, method="html")
    expr1 = check_reg(root)
    expr2 = check_practice(root)
    D_expr1 = no_ks(root)
    D_expr2 = Registred_by(root)
    D_expr3 = Name_doctor(root)
    D_expr4 = Registration_no(root) 
    D_expr5 = PWZL(root)
    D_expr6 = NIP(root)
    D_expr7 = Spec(root)
    D_expr8 = Start_date(root)

    #-----Reg_practice-----
    R_expr1 = Name_of_practise(root) 
    R_expr2 = TERYT(root) 
    R_expr3 = Street(root)
    R_expr4 = House_no(root)
    R_expr5 = Flat_no(root)
    R_expr6 = Post_code(root)
    R_expr7 = City(root)
    R_expr8 = Practice_no(root)
    R_expr9 = Kind_of_practice(root)

    #------Serv_practice -----
    S_expr1 = TERYT2(root)
    S_expr2 = Street2(root)
    S_expr3 = House_no2(root)
    S_expr4 = Flat_no2(root)
    S_expr5 = Post_code2(root)
    S_expr6 = City2(root)
    S_expr7 = Phone_no(root)

    return expr1
    return expr2
    return D_expr1
    return D_expr2
    return D_expr3
    return D_expr4 
    return D_expr5
    return D_expr6
    return D_expr7
    return D_expr8

    #-----Reg_practice-----
    return R_expr1 
    return R_expr2
    return R_expr3
    return R_expr4
    return R_expr5
    return R_expr6
    return R_expr7
    return R_expr8
    return R_expr9

    #------Serv_practice -----
    return S_expr1
    return S_expr2
    return S_expr3
    return S_expr4
    return S_expr5
    return S_expr6
    return S_expr7




# --------------------------- ustanawiamy polaczenie z baza danych -----------------------------
# ----------------------------------------------------------------------------------------------
con = mdb.connect('localhost', 'root', '******', 'SANBROKER', charset='utf8');

# ---------------------------- początek programu -----------------------------------------------
# ----------------------------------------------------------------------------------------------

with con: 
    cur = con.cursor()
    cur.execute("SELECT Old_num FROM SANBROKER.Number_of_records;")
    Old_num = cur.fetchone()
    count = Old_num[0]
    counter = input("Input number of rows: ")

    # ----------------------- pierwsze połączenie z TORem ------------------------------------
    # ----------------------------------------------------------------------------------------
    #connectTor()
    #conn = httplib.HTTPConnection("my-ip.heroku.com")
    #conn.request("GET", "/")
    #response = conn.getresponse()
    #print(response.read())

    while count <= counter: # co dziesiata liczba
        # --------------- pierwsze wpisanie do bazy danych do Archive --------------------
        with con:
            cur = con.cursor()
            cur.execute("UPDATE SANBROKER.Number_of_records  SET Archive_num=%s",(count))

        # ---------------------------------------------------------------------------------
        if decrease == 0:
            MatchPattern(count)

            # Now I wanna check some expresions (2 or 3)
            # After that i wanna write all the values into my database


            #------- ostatnie czynności:
            percentage = count / 100
            print "rekordów: " + str(count) + " z: " + str(counter) + " procent dodanych: " + str(percentage) + "%"  
            with con:
                cur = con.cursor()
                cur.execute("UPDATE SANBROKER.Number_of_records SET Old_num=%s",(count))
            decrease = 10-1
            count +=1
        else:
            MatchPattern(count)

            # Now I wanna check some expresions (2 or 3)
            # After that i wanna write all the values into my database

            # ------ ostatnie czynności:
            percentage = count / 100
            print "rekordów: " + str(count) + " z: " + str(counter) + " procent dodanych: " + str(percentage) + "%"
            with con:
                cur = con.cursor()
                cur.execute("UPDATE SANBROKER.Number_of_records SET Old_num=%s",(count))
            decrease -=1
            count +=1
4

3 回答 3

0

好吧,我假设 check_reg 是一个返回布尔值(True 或 False)的函数。

如果是这种情况,请检查退货:

if expr1:
   print "True."
else: 
   print "False"

有不止一种方法可以做到这一点,但基本上,if expr1:您只需要进行检查即可。

于 2013-04-24T06:13:26.743 回答
0

要捕获函数的返回值,请将函数分配给带有等号的名称,如下所示:

return_value = somefunction(some_value)
print('The return value is ',return_value)

请记住,当return遇到第一条语句时,该函数将退出。因此,如果您有多个返回语句,则只有第一个将执行。

如果要返回多个事物,请将它们添加到列表中,然后返回列表。

这是您的功能的改进版本:

def match_pattern(count):

    sock = urllib.urlopen(Link+str(count))
    htmlsource = sock.read()                             
    sock.close()
    root = etree.HTML(htmlSource)
    # root = etree.HTML(htmlSource) - duplicate line
    # result = etree.tostring(root, pretty_print=True, method="html")
    function_names = [check_reg, check_practice, no_ks, Registered_by, \
                      Name_doctor, Registration_no, PWZL, NIP, Spec, Start_date, \
                      Name_of_practise, TERYT, Street, House_no2, Flat_no, \
                      Post_code2, City2, Phone_no]
    results = []
    for function in function_names:
         results.append(function(root))

    return results

r = match_pattern(1)
print r[0] # this will be the result of check_reg(root)
于 2013-04-24T06:14:12.750 回答
0

您发布的代码非常模棱两可。您能否修复 ident,让我们知道什么属于函数,哪一部分是脚本。

一个函数只能返回一个值。你不能这样做:

return something
return something_else
return ...

该函数将在返回第一个值时结束。您可以做的是返回包含所有值的列表、元组或字典。例如 :

return (something,something_else,...)

或者

return [something,something_else,...]

在您的情况下,创建一个将您想要的所有值作为属性的类似乎更好,并将此函数转换为设置属性值的方法。

class Example(object):
    def __init__ ( self , link , count ):

        sock = urllib.urlopen(link+str(count))
        htmlSource = sock.read()                             
        sock.close()
        root = etree.HTML(htmlSource)
        root = etree.HTML(htmlSource)
        result = etree.tostring(root, pretty_print=True, method="html")

        self.expr1 = check_reg(root)
        self.expr2 = check_practice(root)
        self.D_expr1 = no_ks(root)
        ...
        self.D_expr8 = Start_date(root)

        #-----Reg_practice-----
        self.R_expr1 = Name_of_practise(root) 
        ...
        self.R_expr9 = Kind_of_practice(root)

        #------Serv_practice -----
        self.S_expr1 = TERYT2(root)
        ...
        self.S_expr7 = Phone_no(root)

然后你就可以像这样使用这个类:

exampleInstance = Example ( "link you want to use" , 4 ) # the second argument is your 'count' value

# Now you can use attributes of your class to get the values you want
print exampleInstance . expr1
print exampleInstance . S_expr7
于 2013-04-24T06:14:35.343 回答