Ver código fonte

所以查询页面api全部抓取完毕

sptkw 1 ano atrás
pai
commit
3af0d84522

+ 0 - 1
.idea/sqldialects.xml

@@ -1,7 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="SqlDialectMappings">
-    <file url="file://$PROJECT_DIR$/amac/person_org_registration_user_api.py" dialect="GenericSQL" />
     <file url="PROJECT" dialect="MySQL" />
   </component>
 </project>

+ 4 - 5
amac/main.py

@@ -4,9 +4,9 @@ print("执行 开始")
 
 # 定义要运行的 Python 文件列表
 python_files = [
-    # 'member_api.py',  # 会员机构综合查询-会员表
-    # 'person_org_registration_api.py',  # 基金从业人员资格注册信息-人员机构注册表
-    # 'person_org_registration_user_api.py',  # 基金从业人员资格注册信息-人员机构用户信息表、人员机构用户证书表
+    'member_api_new.py',  # 会员机构综合查询(以及证券公司私募基金子公司管理人信息公示,即整体会员中的其中一部分筛选)-会员表
+    # 'person_org_registration_api_new.py',  # 基金从业人员资格注册信息-人员机构注册表
+    # 'person_org_registration_user_api_new.py',  # 基金从业人员资格注册信息-人员机构用户信息表、人员机构用户证书表
     # 'amac_private_fund_manager_product_api.py'  # 私募基金管理人基金产品
     # 'amac_securities_fund_disclosure_api.py'  # 证券公司集合资管产品公示
     # 'amac_securities_direct_investment_funds_api.py'  # 证券公司直投基金
@@ -19,8 +19,7 @@ python_files = [
     # 'amac_private_fund_registration_process_api.py'  # 私募基金管理人登记办理流程
     # 'amac_suspended_institution_api.py'  # 中止机构-[数据为空,无法知道数据类型]
     # 'amac_terminated_institution_api.py'  # 终止机构
-    # ''  #
-    'amac_canceled_fund_manager_api.py'  # 已注销私募基金管理人
+    # 'amac_canceled_fund_manager_api.py'  # 已注销私募基金管理人
 
 ]
 

+ 43 - 0
amac/member_api_new.py

@@ -0,0 +1,43 @@
+import json
+
+import Utils
+from Config import mysql_pool, conn, headers, page, size
+
+start_time = Utils.data_time()
+print("开始时间(精确到毫秒)[会员机构综合查询]:", start_time)
+
+
+# 因为这里数据一条人员信息amac_member_user中包含了对应的多个证书,所以需要先循环把证书拿出来
+def savetodb(data):
+    # 判断是否为空,为空则跳过直接返回
+    if data is None: return
+
+    person_record = []
+    for item in data:
+        # 构建人员信息记录
+        this_data = {
+            'user_tenant_id': item.get("userTenantId"),
+            'manager_name': item.get("managerName"),
+            'member_behalf': item.get("memberBehalf"),
+            'member_type': item.get("memberType"),
+            'member_code': item.get("memberCode"),
+            'member_date': item.get("memberDate"),
+            'primary_invest_type': item.get("primaryInvestType"),
+            'mark_star': item.get("markStar")
+        }
+
+        person_record.append(this_data)
+    # 批量插入
+    mysql_pool.insert('amac_member_institution', person_record)
+
+
+this_page = page
+this_size = size
+payload = json.dumps({})
+
+# 会员机构综合查询
+http_url = "/amac-infodisc/api/pof/pofMember"
+
+Utils.get_page_result(http_url, this_page, this_size, payload, headers, conn, savetodb, __file__)
+
+print(f"[会员机构综合查询]结束时间(精确到毫秒): {Utils.data_time()} - {start_time}")

+ 0 - 78
amac/person_org_registration_api.py

@@ -1,78 +0,0 @@
-import http.client
-import json
-import random
-
-from Config import mysql_pool, conn, headers, page, size
-
-# 首先查出所有类型 amac_org_type ,然后根据类型循环遍历入库
-amac_org_type = mysql_pool.fetchall("select * from amac_org_type");
-for amac_org_type_item in amac_org_type:
-    org_type_code = amac_org_type_item.get("org_type_code")
-    org_type_name = amac_org_type_item.get("org_type_name")
-    this_page = page
-    this_size = size
-    # org_type_code = "gmjjglgs"
-    # org_type_name = "公募基金管理公司"
-    print("开始处理类型:", org_type_code, org_type_name)
-
-    payload = json.dumps({"page": 0, "orgType": str(org_type_code), "orgName": ""})
-
-    http_url = "/amac-infodisc/api/pof/personOrg"
-
-    # 生成一个0到1之间的随机浮点数,并格式化为指定的小数位数
-    random_float = round(random.random(), 14)  # 保留14位小数
-    print("请求url=", http_url + "?rand=" + str(random_float) + "&page=" + str(this_page) + "&size=" + str(this_size))
-    conn.request("POST",
-                 http_url + "?rand=(" + str(random_float) + ")&page=" + str(this_page) + "&size=" + str(this_size),
-                 payload,
-                 headers)
-
-    res = conn.getresponse()
-
-    data = res.read()
-
-    json_str = data.decode("utf-8")
-    print("JSON String", json_str)
-
-    try:
-        json_obj = json.loads(json_str)
-        print("获取到总条数为:", json_obj.get("totalPages"))
-
-        # 插入数据示例
-        data_to_insert = json_obj.get("content")
-        print("\n开始插入数据==>" + str(this_page) + "*20\n")
-        mysql_pool.insert('amac_person_org_registration', data_to_insert)
-
-        # 开始循环
-        if (json_obj.get("totalPages") > 1):
-            for i in range(1, json_obj.get("totalPages")):
-                this_page = i
-                # # 将 JSON 字符串转换为 Python 字典
-                # data = json.loads(payload)
-                # # 重新给 'page' 字段赋值
-                # data["page"] = page
-                # # 将修改后的字典转换回 JSON 字符串
-                # payload = json.dumps(data)
-                print("请求url=",
-                      http_url + "?rand=" + str(random_float) + "&page=" + str(this_page) + "&size=" + str(this_size))
-                conn.request("POST",
-                             http_url + "?rand=" + str(random_float) + "&page=" + str(this_page) + "&size=" + str(
-                                 this_size),
-                             payload, headers)
-                res = conn.getresponse()
-                data = res.read()
-                json_str = data.decode("utf-8")
-                print("JSON String", json_str)
-                try:
-                    json_obj_arr = json.loads(json_str)
-                    print("获取到总条数为:", json_obj_arr.get("totalPages"))
-                    # 插入数据示例
-                    data_to_insert = json_obj_arr.get("content")
-                    print("\n开始插入数据==>" + str(i) + "*20\n")
-                    mysql_pool.insert('amac_person_org_registration', data_to_insert)
-
-                except json.JSONDecodeError as e:
-                    print(f"循环 Error decoding JSON: {e}")
-
-    except json.JSONDecodeError as e:
-        print(f"请求Error decoding JSON: {e}")

+ 58 - 0
amac/person_org_registration_api_new.py

@@ -0,0 +1,58 @@
+import json
+
+import Utils
+from Config import mysql_pool, conn, headers, page, size
+
+start_time = Utils.data_time()
+print("开始时间(精确到毫秒)[终止机构]:", start_time)
+
+
+# 因为这里数据一条人员信息amac_member_user中包含了对应的多个证书,所以需要先循环把证书拿出来
+def savetodb(data):
+    # 判断是否为空,为空则跳过直接返回
+    if data is None: return
+
+    person_record = []
+    for item in data:
+        # 构建人员信息记录
+        this_data = {
+            'user_id': item.get("userId"),
+            'org_code': item.get("orgCode"),
+            'org_type': item.get("orgType"),
+            'org_name': item.get("orgName"),
+            'org_name_chinese_spell': item.get("orgNameChineseSpell"),
+            'worker_total_num': item.get("workerTotalNum"),
+            'oper_num': item.get("operNum"),
+            'salesman_num': item.get("salesmanNum"),
+            'investment_manager_num': item.get("investmentManagerNum"),
+            'fund_manager_num': item.get("fundManagerNum"),
+            'ext_worker_total_num': item.get("extWorkerTotalNum"),
+            'ext_oper_num': item.get("extOperNum"),
+            'ext_salesman_num': item.get("extSalesmanNum"),
+            'ext_investment_manager_num': item.get("extInvestmentManagerNum"),
+            'ext_fund_manager_num': item.get("extFundManagerNum")
+        }
+
+        person_record.append(this_data)
+    # 批量插入
+    mysql_pool.insert('amac_person_org_registration', person_record)
+
+
+# 首先查出所有类型 amac_org_type ,然后根据类型循环遍历入库
+amac_org_type = mysql_pool.fetchall("select * from amac_org_type");
+for amac_org_type_item in amac_org_type:
+    org_type_code = amac_org_type_item.get("org_type_code")
+    org_type_name = amac_org_type_item.get("org_type_name")
+    this_page = page
+    this_size = size
+    # org_type_code = "gmjjglgs"
+    # org_type_name = "公募基金管理公司"
+    print("开始处理类型:", org_type_code, org_type_name)
+
+    payload = json.dumps({"page": 1, "orgType": str(org_type_code), "orgName": ""})
+
+    http_url = "/amac-infodisc/api/pof/personOrg"
+
+    Utils.get_page_result(http_url, this_page, this_size, payload, headers, conn, savetodb, __file__)
+
+print(f"[终止机构]结束时间(精确到毫秒): {Utils.data_time()} - {start_time}")

+ 4 - 47
amac/person_org_registration_user_api.py → amac/person_org_registration_user_api_new.py

@@ -5,7 +5,8 @@ import time
 import Utils
 from Config import mysql_pool, conn, headers, page, size
 
-print("开始时间(精确到毫秒):", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), time.time() * 1000)
+start_time = Utils.data_time()
+print("开始时间(精确到毫秒)[终止机构]:", start_time)
 
 
 # 因为这里数据一条人员信息amac_member_user中包含了对应的多个证书,所以需要先循环把证书拿出来
@@ -83,51 +84,7 @@ for amac_org_type_item in amac_org_type:
         {"userName": "", "certCode": "", "certName": "", "userId": str(user_id), "page": "1"})
 
     http_url = "/amac-infodisc/api/pof/person"
-    random_float = round(random.random(), 14)
-    print(f"请求url={http_url}?rand={random_float}&page={this_page}&size={this_size}")
 
-    conn.request("POST",
-                 f"{http_url}?rand={random_float}&page={this_page}&size={this_size}",
-                 payload, headers)
+    Utils.get_page_result(http_url, this_page, this_size, payload, headers, conn, savetodb, __file__)
 
-    res = Utils.get_conn_result(conn, __file__).getresponse()
-    data = res.read()
-    json_str = data.decode("utf-8")
-    # print("JSON String", json_str)
-
-    try:
-        json_obj = json.loads(json_str)
-        print("获取到总条数为:", json_obj.get("totalPages"))
-
-        data_to_insert = json_obj.get("content")
-
-        if data_to_insert is not None:
-            # 先睡一秒,确保峰值不会太高
-            time.sleep(1)
-
-        print(f"\n开始插入数据==>{this_page}*{this_size}\n")
-        savetodb(data_to_insert)
-
-        if json_obj.get("totalPages") > 1:
-            for i in range(1, json_obj.get("totalPages")):
-                this_page = i
-                print(f"请求url={http_url}?rand={random_float}&page={this_page}&size={this_size}")
-                conn.request("POST",
-                             f"{http_url}?rand={random_float}&page={this_page}&size={this_size}",
-                             payload, headers)
-                res = conn.getresponse()
-                data = res.read()
-                json_str = data.decode("utf-8")
-                print("JSON String", json_str)
-                try:
-                    json_obj_arr = json.loads(json_str)
-                    print("获取到总条数为:", json_obj_arr.get("totalPages"))
-                    data_to_insert = json_obj_arr.get("content")
-                    print(f"\n开始插入数据==>{i}*{this_size}\n")
-                    savetodb(data_to_insert)
-                except json.JSONDecodeError as e:
-                    print(f"循环 Error decoding JSON: {e}")
-    except json.JSONDecodeError as e:
-        print(f"请求 Error decoding JSON: {e}")
-
-    print("结束时间(精确到毫秒):", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), time.time() * 1000)
+    print(f"[终止机构]结束时间(精确到毫秒): {Utils.data_time()} - {start_time}")