tableau作为商业智能软件的业内翘楚,很多大公司还是会采用的,又或者有些乙方也会推荐给甲方使用。而在某些情况下(例如,不希望联网泄露数据,或者权限处理难度比较大等等),可能需要分发本地数据源。这时候就需要重新处理数据源文件了。
tableau打包文件一般为twbx,改后缀名为zip之类的压缩包之后,hyper文件位于data/Extracts文件夹下。而tableau也提供了对应的api接口。稍稍改动即可使用。
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 19 12:34:37 2020
@author: Roych
"""
import pandas as pd
import shutil
from pathlib import Path
import os
import zipfile
#tableau提供的Python API
from tableauhyperapi import HyperProcess, Telemetry, Connection, TableName
class SplitHyper():
def __init__(self, source_file, unzip_fold, destination_fold, field_name, user_name):
self.source_file = source_file
self.unzip_fold = unzip_fold
self.destination_fold = destination_fold
self.field_name = field_name
self.user_name = user_name
#解压twbx文件
def zip_to_hyper(self):
with zipfile.ZipFile(self.source_file, 'r') as f:
for item in f.namelist():
f.extract(item, self.unzip_fold)
old_name = os.path.join(self.unzip_fold, item)
#对中文命名的twbx进行解码。
new_name = os.path.join(self.unzip_fold, item.encode('cp437').decode())
os.rename(old_name, new_name)
#复制解压后的文件夹到指定位置
def copy_tree(self):
shutil.copytree(self.unzip_fold, self.destination_fold)
#通过sql子句和旧hyper创建指定用户权限下的hyper文件(数据源)
def create_hyper_from_sql(self):
hyper_path = os.path.join(self.destination_fold, 'Data', 'Extracts')
source_hyper_name = [x for x in os.listdir(hyper_path) if x[-6:] == '.hyper']
source_hyper = os.path.join(hyper_path, ''.join(source_hyper_name))
destination_hyper = os.path.join(self.destination_fold, 'Data', 'Extracts', self.user_name + '.hyper')
#构建SQL语句。
sql = '"' + self.field_name + '" like \'%' + self.user_name + '%\''
path_to_database = Path(shutil.copy(src=source_hyper, dst=destination_hyper)).resolve()
with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
with Connection(endpoint=hyper.endpoint, database=path_to_database) as connection:
table_names = connection.catalog.get_table_names(schema="Extract")
for table in table_names:
table_name = TableName(table)
command = f"DELETE FROM {table_name} WHERE NOT " + sql
connection.execute_command(command=command)
os.remove(source_hyper)
#将创建后的hyper文件与其它文件夹打包为twbx文件
def zip_to_twbx(self):
zip_name = os.path.join(self.destination_fold, self.user_name + '.zip')
tbwx_name = os.path.join(self.destination_fold, self.user_name + '.twbx')
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as f:
for path, dirnames, filenames in os.walk(self.destination_fold):
fpath = path.replace(self.destination_fold, '')
for filename in filenames:
if filename[-4:] != '.zip':
f.write(os.path.join(path, filename), os.path.join(fpath, filename))
os.rename(zip_name, tbwx_name)
#复制twbx文件后,移除所有文件夹。
def remove_tree(self):
dir_name = os.path.dirname(self.destination_fold)
if os.path.exists(self.unzip_fold):
shutil.rmtree(self.unzip_fold)
for item in os.listdir(self.destination_fold):
remove_item = os.path.join(self.destination_fold, item)
if remove_item[-5:] == '.twbx':
shutil.copy(src=remove_item, dst=dir_name)
break
shutil.rmtree(self.destination_fold)
#读取excel文件的用户名,并转为列表
def get_list_from_excel(excel_file, user_col):
df = pd.read_excel(excel_file)
user_list = list(df[user_col])
return user_list
if __name__ == '__main__':
source_file = r'C:\微信文档\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\业务报表(CARD).twbx'
unzip_fold = r'C:\微信文档\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\业务报表(CARD)'
destination_fold = r'C:\微信文档\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\test'
field_name = 'DATA_ACCESS'
excel_file = r'C:\微信文档\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\permisssion.xlsx'
user_col = 'PROFILE_ID'
user_list = get_list_from_excel(excel_file, user_col)
for user_name in user_list:
test = SplitHyper(source_file=source_file,
unzip_fold=unzip_fold,
destination_fold=destination_fold,
field_name=field_name,
user_name=user_name)
test.zip_to_hyper()
test.copy_tree()
test.create_hyper_from_sql()
test.zip_to_twbx()
test.remove_tree()