日志

利用Python解决tableau数据源分发问题

热度 1已有 2525 次阅读2020-7-19 15:38 |个人分类:随便说说

tableau作为商业智能软件的业内翘楚，很多大公司还是会采用的，又或者有些乙方也会推荐给甲方使用。而在某些情况下（例如，不希望联网泄露数据，或者权限处理难度比较大等等），可能需要分发本地数据源。这时候就需要重新处理数据源文件了。

tableau打包文件一般为twbx，改后缀名为zip之类的压缩包之后，hyper文件位于data/Extracts文件夹下。而tableau也提供了对应的api接口。稍稍改动即可使用。

# -*- coding: utf-8 -*-

"""

Created on Sun Jul 19 12:34:37 2020

@author: Roych

"""

import pandas as pd

import shutil

from pathlib import Path

import os

import zipfile

#tableau提供的Python API

from tableauhyperapi import HyperProcess, Telemetry, Connection, TableName

class SplitHyper():

def __init__(self, source_file, unzip_fold, destination_fold, field_name, user_name):

self.source_file = source_file

self.unzip_fold = unzip_fold

self.destination_fold = destination_fold

self.field_name = field_name

self.user_name = user_name

#解压twbx文件

def zip_to_hyper(self):

with zipfile.ZipFile(self.source_file, 'r') as f:

for item in f.namelist():

f.extract(item, self.unzip_fold)

old_name = os.path.join(self.unzip_fold, item)

#对中文命名的twbx进行解码。

new_name = os.path.join(self.unzip_fold, item.encode('cp437').decode())

os.rename(old_name, new_name)

#复制解压后的文件夹到指定位置

def copy_tree(self):

shutil.copytree(self.unzip_fold, self.destination_fold)

#通过sql子句和旧hyper创建指定用户权限下的hyper文件（数据源）

def create_hyper_from_sql(self):

hyper_path = os.path.join(self.destination_fold, 'Data', 'Extracts')

source_hyper_name = [x for x in os.listdir(hyper_path) if x[-6:] == '.hyper']

source_hyper = os.path.join(hyper_path, ''.join(source_hyper_name))

destination_hyper = os.path.join(self.destination_fold, 'Data', 'Extracts', self.user_name + '.hyper')

#构建SQL语句。

sql = '"' + self.field_name + '" like \'%' + self.user_name + '%\''

path_to_database = Path(shutil.copy(src=source_hyper, dst=destination_hyper)).resolve()

with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:

with Connection(endpoint=hyper.endpoint, database=path_to_database) as connection:

table_names = connection.catalog.get_table_names(schema="Extract")

for table in table_names:

table_name = TableName(table)

command = f"DELETE FROM {table_name} WHERE NOT " + sql

connection.execute_command(command=command)

os.remove(source_hyper)

#将创建后的hyper文件与其它文件夹打包为twbx文件

def zip_to_twbx(self):

zip_name = os.path.join(self.destination_fold, self.user_name + '.zip')

tbwx_name = os.path.join(self.destination_fold, self.user_name + '.twbx')

with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as f:

for path, dirnames, filenames in os.walk(self.destination_fold):

fpath = path.replace(self.destination_fold, '')

for filename in filenames:

if filename[-4:] != '.zip':

f.write(os.path.join(path, filename), os.path.join(fpath, filename))

os.rename(zip_name, tbwx_name)

#复制twbx文件后，移除所有文件夹。

def remove_tree(self):

dir_name = os.path.dirname(self.destination_fold)

if os.path.exists(self.unzip_fold):

shutil.rmtree(self.unzip_fold)

for item in os.listdir(self.destination_fold):

remove_item = os.path.join(self.destination_fold, item)

if remove_item[-5:] == '.twbx':

shutil.copy(src=remove_item, dst=dir_name)

break

shutil.rmtree(self.destination_fold)

#读取excel文件的用户名，并转为列表

def get_list_from_excel(excel_file, user_col):

df = pd.read_excel(excel_file)

user_list = list(df[user_col])

return user_list

if __name__ == '__main__':

source_file = r'C:\微信文档\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\业务报表（CARD）.twbx'

unzip_fold = r'C:\微信文档\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\业务报表（CARD）'

destination_fold = r'C:\微信文档\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\test'

field_name = 'DATA_ACCESS'

excel_file = r'C:\微信文档\WeChat Files\wxid_iuqse7vjqyq821\FileStorage\File\2020-07\permisssion.xlsx'

user_col = 'PROFILE_ID'

user_list = get_list_from_excel(excel_file, user_col)

for user_name in user_list:

test = SplitHyper(source_file=source_file,

unzip_fold=unzip_fold,

destination_fold=destination_fold,

field_name=field_name,

user_name=user_name)

test.zip_to_hyper()

test.copy_tree()

test.create_hyper_from_sql()

test.zip_to_twbx()

test.remove_tree()

作为access版块的斑竹，写这些似乎有些不务正业。不过作为个人来说，很久没写日志了，就当娱乐下吧。

收藏分享邀请举报

全部作者的其他最新日志

发表评论评论 (2 个评论)

回复方漠 2021-3-9 16:19: 谢谢分享, 最近也在看 Python 爬虫.

回复 roych 2021-3-9 19:59: 方漠: 谢谢分享, 最近也在看 Python 爬虫.
现在爬虫很难玩了。各种反爬机制，不建议花太多时间在这上面。

roych的个人空间 http://www.office-cn.net/?179386 [收藏] [复制] [分享] [RSS]

日志

利用Python解决tableau数据源分发问题

全部作者的其他最新日志

发表评论 评论 (2 个评论)

roych

发表评论评论 (2 个评论)