Modulo:Webarchive/data

Manipud iti Wikipedia, ti nawaya nga ensiklopedia
Darsen a mapan iti pagdaliasatan Darsen a mapan agbiruk
Dokumentasion ti modulo[kitaen] [urnosen] [pakasaritaan] [purgaen]

Daytoy ket katulong a modulo para iti Modulo:Webarchive.

--[[--------------------------< C O N F I G U R A T I O N >----------------------------------------------------

global configuration settings

]]

local config = {
	maxurls = 10,																-- Max number of URLs allowed. 
	tname = 'Webarchive',														-- name of calling template. Change if template rename.
	verifydates = true,															-- See documentation. Set false to disable.
	}


--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------

List of namespaces that should not be included in citation error categories.

Note: Namespace names should use underscores instead of spaces.

]]

local uncategorized_namespaces = {												-- same list as specified at [[Module:Citation/CS1/Configuration]]
	['Agar-aramat']=true, ['Tungtungan']=true, ['Agar-aramat_tungtungan']=true, ['Wikipedia_tungtungan']=true, ['Papeles_tungtungan']=true,
	['Plantilia_tungtungan']=true, ['Tulong_tungtungan']=true, ['Kategoria_tungtungan']=true, ['Modulo_tungtungan']=true,
	['MediaWiki_tungtungan']=true,
	}

local uncategorized_subpages = {'/[Pp]agipadasan', '/[Pp]angsubok'};				-- list of Lua patterns found in page names of pages we should not categorize

local excepted_pages = {														-- these pages will be categorized if set true; set to nil to disable
	['Modulo tungtungan:Webarchive/pangsubok'] = true,								-- test cases pages used during development
	['Plantilia:Webarchive/pangsubok/Produksion'] = true,
	}


--[[--------------------------< C A T E G O R I E S >----------------------------------------------------------

this is a table of all categories supported by Module:Webarchive

]]

local categories = {
	archiveis = 'Kategoria:Dagiti silpo ti archiveis iti plantilia ti webarchive',
	error = 'Kategoria:Dagiti biddut iti plantilia ti webarchive',
	other = 'Kategoria:Dagiti sabali nga arkibo iti plantilia ti webarchive',
	unknown = 'Kategoria:Dagiti di ammo nga arkibo iti plantilia ti webarchive',
	warning = 'Kategoria:Dagiti ballaag iti plantilia ti webarchive',
	wayback = 'Kategoria:Dagiti silpo ti wayback iti plantilia ti webarchive',
	webcite = 'Kategoria:Dagiti silpo ti webcite iti plantilia ti webarchive',
	}


--[[--------------------------< P R E F I X E S >--------------------------------------------------------------

used only with serviceName(), this table holds the two generic tail-text prefixes specified by services['<service name>'][1]

]]

local prefixes = {
	at = 'iti',
	atthe = 'iti',
	}


--[=[-------------------------< S E R V I C E S >--------------------------------------------------------------

this is a table of tables for archive services.  Each service table has:
	[1]=prefix; may be boolean true or false, or text string where:
		true indicates that the prefix is taken from prefixes.atthe
		false indicates that the prefix is taken from prefixes.at
		'text string' is used in lieu of the typical 'at' or 'at the' prefix
	[2]=wikilink target article that describes the service; set to nil if not used
	[3]=wikilink label; the label in [[target|label]]; set to nil if not used; when there is not article ([2] is nil) use this to name the service; see wikiwix in the table
	[4]=service ID; set to nil if not used
	[5]=tracking category key from the categories table; set to nil if not used
	[6]=postfix; text string to be appended at the end of the tail string - see webarchive.loc.gov in the table

]=]

local services = {
	['archive.ec'] = {false, 'Archive.today', nil, 'archiveis', categories.archiveis},
	['archive.fo'] = {false, 'Archive.today', nil, 'archiveis', categories.archiveis},
	['archive.is'] = {false, 'Archive.today', nil, 'archiveis', categories.archiveis},
	['archive.li'] = {false, 'Archive.today', nil, 'archiveis', categories.archiveis},
	['archive.md'] = {false, 'Archive.today', nil, 'archiveis', categories.archiveis},
	['archive.org'] = {true, 'Wayback Machine', nil, 'wayback', categories.wayback},
	['archive.ph'] = {false, 'Archive.today', nil, 'archiveis', categories.archiveis},
	['archive.today'] = {false, 'Archive.today', nil, 'archiveis', categories.archiveis},
	['archive.vn'] = {false, 'Archive.today', nil, 'archiveis', categories.archiveis},
	['archive-it.org'] = {false, 'Archive-It', nil, 'archiveit'},
	['arquivo.pt'] = {true, nil, 'Portuguese Web Archive'},
	['bibalex.org'] = {false, 'Bibliotheca Alexandrina#Internet Archive partnership', 'Bibliotheca Alexandrina'},
	['collectionscanada'] = {true, 'Canadian Government Web Archive'},
	['europarchive.org'] = {true, 'National Library of Ireland'},
	['freezepage.com'] = {false, nil, 'Freezepage'},
	['haw.nsk'] = {true, 'Croatian Web Archive (HAW)'},
	['langzeitarchivierung.bib-bvb.de'] = {false, 'Bavarian State Library'},
	['loc.gov'] = {true, 'Library of Congress'},
	['nationalarchives.gov.uk'] = {true, 'UK Government Web Archive'},
	['nlb.gov.sg'] = {false, 'Web Archive Singapore'},
	['pandora.nla.gov.au'] = {false, 'Pandora Archive'},
	['parliament.uk'] = {true, 'UK Parliament\'s Web Archive'},
	['perma.cc'] = {false, 'Perma.cc'},
	['perma-archives.cc'] = {false, 'Perma.cc'},
	['proni.gov'] = {true, 'Public Record Office of Northern Ireland'},
	['screenshots.com'] = {false, nil, 'Screenshots'},
	['stanford.edu'] = {true, 'Stanford University Libraries', 'Stanford Web Archive'},
	['timetravel.mementoweb.org'] = {false, 'Memento Project'},
	['uni-lj.si'] = {true, nil, 'Slovenian Web Archive'},
	['veebiarhiiv.digar.ee'] = {true, nil, 'Estonian Web Archive'},
	['vefsafn.is'] = {true, 'National and University Library of Iceland'},
	['webarchive.bac-lac.gc.ca'] = {false, 'Library and Archives Canada'},
	['webarchive.loc.gov'] = {true, 'Library of Congress', nil, 'locwebarchives', nil, 'Web Archives'},
	['webarchive.org.uk'] = {true, 'UK Web Archive'},
	['webcache.googleusercontent.com'] = {false, nil, 'Google Cache'},
	['webcitation.org'] = {false, 'WebCite', nil, 'webcite', categories.webcite},
	['webharvest.gov'] = {true, 'National Archives and Records Administration'},
	['webrecorder.io'] = {false, 'webrecorder.io'},
	['wikiwix.com'] = {false, nil, 'Wikiwix'},
	['yorku.ca'] = {false, 'York University Libraries', 'York University Digital Library'},
	}


--[[--------------------------< S T A T I C   T E X T >--------------------------------------------------------

for internationalzation

]]

local s_text = {
	addlarchives = 'Dagiti maipatinayon nga arkibo',
	addlpages = 'Dagiti maipatinayon a panid a naiyarkibo&nbsp;iti',							-- TODO why the &nbsp; there? replace with regular space?
	Archive_index = 'Indeks ti arkibo',
	Archived = 'Naiyarkibo',
	archived = 'naiyarkibo',
	archive = 'arkibo',
	Page = 'Panid',
	}


--[[--------------------------< E R R _ W A R N _ M S G S >----------------------------------------------------

these tables hold error and warning message text

]]

local err_warn_msgs = {
	date_err = '[Biddut iti petsa]',													-- decodeWebciteDate, decodeWaybackDate, decodeArchiveisDate
	date_miss = '[Awan iti petsa]',												-- parseExtraArgs
	ts_short = '[Kaatiddog ti timestamp ti petsa]',										-- decodeWaybackDate timestamp less than 8 digits
	ts_date = '[Imbalido ti timestamp ti petsa]',										-- decodeWaybackDate timestamp not a valid date
	unknown_url = '[Biddut: di ammo nga URL ti arkibo]',								-- serviceName
	unnamed_params = '[Di naikaskaso dagiti panangipuesto a parametro]',

--warnings
	mismatch = '<sup>[Agsungani iti petsa]</sup>',									-- webarchive
	ts_len = '<sup>[Kaatiddog ti timestamp]</sup>',									-- decodeWaybackDate, decodeArchiveisDate timestamp not 14 digits
	ts_cal = '<sup>[Kalendario]</sup>',											-- decodeWaybackDate timestamp has trailing splat
	}


local crit_err_msgs = {															-- critical error messages
	conflicting = 'Agsuppiat a |$1= ken |$2=',
	empty = 'Awan linaon nga url',
--	iabot1 = 'https://web.http',												-- TODO: these iabot bugs perportedly fixed; removing these causes lua script error
--	iabot2 = 'Invalid URL',														-- at Template:Webarchive/testcases/Production; resolve that before deleting these messages
	invalid_url = 'Imbalido nga URL',
	ts_nan = 'Timestamp saan a bilang',
	unknown = 'Di ammo a parikut. Pangngaasi nga ireporta iti tungtungan a panid ti plantilia',
	}



--[[--------------------------< D A T E   I N T E R N A T I O N A L I Z A T I O N >----------------------------

these tables hold data that is used when converting date formats from non-English languages (because mw.language.getContentLanguage:formatDate()
doesn't understand non-English month names)

]]

local month_num = {																-- retain English language names even though they may not be strictly required on the local wiki
	['Enero'] = 1, ['Pebrero'] = 2, ['Marso'] = 3, ['Abril'] = 4, ['Mayo'] = 5, ['Hunio'] = 6, ['Hulio'] = 7, ['Agosto'] = 8, ['Septiembre'] = 9, ['Oktubre'] = 10, ['Nobiembre'] = 11, ['Disiembre'] = 12,
	['Ene'] = 1, ['Peb'] = 2, ['Mar'] = 3, ['Abr'] = 4, ['May'] = 5, ['Hun'] = 6, ['Hul'] = 7, ['Ago'] = 8, ['Sep'] = 9, ['Okt'] = 10, ['Nob'] = 11, ['Dis'] = 12,
-- add local wiki month-names to number translation here
--	[''] = 1, [''] = 2, [''] = 3, [''] = 4, [''] = 5, [''] = 6, [''] = 7, [''] = 8, [''] = 9, [''] = 10, [''] = 11, [''] = 12,
	};

																				-- when the local wiki uses non-western digits in dates, local wiki digits must be
																				-- translated to western digits; lua only understands western digits
local digits = {																-- use this table to aid translation
--	[''] = 0, [''] = 1, [''] = 2, [''] = 3, [''] = 4, [''] = 5, [''] = 6, [''] = 7, [''] = 8, [''] = 9,	-- fill these table indexes with local digits
	enable = false																-- set to true to enable local-digit to western-digit translation
	};


--[[--------------------------< P A R A M E T E R   I N T E R N A T I O N A L I Z A T I O N >------------------

this table holds tables of parameter names and their non-English aliases.  In the enum_params table '#' is a single
character placeholder for 1 or more digit characters

parameter names in this table shall be lowercase
]]

local params = {
	['url'] = {'url'},
	['date'] = {'date', 'datum'},
	['title'] = {'title', 'titel'},
	['nolink'] = {'nolink'},
	['format'] = {'format'}
	}

local enum_params = {
	['url#'] = {'url#'},
	['date#'] = {'date#', 'datum#'},
	['title#'] = {'title#', 'titel#'},
	}

local format_vals = {															-- |format= accepts two values; add local language variants here
	['addlpages'] = {'addlpages'},
	['addlarchives'] = {'addlarchives'},
	}


--[[--------------------------< E X P O R T E D   T A B L E S >------------------------------------------------
]]

return {
	categories = categories,
	config = config,
	crit_err_msgs = crit_err_msgs,
	digits = digits,
	enum_params = enum_params,
	err_warn_msgs = err_warn_msgs,
	excepted_pages = excepted_pages,
	format_vals = format_vals,
	month_num = month_num,
	params = params,
	prefixes = prefixes,
	services = services,
	s_text = s_text,
	uncategorized_namespaces = uncategorized_namespaces,
	uncategorized_subpages = uncategorized_subpages,
	}